Exemple #1
0
def main():

    experiment_name = "streetview_test_move_forward_test"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    logging.basicConfig(filename=experiment + '/baseline_info.log',
                        level=logging.INFO)
    logging.info(
        "----------------------------------------------------------------")
    logging.info(
        "                    STARING NEW EXPERIMENT                      ")
    logging.info(
        "----------------------------------------------------------------")

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))

    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    logging.info("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        logging.info("    %s --- %r" % (k, v))
        logging.info("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        logging.info("    %s --- %r" % (k, v))
        logging.info("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            logging.info(">>> " + line.strip())
    logging.info("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])
    meta_data_util = MetaDataUtil()

    vocab = dict()
    vocab_list = open(config["vocab_file"]).readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[i] = token
    vocab[len(vocab_list)] = "$UNK$"
    config["vocab_size"] = len(vocab_list) + 1

    # Create the server
    server = StreetViewServer(config,
                              action_space,
                              forward_setting_strict=False)

    # Read the dataset
    test_data = DatasetParser.parse("data/streetview/navigation_test.json",
                                    config)

    agent = Agent(Agent.MOST_FREQUENT, server, action_space, meta_data_util,
                  constants)
    agent.test(test_data)
Exemple #2
0
def main():

    experiment_name = "train-streetview-deepmind-mixture-learning-repeat"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])
    meta_data_util = MetaDataUtil()

    # Learning algorithm
    parser = argparse.ArgumentParser(description='Parser Values')
    parser.add_argument('--num_processes',
                        type=int,
                        default=6,
                        help='num of process')
    parser.add_argument('--learning_alg',
                        type=str,
                        default="cb",
                        help='learning alg ("cb", "sup", "mix"')
    args = parser.parse_args()

    num_processes = args.num_processes
    learning_alg = args.learning_alg
    master_logger.log("Num processes %r, Learning Algorithm %r " %
                      (num_processes, learning_alg))

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = TmpStreetviewIncrementalModelDeepMindPolicyNetwork
        # model_type = TmpStreetviewIncrementalModelConcatRecurrentPolicyNetwork
        # model_type = IncrementalModelChaplot
        shared_model = model_type(config, constants)
        shared_model.init_weights()

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        train_split = DatasetParser.parse(
            "data/streetview/navigation_train.json", config)
        tune_split = DatasetParser.parse("data/streetview/navigation_dev.json",
                                         config)
        master_logger.log("Created train dataset of size %d " %
                          len(train_split))
        master_logger.log("Created tuning dataset of size %d " %
                          len(tune_split))

        processes = []

        # Split the train data between processes
        train_split_process_chunks = []
        tune_split_process_chunks = []
        train_chunk_size = int(len(train_split) / num_processes)
        tune_chunk_size = int(len(tune_split) / num_processes)
        train_pad = 0
        tune_pad = 0
        for i in range(0, num_processes):
            train_split_process_chunks.append(train_split[train_pad:train_pad +
                                                          train_chunk_size])
            tune_split_process_chunks.append(tune_split[tune_pad:tune_pad +
                                                        tune_chunk_size])
            train_pad += train_chunk_size
            tune_pad += tune_chunk_size

        # Start the training thread(s)
        for i in range(0, num_processes):
            train_chunk = train_split_process_chunks[i]
            if i == num_processes - 1:
                # Don't want each client to do testing.
                tmp_tune_split = tune_split_process_chunks[i]
            else:
                tmp_tune_split = tune_split_process_chunks[i]
            print("Client " + str(i) + " getting a validation set of size ",
                  len(tmp_tune_split))
            server = StreetViewServer(config,
                                      action_space,
                                      forward_setting_strict=False)
            client_logger = multiprocess_logging_manager.get_logger(i)

            if learning_alg == "cb" or (learning_alg == "mix"
                                        and i < num_processes - 2):
                p = mp.Process(
                    target=TmpStreetViewAsynchronousContextualBandit.do_train,
                    args=(shared_model, config, action_space, meta_data_util,
                          constants, train_chunk, tmp_tune_split, experiment,
                          experiment_name, i, server, client_logger,
                          model_type))
            elif learning_alg == "sup" or (learning_alg == "mix"
                                           and i >= num_processes - 2):
                p = mp.Process(
                    target=TmpStreetViewAsynchronousSupervisedLearning.
                    do_train,
                    args=(shared_model, config, action_space, meta_data_util,
                          constants, train_chunk, tmp_tune_split, experiment,
                          experiment_name, i, server, client_logger,
                          model_type))
            else:
                raise NotImplementedError()
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
Exemple #3
0
def main():

    experiment_name = "debug_oracle_trajectory"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])

    try:

        # Read the dataset
        train_split = DatasetParser.parse(
            "data/streetview/navigation_train.json", config)
        server = StreetViewServer(config,
                                  action_space,
                                  forward_setting_strict=False)

        for data_point_ix, data_point in enumerate(train_split):

            _, metadata = server.reset_receive_feedback(data_point)
            trajectory = server.get_trajectory_exact(data_point.trajectory)
            trajectory = trajectory[:min(len(trajectory), constants["horizon"]
                                         )]
            traj_node_ids = [
                server.fsa.panorama_to_node_dict[pano_id]
                for pano_id in data_point.trajectory
            ]
            total_reward = 0

            master_logger.log("Route ID: %r " % traj_node_ids)
            node_ix = 0

            for action in trajectory:
                route_id = traj_node_ids[node_ix]
                _, reward, metadata = server.send_action_receive_feedback(
                    action)
                total_reward += reward
                master_logger.log("Reward %r, Action %r, Metadata %r" %
                                  (reward, action, metadata))

                # current node id should be either same or next
                if route_id != metadata["panorama_id"]:  # hopefully updated
                    if node_ix >= len(traj_node_ids) - 1:
                        master_logger.log(
                            "Failed. Went to a node beyond the trajectory")
                        raise AssertionError()
                    elif traj_node_ids[node_ix + 1] != metadata["panorama_id"]:
                        master_logger.log(
                            "Supposed to go to %r but went to %r " %
                            (traj_node_ids[node_ix + 1],
                             metadata["panorama_id"]))
                        raise AssertionError()
                    else:
                        node_ix += 1

            _, reward, metadata = server.halt_and_receive_feedback()
            total_reward += reward
            master_logger.log("Reward %r, Action stop, Metadata %r" %
                              (reward, metadata))
            master_logger.log("Total reward %r, Nav Error %r " %
                              (total_reward, metadata["navigation_error"]))

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
def main():

    experiment_name = "train_a3c_ga_chaplot_baseline_streetview"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    parser = argparse.ArgumentParser(description='Gated-Attention for Grounding')

    # Environment arguments
    parser.add_argument('-l', '--max-episode-length', type=int, default=50,
                        help='maximum length of an episode (default: 40)')
    parser.add_argument('-d', '--difficulty', type=str, default="hard",
                        help="""Difficulty of the environment,
                        "easy", "medium" or "hard" (default: hard)""")
    parser.add_argument('--living-reward', type=float, default=0,
                        help="""Default reward at each time step (default: 0,
                        change to -0.005 to encourage shorter paths)""")
    parser.add_argument('--frame-width', type=int, default=300,
                        help='Frame width (default: 300)')
    parser.add_argument('--frame-height', type=int, default=168,
                        help='Frame height (default: 168)')
    parser.add_argument('-v', '--visualize', type=int, default=0,
                        help="""Visualize the envrionment (default: 0,
                        use 0 for faster training)""")
    parser.add_argument('--sleep', type=float, default=0,
                        help="""Sleep between frames for better
                        visualization (default: 0)""")
    parser.add_argument('--scenario-path', type=str, default="maps/room.wad",
                        help="""Doom scenario file to load
                        (default: maps/room.wad)""")
    parser.add_argument('--interactive', type=int, default=0,
                        help="""Interactive mode enables human to play
                        (default: 0)""")
    parser.add_argument('--all-instr-file', type=str,
                        default="data/instructions_all.json",
                        help="""All instructions file
                        (default: data/instructions_all.json)""")
    parser.add_argument('--train-instr-file', type=str,
                        default="data/instructions_train.json",
                        help="""Train instructions file
                        (default: data/instructions_train.json)""")
    parser.add_argument('--test-instr-file', type=str,
                        default="data/instructions_test.json",
                        help="""Test instructions file
                        (default: data/instructions_test.json)""")
    parser.add_argument('--object-size-file', type=str,
                        default="data/object_sizes.txt",
                        help='Object size file (default: data/object_sizes.txt)')

    # A3C arguments
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate (default: 0.001)')
    parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
                        help='discount factor for rewards (default: 0.99)')
    parser.add_argument('--tau', type=float, default=1.00, metavar='T',
                        help='parameter for GAE (default: 1.00)')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('-n', '--num-processes', type=int, default=6, metavar='N',
                        help='how many training processes to use (default: 6)')
    parser.add_argument('--num-steps', type=int, default=20, metavar='NS',
                        help='number of forward steps in A3C (default: 20)')
    parser.add_argument('--load', type=str, default="0",
                        help='model path to load, 0 to not reload (default: 0)')
    parser.add_argument('-e', '--evaluate', type=int, default=0,
                        help="""0:Train, 1:Evaluate MultiTask Generalization
                        2:Evaluate Zero-shot Generalization (default: 0)""")
    parser.add_argument('--dump-location', type=str, default="./saved/",
                        help='path to dump models and log (default: ./saved/)')

    args = parser.parse_args()

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config,indent=2))
    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)
    args.input_size = config['vocab_size'] + 2

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])
    meta_data_util = MetaDataUtil()

    try:
        # create tensorboard
        tensorboard = None  # Tensorboard(experiment_name)

        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = a3c_lstm_ga_default
        shared_model = model_type(args, config=config, final_image_height=3, final_image_width=3)

        # make the shared model use share memory
        shared_model.share_memory()

        lstm_size = 256
        if isinstance(shared_model, a3c_lstm_ga_concat_gavector):
            lstm_size *= 3
        contextual_bandit = False
        model = ChaplotBaselineStreetView(args, shared_model, config, constants, tensorboard,
                                          use_contextual_bandit=contextual_bandit, lstm_size=lstm_size)

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        train_split = DatasetParser.parse("data/streetview/navigation_train.json", config)
        tune_split = DatasetParser.parse("data/streetview/navigation_dev.json", config)
        master_logger.log("Created train dataset of size %d " % len(train_split))
        master_logger.log("Created tuning dataset of size %d " % len(tune_split))

        processes = []

        # Split the train data between processes
        train_split_process_chunks = []
        tune_split_process_chunks = []
        train_chunk_size = int(len(train_split) / args.num_processes)
        tune_chunk_size = int(len(tune_split) / args.num_processes)
        train_pad = 0
        tune_pad = 0
        for i in range(0, args.num_processes):
            train_split_process_chunks.append(train_split[train_pad: train_pad + train_chunk_size])
            tune_split_process_chunks.append(tune_split[tune_pad: tune_pad + tune_chunk_size])
            train_pad += train_chunk_size
            tune_pad += tune_chunk_size

        # Start the training thread(s)
        for i in range(args.num_processes):
            train_chunk = train_split_process_chunks[i]
            tune_chunk = tune_split_process_chunks[i]
            print ("Client " + str(i) + " receives train-split of size %d and tune-split of size %d " %
                   (len(train_chunk), len(tune_chunk)))
            server = StreetViewServer(config, action_space, forward_setting_strict=False)
            client_logger = multiprocess_logging_manager.get_logger(i)
            p = mp.Process(target=ChaplotBaselineStreetView.do_train, args=(model, shared_model, config,
                                                                            action_space, meta_data_util, args,
                                                                            constants, train_chunk, tune_chunk,
                                                                            experiment, experiment_name, i, server,
                                                                            client_logger, model_type,
                                                                            contextual_bandit))

            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
Exemple #5
0
def main():

    # Learning algorithm
    parser = argparse.ArgumentParser(description='Parser Values')
    parser.add_argument('--name', type=str, help='name of the experiment')
    parser.add_argument('--num_processes',
                        type=int,
                        default=6,
                        help='num of process')
    parser.add_argument('--split',
                        type=str,
                        help='learning alg ("train", "dev", "test")')
    parser.add_argument('--model',
                        type=str,
                        help='model ("chaplot", "concat")')
    parser.add_argument('--learning_alg',
                        type=str,
                        help='learning alg ("cb", "sup", "mix"')
    args = parser.parse_args()

    experiment_name = args.name
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline_%s.log' % args.split
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])
    meta_data_util = MetaDataUtil()

    num_processes = args.num_processes
    model_name = args.model
    data_split = args.split
    learning_alg = args.learning_alg

    # Number of processes
    master_logger.log("Num processes %r, Model %r, Alg %r, Split %r " %
                      (num_processes, model_name, learning_alg, data_split))

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        if model_name == "concat":
            model_type = TmpStreetviewIncrementalModelDeepMindPolicyNetwork
        elif model_name == "chaplot":
            model_type = IncrementalModelChaplot
        else:
            raise AssertionError("Model name not known. %r " % model_name)

        shared_model = model_type(config, constants)
        shared_model.init_weights()

        if model_name == "concat":
            if learning_alg == "sup":
                shared_model.load_saved_model(
                    "./results/train-streetview-deepmind-supervised-learning/supervised_learning0_epoch_13"
                )
            elif learning_alg == "cb":
                shared_model.load_saved_model(
                    "./results/train-streetview-deepmind-cb/contextual_bandit_0_epoch_38"
                )
            elif learning_alg == "mix":
                shared_model.load_saved_model(
                    "./results/train-streetview-deepmind-mixture-algorithm/supervised_learning5_epoch_54"
                )
            else:
                raise AssertionError("Unregistered learning algorithm %r " %
                                     learning_alg)
        elif model_name == "chaplot":
            if learning_alg == "sup":
                shared_model.load_saved_model(
                    "./results/train-streetview-chaplot-supervised-learning/supervised_learning0_epoch_36"
                )
            elif learning_alg == "cb":
                shared_model.load_saved_model(
                    "./results/train-streetview-chaplot-cb/contextual_bandit_0_epoch_66"
                )
            elif learning_alg == "mix":
                shared_model.load_saved_model(
                    "./results/train-streetview-chaplot-mixture-repeat2/contextual_bandit_0_epoch_34"
                )
            else:
                raise AssertionError("Unregistered learning algorithm %r " %
                                     learning_alg)
        else:
            raise AssertionError("Unregistered model %r " % model_name)

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        test_split = DatasetParser.parse(
            "data/streetview/navigation_%s.json" % data_split, config)

        master_logger.log("Created tuning dataset of size %d " %
                          len(test_split))

        processes = []

        # Split the train data between processes
        test_split_process_chunks = []
        tune_chunk_size = int(len(test_split) / num_processes)
        tune_pad = 0
        for i in range(0, num_processes):
            if i < num_processes - 1:
                test_split_process_chunks.append(test_split[tune_pad:tune_pad +
                                                            tune_chunk_size])
            else:
                test_split_process_chunks.append(test_split[tune_pad:])
            tune_pad += tune_chunk_size

        assert sum([
            len(chunk) for chunk in test_split_process_chunks
        ]) == len(test_split), "Test dataset not properly partitioned."

        # Start the training thread(s)
        for i in range(0, num_processes):
            test_chunk = test_split_process_chunks[i]
            print("Client " + str(i) + " getting a test set of size ",
                  len(test_chunk))
            server = StreetViewServer(config,
                                      action_space,
                                      forward_setting_strict=False)
            client_logger = multiprocess_logging_manager.get_logger(i)
            p = mp.Process(
                target=TmpStreetViewAsynchronousContextualBandit.do_test,
                args=(shared_model, config, action_space, meta_data_util,
                      constants, test_chunk, experiment_name, i, server,
                      client_logger, model_type))
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)