def main(): experiment_name = "streetview_test_move_forward_test" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings logging.basicConfig(filename=experiment + '/baseline_info.log', level=logging.INFO) logging.info( "----------------------------------------------------------------") logging.info( " STARING NEW EXPERIMENT ") logging.info( "----------------------------------------------------------------") with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) # log core experiment details logging.info("CONFIG DETAILS") for k, v in sorted(config.items()): logging.info(" %s --- %r" % (k, v)) logging.info("CONSTANTS DETAILS") for k, v in sorted(constants.items()): logging.info(" %s --- %r" % (k, v)) logging.info("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): logging.info(">>> " + line.strip()) logging.info("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() vocab = dict() vocab_list = open(config["vocab_file"]).readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[i] = token vocab[len(vocab_list)] = "$UNK$" config["vocab_size"] = len(vocab_list) + 1 # Create the server server = StreetViewServer(config, action_space, forward_setting_strict=False) # Read the dataset test_data = DatasetParser.parse("data/streetview/navigation_test.json", config) agent = Agent(Agent.MOST_FREQUENT, server, action_space, meta_data_util, constants) agent.test(test_data)
def main(): experiment_name = "train-streetview-deepmind-mixture-learning-repeat" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() # Learning algorithm parser = argparse.ArgumentParser(description='Parser Values') parser.add_argument('--num_processes', type=int, default=6, help='num of process') parser.add_argument('--learning_alg', type=str, default="cb", help='learning alg ("cb", "sup", "mix"') args = parser.parse_args() num_processes = args.num_processes learning_alg = args.learning_alg master_logger.log("Num processes %r, Learning Algorithm %r " % (num_processes, learning_alg)) try: # Create the model master_logger.log("CREATING MODEL") model_type = TmpStreetviewIncrementalModelDeepMindPolicyNetwork # model_type = TmpStreetviewIncrementalModelConcatRecurrentPolicyNetwork # model_type = IncrementalModelChaplot shared_model = model_type(config, constants) shared_model.init_weights() # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset train_split = DatasetParser.parse( "data/streetview/navigation_train.json", config) tune_split = DatasetParser.parse("data/streetview/navigation_dev.json", config) master_logger.log("Created train dataset of size %d " % len(train_split)) master_logger.log("Created tuning dataset of size %d " % len(tune_split)) processes = [] # Split the train data between processes train_split_process_chunks = [] tune_split_process_chunks = [] train_chunk_size = int(len(train_split) / num_processes) tune_chunk_size = int(len(tune_split) / num_processes) train_pad = 0 tune_pad = 0 for i in range(0, num_processes): train_split_process_chunks.append(train_split[train_pad:train_pad + train_chunk_size]) tune_split_process_chunks.append(tune_split[tune_pad:tune_pad + tune_chunk_size]) train_pad += train_chunk_size tune_pad += tune_chunk_size # Start the training thread(s) for i in range(0, num_processes): train_chunk = train_split_process_chunks[i] if i == num_processes - 1: # Don't want each client to do testing. tmp_tune_split = tune_split_process_chunks[i] else: tmp_tune_split = tune_split_process_chunks[i] print("Client " + str(i) + " getting a validation set of size ", len(tmp_tune_split)) server = StreetViewServer(config, action_space, forward_setting_strict=False) client_logger = multiprocess_logging_manager.get_logger(i) if learning_alg == "cb" or (learning_alg == "mix" and i < num_processes - 2): p = mp.Process( target=TmpStreetViewAsynchronousContextualBandit.do_train, args=(shared_model, config, action_space, meta_data_util, constants, train_chunk, tmp_tune_split, experiment, experiment_name, i, server, client_logger, model_type)) elif learning_alg == "sup" or (learning_alg == "mix" and i >= num_processes - 2): p = mp.Process( target=TmpStreetViewAsynchronousSupervisedLearning. do_train, args=(shared_model, config, action_space, meta_data_util, constants, train_chunk, tmp_tune_split, experiment, experiment_name, i, server, client_logger, model_type)) else: raise NotImplementedError() p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): experiment_name = "debug_oracle_trajectory" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) try: # Read the dataset train_split = DatasetParser.parse( "data/streetview/navigation_train.json", config) server = StreetViewServer(config, action_space, forward_setting_strict=False) for data_point_ix, data_point in enumerate(train_split): _, metadata = server.reset_receive_feedback(data_point) trajectory = server.get_trajectory_exact(data_point.trajectory) trajectory = trajectory[:min(len(trajectory), constants["horizon"] )] traj_node_ids = [ server.fsa.panorama_to_node_dict[pano_id] for pano_id in data_point.trajectory ] total_reward = 0 master_logger.log("Route ID: %r " % traj_node_ids) node_ix = 0 for action in trajectory: route_id = traj_node_ids[node_ix] _, reward, metadata = server.send_action_receive_feedback( action) total_reward += reward master_logger.log("Reward %r, Action %r, Metadata %r" % (reward, action, metadata)) # current node id should be either same or next if route_id != metadata["panorama_id"]: # hopefully updated if node_ix >= len(traj_node_ids) - 1: master_logger.log( "Failed. Went to a node beyond the trajectory") raise AssertionError() elif traj_node_ids[node_ix + 1] != metadata["panorama_id"]: master_logger.log( "Supposed to go to %r but went to %r " % (traj_node_ids[node_ix + 1], metadata["panorama_id"])) raise AssertionError() else: node_ix += 1 _, reward, metadata = server.halt_and_receive_feedback() total_reward += reward master_logger.log("Reward %r, Action stop, Metadata %r" % (reward, metadata)) master_logger.log("Total reward %r, Nav Error %r " % (total_reward, metadata["navigation_error"])) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): experiment_name = "train_a3c_ga_chaplot_baseline_streetview" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") parser = argparse.ArgumentParser(description='Gated-Attention for Grounding') # Environment arguments parser.add_argument('-l', '--max-episode-length', type=int, default=50, help='maximum length of an episode (default: 40)') parser.add_argument('-d', '--difficulty', type=str, default="hard", help="""Difficulty of the environment, "easy", "medium" or "hard" (default: hard)""") parser.add_argument('--living-reward', type=float, default=0, help="""Default reward at each time step (default: 0, change to -0.005 to encourage shorter paths)""") parser.add_argument('--frame-width', type=int, default=300, help='Frame width (default: 300)') parser.add_argument('--frame-height', type=int, default=168, help='Frame height (default: 168)') parser.add_argument('-v', '--visualize', type=int, default=0, help="""Visualize the envrionment (default: 0, use 0 for faster training)""") parser.add_argument('--sleep', type=float, default=0, help="""Sleep between frames for better visualization (default: 0)""") parser.add_argument('--scenario-path', type=str, default="maps/room.wad", help="""Doom scenario file to load (default: maps/room.wad)""") parser.add_argument('--interactive', type=int, default=0, help="""Interactive mode enables human to play (default: 0)""") parser.add_argument('--all-instr-file', type=str, default="data/instructions_all.json", help="""All instructions file (default: data/instructions_all.json)""") parser.add_argument('--train-instr-file', type=str, default="data/instructions_train.json", help="""Train instructions file (default: data/instructions_train.json)""") parser.add_argument('--test-instr-file', type=str, default="data/instructions_test.json", help="""Test instructions file (default: data/instructions_test.json)""") parser.add_argument('--object-size-file', type=str, default="data/object_sizes.txt", help='Object size file (default: data/object_sizes.txt)') # A3C arguments parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--gamma', type=float, default=0.99, metavar='G', help='discount factor for rewards (default: 0.99)') parser.add_argument('--tau', type=float, default=1.00, metavar='T', help='parameter for GAE (default: 1.00)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('-n', '--num-processes', type=int, default=6, metavar='N', help='how many training processes to use (default: 6)') parser.add_argument('--num-steps', type=int, default=20, metavar='NS', help='number of forward steps in A3C (default: 20)') parser.add_argument('--load', type=str, default="0", help='model path to load, 0 to not reload (default: 0)') parser.add_argument('-e', '--evaluate', type=int, default=0, help="""0:Train, 1:Evaluate MultiTask Generalization 2:Evaluate Zero-shot Generalization (default: 0)""") parser.add_argument('--dump-location', type=str, default="./saved/", help='path to dump models and log (default: ./saved/)') args = parser.parse_args() with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config,indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) args.input_size = config['vocab_size'] + 2 # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() try: # create tensorboard tensorboard = None # Tensorboard(experiment_name) # Create the model master_logger.log("CREATING MODEL") model_type = a3c_lstm_ga_default shared_model = model_type(args, config=config, final_image_height=3, final_image_width=3) # make the shared model use share memory shared_model.share_memory() lstm_size = 256 if isinstance(shared_model, a3c_lstm_ga_concat_gavector): lstm_size *= 3 contextual_bandit = False model = ChaplotBaselineStreetView(args, shared_model, config, constants, tensorboard, use_contextual_bandit=contextual_bandit, lstm_size=lstm_size) master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset train_split = DatasetParser.parse("data/streetview/navigation_train.json", config) tune_split = DatasetParser.parse("data/streetview/navigation_dev.json", config) master_logger.log("Created train dataset of size %d " % len(train_split)) master_logger.log("Created tuning dataset of size %d " % len(tune_split)) processes = [] # Split the train data between processes train_split_process_chunks = [] tune_split_process_chunks = [] train_chunk_size = int(len(train_split) / args.num_processes) tune_chunk_size = int(len(tune_split) / args.num_processes) train_pad = 0 tune_pad = 0 for i in range(0, args.num_processes): train_split_process_chunks.append(train_split[train_pad: train_pad + train_chunk_size]) tune_split_process_chunks.append(tune_split[tune_pad: tune_pad + tune_chunk_size]) train_pad += train_chunk_size tune_pad += tune_chunk_size # Start the training thread(s) for i in range(args.num_processes): train_chunk = train_split_process_chunks[i] tune_chunk = tune_split_process_chunks[i] print ("Client " + str(i) + " receives train-split of size %d and tune-split of size %d " % (len(train_chunk), len(tune_chunk))) server = StreetViewServer(config, action_space, forward_setting_strict=False) client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process(target=ChaplotBaselineStreetView.do_train, args=(model, shared_model, config, action_space, meta_data_util, args, constants, train_chunk, tune_chunk, experiment, experiment_name, i, server, client_logger, model_type, contextual_bandit)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): # Learning algorithm parser = argparse.ArgumentParser(description='Parser Values') parser.add_argument('--name', type=str, help='name of the experiment') parser.add_argument('--num_processes', type=int, default=6, help='num of process') parser.add_argument('--split', type=str, help='learning alg ("train", "dev", "test")') parser.add_argument('--model', type=str, help='model ("chaplot", "concat")') parser.add_argument('--learning_alg', type=str, help='learning alg ("cb", "sup", "mix"') args = parser.parse_args() experiment_name = args.name experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline_%s.log' % args.split multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() num_processes = args.num_processes model_name = args.model data_split = args.split learning_alg = args.learning_alg # Number of processes master_logger.log("Num processes %r, Model %r, Alg %r, Split %r " % (num_processes, model_name, learning_alg, data_split)) try: # Create the model master_logger.log("CREATING MODEL") if model_name == "concat": model_type = TmpStreetviewIncrementalModelDeepMindPolicyNetwork elif model_name == "chaplot": model_type = IncrementalModelChaplot else: raise AssertionError("Model name not known. %r " % model_name) shared_model = model_type(config, constants) shared_model.init_weights() if model_name == "concat": if learning_alg == "sup": shared_model.load_saved_model( "./results/train-streetview-deepmind-supervised-learning/supervised_learning0_epoch_13" ) elif learning_alg == "cb": shared_model.load_saved_model( "./results/train-streetview-deepmind-cb/contextual_bandit_0_epoch_38" ) elif learning_alg == "mix": shared_model.load_saved_model( "./results/train-streetview-deepmind-mixture-algorithm/supervised_learning5_epoch_54" ) else: raise AssertionError("Unregistered learning algorithm %r " % learning_alg) elif model_name == "chaplot": if learning_alg == "sup": shared_model.load_saved_model( "./results/train-streetview-chaplot-supervised-learning/supervised_learning0_epoch_36" ) elif learning_alg == "cb": shared_model.load_saved_model( "./results/train-streetview-chaplot-cb/contextual_bandit_0_epoch_66" ) elif learning_alg == "mix": shared_model.load_saved_model( "./results/train-streetview-chaplot-mixture-repeat2/contextual_bandit_0_epoch_34" ) else: raise AssertionError("Unregistered learning algorithm %r " % learning_alg) else: raise AssertionError("Unregistered model %r " % model_name) # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset test_split = DatasetParser.parse( "data/streetview/navigation_%s.json" % data_split, config) master_logger.log("Created tuning dataset of size %d " % len(test_split)) processes = [] # Split the train data between processes test_split_process_chunks = [] tune_chunk_size = int(len(test_split) / num_processes) tune_pad = 0 for i in range(0, num_processes): if i < num_processes - 1: test_split_process_chunks.append(test_split[tune_pad:tune_pad + tune_chunk_size]) else: test_split_process_chunks.append(test_split[tune_pad:]) tune_pad += tune_chunk_size assert sum([ len(chunk) for chunk in test_split_process_chunks ]) == len(test_split), "Test dataset not properly partitioned." # Start the training thread(s) for i in range(0, num_processes): test_chunk = test_split_process_chunks[i] print("Client " + str(i) + " getting a test set of size ", len(test_chunk)) server = StreetViewServer(config, action_space, forward_setting_strict=False) client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process( target=TmpStreetViewAsynchronousContextualBandit.do_test, args=(shared_model, config, action_space, meta_data_util, constants, test_chunk, experiment_name, i, server, client_logger, model_type)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)