def train_supervised(): initialize_experiment() setup = get_current_parameters()["Setup"] supervised_params = get_current_parameters()["Supervised"] num_epochs = supervised_params["num_epochs"] model, model_loaded = load_model() print("Loading data") train_envs, dev_envs, test_envs = get_all_env_id_lists(max_envs=setup["max_envs"]) if "split_train_data" in supervised_params and supervised_params["split_train_data"]: split_name = supervised_params["train_data_split"] split = load_env_split()[split_name] train_envs = [env_id for env_id in train_envs if env_id in split] print("Using " + str(len(train_envs)) + " envs from dataset split: " + split_name) filename = "supervised_" + setup["model"] + "_" + setup["run_name"] start_filename = "tmp/" + filename + "_epoch_" + str(supervised_params["start_epoch"]) if supervised_params["start_epoch"] > 0: if file_exists(start_filename): load_pytorch_model(model, start_filename) else: print("Couldn't continue training. Model file doesn't exist at:") print(start_filename) exit(-1) if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) trainer = Trainer(model, epoch=supervised_params["start_epoch"], name=setup["model"], run_name=setup["run_name"]) print("Beginning training...") best_test_loss = 1000 for epoch in range(num_epochs): train_loss = trainer.train_epoch(train_data=None, train_envs=train_envs, eval=False) trainer.model.correct_goals = 0 trainer.model.total_goals = 0 test_loss = trainer.train_epoch(train_data=None, train_envs=dev_envs, eval=True) print("GOALS: ", trainer.model.correct_goals, trainer.model.total_goals) if test_loss < best_test_loss: best_test_loss = test_loss save_pytorch_model(trainer.model, filename) print("Saved model in:", filename) print ("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model(trainer.model, "tmp/" + filename + "_epoch_" + str(epoch)) if hasattr(trainer.model, "save"): trainer.model.save(epoch) save_pretrained_weights(trainer.model, setup["run_name"])
def train_top_down_pred(args, max_epoch=SUPERVISED_EPOCHS): initialize_experiment(args.run_name, args.setup_name) model, model_loaded = load_model() # TODO: Get batch size from global parameter server when it exists batch_size = 1 if \ args.model == "top_down" or \ args.model == "top_down_prior" or \ args.model == "top_down_sm" or \ args.model == "top_down_pretrain" or \ args.model == "top_down_goal_pretrain" or \ args.model == "top_down_nav" or \ args.model == "top_down_cond" \ else BATCH_SIZE lr = 0.001 # * batch_size trainer = Trainer(model, epoch=args.start_epoch, name=args.model, run_name=args.run_name) train_envs, dev_envs, test_envs = get_all_env_id_lists( max_envs=args.max_envs) filename = "top_down_" + args.model + "_" + args.run_name if args.restore_weights_name is not None: restore_pretrained_weights(model, args.restore_weights_name, args.fix_restored_weights) print("Beginning training...") best_test_loss = 1000 validation_loss = [] for epoch in range(SUPERVISED_EPOCHS): train_loss = -1 if not args.eval_pretrain: train_loss = trainer.train_epoch(train_envs=train_envs, eval=False) test_loss = trainer.train_epoch(train_envs=dev_envs, eval=True) validation_loss.append([epoch, test_loss]) if not args.eval_pretrain: if test_loss < best_test_loss: best_test_loss = test_loss save_pytorch_model(trainer.model, filename) print("Saved model in:", filename) print("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model(trainer.model, "tmp/" + filename + "_epoch_" + str(epoch)) save_pretrained_weights(trainer.model, args.run_name) else: break if max_epoch is not None and epoch > max_epoch: print("Reached epoch limit!") break test_loss_dir = get_model_dir( ) + "/test_loss/" + filename + "_test_loss.csv" validation_loss = pd.DataFrame(validation_loss, columns=['epoch', "test_loss"]) validation_loss.to_csv(test_loss_dir, index=False)
def train_supervised(): initialize_experiment() setup = get_current_parameters()["Setup"] supervised_params = get_current_parameters()["Supervised"] num_epochs = supervised_params["num_epochs"] model, model_loaded = load_model() # import pdb; pdb.set_trace() # import pickle # with open('/storage/dxsun/model_input.pickle', 'rb') as f: data = pickle.load(f) # g = model(data['images'], data['states'], data['instructions'], data['instr_lengths'], data['has_obs'], data['plan'], data['save_maps_only'], data['pos_enc'], data['noisy_poses'], data['start_poses'], data['firstseg']) print("model:", model) print("model type:", type(model)) print("Loading data") # import pdb;pdb.set_trace() train_envs, dev_envs, test_envs = get_all_env_id_lists( max_envs=setup["max_envs"]) if "split_train_data" in supervised_params and supervised_params[ "split_train_data"]: split_name = supervised_params["train_data_split"] split = load_env_split()[split_name] train_envs = [env_id for env_id in train_envs if env_id in split] print("Using " + str(len(train_envs)) + " envs from dataset split: " + split_name) filename = "supervised_" + setup["model"] + "_" + setup["run_name"] # Code looks weird here because load_pytorch_model adds ".pytorch" to end of path, but # file_exists doesn't model_path = "tmp/" + filename + "_epoch_" + str( supervised_params["start_epoch"]) model_path_with_extension = model_path + ".pytorch" print("model path:", model_path_with_extension) if supervised_params["start_epoch"] > 0: if file_exists(model_path_with_extension): print("THE FILE EXISTS code1") load_pytorch_model(model, model_path) else: print("Couldn't continue training. Model file doesn't exist at:") print(model_path_with_extension) exit(-1) # import pdb;pdb.set_trace() ## If you just want to use the pretrained model # load_pytorch_model(model, "supervised_pvn_stage1_train_corl_pvn_stage1") # all_train_data, all_test_data = data_io.train_data.load_supervised_data(max_envs=100) if setup["restore_weights_name"]: restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"]) # Add a tensorboard logger to the model and trainer tensorboard_dir = get_current_parameters( )['Environment']['tensorboard_dir'] logger = Logger(tensorboard_dir) model.logger = logger if hasattr(model, "goal_good_criterion"): print("gave logger to goal evaluator") model.goal_good_criterion.logger = logger trainer = Trainer(model, epoch=supervised_params["start_epoch"], name=setup["model"], run_name=setup["run_name"]) trainer.logger = logger # import pdb;pdb.set_trace() print("Beginning training...") best_test_loss = 1000 continue_epoch = supervised_params["start_epoch"] + 1 if supervised_params[ "start_epoch"] > 0 else 0 rng = range(0, num_epochs) print("filename:", filename) import pdb pdb.set_trace() for epoch in rng: # import pdb;pdb.set_trace() train_loss = trainer.train_epoch(train_data=None, train_envs=train_envs, eval=False) # train_loss = trainer.train_epoch(train_data=all_train_data, train_envs=train_envs, eval=False) trainer.model.correct_goals = 0 trainer.model.total_goals = 0 test_loss = trainer.train_epoch(train_data=None, train_envs=dev_envs, eval=True) print("GOALS: ", trainer.model.correct_goals, trainer.model.total_goals) if test_loss < best_test_loss: best_test_loss = test_loss save_pytorch_model(trainer.model, filename) print("Saved model in:", filename) print("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model(trainer.model, "tmp/" + filename + "_epoch_" + str(epoch)) if hasattr(trainer.model, "save"): trainer.model.save(epoch) save_pretrained_weights(trainer.model, setup["run_name"])