def train_lstm(lstm, dataset_name, max_iter=1000, load_path=None): dataset = LSTMDataset(name=dataset_name) dataloader = DataLoader(dataset, batch_size=1, collate_fn=collate_fn) if load_path is not None: load_model(load_path, lstm) n_iter = 0 while n_iter < max_iter: train_loss = [] for idx, (encoded, actions) in enumerate(dataloader): x = {'encoded': encoded, 'actions': actions} loss = train_batch(lstm, x) train_loss.append(loss) if n_iter % 5 == 0: print("[TRAIN] current iteration: {}, loss: {}".format( n_iter, loss)) if (n_iter + 1) % 500 == 0: dir_path = './saved_models/' save_model(dir_path, lstm, 'lstm', str(n_iter), str(int(time.time())), {}) n_iter += 1 print("[TRAIN] Average backward pass loss : {}".format( np.mean(train_loss)))
def get_discriminator(mode): model = get_Dnet_2D()#DiscNet() if mode == 'train': if cfg.continue_train: model = load_last_model(model, model_type = 'D') if not cfg.continue_train: model.apply(init_weights) model.train() if mode =='test': model = load_model(model, cfg.test_model_epoch, model_type= 'D') model.eval() return model.cuda()
def get_refiner(mode): model = get_Gnet_2D() if mode =='train': if cfg.continue_train: model = load_last_model(model, model_type='R') else: model.apply(init_weights) model.train() elif mode == 'test': model = load_model(model, cfg.refine_model_epoch, model_type='R') model.eval() return model.cuda()
best_param = solver_state.result[0] controller_test = Controller(LATENT_SIZE, HIDDEN_SIZE, ACTION_SIZE, ONLY_VAE) #controller_test.load_state_dict(controllers[0].state_dict()) load_parameters(best_param, controller_test) device = torch.device("cpu") vae_file = '../checkpoints/random/model_7.pth' vae = ConvVAE() vae.load_state_dict(torch.load(vae_file, map_location=device)) if not ONLY_VAE: lstm_model_path = "../src/saved_models/lstm/49500/1576236505.pth.tar" lstm_mdn = LSTM_MDN(seq_size=1) load_model(lstm_model_path, lstm_mdn) #env = gym.make('MountainCar-v0') env = gym.make('CarRacing-v0') obs = env.reset() counter = 0 #s = controller.Controller #Will not work because I do not have inputs. #s.action_rand() #s.action(z,h) #just intialising reward = 0 done = False