is_disc_action = len(env.action_space.shape) == 0 if use_running_state: running_state = ZFilter( (state_dim, ), clip=5 ) # running list of states that allows to access precise mean and std else: running_state = None # running_reward = ZFilter((1,), demean=False, clip=10) """seeding""" np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) '''create neural process''' policy_np = NeuralProcess(state_dim, action_dim, r_dim, z_dim, h_dim).to(device_np) optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerRL(device_np, policy_np, optimizer, num_context_range=(400, 500), num_extra_target_range=(400, 500), print_freq=100) """create replay memory""" replay_memory = ReplayMemoryDataset(replay_memory_size) """create agent""" agent = Agent(env, policy_np, device_np, running_state=running_state, render=args.render, num_threads=args.num_threads,
plt.xlim(-pi, pi) from neural_process import NeuralProcess x_dim = 1 y_dim = 1 r_dim = 50 # Dimension of representation of context points z_dim = 50 # Dimension of sampled latent variable h_dim = 50 # Dimension of hidden layers in encoder and decoder neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim) from torch.utils.data import DataLoader from training import NeuralProcessTrainer batch_size = 2 num_context = 4 num_target = 4 data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainer(device, neuralprocess, optimizer, num_context_range=(num_context, num_context), num_extra_target_range=(num_target, num_target), print_freq=200) neuralprocess.training = True np_trainer.train(data_loader, 30)
shift_range=(-.5, .5), num_points=400, num_samples=800) data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) #if config["dataset"] == "mnist": # data_loader, _ = mnist(batch_size=batch_size, size=img_size[1]) #elif config["dataset"] == "celeba": # data_loader = celeba(batch_size=batch_size, size=img_size[1]) #np_img = NeuralProcessImg(img_size, r_dim, z_dim, h_dim).to(device) gru = GRUNet(50, 256, 50, 2) hidden = gru.init_hidden(batch_size) input_data = NeuralProcess(1, 1, 50, 50, 50, gru, hidden) optimizer = torch.optim.Adam(input_data.parameters(), lr=config["lr"]) np_trainer = NeuralProcessTrainer(device, input_data, optimizer, num_context_range, num_extra_target_range, print_freq=100) for epoch in range(epochs): print("Epoch {}".format(epoch + 1)) np_trainer.train(data_loader, 1) # Save losses at every epoch with open(directory + '/losses.json', 'w') as f: json.dump(np_trainer.epoch_loss_history, f)