test_dataset = MultiGPData(mean, kernel, num_samples=args.batch_size, amplitude_range=x_range, num_points=200) test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=True) for data_init in data_loader: break x_init, y_init = data_init x_init, y_init, _, _ = context_target_split(x_init[0:1], y_init[0:1], args.num_context, args.num_target) print('dataset created', x_init.size()) # create model likelihood = gpytorch.likelihoods.GaussianLikelihood().to(device) model_dkl = GPRegressionModel(x_init, y_init.squeeze(0).squeeze(-1), likelihood, args.h_dim_dkl, args.z_dim_dkl, name_id='DKL').to(device) if anp: model_np = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, args.a_dim_np, use_self_att=True, fixed_sigma=None).to(device) else: model_np = NeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, fixed_sigma=None).to(device) optimizer_dkl = torch.optim.Adam([ {'params': model_dkl.feature_extractor.parameters()}, {'params': model_dkl.covar_module.parameters()}, {'params': model_dkl.mean_module.parameters()}, {'params': model_dkl.likelihood.parameters()}], lr=0.01) trainer_dkl = DKMTrainer(device, model_dkl, optimizer_dkl, args, print_freq=args.print_freq) optimizer_np = torch.optim.Adam(model_np.parameters(), lr=learning_rate) np_trainer = NeuralProcessTrainer(device, model_np, optimizer_np, num_context_range=(args.num_context,args.num_context), num_extra_target_range=(args.num_target,args.num_target), print_freq=args.print_freq)
def test(): def plot_weights_and_policy(policy, id): ''' Plots the weights in the different layers. ''' named_parameters = policy.named_parameters() fig = plt.figure(figsize=(16, 10)) #fig.suptitle(id , fontsize=18) fig.tight_layout() ax_w = fig.add_subplot(211) ax_w.set_title(id + " Weights initialization", fontsize=18) for n, p in named_parameters: color = '#%06X' % randint(0, 0xFFFFFF) if 'weight' in n and 'layer_norm' not in n: try: weights = p.mean(dim=1) except: print(n) ax_w.plot(np.arange(len(weights)), weights.detach().numpy(), alpha=0.5, color=color, label=n.replace('.weight', '')) ax_w.legend(loc="upper right") ax_w.set_xlabel("Weights") ax_w.set_ylabel("initialization") ax_w.set_ylim(-2, 2) ax_policy = fig.add_subplot(212) max_std = 0 min_std = 0 for z_sample in z_samples: mu, sigma = policy.xz_to_y(x, z_sample) ax_policy.plot(x[0, :, 0].numpy(), mu[0, :, 0].detach().numpy(), color='b') std_h = mu + sigma max_std = max(max_std, std_h.max().detach()) std_l = mu - sigma min_std = min(min_std, std_l.min().detach()) ax_policy.fill_between(x[0, :, 0].detach(), std_l[0, :, 0].detach(), std_h[0, :, 0].detach(), alpha=0.01, color='b') ax_policy.set_xlabel('x') ax_policy.set_ylabel('y') ax_policy.set_ylim(min(min_std, -1), max(max_std, 1)) ax_policy.set_title('Policies sampled with z ~ N(0,1)') plt.grid(True) plt.show() fig.savefig('/home/francesco/PycharmProjects/MasterThesis/plots/NP&ANP/1D/weights/'+id+'64') z_dim = 128 dims = 128 num_points = 100 x = torch.linspace(-1, 1, num_points).unsqueeze(1).unsqueeze(0) z_samples =[] for n in range(16): z_samples.append(torch.randn((1, z_dim*2)).unsqueeze(1).repeat(1, num_points, 1)) neural_process = AttentiveNeuralProcess(1, 1, dims, z_dim, dims, z_dim) plot_weights_and_policy(neural_process, ' default') for init_func in [InitFunc.init_xavier, InitFunc.init_normal, InitFunc.init_zero, InitFunc.init_kaiming, InitFunc.init_sparse]: # init_policy = neural_process.apply(init_func) plot_weights_and_policy(init_policy, init_func.__name__)
color = 'b' if use_attention: mdl = 'cross attention ' + mdl color = 'r' if use_self_att: mdl = 'self & ' + mdl color = 'g' id = mdl + time.ctime() + '{}e_{}b_{}c{}t_{}lr_{}r_{}z_{}a'.format( epochs, batch_size, num_context, num_target, l, r_dim, z_dim, a_dim) # create and train np if use_attention: neuralprocess = AttentiveNeuralProcess( x_dim, y_dim, r_dim, z_dim, h_dim, a_dim, use_self_att=use_self_att).to(device) first = False else: neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim).to(device) t0 = time.time() optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=learning_rate) np_trainer = NeuralProcessTrainer(device, neuralprocess, optimizer, num_context_range=num_context, num_extra_target_range=num_target,
fig.savefig(parent_dir+folder_name+name, dpi=250) plt.close(fig) def sample_context(x, y, num_context=100): num_points = x.shape[1] # Sample locations of context and target points locations = np.random.choice(num_points, size=num_context, replace=False) x_context = x[:, locations[:num_context], :] y_context = y[:, locations[:num_context], :] return x_context, y_context if use_attention: neuralprocess = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim, args.z_dim, args.h_dim, args.a_dim, use_self_att=True).to(device) else: neuralprocess = NeuralProcess(args.x_dim, args.y_dim, args.r_dim, args.z_dim, args.h_dim).to(device) optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerRL(device, neuralprocess, optimizer, num_context_range=(400, 500), num_extra_target_range= (400, 500), print_freq=2) def get_dataset(i_iter): file_name = memory_dir + str(i_iter) + '^iter_' + env_name with open(file_name, 'rb') as file_m: memory_iter = pickle.load(file_m) # memory_iter.memory to access list of transitions
) # running list of states that allows to access precise mean and std else: running_state = None # running_reward = ZFilter((1,), demean=False, clip=10) """seeding""" np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) max_episode_len = env._max_episode_steps '''create neural process''' ep_frq = 50 if args.use_attentive_np: policy_np = AttentiveNeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim, args.a_dim, use_self_att=False).to(device_np) else: policy_np = NeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim).to(device_np) optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4) np_trainer = NeuralProcessTrainerRL(device_np, policy_np, optimizer, (1, max_episode_len // 2), print_freq=ep_frq) '''create MKI model''' mi_model = MeanInterpolator(state_dim, args.h_mi_dim,
print('Use NP anyways') policy_np = NeuralProcessAblated(state_dim, action_dim, args.r_dim, args.h_dim).to(args.device_np) optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4) np_trainer = TrainerAblated(args.device_np, policy_np, optimizer, print_freq=50) if args.v_use_attentive_np: value_np = AttentiveNeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_h_dim, args.z_dim, use_self_att=False).to(args.device_np) else: value_np = NeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_h_dim).to(args.device_np) value_optimizer = torch.optim.Adam(value_np.parameters(), lr=3e-4) value_np_trainer = NeuralProcessTrainerRL( args.device_np, value_np, value_optimizer, num_context_range=(num_context_points, num_context_points), num_extra_target_range=(args.num_testing_points, args.num_testing_points), print_freq=50) """create replay memory"""