# # Create models # NP if args.use_attention: model_np = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, args.a_dim_np, att_type='multihead').to(device) else: model_np = NeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np).to(device) optimizer_np = torch.optim.Adam(model_np.parameters(), lr=learning_rate) np_trainer = NeuralProcessTrainer(device, model_np, optimizer_np, num_context_range=args.context_range, num_extra_target_range=args.num_points, print_freq=5040) # MI model_mi = MeanInterpolator(1, args.h_dim_mi, args.z_dim_mi).to(device).double() optimizer_mi = torch.optim.Adam([ {'params': model_mi.feature_extractor.parameters(), 'lr': learning_rate}, {'params': model_mi.interpolator.parameters(), 'lr': learning_rate}]) trainer_mi = MITrainer(device, model_mi, optimizer_mi, num_context=args.test_context, num_target=args.num_points-args.test_context, print_freq=10) # DKL for data_init in data_loader: break x_init, y_init = data_init x_init, y_init, _, _ = context_target_split_CinT(x_init[0:1], y_init[0:1], args.test_context, args.num_points-args.test_context) likelihood = gpytorch.likelihoods.GaussianLikelihood().to(device) # noise_constraint=gpytorch.constraints.GreaterThan(5e-2) model_dkl = GPRegressionModel(x_init, y_init.squeeze(0).squeeze(-1), likelihood, args.h_dim_dkl, args.z_dim_dkl, name_id='DKL').to(device)
states = torch.zeros([1, max_episode_len, state_dim]) for i in range(max_episode_len): states[:, i, :] = torch.randn( state_dim) # torch.from_numpy(env.observation_space.sample()) actions_init = Normal( torch.zeros([1, max_episode_len, action_dim]), sigma * torch.ones([1, max_episode_len, action_dim])).sample() initial_episodes.append([states, actions_init, max_episode_len]) return initial_episodes '''create policy model''' improved_context_list = sample_initial_context_normal(args.num_ensembles) model = MeanInterpolator(state_dim, args.h_dim, args.z_dim, scaling=args.scaling).to(device).double() optimizer = torch.optim.Adam([{ 'params': model.feature_extractor.parameters(), 'lr': args.lr_nn }, { 'params': model.interpolator.parameters(), 'lr': args.lr_nn }]) # train model_trainer = MITrainer(device, model, optimizer, args, print_freq=30) """create replay memory""" replay_memory = ReplayMemoryDataset(args.replay_memory_size, use_mean=True) """create agent"""
num_samples=args.num_tot_samples, grid_bounds=grid_bounds, grid_size=args.grid_size) data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) test_dataset = dataset = GPData2D('constant', kernel, num_samples=args.num_tot_samples, grid_bounds=grid_bounds, grid_size=args.grid_size) test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=True) print('dataset created') # create model model = MeanInterpolator(args.x_dim, args.h_dim, args.z_dim, scaling=args.scaling).to(device) optimizer = torch.optim.Adam([{ 'params': model.feature_extractor.parameters(), 'lr': learning_rate }, { 'params': model.interpolator.parameters(), 'lr': 1e-2 }]) try: os.mkdir(args.directory_path) except FileExistsError: pass model_trainer = MITrainer(device,