# Visualize data samples
for i in range(64):
    x, y = dataset[i]
    plt.plot(x.numpy(), y.numpy(), c='b', alpha=0.5)
    plt.xlim(-pi, pi)

    from neural_process import NeuralProcess

x_dim = 1
y_dim = 1
r_dim = 50  # Dimension of representation of context points
z_dim = 50  # Dimension of sampled latent variable
h_dim = 50  # Dimension of hidden layers in encoder and decoder

neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim)

from torch.utils.data import DataLoader
from training import NeuralProcessTrainer

batch_size = 2
num_context = 4
num_target = 4

data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainer(device,
                                  neuralprocess,
                                  optimizer,
                                  num_context_range=(num_context, num_context),
                                  num_extra_target_range=(num_target,
Ejemplo n.º 2
0
    """get advantage estimation from the trajectories"""
    advantages, returns = estimate_advantages(rewards, masks, values, args.gamma, args.tau, device)

    """perform TRPO update"""
    trpo_step(policy_net, value_net, states, actions, returns, advantages, args.max_kl_trpo, args.damping, args.l2_reg)




'''create neural process'''
if args.use_attentive_np:
    policy_np = AttentiveNeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim,
                                                       args.a_dim, use_self_att=False).to(args.device_np)
else:
    policy_np = NeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim).to(args.device_np)

optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerLoo(args.device_np, policy_np, optimizer,
                                    num_context_range=(num_context_points, num_context_points),
                                    num_extra_target_range=(args.num_testing_points, args.num_testing_points),
                                    print_freq=50)

if args.v_use_attentive_np:
    value_np = AttentiveNeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_r_dim,
                                      args.a_dim, use_self_att=False).to(args.device_np)
else:
    value_np = NeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_h_dim).to(args.device_np)
value_optimizer = torch.optim.Adam(value_np.parameters(), lr=3e-4)
value_np_trainer = NeuralProcessTrainerLoo(args.device_np, value_np, value_optimizer,
                                          num_context_range=(num_context_points, num_context_points),
Ejemplo n.º 3
0
    running_state = None
"""seeding"""
np.random.seed(args.seed)
torch.manual_seed(args.seed)
env.seed(args.seed)

if args.use_attentive_np:
    value_np = AttentiveNeuralProcess(state_dim,
                                      1,
                                      args.v_r_dim,
                                      args.v_z_dim,
                                      args.v_h_dim,
                                      args.v_z_dim,
                                      use_self_att=False).to(args.device)
else:
    value_np = NeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim,
                             args.v_h_dim).to(args.device)
value_optimizer = torch.optim.Adam(value_np.parameters(), lr=3e-4)
if args.loo:
    value_np_trainer = NeuralProcessTrainerLoo(
        args.device,
        value_np,
        value_optimizer,
        num_context_range=(num_context_points, num_context_points),
        num_extra_target_range=(args.num_testing_points,
                                args.num_testing_points),
        print_freq=50)
else:
    value_np_trainer = NeuralProcessTrainerRL(
        args.device,
        value_np,
        value_optimizer,
for data_init in data_loader:
    break
x_init, y_init = data_init
x_init, y_init, _, _ = context_target_split(x_init[0:1], y_init[0:1], args.num_context, args.num_target)
print('dataset created', x_init.size())

# create model
likelihood = gpytorch.likelihoods.GaussianLikelihood().to(device)
model_dkl = GPRegressionModel(x_init, y_init.squeeze(0).squeeze(-1), likelihood,
                          args.h_dim_dkl, args.z_dim_dkl, name_id='DKL').to(device)
if anp:
    model_np = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, args.a_dim_np,
                                      use_self_att=True, fixed_sigma=None).to(device)
else:
    model_np = NeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, fixed_sigma=None).to(device)

optimizer_dkl = torch.optim.Adam([
    {'params': model_dkl.feature_extractor.parameters()},
    {'params': model_dkl.covar_module.parameters()},
    {'params': model_dkl.mean_module.parameters()},
    {'params': model_dkl.likelihood.parameters()}], lr=0.01)
trainer_dkl = DKMTrainer(device, model_dkl, optimizer_dkl, args, print_freq=args.print_freq)

optimizer_np = torch.optim.Adam(model_np.parameters(), lr=learning_rate)
np_trainer = NeuralProcessTrainer(device, model_np, optimizer_np,
                                  num_context_range=(args.num_context,args.num_context),
                                  num_extra_target_range=(args.num_target,args.num_target),
                                  print_freq=args.print_freq)
# train
print('start dkl training')
Ejemplo n.º 5
0
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
is_disc_action = len(env.action_space.shape) == 0
if args.use_running_state:
    running_state = ZFilter((state_dim,), clip=5)  # running list of states that allows to access precise mean and std
else:
    running_state = None
# running_reward = ZFilter((1,), demean=False, clip=10)

"""seeding"""
np.random.seed(args.seed)
torch.manual_seed(args.seed)
env.seed(args.seed)

'''create neural process'''
policy_np = NeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim, args.h_dim).to(args.device_np)
optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerRL(args.device_np, policy_np, optimizer,
                                    num_context_range=(400, 500),
                                    num_extra_target_range=(400, 500),
                                    print_freq=10)

value_np = NeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim, args.v_h_dim).to(args.device_np)
value_optimizer = torch.optim.Adam(value_np.parameters(), lr=3e-4)
value_np_trainer = NeuralProcessTrainerRL(args.device_np, value_np, value_optimizer,
                                          num_context_range=(400, 500),
                                          num_extra_target_range=(400, 500),
                                          print_freq=10)
"""create replay memory"""
replay_memory = ReplayMemoryDataset(args.replay_memory_size)
value_replay_memory = ValueReplay(args.v_replay_memory_size)
Ejemplo n.º 6
0
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
is_disc_action = len(env.action_space.shape) == 0
if use_running_state:
    running_state = ZFilter(
        (state_dim, ), clip=5
    )  # running list of states that allows to access precise mean and std
else:
    running_state = None
# running_reward = ZFilter((1,), demean=False, clip=10)
"""seeding"""
np.random.seed(args.seed)
torch.manual_seed(args.seed)
env.seed(args.seed)
'''create neural process'''
policy_np = NeuralProcess(state_dim, action_dim, r_dim, z_dim,
                          h_dim).to(device_np)
optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerRL(device_np,
                                    policy_np,
                                    optimizer,
                                    num_context_range=(400, 500),
                                    num_extra_target_range=(400, 500),
                                    print_freq=100)
"""create replay memory"""
replay_memory = ReplayMemoryDataset(replay_memory_size)
"""create agent"""
agent = Agent(env,
              policy_np,
              device_np,
              running_state=running_state,
              render=args.render,
plt.savefig(plots_path + '-'.join(kernel) + '_data')
plt.close()
# create and train np
if use_attention:
    neuralprocess = AttentiveNeuralProcess(x_dim,
                                           y_dim,
                                           r_dim,
                                           z_dim,
                                           h_dim,
                                           a_dim,
                                           use_self_att=use_self_att,
                                           fixed_sigma=fix_sigma).to(device)
else:
    neuralprocess = NeuralProcess(x_dim,
                                  y_dim,
                                  r_dim,
                                  z_dim,
                                  h_dim,
                                  fixed_sigma=fix_sigma).to(device)

optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=learning_rate)
np_trainer = NeuralProcessTrainer(device,
                                  neuralprocess,
                                  optimizer,
                                  num_context_range=num_context,
                                  num_extra_target_range=num_target,
                                  print_freq=5040)
neuralprocess.training = True
np_trainer.train(data_loader, epochs)

plt.figure(2)
plt.title('average loss over epochs')
Ejemplo n.º 8
0
    id = mdl + time.ctime() + '{}e_{}b_{}c{}t_{}lr_{}r_{}z_{}a'.format(
        epochs, batch_size, num_context, num_target, l, r_dim, z_dim, a_dim)
    # create and train np
    if use_attention:
        neuralprocess = AttentiveNeuralProcess(
            x_dim,
            y_dim,
            r_dim,
            z_dim,
            h_dim,
            a_dim,
            use_self_att=use_self_att).to(device)
        first = False
    else:
        neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim,
                                      h_dim).to(device)

    t0 = time.time()
    optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=learning_rate)
    np_trainer = NeuralProcessTrainer(device,
                                      neuralprocess,
                                      optimizer,
                                      num_context_range=num_context,
                                      num_extra_target_range=num_target,
                                      print_freq=50000)
    neuralprocess.training = True
    np_trainer.train(data_loader, epochs, early_stopping=0)
    '''plot training epochs'''
    n_ep = len(np_trainer.epoch_loss_history)
    ax_epoch.plot(np.linspace(0, n_ep - 1, n_ep),
                  np_trainer.epoch_loss_history,
Ejemplo n.º 9
0
def sample_context(x, y, num_context=100):
    num_points = x.shape[1]
    # Sample locations of context and target points
    locations = np.random.choice(num_points,
                                 size=num_context,
                                 replace=False)
    x_context = x[:, locations[:num_context], :]
    y_context = y[:, locations[:num_context], :]
    return x_context, y_context

if use_attention:
    neuralprocess = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim, args.z_dim,
                                            args.h_dim, args.a_dim, use_self_att=True).to(device)
else:
    neuralprocess = NeuralProcess(args.x_dim, args.y_dim, args.r_dim, args.z_dim, args.h_dim).to(device)

optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerRL(device, neuralprocess, optimizer,
                                    num_context_range=(400, 500),
                                    num_extra_target_range= (400, 500),
                                    print_freq=2)


def get_dataset(i_iter):
    file_name = memory_dir + str(i_iter) + '^iter_' + env_name

    with open(file_name, 'rb') as file_m:
        memory_iter = pickle.load(file_m)  # memory_iter.memory to access list of transitions

    dataset = MemoryDataset(memory_iter.memory, max_len=999)
Ejemplo n.º 10
0
                    default=1,
                    metavar='N',
                    help='interval between training status logs (default: 10)')
parser.add_argument(
    '--save-model-interval',
    type=int,
    default=0,
    metavar='N',
    help="interval between saving model (default: 0, means don't save)")
parser.add_argument('--gpu-index', type=int, default=0, metavar='N')

args = parser.parse_args()

policy_np = NeuralProcess(2,
                          1,
                          args.r_dim,
                          args.z_dim,
                          args.h_dim,
                          fixed_sigma=None).to(args.device_np)
optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerRL(args.device_np,
                                    policy_np,
                                    optimizer,
                                    num_context_range=(400, 500),
                                    num_extra_target_range=(400, 500),
                                    print_freq=10)
env = gym.make(args.env_name)


def sample_initial_context_normal(num_episodes):
    initial_episodes = []
    max_episode_len = 999
Ejemplo n.º 11
0
dataset = SineData(amplitude_range=(-1., 1.),
                   shift_range=(-.5, .5),
                   num_points=400,
                   num_samples=800)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
#if config["dataset"] == "mnist":
#   data_loader, _ = mnist(batch_size=batch_size, size=img_size[1])
#elif config["dataset"] == "celeba":
#   data_loader = celeba(batch_size=batch_size, size=img_size[1])

#np_img = NeuralProcessImg(img_size, r_dim, z_dim, h_dim).to(device)

gru = GRUNet(50, 256, 50, 2)
hidden = gru.init_hidden(batch_size)
input_data = NeuralProcess(1, 1, 50, 50, 50, gru, hidden)

optimizer = torch.optim.Adam(input_data.parameters(), lr=config["lr"])

np_trainer = NeuralProcessTrainer(device,
                                  input_data,
                                  optimizer,
                                  num_context_range,
                                  num_extra_target_range,
                                  print_freq=100)

for epoch in range(epochs):
    print("Epoch {}".format(epoch + 1))
    np_trainer.train(data_loader, 1)

    # Save losses at every epoch
np.random.seed(args.seed)
torch.manual_seed(args.seed)
env.seed(args.seed)
max_episode_len = env._max_episode_steps
'''create neural process'''
ep_frq = 50
if args.use_attentive_np:
    policy_np = AttentiveNeuralProcess(state_dim,
                                       action_dim,
                                       args.r_dim,
                                       args.z_dim,
                                       args.h_dim,
                                       args.a_dim,
                                       use_self_att=False).to(device_np)
else:
    policy_np = NeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim,
                              args.h_dim).to(device_np)
optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerRL(device_np,
                                    policy_np,
                                    optimizer, (1, max_episode_len // 2),
                                    print_freq=ep_frq)
'''create MKI model'''

mi_model = MeanInterpolator(state_dim,
                            args.h_mi_dim,
                            args.z_mi_dim,
                            scaling=args.scaling).to(device_np).double()

optimizer_mi = torch.optim.Adam([{
    'params':
    mi_model.feature_extractor.parameters(),
Ejemplo n.º 13
0
#  # Create dataset
if args.x_dim == 1:
    dataset = MultiGPData(args.mean, args.kernel, num_samples=args.num_tot_samples, amplitude_range=args.x_range[0], num_points=args.num_points)
    data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)

    test_dataset = MultiGPData(args.mean, args.kernel, num_samples=1, amplitude_range=[v*2 for v in args.x_range[0]], num_points=args.num_points)
    test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)


#  # Create models

# NP
if args.use_attention:
    model_np = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, args.a_dim_np, att_type='multihead').to(device)
else:
    model_np = NeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np).to(device)
optimizer_np = torch.optim.Adam(model_np.parameters(), lr=learning_rate)
np_trainer = NeuralProcessTrainer(device, model_np, optimizer_np,
                                  num_context_range=args.context_range,
                                  num_extra_target_range=args.num_points,
                                  print_freq=5040)

# MI
model_mi = MeanInterpolator(1, args.h_dim_mi, args.z_dim_mi).to(device).double()
optimizer_mi = torch.optim.Adam([
    {'params': model_mi.feature_extractor.parameters(), 'lr': learning_rate},
    {'params': model_mi.interpolator.parameters(), 'lr': learning_rate}])
trainer_mi = MITrainer(device, model_mi, optimizer_mi, num_context=args.test_context,
                       num_target=args.num_points-args.test_context, print_freq=10)

# DKL