is_disc_action = len(env.action_space.shape) == 0
if use_running_state:
    running_state = ZFilter(
        (state_dim, ), clip=5
    )  # running list of states that allows to access precise mean and std
else:
    running_state = None
# running_reward = ZFilter((1,), demean=False, clip=10)
"""seeding"""
np.random.seed(args.seed)
torch.manual_seed(args.seed)
env.seed(args.seed)
'''create neural process'''
policy_np = NeuralProcess(state_dim, action_dim, r_dim, z_dim,
                          h_dim).to(device_np)
optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerRL(device_np,
                                    policy_np,
                                    optimizer,
                                    num_context_range=(400, 500),
                                    num_extra_target_range=(400, 500),
                                    print_freq=100)
"""create replay memory"""
replay_memory = ReplayMemoryDataset(replay_memory_size)
"""create agent"""
agent = Agent(env,
              policy_np,
              device_np,
              running_state=running_state,
              render=args.render,
              num_threads=args.num_threads,
    plt.xlim(-pi, pi)

    from neural_process import NeuralProcess

x_dim = 1
y_dim = 1
r_dim = 50  # Dimension of representation of context points
z_dim = 50  # Dimension of sampled latent variable
h_dim = 50  # Dimension of hidden layers in encoder and decoder

neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim, h_dim)

from torch.utils.data import DataLoader
from training import NeuralProcessTrainer

batch_size = 2
num_context = 4
num_target = 4

data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainer(device,
                                  neuralprocess,
                                  optimizer,
                                  num_context_range=(num_context, num_context),
                                  num_extra_target_range=(num_target,
                                                          num_target),
                                  print_freq=200)

neuralprocess.training = True
np_trainer.train(data_loader, 30)
                   shift_range=(-.5, .5),
                   num_points=400,
                   num_samples=800)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
#if config["dataset"] == "mnist":
#   data_loader, _ = mnist(batch_size=batch_size, size=img_size[1])
#elif config["dataset"] == "celeba":
#   data_loader = celeba(batch_size=batch_size, size=img_size[1])

#np_img = NeuralProcessImg(img_size, r_dim, z_dim, h_dim).to(device)

gru = GRUNet(50, 256, 50, 2)
hidden = gru.init_hidden(batch_size)
input_data = NeuralProcess(1, 1, 50, 50, 50, gru, hidden)

optimizer = torch.optim.Adam(input_data.parameters(), lr=config["lr"])

np_trainer = NeuralProcessTrainer(device,
                                  input_data,
                                  optimizer,
                                  num_context_range,
                                  num_extra_target_range,
                                  print_freq=100)

for epoch in range(epochs):
    print("Epoch {}".format(epoch + 1))
    np_trainer.train(data_loader, 1)

    # Save losses at every epoch
    with open(directory + '/losses.json', 'w') as f:
        json.dump(np_trainer.epoch_loss_history, f)