Esempio n. 1
0
def test(args=get_args()):
    torch.set_num_threads(1)  # we just need only one thread for NN

    env = gym_make()

    model_path = os.path.join(args.logdir, args.task, 'ddpg/policy.pth')
    layer = [1024, 512, 512, 512]
    device = 'cuda'

    state_shape = env.observation_space.shape or env.observation_space.n
    action_shape = env.action_space.shape or env.action_space.n
    action_range = [env.action_space.low, env.action_space.high]
    actor = Actor(
        layer, state_shape, action_shape,
        action_range, device
    ).to(device)
    critic = Critic(
        layer, state_shape, action_shape, device
    ).to(device)

    actor = actor.to(device)
    actor_optim = torch.optim.Adam(actor.parameters())
    critic = critic.to(device)
    critic_optim = torch.optim.Adam(critic.parameters())
    policy = DDPGPolicy(
        actor, actor_optim, critic, critic_optim,
        action_range=action_range)
    policy.load_state_dict(torch.load(model_path, map_location=device))
    obs = env.reset()
    # env.state[0] = -30.0
    # env.goal[0] = 30.0
    env.render()
    print(env.goal)
    while True:
        action, _ = policy.actor(obs.reshape(1,-1), eps=0.01)
        action = action.detach().cpu().numpy()[0]
        
        obs, reward, done, info = env.step(action)
        # print(env.state)
        # print(reward)
        # print(info)
        env.render()
        if done:
            break
Esempio n. 2
0
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

# create the objects for two networks and for the two optimizers
generator = Generator(latent=opt.latent, channels=opt.channels, num_filters=opt.num_filters)
critic = Critic(channels=opt.channels, num_filters=opt.num_filters)
optimizer_G = torch.optim.RMSprop(generator.parameters(), lr=opt.learning_rate)
optimizer_C = torch.optim.RMSprop(critic.parameters(), lr=opt.learning_rate)

# put the nets on gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
generator, critic = generator.to(device), critic.to(device)
generator.apply(weights_init)
critic.apply(weights_init)

if opt.dataset == 'cifar10':
    print(ROOT_DIR + "/cifar")
    if not os.path.isdir(ROOT_DIR + "/cifar"):
        os.mkdir(ROOT_DIR + "/cifar")
elif opt.dataset == 'LSUN':
    if not os.path.isdir(ROOT_DIR + "/bedrooms"):
        os.mkdir(ROOT_DIR + "/bedrooms")

# start training
current_epoch = 0
gen_iterations = 0
for epoch in range(opt.n_epochs):