예제 #1
0
        agent.actor_perturbed = nn.DataParallel(agent.actor_perturbed)
        agent.critic = nn.DataParallel(agent.critic)
        agent.critic_target = nn.DataParallel(agent.critic_target)
    agent.actor.to(device)
    agent.actor_target.to(device)
    agent.actor_perturbed.to(device)
    agent.critic.to(device)
    agent.critic_target.to(device)

end_str = "_{}_{}".format(args.env_name, args.model_suffix)
agent.load_model("models/ddpg_actor" + end_str, "models/ddpg_critic" + end_str)

while True:
    episode_reward = 0
    state = torch.Tensor([env.reset()]).to(device)
    env.render()
    while True:
        action = agent.select_action(state, None, None)
        next_state, reward, done, _ = env.step(action.cpu().numpy()[0])
        env.render()
        episode_reward += reward

        #action = torch.Tensor(action).to(device)
        mask = torch.Tensor([not done]).to(device)
        next_state = torch.Tensor([next_state]).to(device)
        reward = torch.Tensor([reward]).to(device)

        state = next_state
        print("Reward: {}; Episode reward: {}".format(reward, episode_reward))

        if done:
예제 #2
0
                next_action = next_action + npr.normal(
                    0., eps, size=(action_dim, ))

            model_replay_buffer.push(state, action, reward, next_state,
                                     next_action, done)

            if len(model_replay_buffer) > batch_size:
                model_optim.update_model(batch_size, mini_iter=args.model_iter)

            state = next_state
            action = next_action
            episode_reward += reward
            frame_idx += 1

            if args.render:
                env.render("human")

            if frame_idx % (max_frames // 10) == 0:
                last_reward = rewards[-1][1] if len(rewards) > 0 else 0
                print('frame : {}/{}, \t last rew: {}'.format(
                    frame_idx, max_frames, last_reward))

                # pickle.dump(rewards, open(path + 'reward_data' + '.pkl', 'wb'))
                # torch.save(policy_net.state_dict(), path + 'policy_' + str(frame_idx) + '.pt')
                # torch.save(model.state_dict(), path + 'model_' + str(frame_idx) + '.pt')

            if args.done_util:
                if done:
                    break
        test_reward = evaluate(env, policy_net, args.max_steps)
        print('ep rew', ep_num, episode_reward, test_reward)
예제 #3
0
    frame_idx = 0
    rewards = []

    ep_num = 0
    state = env.reset()
    mpc_planner.reset()

    episode_reward = 0
    done = False
    for step in range(max_steps):

        action = mpc_planner.update(state)
        for _ in range(frame_skip):
            state, reward, done, _ = env.step(action.copy())
            if done: break
        episode_reward += reward
        frame_idx += 1

        if args.render:
            env.render("rgb_array", width=320 * 2, height=240 * 2)

        if args.done_util:
            if done:
                break

    print('ep rew', ep_num, episode_reward)
    rewards.append([frame_idx, episode_reward])
    ep_num += 1
    env.close()
예제 #4
0
                next_action = next_action + np.random.normal(
                    0., eps, size=(action_dim, ))

            model_replay_buffer.push(state, action, reward, next_state,
                                     next_action, done)

            if len(model_replay_buffer) > batch_size:
                model_optim.update_model(batch_size, mini_iter=args.model_iter)

            state = next_state
            action = next_action
            episode_reward += reward
            frame_idx += 1

            if args.render:
                env.render('human')

            if frame_idx % (max_frames // 10) == 0:
                last_reward = rewards[-1][1] if len(rewards) > 0 else 0
                print('frame : {}/{}, \t last rew: {}'.format(
                    frame_idx, max_frames, last_reward))

                if args.log:
                    print('saving model and reward')
                    pickle.dump(rewards,
                                open(path + 'reward_data' + '.pkl', 'wb'))
                    torch.save(model.state_dict(),
                               path + 'model_' + str(frame_idx) + '.pt')

            if args.done_util:
                if done: