Ejemplo n.º 1
0
    agent.actor.to(device)
    agent.actor_target.to(device)
    agent.actor_perturbed.to(device)
    agent.critic.to(device)
    agent.critic_target.to(device)

end_str = "_{}_{}".format(args.env_name, args.model_suffix)
agent.load_model("models/ddpg_actor" + end_str, "models/ddpg_critic" + end_str)

while True:
    episode_reward = 0
    state = torch.Tensor([env.reset()]).to(device)
    env.render()
    while True:
        action = agent.select_action(state, None, None)
        next_state, reward, done, _ = env.step(action.cpu().numpy()[0])
        env.render()
        episode_reward += reward

        #action = torch.Tensor(action).to(device)
        mask = torch.Tensor([not done]).to(device)
        next_state = torch.Tensor([next_state]).to(device)
        reward = torch.Tensor([reward]).to(device)

        state = next_state
        print("Reward: {}; Episode reward: {}".format(reward, episode_reward))

        if done:
            break

env.close()
Ejemplo n.º 2
0
        '''
        #############
        The DDPG part
        #############
        '''
        state = torch.Tensor([env.reset()])  # algo line 6
        ounoise.scale = (args.noise_scale - args.final_noise_scale) * max(
            0, args.exploration_end -
            i_episode) / args.exploration_end + args.final_noise_scale
        ounoise.reset()
        episode_reward = 0

        for t in range(args.num_steps):  # line 7
            # forward pass through the actor network
            action = agent.select_action(state, ounoise)  # line 8
            next_state, reward, done, _ = env.step(action.numpy()[0])  # line 9
            episode_reward += reward

            action = torch.Tensor(action)
            mask = torch.Tensor([not done])
            next_state = torch.Tensor([next_state])
            reward = torch.Tensor([reward])

            # if i_episode % 10 == 0:
            #     env.render()

            memory.push(state, action, mask, next_state, reward)  # line 10

            state = next_state

            if len(memory) > args.batch_size * 5:
Ejemplo n.º 3
0
            action = agent.select_action(state)  # Sample action from policy
        if len(memory) > args.start_steps:
            # Number of updates per step in environment
            for i in range(args.updates_per_step):
                # Update parameters of all the networks
                (critic_1_loss, critic_2_loss,
                 policy_loss,
                 _, _,
                 policy_info,
                 )= agent.update_parameters(memory, args.batch_size, updates)
                updates += 1

        else:
            pass

        next_state, reward, done, _ = env.step(action) # Step
        episode_steps += 1
        total_numsteps += 1
        eval_steps += 1
        ckpt_steps += 1
        map_steps +=1
        episode_reward += reward

        mask = 1 if episode_steps == max_episode_steps else float(not done)

        memory.push(state, action, reward, next_state, mask) # Append transition to memory

        state = next_state

    elapsed = round((time.time() - start_time + offset_time),2)
    logging("Episode: {}"
Ejemplo n.º 4
0
for i_episode in range(args.num_episodes):
    state = torch.Tensor([env.reset()])

    if args.ou_noise: 
        ounoise.scale = (args.noise_scale - args.final_noise_scale) * max(0, args.exploration_end -
                                                                      i_episode) / args.exploration_end + args.final_noise_scale
        ounoise.reset()

    if args.param_noise and args.algo == "DDPG":
        agent.perturb_actor_parameters(param_noise)

    episode_reward = 0
    while True:
        action = agent.select_action(state, ounoise, param_noise)
        next_state, reward, done, _ = env.step(action.numpy()[0])
        total_numsteps += 1
        episode_reward += reward

        action = torch.Tensor(action)
        mask = torch.Tensor([not done])
        next_state = torch.Tensor([next_state])
        reward = torch.Tensor([reward])

        memory.push(state, action, mask, next_state, reward)

        state = next_state

        if len(memory) > args.batch_size:
            for _ in range(args.updates_per_step):
                transitions = memory.sample(args.batch_size)
Ejemplo n.º 5
0
    frame_skip = args.frame_skip

    frame_idx = 0
    rewards = []

    ep_num = 0
    state = env.reset()
    mpc_planner.reset()

    episode_reward = 0
    done = False
    for step in range(max_steps):

        action = mpc_planner.update(state)
        for _ in range(frame_skip):
            state, reward, done, _ = env.step(action.copy())
            if done: break
        episode_reward += reward
        frame_idx += 1

        if args.render:
            env.render("rgb_array", width=320 * 2, height=240 * 2)

        if args.done_util:
            if done:
                break

    print('ep rew', ep_num, episode_reward)
    rewards.append([frame_idx, episode_reward])
    ep_num += 1
    env.close()
Ejemplo n.º 6
0
                                                                      i_episode) / args.exploration_end + args.final_noise_scale
        ounoise.reset()

    if args.param_noise and args.algo == "DDPG" and param_noise is not None:
        agent.perturb_actor_parameters(param_noise)
    '''
    episode_reward = 0
    state = state.to(device)
    action, action_probs, entropy = agent.select_action(
        state, ounoise, param_noise)
    #print(action.cpu().numpy())
    if args.discrete:
        use_action = action.squeeze(1).cpu().numpy()
    else:
        use_action = action.cpu().numpy()[0]
    next_state, reward, done, _ = env.step(use_action)
    total_numsteps += 1
    episode_reward += reward

    #action = torch.LongTensor(action)
    reward = torch.Tensor(reward).to(device).unsqueeze(1)
    episode_rewards += reward
    mask = torch.Tensor([[1.0] if not x else [0.0] for x in done]).to(device)
    final_rewards *= mask
    final_rewards += (1 - mask) * episode_rewards
    episode_rewards *= mask

    next_state = torch.Tensor(next_state).to(device)
    for i in range(args.num_processes):
        if args.discrete:
            memory.push(state[i], action_probs[i], mask[i], next_state[i],