Example #1
0
def train(env, hparams):
    # randomness (https://pytorch.org/docs/stable/notes/randomness.html)

    random_seed = hparams['seed']
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)



    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    scores_hparams = hparams['scores']
    scores = Scores( scores_hparams['expectation'],size=scores_hparams['window_size'], check_solved=scores_hparams['check_solved']) 

    env_info = env.reset(train_mode=True)[brain_name]     # reset the environment    
    # number of agents
    num_agents = len(env_info.agents)

    # size of each action
    action_size = brain.vector_action_space_size
    states = env_info.vector_observations                  # get the current state (for each agent)
    state_size = states.shape[1]

    
    Agent.set_hparams(state_size, action_size, hparams)
    agents = []
    for _ in range(num_agents):
        agents.append( Agent(action_size))
    
    prefix = f'result/{hparams["output"]}'

    for i in range(hparams['epoch']):
        env_info = env.reset(train_mode=True)[brain_name]     # reset the environment    
        # number of agents
        num_agents = len(env_info.agents)
        
        for agent in agents:
            agent.reset()
    
        # size of each action
        action_size = brain.vector_action_space_size
        states = env_info.vector_observations                  # get the current state (for each agent)

        # initialize the score (for each agent)
        epoch_score = np.zeros(num_agents)
        for t in range(1, hparams['t_max']+1):
            actions = np.array( [agents[i].act(states[i]) for i in range(num_agents) ])
            env_info = env.step(actions)[brain_name]           # send all actions to tne environment
            next_states = env_info.vector_observations         # get next state (for each agent)
            dones = env_info.local_done                        # see if episode finished
    
            for i in range(num_agents):
                agents[i].step(t, states[i], actions[i], env_info.rewards[i], next_states[i], dones[i]) 

            states = next_states
            epoch_score += env_info.rewards 

            if t % 20 == 0:
                print('\rTimestep {}\tScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}' .format(t, np.mean(epoch_score), np.min(epoch_score), np.max(epoch_score)), end='') 

            if np.any(dones):
                break
        print('')
        if scores.AddScore(np.mean(epoch_score)) is True:
            break

    Agent.save(prefix)
    scores.FlushLog(prefix, False)
Example #2
0
    #

    b_agent = Agent(args.model_name, state_size, action_size)
    try:
        b_agent.load()  # try to load to continue training
    except:
        pass

    for epx in range(1, args.episodes + 1):
        at_step = 0
        env_info = env.reset(train_mode=False)[brain_name]
        b_agent.reset_episode()
        while True:
            action = b_agent.act(state)
            env_info = env.step(action)[brain_name]
            at_step += 1
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            if at_step % 100 == 0:
                log.info("ep:{} step:{} r:{} l:{}".format(
                    epx, at_step, b_agent.cum_rewards(), b_agent.ave_loss()))
            if done:
                break
            b_agent.sense(state, action, reward, next_state, done)
            state = next_state
        print("{},{}".format(epx, b_agent.cum_rewards()))
        b_agent.save()

    log.info("finished.")
Example #3
0
    agent = Agent(state_space, HIDDEN_SIZE, action_dim, 1,
                  seed=SEED, buffer_size=MEMORY_BUFFER_SIZE,
                  actor_lr=ACTOR_LR, actor_hidden_sizes=ACTOR_HIDDEN_UNITS, actor_weight_decay=ACTOR_WEIGHT_DECAY,
                  critic_lr=CRITIC_LR, critic_hidden_sizes=CRITIC_HIDDEN_UNITS, critic_weight_decay=CRITIC_WEIGHT_DECAY,
                  batch_size=BATCH_SIZE, gamma=GAMMA, tau=TAU
                  )
    print(agent)
    agent.load()

    scores, actor_losses, critic_losses = run_ddpg(n_episodes=N_EPISODES, is_training=is_training,
                                                   eps_start=EPS_START if is_training else EPS_END,
                                                   eps_decay=EPS_DECAY, eps_end=EPS_END,
                                                   max_t=MAX_STEPS, learn_every_step=LEARN_EVERY_STEP)

    if is_training:
        agent.save()

    fig = plt.figure()
    ax1 = fig.add_subplot(311)
    ax1.plot(np.arange(1, len(scores) + 1), scores)
    ax1.set_ylabel('Score')
    ax1.set_xlabel('Episode #')

    ax2 = fig.add_subplot(312)
    ax2.plot(np.arange(1, len(actor_losses) + 1), actor_losses)
    # ax2.legend()
    ax2.set_ylabel('Actor Loss')
    ax2.set_xlabel('Episode #')

    ax3 = fig.add_subplot(313)
    ax3.plot(np.arange(1, len(critic_losses) + 1), critic_losses)