def training():
    print('___________TRAINING______________')
    agents = []
    for idx_done in range(no_iter):
        agent = Agent(gamma=gamma,
                      epsilon=epsilon,
                      batch_size=batch_size,
                      n_actions=m,
                      eps_end=0.01,
                      input_dims=[m],
                      lr=lr)
        agents.append(agent)
    scores, eps_history = [], []
    start_time = time.time()
    results = []
    trials = []

    for trial in range(no_trials):
        env = Environment(trial, status='train')
        score = 0
        done = False
        observation = env.reset()
        i = 0

        for idx_done in range(no_iter):
            i += 1
            action = agents[idx_done].choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agents[idx_done].store_trainsition(observation, action, reward,
                                               observation_, done)
            agents[idx_done].learn()
            observation = observation_
            #print('epsilon %.2f' % agents[idx_done].epsilon)

        result = env.CodeRunner(trial)
        results.append(result)
        scores.append(score)
        #eps_history.append(agent.epsilon)
        avg_score = np.mean(scores)
        trials.append(trial)
        print('episode', trial, 'score %.2f' % score,
              'average score %.2f' % avg_score,
              'epsilon %.4f' % agents[idx_done].epsilon)

    mvn_score = moving_average(scores, 250)
    plt.figure(figsize=(5, 5), dpi=100)
    plt.plot(trials, scores, label='Episodic Average')
    plt.plot(mvn_score, label='Moving Average')
    plt.xlabel('Trials')
    plt.ylabel('Reward')
    plt.grid()
    plt.title('Training trial vs reward')
    trimester = time.strftime("_%Y_%m_%d-%H__%M_%S")
    plt.savefig('./pic/TR_DQN_' + str(no_trials) + '_' + (trimester) + '.png')

    print("--- %s TRAINING seconds ---" % (time.time() - start_time))
    env.show_result(results)
    handle_agents(opt='save', agents=agents)
Exemple #2
0
def testing():
    print('___________TESTING_____________')
    agent = handle_agents(opt='load')

    scores, eps_history = [], []
    start_time = time.time()
    results = []
    trials = []

    for trial in range(no_trials):
        env = Environment(trial, status='test')
        score = 0
        done = False
        observation = env.reset()
        i = 0
        while not done:
            i += 1
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            observation = observation_

        result = env.CodeRunner(trial)
        results.append(result)
        scores.append(score)
        avg_score = np.mean(scores[-100:])
        trials.append(trial)
        print('episode', trial, 'score %.2f' % score, 'average score %.2f' % avg_score,
              'epsilon %.4f' % agent.epsilon)

    mvn_score = moving_average(scores, 250)

    plt.figure(figsize=(5, 5), dpi=100)
    plt.plot(trials, scores, label='Episodic Average')
    plt.plot(mvn_score, label='Moving Average')
    plt.xlabel('Trials')
    plt.ylabel('Reward')
    plt.grid()
    plt.title('Testing trial vs reward')
    trimester = time.strftime("_%Y_%m_%d-%H__%M_%S")
    plt.savefig('./pic/TstS_DQN_' + str(no_trials) + '_' + (trimester) + '.png')

    print("--- %s seconds ---" % (time.time() - start_time))
    env.show_result(results)
Exemple #3
0
def training():
    print('----------TRAIN---------')
    print('Episode num: ', MAX_EPISODES)
    start = time.time()
    scores = []
    episodes = []
    results = []
    scores_window = deque(maxlen=100)
    eps = EPS_START

    if env_name == 'BP':
        actions = range(param.m)
        state_size = param.m
        action_size = param.m

        dqn_agentS = []
        if single_nn:
            dqn_agent = DQNAgent(state_size, action_size, seed=0)
        else:
            for t in range(MAX_STEPS):
                dqn_agent = DQNAgent(state_size, action_size, seed=0)
                dqn_agentS.append(dqn_agent)

    for episode in range(1, MAX_EPISODES + 1):

        if env_name == 'BP':
            env = Environment(episode, status='train', state_asnp=True)

        state = env.reset()
        score = 0
        for t in range(MAX_STEPS):

            if single_nn:
                action = dqn_agent.act(state, eps)
            else:
                action = dqn_agentS[t].act(state, eps)
            next_state, reward, done, info = env.step(action)

            if single_nn:
                dqn_agent.step(state, action, reward, next_state, done)
            else:
                dqn_agentS[t].step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break

            eps = max(eps * EPS_DECAY, EPS_MIN)
            if episode % PRINT_EVERY == 0:
                mean_score = np.mean(scores_window)
                print('\r Progress {}/{}, average score:{:.2f}'.format(
                    episode, MAX_EPISODES, mean_score),
                      end="")

        result = env.CodeRunner(episode)
        results.append(result)
        scores_window.append(score)
        scores.append(score)
        episodes.append(episode)
        print('epi ', episode, ' score ', score, 'avg score ', np.mean(scores))

    plt.figure(figsize=(10, 6))
    plt.plot(episodes, scores)
    plt.plot(pd.Series(scores).rolling(100).mean())
    plt.title('DQN Training')
    plt.xlabel('# of episodes')
    plt.ylabel('score')
    #plt.show()
    plt.grid()
    trimester = time.strftime("_%Y_%m_%d-%H__%M_%S")
    plt.savefig('./pic/TR_DQN_' + str(MAX_EPISODES) + '_' + (trimester) +
                '.png')
    env.show_result(results, MAX_EPISODES)
    print('TRAINING {} seconds'.format(time.time() - start))

    print('----------TEST---------')
    print('Episode num: ', MAX_EPISODES)

    start = time.time()
    scores = []
    episodes = []
    results = []
    scores_window = deque(maxlen=100)
    eps = 0
    episode = 0

    for episodeT in range(1, MAX_EPISODES + 1):

        if env_name == 'BP':
            env = Environment(episodeT, status='train', state_asnp=True)
            actions = range(param.m)
            state_size = param.m
            action_size = param.m

        state = env.reset()
        score = 0
        for tT in range(MAX_STEPS):

            if single_nn:
                action = dqn_agent.act(state, eps)
            else:
                action = dqn_agentS[tT].act(state, eps)
            next_state, reward, done, info = env.step(action)
            """
            if single_nn:
                dqn_agent.step(state, action, reward, next_state, done)
            else:
                dqn_agentS[t].step(state, action, reward, next_state, done)
            """
            state = next_state
            score += reward
            if done:
                break

            if episodeT % PRINT_EVERY == 0:
                mean_score = np.mean(scores_window)
                print('\r Progress {}/{}, average score:{:.2f}'.format(
                    episode, MAX_EPISODES, mean_score),
                      end="")

        result = env.CodeRunner(episodeT)
        results.append(result)
        scores_window.append(score)
        scores.append(score)
        episodes.append(episodeT)
        print('epi ', episodeT, ' score ', score, 'avg score ',
              np.mean(scores))

    plt.figure(figsize=(10, 6))
    plt.plot(episodes, scores)
    plt.plot(pd.Series(scores).rolling(100).mean())
    plt.title('DQN Testing')
    plt.xlabel('# of episodes')
    plt.ylabel('score')
    #plt.show()
    plt.grid()
    trimester = time.strftime("_%Y_%m_%d-%H__%M_%S")
    plt.savefig('./pic/TS_DQN_' + str(MAX_EPISODES) + '_' + (trimester) +
                '.png')
    env.show_result(results, MAX_EPISODES)
    print('TESTING {} seconds'.format(time.time() - start))