def training(): print('___________TRAINING______________') agents = [] for idx_done in range(no_iter): agent = Agent(gamma=gamma, epsilon=epsilon, batch_size=batch_size, n_actions=m, eps_end=0.01, input_dims=[m], lr=lr) agents.append(agent) scores, eps_history = [], [] start_time = time.time() results = [] trials = [] for trial in range(no_trials): env = Environment(trial, status='train') score = 0 done = False observation = env.reset() i = 0 for idx_done in range(no_iter): i += 1 action = agents[idx_done].choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agents[idx_done].store_trainsition(observation, action, reward, observation_, done) agents[idx_done].learn() observation = observation_ #print('epsilon %.2f' % agents[idx_done].epsilon) result = env.CodeRunner(trial) results.append(result) scores.append(score) #eps_history.append(agent.epsilon) avg_score = np.mean(scores) trials.append(trial) print('episode', trial, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.4f' % agents[idx_done].epsilon) mvn_score = moving_average(scores, 250) plt.figure(figsize=(5, 5), dpi=100) plt.plot(trials, scores, label='Episodic Average') plt.plot(mvn_score, label='Moving Average') plt.xlabel('Trials') plt.ylabel('Reward') plt.grid() plt.title('Training trial vs reward') trimester = time.strftime("_%Y_%m_%d-%H__%M_%S") plt.savefig('./pic/TR_DQN_' + str(no_trials) + '_' + (trimester) + '.png') print("--- %s TRAINING seconds ---" % (time.time() - start_time)) env.show_result(results) handle_agents(opt='save', agents=agents)
def testing(): print('___________TESTING_____________') agent = handle_agents(opt='load') scores, eps_history = [], [] start_time = time.time() results = [] trials = [] for trial in range(no_trials): env = Environment(trial, status='test') score = 0 done = False observation = env.reset() i = 0 while not done: i += 1 action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward observation = observation_ result = env.CodeRunner(trial) results.append(result) scores.append(score) avg_score = np.mean(scores[-100:]) trials.append(trial) print('episode', trial, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.4f' % agent.epsilon) mvn_score = moving_average(scores, 250) plt.figure(figsize=(5, 5), dpi=100) plt.plot(trials, scores, label='Episodic Average') plt.plot(mvn_score, label='Moving Average') plt.xlabel('Trials') plt.ylabel('Reward') plt.grid() plt.title('Testing trial vs reward') trimester = time.strftime("_%Y_%m_%d-%H__%M_%S") plt.savefig('./pic/TstS_DQN_' + str(no_trials) + '_' + (trimester) + '.png') print("--- %s seconds ---" % (time.time() - start_time)) env.show_result(results)
def training(): print('----------TRAIN---------') print('Episode num: ', MAX_EPISODES) start = time.time() scores = [] episodes = [] results = [] scores_window = deque(maxlen=100) eps = EPS_START if env_name == 'BP': actions = range(param.m) state_size = param.m action_size = param.m dqn_agentS = [] if single_nn: dqn_agent = DQNAgent(state_size, action_size, seed=0) else: for t in range(MAX_STEPS): dqn_agent = DQNAgent(state_size, action_size, seed=0) dqn_agentS.append(dqn_agent) for episode in range(1, MAX_EPISODES + 1): if env_name == 'BP': env = Environment(episode, status='train', state_asnp=True) state = env.reset() score = 0 for t in range(MAX_STEPS): if single_nn: action = dqn_agent.act(state, eps) else: action = dqn_agentS[t].act(state, eps) next_state, reward, done, info = env.step(action) if single_nn: dqn_agent.step(state, action, reward, next_state, done) else: dqn_agentS[t].step(state, action, reward, next_state, done) state = next_state score += reward if done: break eps = max(eps * EPS_DECAY, EPS_MIN) if episode % PRINT_EVERY == 0: mean_score = np.mean(scores_window) print('\r Progress {}/{}, average score:{:.2f}'.format( episode, MAX_EPISODES, mean_score), end="") result = env.CodeRunner(episode) results.append(result) scores_window.append(score) scores.append(score) episodes.append(episode) print('epi ', episode, ' score ', score, 'avg score ', np.mean(scores)) plt.figure(figsize=(10, 6)) plt.plot(episodes, scores) plt.plot(pd.Series(scores).rolling(100).mean()) plt.title('DQN Training') plt.xlabel('# of episodes') plt.ylabel('score') #plt.show() plt.grid() trimester = time.strftime("_%Y_%m_%d-%H__%M_%S") plt.savefig('./pic/TR_DQN_' + str(MAX_EPISODES) + '_' + (trimester) + '.png') env.show_result(results, MAX_EPISODES) print('TRAINING {} seconds'.format(time.time() - start)) print('----------TEST---------') print('Episode num: ', MAX_EPISODES) start = time.time() scores = [] episodes = [] results = [] scores_window = deque(maxlen=100) eps = 0 episode = 0 for episodeT in range(1, MAX_EPISODES + 1): if env_name == 'BP': env = Environment(episodeT, status='train', state_asnp=True) actions = range(param.m) state_size = param.m action_size = param.m state = env.reset() score = 0 for tT in range(MAX_STEPS): if single_nn: action = dqn_agent.act(state, eps) else: action = dqn_agentS[tT].act(state, eps) next_state, reward, done, info = env.step(action) """ if single_nn: dqn_agent.step(state, action, reward, next_state, done) else: dqn_agentS[t].step(state, action, reward, next_state, done) """ state = next_state score += reward if done: break if episodeT % PRINT_EVERY == 0: mean_score = np.mean(scores_window) print('\r Progress {}/{}, average score:{:.2f}'.format( episode, MAX_EPISODES, mean_score), end="") result = env.CodeRunner(episodeT) results.append(result) scores_window.append(score) scores.append(score) episodes.append(episodeT) print('epi ', episodeT, ' score ', score, 'avg score ', np.mean(scores)) plt.figure(figsize=(10, 6)) plt.plot(episodes, scores) plt.plot(pd.Series(scores).rolling(100).mean()) plt.title('DQN Testing') plt.xlabel('# of episodes') plt.ylabel('score') #plt.show() plt.grid() trimester = time.strftime("_%Y_%m_%d-%H__%M_%S") plt.savefig('./pic/TS_DQN_' + str(MAX_EPISODES) + '_' + (trimester) + '.png') env.show_result(results, MAX_EPISODES) print('TESTING {} seconds'.format(time.time() - start))