def learning(episode_count): env = Env() actions = 8 input_dim = (2, 84, 84) scores = [] avg_scores = deque([]) gamma = 0.9 lr = 0.0001 exp_buffer_size = 25000 batch_size = 256 buffer = ExperienceReplay(exp_buffer_size, input_dim) agent = AgentDDQN(gamma, actions, Net(), buffer, lr, update_steps=1000, batch_size=batch_size, path="trained_models/sc2_beacon", epsilon_dec=1e-5) d_now = datetime.datetime.now() for i in range(0, episode_count): score = 0 terminal = False state = env.reset() state = torch.from_numpy(state).double() while not terminal: action = agent.choose_action(state) state_, reward, terminal = env.step(action) state_ = torch.from_numpy(state_).double() score += reward agent.store(state, action, reward, state_, terminal) agent.learn() state = state_ avg_scores.append(score) if len(avg_scores) > MAX_SCORE_COUNT: avg_scores.popleft() scores.append(score) print('episode: ', i, '\t\tscore: ', +round(score, 3), '\t\tcumulative score:', round(np.average(scores), 3), '\t\tavg-100 score:', round(np.average(avg_scores), 3), '\t\tepsilon: ', round(agent.epsilon, 4)) if i % 100 == 0: d_end = datetime.datetime.now() d = d_end - d_now print('time: ', d) if i % 100 == 0: agent.save_model()
def animation(): env = Env() actions = 2 * 64 * 64 input_dim = (3, 64, 64) scores = [] avg_scores = deque([]) gamma = 0.9 lr = 0.0001 exp_buffer_size = 0 batch_size = 0 buffer = ExperienceReplay(exp_buffer_size, input_dim) agent = AgentDDQN(gamma, actions, Net(), buffer, lr, update_steps=1000, batch_size=batch_size, epsilon=0.01, path="trained_models/sc2_collect_shards") agent.load_model() i = 0 while True: score = 0 terminal = False state = env.reset() state = torch.from_numpy(state).double() i += 1 while not terminal: action = agent.choose_action(state) state_, reward, terminal = env.step(action) state = torch.from_numpy(state_).double() score += reward avg_scores.append(score) if len(avg_scores) > MAX_SCORE_COUNT: avg_scores.popleft() scores.append(score) print('episode: ', i, '\t\tcumulative score:', round(np.average(scores), 3), '\t\tavg-100 score:', round(np.average(avg_scores), 3))
def learning(): env = gym.make('LunarLander-v2') actions = 4 state_dim = (8, ) games_count = 1000 scores = [] experience_replay = ExperienceReplay(10000, state_dim) agent = AgentDDQN(0.99, actions, Net(), experience_replay, 0.001) d_now = datetime.datetime.now() for i in range(1, games_count): score = 0 terminal = False state = env.reset() state = torch.from_numpy(state).double() while not terminal: action = agent.choose_action(state) state_, reward, terminal, _ = env.step(action) state_ = torch.from_numpy(state_).double() agent.store(state, action, reward, state_, terminal) agent.learn() state = state_ score += reward scores.append(score) print('episode: ', i, '\t\tscore: ', +score, '\t\taverage score:', np.average(scores[-100:]), '\t\tepsilon: ', agent.epsilon) if i % 10 == 0: d_end = datetime.datetime.now() d = d_end - d_now print('time: ', d)
def learning(num): env = make_env('PongNoFrameskip-v4') actions = 6 input_dim = (4, 80, 80) scores = [] experience_replay = ExperienceReplay(25000, input_dim) agent = AgentDDQN(0.99, actions, Net(), experience_replay, 0.0001, update_steps=1000, batch_size=32, epsilon_min=0.02, epsilon=1.0, epsilon_dec=1e-4) ''' d_now = datetime.datetime.now() text = 'episode,score,time,step,epsilon' for i in range(1, games_count): score = 0 terminal = False state = env.reset() state = torch.from_numpy(state).double() while not terminal: action = agent.choose_action(state) state_, reward, terminal, _ = env.step(action) state_ = torch.from_numpy(state_).double() agent.store(state, action, reward, state_, terminal) agent.learn() state = state_ score += reward scores.append(score) print('episode: ', i, '\t\tscore: ', + score, '\t\taverage score:' , np.average(scores[-100:]), '\t\tepsilon: ', agent.epsilon) if i % 10 == 0: d_end = datetime.datetime.now() d = d_end - d_now print('time: ', d) ''' i = 0 step = 0 d_start = datetime.datetime.now() print('start ', d_start) d_end = datetime.datetime.now() + datetime.timedelta(minutes=180) text = 'episode,score,time,step,epsilon' while d_end > datetime.datetime.now(): score = 0 terminal = False state = env.reset() state = torch.from_numpy(state).double() i += 1 while not terminal: step += 1 action = agent.choose_action(state) state_, reward, terminal, _ = env.step(action) state_ = torch.from_numpy(state_).double() score += reward agent.store(state, action, reward, state_, terminal) agent.learn() state = state_ scores.append(score) avg = np.average(scores[-100:]) d = datetime.datetime.now() - d_start text += '\n' + str(i) + ',' + str(avg) + ',' + str(d) + ',' + str( step) + ',' + str(agent.epsilon) write_to_file(text, 'logs/pong/log_1_' + str(num) + '_DDQN.txt')