game = GAME() game.set_timer(100) network = Network(n_output, lr_rate) agent = Agent(network, gamma, batch_size, n_action, input_dims, mem_size) scores = [] avg_rewards = [] score_max = 0 n_total = 0 for n in range(n_games): if not n == 0: scores.append(score) avg_rewards.append(total_reward / n_moves) agent.learn(n_total, n_total + n_moves) score = 0 game.score = 0 total_reward = 0 n_moves = 0 n_total = n_moves+n_total done = False observation = game.get_states() while not done: action = agent.choose_action(observation) if n < -1: game.draw() action = int(msvcrt.getch().decode("utf-8")) observation_, reward, done, score = game.spin_once(action, n_moves) agent.store_data(observation, action, reward, observation_, done)
score_record = [] score_record_step = [] count_record = [] count_record_step = [] time_record = [] time_record_step = [] for i in range(800): done = False score = 0 obs = env.get_init_state() # 没分配完 while not done: act = MECSnet.choose_action(obs) new_state, reward, done = env.step(act) MECSnet.remember(obs, act, reward, new_state, int(done)) MECSnet.learn() score += reward obs = new_state # print('reward is: {}'.format(reward)) # 本轮的reward追加到list中 score_record.append(score) # print('episode ', i, 'score %.2f' % score, '100 game average %.2f' % np.mean(score[-100:])) print('episode ', i, 'score %.2f' % score, " wrong: ", env.count_wrong) count_record.append(1 - env.count_wrong / num_task) time_record.append(env.time) if i % 25 == 0: # UEnet.save_models() MECSnet.save_models() score_record_step.append(np.mean(score_record)) count_record_step.append(np.mean(count_record))