action = agent.act(state) img_state, reward, done = game.make_action(action) if not done: state_new = img_state else: state_new = None agent.add_transition(state, action, reward, state_new, done) state = state_new if learning_step % UPDATE_FREQUENCY == 0: agent.learn_from_memory() if learning_step % COPY_FREQUENCY == 0: updateTarget(targetOps, SESSION) if done: print("Epoch %d Train Game %d get %.1f" % (epoch, games_cnt, game.get_total_reward())) break if SAVE_MODEL and games_cnt % 10 == 0: saver.save(SESSION, model_savefile) print("Saving the network weigths to:", model_savefile) print("\nTesting...") test_scores = [] for test_step in range(EPISODES_TO_TEST): game.reset() agent.reset_cell_state() while not game.is_terminared(): state = game.get_state() action = agent.act(state, train=False) game.make_action(action)
trainables = tf.trainable_variables() targetOps = updateTargetGraph(trainables, TAU) print("Loading model from: ", model_savefile) saver.restore(SESSION, model_savefile) ########################################## print("\nTesting...") test_scores = [] for test_step in range(EPISODES_TO_TEST): game.reset() agent.reset_cell_state() while not game.is_terminared(): state = game.get_state() action = agent.act(state, train=False) game.make_action(action) now_score = game.get_total_reward() saveScore(now_score) test_scores.append(now_score) test_scores = np.array(test_scores) my_file = open(reward_savefile, 'a') # Name and path of the reward text file my_file.write("%.1f (±%.1f) min:%.1f max:%.1f\n" % (test_scores.mean(), test_scores.std(), test_scores.min(), test_scores.max())) my_file.close()
img_state, reward, done = game.make_action(action) if not done: state_new = img_state else: state_new = None agent.add_transition(state, action, reward, state_new, done) state = state_new if learning_step % UPDATE_FREQUENCY == 0: agent.learn_from_memory() if learning_step % COPY_FREQUENCY == 0: updateTarget(targetOps, SESSION) if done: print("Epoch %d Train Game %d get %.1f" % (epoch, games_cnt, game.get_total_reward())) break if SAVE_MODEL and games_cnt % 10 == 0: saver.save(SESSION, model_savefile) print("Saving the network weigths to:", model_savefile) print("\nTesting...") test_scores = [] for test_step in range(EPISODES_TO_TEST): game.reset() agent.reset_cell_state() while not game.is_terminared(): state = game.get_state() action = agent.act(state, train=False) game.make_action(action)
s, reward, d = game.make_action(action) done = game.is_terminared() if not done: state_new = preprocess(game.get_state()) else: state_new = None agent.add_transition(state, action, reward, state_new, done) state = state_new if learning_step % UPDATE_FREQUENCY == 0: agent.learn_from_memory() updateTarget(targetOps, SESSION) if done: train_scores.append(game.get_total_reward()) train_episodes_finished += 1 game.reset() agent.reset_cell_state() state = preprocess(game.get_state()) print("%d training episodes played." % train_episodes_finished) train_scores = np.array(train_scores) print( "Results: mean: %.1f±%.1f," % (train_scores.mean(), train_scores.std()), "min: %.1f," % train_scores.min(), "max: %.1f," % train_scores.max()) print("\nTesting...")