def final(self, state): self.terminal_state = True self.params[EPISODES] += 1 self.observationFunction(state) if self.won: self.params[WINS] += 1 self.last_100_wins_avg.add(1) else: self.last_100_wins_avg.add(0) self.last_100_reward_avg.add(self.ep_reward) info( "Episode #%d | Frames: %d | Wins: %d | Won: %s | Score: %d | Epsilon: %5f | Q: %5f | 100 Wins Avg: %3f | 100 Reward Avg: %3f" % (self.params[EPISODES], self.params[FRAMES], self.params[WINS], self.won, state.getScore(), self.params[RL_EPSILON_CURRENT], self.best_q, self.last_100_wins_avg.avg(), self.last_100_reward_avg.avg()))
def _save_model(self): if self.params[NO_TRAIN]: return wins = self.last_100_wins_avg.avg() if self._should_train() and self._should_save_model(): model = "%s_%s" % (self.params[LAYOUT], self.run_id) elif wins in self.wins_save_threshold: model = "%s_%s_100_wins_%2f" % (self.params[LAYOUT], self.run_id, wins) info("Reached a new wins threshold of %2f. Saving model..." % wins) self.wins_save_threshold.remove(wins) else: return info("Saving model [%s]..." % model) self.params.save(model) self.replay_memory.save(model) self.dqn.save(model)
def __init__(self, args): Agent.__init__(self) info("Initializing DQN Agent...") tf.reset_default_graph() self.session = tf.Session() self.params = _init_dqn_params(args) self.replay_memory = _init_replay_memory(args) self.frame_stack = FrameStack(self.params[FRAME_STACK_SIZE], self.params[FRAME_WIDTH], self.params[FRAME_HEIGHT]) self.dqn = DeepQNetwork(self.params, self.session, 'online') self.target_dqn = self.dqn if not self.params[NO_TRAIN]: self.target_dqn = DeepQNetwork(self.params, self.session, 'target', False) self.target_dqn.assign(self.dqn) self.run_id = get_time() self.first_move = True self.current_state = None self.last_state = None self.last_action = None self.last_score = None self.last_reward = None self.ep_reward = None self.terminal_state = None self.won = None self.best_q = np.nan self.last_100_wins_avg = CappedMovingAverage(100) self.last_100_reward_avg = CappedMovingAverage(100) self.wins_save_threshold = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1] info("Done initializing DQN Agent.")
def _init_replay_memory(args): if MODEL not in args: info("Using empty replay memory.") return ReplayMemory(DEFAULT_PARAMS[REPLAY_MEMORY_SIZE]) replay_memory = load(args[MODEL], REPLAY_MEMORY_EXT) if replay_memory is None: info("Using empty replay memory.") return ReplayMemory(DEFAULT_PARAMS[REPLAY_MEMORY_SIZE]) info("Successfully loaded saved replay memory of model %s." % args[MODEL]) return replay_memory
def _load_saved_network_data(self): if self.params[MODEL] is None: info("Using random network weights.") return False network_file_path = _generate_model_file_path(self.params[MODEL]) try: self.session_saver.restore(self.session, network_file_path) except Exception: info("Model %s does not exist. Using random network weights." % network_file_path) return False info("Successfully loaded saved weights of model %s." % self.params[MODEL]) return True
def _init_dqn_params(args): if MODEL not in args: info("Using default DQN parameters.") params = DQNParameters() else: params = load(args[MODEL], DQN_PARAMETERS_EXT) if params is None: info("Using default DQN parameters.") params = DQNParameters() else: info("Successfully loaded saved parameters of model %s." % args[MODEL]) params[NO_TRAIN] = NO_TRAIN in args params[FRAME_WIDTH] = args[FRAME_WIDTH] params[FRAME_HEIGHT] = args[FRAME_HEIGHT] params[LAYOUT] = args[LAYOUT] params[MODEL] = args[MODEL] debug("Using the following parameters: ") for param in params.keys(): debug("\t%s:%s" % (param, params[param])) return params