def final(self, state):
        self.terminal_state = True
        self.params[EPISODES] += 1
        self.observationFunction(state)
        if self.won:
            self.params[WINS] += 1
            self.last_100_wins_avg.add(1)
        else:
            self.last_100_wins_avg.add(0)
        self.last_100_reward_avg.add(self.ep_reward)

        info(
            "Episode #%d | Frames: %d | Wins: %d | Won: %s | Score: %d | Epsilon: %5f | Q: %5f | 100 Wins Avg: %3f | 100 Reward Avg: %3f"
            % (self.params[EPISODES],
               self.params[FRAMES], self.params[WINS], self.won,
               state.getScore(), self.params[RL_EPSILON_CURRENT], self.best_q,
               self.last_100_wins_avg.avg(), self.last_100_reward_avg.avg()))
    def _save_model(self):
        if self.params[NO_TRAIN]:
            return
        wins = self.last_100_wins_avg.avg()
        if self._should_train() and self._should_save_model():
            model = "%s_%s" % (self.params[LAYOUT], self.run_id)
        elif wins in self.wins_save_threshold:
            model = "%s_%s_100_wins_%2f" % (self.params[LAYOUT], self.run_id,
                                            wins)
            info("Reached a new wins threshold of %2f. Saving model..." % wins)
            self.wins_save_threshold.remove(wins)
        else:
            return

        info("Saving model [%s]..." % model)
        self.params.save(model)
        self.replay_memory.save(model)
        self.dqn.save(model)
    def __init__(self, args):
        Agent.__init__(self)

        info("Initializing DQN Agent...")
        tf.reset_default_graph()
        self.session = tf.Session()

        self.params = _init_dqn_params(args)
        self.replay_memory = _init_replay_memory(args)
        self.frame_stack = FrameStack(self.params[FRAME_STACK_SIZE],
                                      self.params[FRAME_WIDTH],
                                      self.params[FRAME_HEIGHT])
        self.dqn = DeepQNetwork(self.params, self.session, 'online')
        self.target_dqn = self.dqn

        if not self.params[NO_TRAIN]:
            self.target_dqn = DeepQNetwork(self.params, self.session, 'target',
                                           False)
            self.target_dqn.assign(self.dqn)

        self.run_id = get_time()

        self.first_move = True
        self.current_state = None
        self.last_state = None
        self.last_action = None
        self.last_score = None
        self.last_reward = None
        self.ep_reward = None
        self.terminal_state = None
        self.won = None
        self.best_q = np.nan
        self.last_100_wins_avg = CappedMovingAverage(100)
        self.last_100_reward_avg = CappedMovingAverage(100)
        self.wins_save_threshold = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]

        info("Done initializing DQN Agent.")
def _init_replay_memory(args):
    if MODEL not in args:
        info("Using empty replay memory.")
        return ReplayMemory(DEFAULT_PARAMS[REPLAY_MEMORY_SIZE])

    replay_memory = load(args[MODEL], REPLAY_MEMORY_EXT)
    if replay_memory is None:
        info("Using empty replay memory.")
        return ReplayMemory(DEFAULT_PARAMS[REPLAY_MEMORY_SIZE])
    info("Successfully loaded saved replay memory of model %s." % args[MODEL])
    return replay_memory
Esempio n. 5
0
    def _load_saved_network_data(self):
        if self.params[MODEL] is None:
            info("Using random network weights.")
            return False

        network_file_path = _generate_model_file_path(self.params[MODEL])
        try:
            self.session_saver.restore(self.session, network_file_path)

        except Exception:
            info("Model %s does not exist. Using random network weights." %
                 network_file_path)
            return False

        info("Successfully loaded saved weights of model %s." %
             self.params[MODEL])

        return True
def _init_dqn_params(args):
    if MODEL not in args:
        info("Using default DQN parameters.")
        params = DQNParameters()
    else:
        params = load(args[MODEL], DQN_PARAMETERS_EXT)
        if params is None:
            info("Using default DQN parameters.")
            params = DQNParameters()
        else:
            info("Successfully loaded saved parameters of model %s." %
                 args[MODEL])

    params[NO_TRAIN] = NO_TRAIN in args

    params[FRAME_WIDTH] = args[FRAME_WIDTH]
    params[FRAME_HEIGHT] = args[FRAME_HEIGHT]
    params[LAYOUT] = args[LAYOUT]
    params[MODEL] = args[MODEL]

    debug("Using the following parameters: ")
    for param in params.keys():
        debug("\t%s:%s" % (param, params[param]))
    return params