Beispiel #1
0
    def move(self, environment):
        BaseDDQNGameModel.move(self, environment)

        if np.random.rand() < 0.01:
            action_vector = random.randrange(self.action_space)
        else:
            state = environment.state()
            q_values = self.ddqn.predict(np.expand_dims(
                np.asarray(state).astype(np.float64), axis=0),
                                         batch_size=1)
            action_vector = Action.action_from_vector(np.argmax(q_values[0]))
        return Action.normalized_action(environment.snake_action,
                                        action_vector)
Beispiel #2
0
    def _ddqn(self,
              total_step_limit=10000000,
              total_run_limit=None,
              clip=True):
        run = 0
        total_step = 0
        scores = []
        while True:
            if total_run_limit is not None and run >= total_run_limit:
                print "Reached total run limit of: " + str(total_run_limit)
                exit(0)

            run += 1
            env = self.prepare_training_environment()
            current_state = env.state()
            step = 0
            score = env.reward()
            while True:
                if total_step >= total_step_limit:
                    print "Reached total step limit of: " + str(
                        total_step_limit)
                    exit(0)
                total_step += 1
                step += 1

                action = self._predict_move(current_state)
                action_vector = Action.action_from_vector(action)
                normalized_action = Action.normalized_action(
                    env.snake_action, action_vector)
                next_state, reward, terminal = env.full_step(normalized_action)
                if clip:
                    np.sign(reward)
                score += reward
                self._remember(current_state, action, reward, next_state,
                               terminal)
                current_state = next_state

                self._step_update(total_step)

                if terminal:
                    scores.append(score)
                    if len(scores) % SCORE_LOGGING_FREQUENCY == 0:
                        self.log_score(mean(scores))
                        print('{{"metric": "score", "value": {}}}'.format(
                            mean(scores)))
                        print('{{"metric": "run", "value": {}}}'.format(run))
                        scores = []
                    break