コード例 #1
0
    def run_simulation(game_map: Map, Q: dict, epsilon_greedy=max_first):
        display = Display(game_map)
        game_map.reset()
        state = deepcopy(game_map.get_init_state())
        run = True
        score = 0

        display.render()
        while run:
            action = epsilon_greedy(Q, state,
                                    game_map.get_available_actions(state), 0.3,
                                    "softmax")
            state, _ = game_map.apply_action(action)
            run = not Display.is_close() and not game_map.is_final(score)
            display.render()

        Display.close_display()

        if game_map.win():
            print("Game Win!!!")
        else:
            print("Game Lose!!!")
コード例 #2
0
def sarsa(game_map,
          epsilon_greedy,
          Q={},
          learning_rate=0.1,
          discovering_factor=0.9,
          epsilon=0.3,
          training_episode=1000,
          evaluation_episode=10,
          strategy="default",
          verbose=False):
    train_scores = []
    eval_scores = []
    initial_state = game_map.get_init_state()
    win_game = 0
    q_values = []
    number_cell_visited = []

    if verbose:
        display = Display(game_map)

    for train_ep in range(1, training_episode + 1):
        game_map.reset()
        score = 0

        state = deepcopy(initial_state)
        actions = game_map.get_available_actions_mouse()
        action = epsilon_greedy(Q, state, actions, epsilon, strategy)

        if verbose:
            from src.map import ACTIONS
            import sys
            print_q(Q, game_map.height, game_map.width, ACTIONS)
            display.render()
            sys.stdin.readline()

            if display.is_close():
                sys.exit(0)

        while not game_map.is_final(score):
            # apply action and get the next state and the reward
            next_state, reward = game_map.apply_action(action)
            score += reward

            state_q_value, number_visited = Q.get((state, action), (0, 0))
            action_next_state = epsilon_greedy(
                Q, next_state, game_map.get_available_actions(next_state),
                epsilon, strategy)
            next_state_q_value, _ = Q.get((next_state, action_next_state),
                                          (0, 0))

            q_value = state_q_value + learning_rate * (
                reward + discovering_factor * next_state_q_value -
                state_q_value)

            Q[(state, action)] = (q_value, number_visited + 1)
            state = next_state
            action = action_next_state

            if verbose:
                from src.map import ACTIONS
                import sys
                print_q(Q, game_map.height, game_map.width, ACTIONS)
                display.render()
                sys.stdin.readline()

                if display.is_close():
                    sys.exit(0)

        q_values.append(sum([x[Q_VALUE] for x in Q.values()]))
        win_game += int(game_map.win())
        train_scores.append(score)
        number_cell_visited.append(len(Q.keys()))

        print("Episode {}/{}, score : {} win : {}".format(
            train_ep, training_episode, score, game_map.win()))

        if train_ep % evaluation_episode == 0:
            avg_score = mean(train_scores[train_ep -
                                          evaluation_episode:train_ep])

            eval_scores.append(avg_score)

    win_game /= training_episode

    return Q, train_scores, eval_scores, win_game, q_values, number_cell_visited