예제 #1
0
def play(g: Game,
         ddqn: DoubleDeepQNetwork,
         NPC_Memory: NPC_History,
         use_NPC=False):
    # Initial 2 players
    player1 = g.get_players(1)[0]
    player2 = g.get_players(2)[0]
    player1.main_player = True

    # Setup action list
    global action_list
    if use_NPC:
        action_list = collections.deque(maxlen=5)

    start_game(g)

    state = g.capture_grey_scale()
    g.prev_stat = g.get_state_stat()

    while True:
        if g.is_terminal():
            g.stop()
            if use_NPC:
                action_list.clear()
            g.game_result()
            print("Game over")
            break

        # Spots
        print("Player 1 : " + str(player1.location) + " - " +
              str(player1.health_p) + " - " + str(player1.gold))
        print("Player 2 : " + str(player2.location) + " - " +
              str(player2.health_p) + " - " + str(player2.gold))

        # Player 1 action by model
        # action = ddqn.predict_action(state)
        action = 1
        player1.do_action(ATTACK_CLOSEST_TARGET)

        # Only for NPC as current model is no good
        if use_NPC is True:
            npc_action = np.random.randint(2)

            if len(action_list) >= 5:
                # TODO dummy_get_npc_state
                player_health = player1.health_p
                resource_matrix = g.get_resource_matrix()
                # print(resource_matrix.shape)
                x, y = g.get_closest_enemy_location(player1.location[0],
                                                    player1.location[1], 1)
                dist_closest_enemy = (np.abs(player1.location[0] - x) +
                                      np.abs(player2.location[1] - y))
                reward_value = g.get_reward_value()
                number_of_enemies = len(g.teams[Game.OPPONENTS[1]].players)
                NPC_state = [
                    action_list[0], action_list[1], action_list[2],
                    action_list[3], action_list[4], player_health,
                    np.sum(np.sum(resource_matrix, axis=0), axis=0),
                    dist_closest_enemy, reward_value, number_of_enemies
                ]
                # print(NPC_state)
                # NPC_Memory.Add_Observation(NPC_state,action)
                NPC_Memory.Add_Observation(NPC_state, npc_action)
                print(NPC_Memory.num_obs)
                if NPC_Memory.num_obs > 100 and NPC_Memory.do_once_flag is True:
                    print("GOT HERE __________----------")
                    NPC_History.do_once_flag = False
                    player_net = NPC_CatBoost('Follower')
                    player_net.train_(NPC_Memory.CAT_State,
                                      NPC_Memory.CAT_Action)
                    player_net.eval_train()
            if use_NPC is True:
                action_list.append(action)

        # Player 2 random action
        # player2.do_action(np.random.randint(1, 4))

        update_with_skip_rate(g, SKIP_RATE)
        g.caption()  # Show Window caption
        g.update_state()  # Update states to new model
        g.view()  # View the game state in the pygame window

        next_state = g.capture_grey_scale()
        reward = g.get_reward_value()

        # ddqn.train(state, action, reward, next_state, g.is_terminal())

        state = next_state