예제 #1
0
 def predict(self, game: Game, player: Player):
     actions = []
     e_x, e_y = game.get_closest_enemy_location(player.location[0], player.location[1], player.team_id)
     actions.append((ATTACK_CLOSEST_TARGET, self.distance((e_x, e_y), player.location)))
     r_x, r_y = game.get_nearest_resource_index(player.location[0], player.location[1])
     actions.append((HARVEST_CLOSEST_RESOURCE, self.distance((r_x, r_y), player.location)))
     b_x, b_y = game.get_enemy_base(player.team_id)
     actions.append((ATTACK_BASE, self.distance((b_x, b_y), player.location)))
     return min(actions, key=lambda x: x[1])[0]
예제 #2
0
def play(g: Game,
         ddqn: DoubleDeepQNetwork,
         NPC_Memory: NPC_History,
         use_NPC=False):
    # Initial 2 players
    player1 = g.get_players(1)[0]
    player2 = g.get_players(2)[0]
    player1.main_player = True

    # Setup action list
    global action_list
    if use_NPC:
        action_list = collections.deque(maxlen=5)

    start_game(g)

    state = g.capture_grey_scale()
    g.prev_stat = g.get_state_stat()

    while True:
        if g.is_terminal():
            g.stop()
            if use_NPC:
                action_list.clear()
            g.game_result()
            print("Game over")
            break

        # Spots
        print("Player 1 : " + str(player1.location) + " - " +
              str(player1.health_p) + " - " + str(player1.gold))
        print("Player 2 : " + str(player2.location) + " - " +
              str(player2.health_p) + " - " + str(player2.gold))

        # Player 1 action by model
        # action = ddqn.predict_action(state)
        action = 1
        player1.do_action(ATTACK_CLOSEST_TARGET)

        # Only for NPC as current model is no good
        if use_NPC is True:
            npc_action = np.random.randint(2)

            if len(action_list) >= 5:
                # TODO dummy_get_npc_state
                player_health = player1.health_p
                resource_matrix = g.get_resource_matrix()
                # print(resource_matrix.shape)
                x, y = g.get_closest_enemy_location(player1.location[0],
                                                    player1.location[1], 1)
                dist_closest_enemy = (np.abs(player1.location[0] - x) +
                                      np.abs(player2.location[1] - y))
                reward_value = g.get_reward_value()
                number_of_enemies = len(g.teams[Game.OPPONENTS[1]].players)
                NPC_state = [
                    action_list[0], action_list[1], action_list[2],
                    action_list[3], action_list[4], player_health,
                    np.sum(np.sum(resource_matrix, axis=0), axis=0),
                    dist_closest_enemy, reward_value, number_of_enemies
                ]
                # print(NPC_state)
                # NPC_Memory.Add_Observation(NPC_state,action)
                NPC_Memory.Add_Observation(NPC_state, npc_action)
                print(NPC_Memory.num_obs)
                if NPC_Memory.num_obs > 100 and NPC_Memory.do_once_flag is True:
                    print("GOT HERE __________----------")
                    NPC_History.do_once_flag = False
                    player_net = NPC_CatBoost('Follower')
                    player_net.train_(NPC_Memory.CAT_State,
                                      NPC_Memory.CAT_Action)
                    player_net.eval_train()
            if use_NPC is True:
                action_list.append(action)

        # Player 2 random action
        # player2.do_action(np.random.randint(1, 4))

        update_with_skip_rate(g, SKIP_RATE)
        g.caption()  # Show Window caption
        g.update_state()  # Update states to new model
        g.view()  # View the game state in the pygame window

        next_state = g.capture_grey_scale()
        reward = g.get_reward_value()

        # ddqn.train(state, action, reward, next_state, g.is_terminal())

        state = next_state