def play(g: Game, ann: ANN): # Initial 2 players player1 = g.get_players(1)[0] player2 = g.get_players(2)[0] player3 = g.get_players(2)[1] player4 = g.get_players(2)[2] player1.main_player = True # Start the game (flag) g.start() g.set_train(True) g.update() g.update_state() g.prev_stat = g.get_state_stat() # print(g.get_resource_matrix()) while True: # If the game is in terminal state if g.is_game_terminal() or g.get_ticks() > 3000: file = open("./logs/evaluation.txt", 'a+') g.game_result(file) file.close() g.stop() print("Game over") break # print("Player 1 : " + str(player1.location)) # Player 1 action by model # TODO 599: Use the predicted action state = g.get_state() predicted_action = ann.predict_action(state) random_action = np.random.randint(2) if (np.random.random() < max(0.01, np.power(ann.epsilon, ann.tick))): action = random_action else: action = predicted_action #print("Player1 Action:", action) player1.do_action(action) # # # Player 2 random action player2.do_action(2) player3.do_action(2) player4.do_action(2) update_with_skip_rate(g, SKIP_RATE) g.render() # Draw the game state to graphics g.caption() # Show Window caption g.update_state() # Update states to new model # print("Resource at ", i) # # print(g.get_resource_matrix()) # print("Player Resource:", player1.player.getScore()) g.view() # View the game state in the pygame window next_state = g.get_state() reward = g.get_reward_value() ann.train(state, action, reward, next_state, g.is_terminal()) state = next_state
def play(g: Game, ac: Actor_Critic, game_num): # Initial 2 players player1 = g.get_players(1)[0] player2 = g.get_players(2)[0] player3 = g.get_players(2)[1] #player4 = g.get_players(2)[2] player1.main_player = True # Start the game (flag) g.start() g.set_train(True) g.update() g.update_state() player1.build_town_hall() player3.build_town_hall() update_with_skip_rate(g, 10) g.update_state() g.prev_stat = g.get_state_stat() # print(g.get_resource_matrix()) state = g.get_state() while True: # If the game is in terminal state if not TRAIN: time.sleep(0.5) if g.is_game_terminal(): if TRAIN: file = open("./logs_ac/evaluation_ac.csv", 'a+') g.game_result(file, game_num) file.close() else: g.game_result(None, game_num) g.stop() print("Game over") break # Player 1 action by model action = ac.predict_action(state) #print("Player1 Action:", action) player1.do_action(action) # # # Player 2 random action random_action = np.random.randint(4) player2.do_action(random_action) player3.do_action(4) update_with_skip_rate(g, SKIP_RATE) g.update_state() # Update states to new model g.view() # View the game state in the pygame window reward = g.get_reward_value() next_state = g.get_state() if (TRAIN): ac.learn(state, action, reward, next_state, g.is_game_terminal()) state = next_state
def play(g: Game, ddqn: DoubleDeepQNetwork, NPC_Memory: NPC_History, use_NPC=False): # Initial 2 players player1 = g.get_players(1)[0] player2 = g.get_players(2)[0] player1.main_player = True # Setup action list global action_list if use_NPC: action_list = collections.deque(maxlen=5) start_game(g) state = g.capture_grey_scale() g.prev_stat = g.get_state_stat() while True: if g.is_terminal(): g.stop() if use_NPC: action_list.clear() g.game_result() print("Game over") break # Spots print("Player 1 : " + str(player1.location) + " - " + str(player1.health_p) + " - " + str(player1.gold)) print("Player 2 : " + str(player2.location) + " - " + str(player2.health_p) + " - " + str(player2.gold)) # Player 1 action by model # action = ddqn.predict_action(state) action = 1 player1.do_action(ATTACK_CLOSEST_TARGET) # Only for NPC as current model is no good if use_NPC is True: npc_action = np.random.randint(2) if len(action_list) >= 5: # TODO dummy_get_npc_state player_health = player1.health_p resource_matrix = g.get_resource_matrix() # print(resource_matrix.shape) x, y = g.get_closest_enemy_location(player1.location[0], player1.location[1], 1) dist_closest_enemy = (np.abs(player1.location[0] - x) + np.abs(player2.location[1] - y)) reward_value = g.get_reward_value() number_of_enemies = len(g.teams[Game.OPPONENTS[1]].players) NPC_state = [ action_list[0], action_list[1], action_list[2], action_list[3], action_list[4], player_health, np.sum(np.sum(resource_matrix, axis=0), axis=0), dist_closest_enemy, reward_value, number_of_enemies ] # print(NPC_state) # NPC_Memory.Add_Observation(NPC_state,action) NPC_Memory.Add_Observation(NPC_state, npc_action) print(NPC_Memory.num_obs) if NPC_Memory.num_obs > 100 and NPC_Memory.do_once_flag is True: print("GOT HERE __________----------") NPC_History.do_once_flag = False player_net = NPC_CatBoost('Follower') player_net.train_(NPC_Memory.CAT_State, NPC_Memory.CAT_Action) player_net.eval_train() if use_NPC is True: action_list.append(action) # Player 2 random action # player2.do_action(np.random.randint(1, 4)) update_with_skip_rate(g, SKIP_RATE) g.caption() # Show Window caption g.update_state() # Update states to new model g.view() # View the game state in the pygame window next_state = g.capture_grey_scale() reward = g.get_reward_value() # ddqn.train(state, action, reward, next_state, g.is_terminal()) state = next_state
def play(g: Game, ann: ANN, game_num): # Initial 2 players player1 = g.get_players(1)[0] player2 = g.get_players(2)[0] player3 = g.get_players(2)[1] #player4 = g.get_players(2)[2] player1.main_player = True # Start the game (flag) g.start() g.set_train(True) g.update() g.update_state() player1.build_town_hall() player3.build_town_hall() update_with_skip_rate(g, 10) g.update_state() g.prev_stat = g.get_state_stat() # print(g.get_resource_matrix()) state = g.get_state() i = 0 while True: # If the game is in terminal state if not TRAIN: time.sleep(0.3) if g.is_game_terminal(): if TRAIN: ex = "PER" if ann.PER else "ER" file = open("./logs_ann/evaluation_" + ex + ".csv", 'a+') g.game_result(file, game_num) file.close() else: g.game_result(None, game_num) g.stop() print("Game over") break # print("Player 1 : " + str(player1.location)) # Player 1 action by model # TODO 599: Use the predicted action predicted_action = ann.predict_action(state) random_action = np.random.randint(4) if (np.random.random() < max(0.01, np.power(ann.epsilon, ann.tick))): action = random_action else: action = predicted_action #print("Player1 Action:", action) player1.do_action(action) # # # # Player 2 random action player2.do_action(random_action) player3.do_action(4) #player4.do_action(2) update_with_skip_rate(g, SKIP_RATE) g.update_state() # Update states to new model g.view() # View the game state in the pygame window next_state = g.get_state() reward = g.get_reward_value() ann.train(state, action, reward, next_state, g.is_game_terminal()) state = next_state