def get_players(state): blue_player = Entity() blue_player.x = state[0] blue_player.y = state[1] red_player = Entity() red_player.x = state[2] red_player.y = state[3] return blue_player, red_player
def can_escape_by_one_step(point1, point2): env = Environment() env.red_player = Entity() env.red_player.x = point1[0] env.red_player.y = point1[1] env.blue_player = Entity() env.blue_player.x = point2[0] env.blue_player.y = point2[1] win_stat = env.compute_terminal() if win_stat == WinEnum.Blue: return True return False
def learn_decision_maker(decision_maker, n_samples = 20, save_to_file=False): env = Environment() blue_decision_maker = decision_maker # the agent we want to learn its behaviour policy red_decision_maker = decision_maker # the imaginary agent we want to learn its behaviour policy env.blue_player = Entity(blue_decision_maker) env.red_player = Entity(red_decision_maker) policy_counts = {} # go over each states = (blue position, red position) for state in state_generator(): if is_terminal_state(state): continue # set position of the players in the environment set_env_state(env, state) # get observation observation_for_blue: State = env.get_observation_for_blue() # the agents are not deterministic, so we want to find the distribution p(a|s) for i_samp in range(n_samples): # get the action chosen by each player action_blue = blue_decision_maker.get_action(observation_for_blue) a = action_blue - 1 # change to 0-based index update_pol_cnts(state, a, policy_counts) # end for # end for print('Finished learning the enemy') if save_to_file: with open(f'learned_{agent_name}_enemy', 'wb') as myfile: pickle.dump([agent_name, policy_counts, n_samples], myfile) return policy_counts
import pickle import numpy as np import json from Arena.Environment import Environment from Arena.Entity import Entity from RafaelPlayer.RafaelDecisionMaker import RafaelDecisionMaker from Arena.constants import WIN_REWARD, MOVE_PENALTY, MAX_STEPS_PER_EPISODE, HARD_AGENT from misharon_utils import state_action_generator, get_Q_vals, set_env_state, is_terminal_state, derive_greedy_policy from misharon_learn_the_enemy import n_actions #------------------------------------------------------------------------------------------------------------~ # define dummy players, just so we can use the class functions dummy_blue = Entity(RafaelDecisionMaker(HARD_AGENT)) dummy_red = Entity(RafaelDecisionMaker(HARD_AGENT)) env = Environment() env.blue_player = dummy_blue env.red_player = dummy_red #------------------------------------------------------------------------------------------------------------~ # ------------------------------------------------------------------------------------------------------------~ def get_reward(env, state): set_env_state(env, state) reward_blue, reward_red = env.handle_reward() return reward_blue
if __name__ == '__main__': env = Environment(IS_TRAINING) print("Starting Blue player") blue_decision_maker = DQNAgent_keras.DQNAgent_keras() #blue_decision_maker = DQNAgent_keras.DQNAgent_keras(UPDATE_CONTEXT=True, path_model_to_load='conv1(6_6_1_256)_conv2(4_4_256_128)_conv3(3_3_128_128)_flatten_fc__blue_202001_ 0.95max_ -0.04avg_ -3.10min__1620558885.model') print("Starting red player") ### Red Decision Maker red_decision_maker = Greedy_player.Greedy_player() env.blue_player = Entity(blue_decision_maker) env.red_player = Entity(red_decision_maker) print_start_of_game_info(blue_decision_maker, red_decision_maker) NUM_OF_EPISODES = env.NUMBER_OF_EPISODES for episode in tqdm(range(1, NUM_OF_EPISODES + 1), ascii=True, unit='episodes'): EVALUATE = evaluate(episode) current_episode = Episode(episode, EVALUATE, show_always=False if IS_TRAINING else True) # set new start position for the players env.reset_game(episode) # get observation observation_for_blue_s0: State = env.get_observation_for_blue() action_blue = -1