def get_players(state):
    blue_player = Entity()
    blue_player.x = state[0]
    blue_player.y = state[1]
    red_player = Entity()
    red_player.x = state[2]
    red_player.y = state[3]
    return blue_player, red_player
Beispiel #2
0
def can_escape_by_one_step(point1, point2):
    env = Environment()

    env.red_player = Entity()
    env.red_player.x = point1[0]
    env.red_player.y = point1[1]

    env.blue_player = Entity()
    env.blue_player.x = point2[0]
    env.blue_player.y = point2[1]

    win_stat = env.compute_terminal()

    if win_stat == WinEnum.Blue:
        return True

    return False
Beispiel #3
0
def learn_decision_maker(decision_maker, n_samples = 20, save_to_file=False):


    env = Environment()
    blue_decision_maker = decision_maker   # the agent we want to learn its behaviour policy
    red_decision_maker = decision_maker    # the imaginary  agent we want to learn its behaviour policy
    env.blue_player = Entity(blue_decision_maker)
    env.red_player = Entity(red_decision_maker)
    policy_counts = {}

    # go over each states = (blue position, red position)
    for state in state_generator():

        if is_terminal_state(state):
            continue

        # set  position of the players in the environment
        set_env_state(env, state)

        # get observation
        observation_for_blue: State = env.get_observation_for_blue()

        # the agents are not deterministic, so we want to find the distribution p(a|s)
        for i_samp in range(n_samples):
            # get the action chosen by each player
            action_blue = blue_decision_maker.get_action(observation_for_blue)
            a = action_blue - 1   # change to 0-based index
            update_pol_cnts(state, a, policy_counts)
        # end for
    # end for

    print('Finished learning the enemy')
    if save_to_file:
        with open(f'learned_{agent_name}_enemy', 'wb') as myfile:
            pickle.dump([agent_name, policy_counts, n_samples], myfile)

    return policy_counts
Beispiel #4
0
import pickle
import numpy as np
import json

from Arena.Environment import Environment
from Arena.Entity import Entity
from RafaelPlayer.RafaelDecisionMaker import RafaelDecisionMaker
from Arena.constants import WIN_REWARD, MOVE_PENALTY, MAX_STEPS_PER_EPISODE, HARD_AGENT

from misharon_utils import state_action_generator, get_Q_vals, set_env_state, is_terminal_state, derive_greedy_policy
from misharon_learn_the_enemy import n_actions

#------------------------------------------------------------------------------------------------------------~

# define dummy players, just so we can use the class functions
dummy_blue = Entity(RafaelDecisionMaker(HARD_AGENT))
dummy_red = Entity(RafaelDecisionMaker(HARD_AGENT))
env = Environment()
env.blue_player = dummy_blue
env.red_player = dummy_red

#------------------------------------------------------------------------------------------------------------~

# ------------------------------------------------------------------------------------------------------------~


def get_reward(env, state):
    set_env_state(env, state)
    reward_blue, reward_red = env.handle_reward()
    return reward_blue
Beispiel #5
0
if __name__ == '__main__':


    env = Environment(IS_TRAINING)

    print("Starting Blue player")

    blue_decision_maker = DQNAgent_keras.DQNAgent_keras()
    #blue_decision_maker = DQNAgent_keras.DQNAgent_keras(UPDATE_CONTEXT=True, path_model_to_load='conv1(6_6_1_256)_conv2(4_4_256_128)_conv3(3_3_128_128)_flatten_fc__blue_202001_   0.95max_  -0.04avg_  -3.10min__1620558885.model')

    print("Starting red player")
    ### Red Decision Maker
    red_decision_maker = Greedy_player.Greedy_player()


    env.blue_player = Entity(blue_decision_maker)
    env.red_player = Entity(red_decision_maker)

    print_start_of_game_info(blue_decision_maker, red_decision_maker)

    NUM_OF_EPISODES = env.NUMBER_OF_EPISODES
    for episode in tqdm(range(1, NUM_OF_EPISODES + 1), ascii=True, unit='episodes'):

        EVALUATE = evaluate(episode)
        current_episode = Episode(episode, EVALUATE, show_always=False if IS_TRAINING else True)

        # set new start position for the players
        env.reset_game(episode)
        # get observation
        observation_for_blue_s0: State = env.get_observation_for_blue()
        action_blue = -1