Ejemplo n.º 1
0
def update_variables():
    MAX_HIT_POINTS = int(get_config("MainInfo")['max_hit_points'])
    MAX_ANGLE = int(get_config("MainInfo")['max_angle']) * math.pi / 180
    MAX_VELOCITY = int(get_config("MainInfo")['max_velocity'])
    BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
    BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])
    return MAX_HIT_POINTS, MAX_ANGLE, MAX_VELOCITY, BOARD_WIDTH, BOARD_HEIGHT
Ejemplo n.º 2
0
 def update_variables():
     Zombie.BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])
     Zombie.BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
     Zombie.LIGHT_SIZE = int(get_config("MainInfo")['light_size'])
     Zombie.DT = int(get_config("MainInfo")['dt'])
     Zombie.ANGLE = float(get_config("MainInfo")['max_angle'])
     Zombie.START_POSITIONS = calculate_start_positions(
         Zombie.BOARD_WIDTH, Zombie.BOARD_HEIGHT, Zombie.ANGLE)
Ejemplo n.º 3
0
 def __init__(self, device, agent_type):
     super().__init__(strategy=EpsilonGreedyStrategy(), agent_type=agent_type)  # use the 'EpsilonGreedyStrategy' strategy
     # load values from config
     self.LIGHT_SIZE = int(get_config("MainInfo")['light_size'])
     ddqn_info = get_config('DdqnAgentInfo')
     self.batch_size = int(ddqn_info['batch_size'])
     self.gamma = float(ddqn_info['gamma'])
     self.memory_size = int(ddqn_info['memory_size'])
     self.target_update = int(ddqn_info['target_update'])
     self.lr = float(ddqn_info['lr'])
     # init networks
     self.num_actions, self.target_net, self.policy_net = create_networks(device, agent_type, self.possible_actions)
     # other fields
     self.optimizer = optim.Adam(params=self.policy_net.parameters(), lr=self.lr)
     self.memory = ReplayMemory()
     self.current_step = 0
     self.device = device
Ejemplo n.º 4
0
    def __init__(self, device, agent_type):
        BasicMCTSAgent.MAX_HIT_POINTS, BasicMCTSAgent.MAX_ANGLE, BasicMCTSAgent.MAX_VELOCITY, BasicMCTSAgent.BOARD_WIDTH, BasicMCTSAgent.BOARD_HEIGHT, BasicMCTSAgent.C = update_variables()
        super().__init__(EpsilonGreedyStrategy(), agent_type)
        self.possible_actions = list(range(Game.BOARD_HEIGHT)) if self.agent_type == 'zombie' else list(range(Game.BOARD_HEIGHT * Game.BOARD_WIDTH))
        self.root = Node([], self.possible_actions)
        self.temporary_root = self.root  # TODO - change its name to something like: real world state-node
        self.current_step = 0
        self.simulation_reward = 0
        self.simulation_num = int(get_config("TreeAgentInfo")['simulation_num'])  # number of simulations in the simulation phase
        self.simulation_depth = int(get_config("TreeAgentInfo")['simulation_depth'])  # number of times to expand a node in single simulation
        self.episode_reward = 0
        self.tree_depth = 0

        self.pool = mp.Pool(mp.cpu_count())

        main_info = get_config('MainInfo')
        self.steps_per_episodes = int(main_info['zombies_per_episode']) + int(main_info['board_width'])
        self.total_episodes = int(main_info['num_train_episodes']) + int(main_info['num_test_episodes'])
Ejemplo n.º 5
0
class RandomAgent(Agent):
    BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
    BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])

    def __init__(self, device, agent_type):
        RandomAgent.BOARD_WIDTH, RandomAgent.BOARD_HEIGHT = update_variables()
        super(RandomAgent, self).__init__(EpsilonGreedyStrategy(), agent_type)
        self.current_step = 0
        self.possible_actions = list(range(RandomAgent.BOARD_HEIGHT)) if self.agent_type == 'zombie' else list(
            range(RandomAgent.BOARD_HEIGHT * RandomAgent.BOARD_WIDTH))

    def select_action(self, state,alive_zombies):
        rate = self.strategy.get_exploration_rate(current_step=self.current_step)
        self.current_step += 1
        return random.sample(self.possible_actions, 1)[0], rate, self.current_step

    def learn(self, state, action, next_state, reward):
        pass

    def reset(self):
        pass
Ejemplo n.º 6
0
def update_variables():
    MAX_HIT_POINTS = int(get_config("MainInfo")['max_hit_points'])
    MAX_ANGLE = int(get_config("MainInfo")['max_angle'])
    MAX_VELOCITY = int(get_config("MainInfo")['max_velocity'])
    BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
    BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])
    C = float(get_config("TreeAgentInfo")['exploration_const'])
    return MAX_HIT_POINTS, MAX_ANGLE, MAX_VELOCITY, BOARD_WIDTH, BOARD_HEIGHT, C
Ejemplo n.º 7
0
def create_networks(device, agent_type, possible_actions):
    main_info = get_config('MainInfo')
    h = int(main_info['board_height'])
    w = int(main_info['board_width'])
    # create networks
    neurons_number = h * w if agent_type == 'light' else h * w / 2
    input_size = 2 * h * w if agent_type == 'light' else h * w  # the light agents get extra information
    num_actions = len(possible_actions)
    target_net = DQN(input_size, num_actions, neurons_number).to(device)
    policy_net = DQN(input_size, num_actions, neurons_number).to(device)
    # set up target network as the same weights
    target_net.load_state_dict(policy_net.state_dict())
    target_net.eval()
    return num_actions, target_net, policy_net
Ejemplo n.º 8
0
 def __init__(self, device, agent_zombie, agent_light):
     Game.MAX_HIT_POINTS, Game.MAX_ANGLE, Game.MAX_VELOCITY, Game.BOARD_WIDTH, Game.BOARD_HEIGHT = update_variables(
     )
     main_info = get_config("MainInfo")
     self.grid = GameGrid()
     self.light_size = int(main_info['light_size'])
     self.max_angle = int(main_info['max_angle'])
     self.start_positions = self.calculate_start_positions()
     if len(self.start_positions) < 2:
         print("The angle is too wide!")
         sys.exit()
     # set interactive mode
     self.interactive_mode = main_info.getboolean('interactive_mode')
     if self.interactive_mode:
         pygame.init()
         pygame.display.set_caption('pickleking')
         self.display_width = int(main_info['display_width'])
         self.display_height = int(main_info['display_height'])
         self.game_display = pygame.display.set_mode(
             (self.display_width, self.display_height))
         self.zombie_image, self.light_image, self.grid_image = self.set_up(
         )
         self.clock = pygame.time.Clock()
     else:
         os.environ[
             "SDL_VIDEODRIVER"] = "dummy"  # not really necessary, here to make sure nothing will pop-up
     # set our agents
     self.agent_zombie = agent_zombie(device, 'zombie')
     self.agent_light = agent_light(device, 'light')
     # load main info
     self.steps_per_episodes = int(main_info['zombies_per_episode']) + int(
         main_info['board_width']) - 1
     self.zombies_per_episode = int(main_info['zombies_per_episode'])
     self.check_point = int(main_info['check_point'])
     self.total_episodes = int(main_info['num_train_episodes']) + int(
         main_info['num_test_episodes'])
     # other fields
     self.max_hit_points = Game.MAX_HIT_POINTS
     self.current_time = 0
     self.alive_zombies = []  # list of the currently alive zombies
     self.all_zombies = []  # list of all zombies (from all time)
     self.max_velocity = int(main_info['max_velocity'])
     self.dt = int(main_info['dt'])
     self.device = device
     self.current_screen = None
     self.done = False
     self.writer = SummaryWriter(log_dir='../runs')
Ejemplo n.º 9
0
class Agent:
    BOARD_WIDTH = int(get_config("MainInfo")['board_width'])

    def __init__(self, strategy, agent_type):
        self.agent_type = agent_type
        self.strategy = strategy

    @abstractmethod
    def select_action(self, state):
        raise NotImplementedError

    @abstractmethod
    def learn(self, state, action, next_state, reward):
        raise NotImplementedError

    def reset(self):
        raise NotImplementedError
Ejemplo n.º 10
0
class Zombie:
    @staticmethod
    def update_variables():
        Zombie.BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])
        Zombie.BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
        Zombie.LIGHT_SIZE = int(get_config("MainInfo")['light_size'])
        Zombie.DT = int(get_config("MainInfo")['dt'])
        Zombie.ANGLE = float(get_config("MainInfo")['max_angle'])
        Zombie.START_POSITIONS = calculate_start_positions(
            Zombie.BOARD_WIDTH, Zombie.BOARD_HEIGHT, Zombie.ANGLE)

    # static field
    ZOMBIE_NUM = 1
    BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])
    BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
    LIGHT_SIZE = int(get_config("MainInfo")['light_size'])
    MAX_HIT_POINT = int(get_config("MainInfo")['max_hit_points'])
    DT = int(get_config("MainInfo")['dt'])
    ANGLE = float(get_config("MainInfo")['max_angle'])
    START_POSITIONS = calculate_start_positions(BOARD_WIDTH, BOARD_HEIGHT,
                                                ANGLE)

    def __init__(self, angle, velocity, state):
        """
        :param id: int
        :param angle: float, radians
        :param velocity: float, unit/sec
        :param y: float
        :param env: env_manager - when creating a zombie, we must specify in which env_manager he is born
        """
        self.id = Zombie.set_id()
        self.angle = angle
        self.velocity = velocity
        self.hit_points = 0  # 1 for alive, 0 for dead
        # x,y are the real coordinates of the zombie
        self.x = 0  # every zombie starts at the left side
        self.v_x = self.velocity * np.cos(self.angle)
        self.y = Zombie.START_POSITIONS[
            state] / Zombie.BOARD_WIDTH  # every zombie starts in an arbitrary positions by some distribution
        self.v_y = self.velocity * np.sin(self.angle)
        self.current_state = state
        # self.history = [(self.env.current_time, int(self.current_state[0]))]  # tuples of (timestamp, pos)
        self.heal_epsilon = HEAL_EPSILON
        self.just_born = True

    @staticmethod
    def set_id():
        new_zombie_id = Zombie.ZOMBIE_NUM
        Zombie.ZOMBIE_NUM += 1
        return new_zombie_id

    @staticmethod
    def reset_id():
        Zombie.ZOMBIE_NUM = 1

    def update_hit_points(self, light_action):
        light_x = int(np.mod(light_action, Zombie.BOARD_WIDTH))
        light_y = int(light_action / Zombie.BOARD_WIDTH)
        # include only the start (the end is outside the light)
        if (light_x <= self.x <
            (light_x + Zombie.LIGHT_SIZE)) & (light_y <= self.y <
                                              (light_y + Zombie.LIGHT_SIZE)):
            # in a case of an hit, increase the zombie's hit points by 1
            if self.hit_points < self.MAX_HIT_POINT:
                self.hit_points += 1
        #else:
        #    # heal the zombie by (1-epsilon)
        #    self.hit_points *= (1 - self.heal_epsilon)

    def move(self, light_action):
        """
        1. punish/heal the zombie by the position of the light
        2. update current pos of zombie by its' angle and velocity
        3. append history
        """
        if self.just_born:
            # if the zombie just born, don't punish him, wait until the next turn to avoid double punishment # TODO - checking if it is necessary
            # new idea: if the zombie just born, punish him without moving him forward
            self.just_born = False
        else:
            # next step, move forward and punish
            self.x += self.v_x * Zombie.DT
            self.y += self.v_y * Zombie.DT
            self.current_state = self.x + self.y * Zombie.BOARD_WIDTH
        # hit/heal the zombie
        self.update_hit_points(light_action)
Ejemplo n.º 11
0
class Game:
    MAX_HIT_POINTS = int(get_config("MainInfo")['max_hit_points'])
    MAX_ANGLE = int(get_config("MainInfo")['max_angle']) * math.pi / 180
    MAX_VELOCITY = int(get_config("MainInfo")['max_velocity'])
    BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
    BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])

    def __init__(self, device, agent_zombie, agent_light):
        Game.MAX_HIT_POINTS, Game.MAX_ANGLE, Game.MAX_VELOCITY, Game.BOARD_WIDTH, Game.BOARD_HEIGHT = update_variables(
        )
        main_info = get_config("MainInfo")
        self.grid = GameGrid()
        self.light_size = int(main_info['light_size'])
        self.max_angle = int(main_info['max_angle'])
        self.start_positions = self.calculate_start_positions()
        if len(self.start_positions) < 2:
            print("The angle is too wide!")
            sys.exit()
        # set interactive mode
        self.interactive_mode = main_info.getboolean('interactive_mode')
        if self.interactive_mode:
            pygame.init()
            pygame.display.set_caption('pickleking')
            self.display_width = int(main_info['display_width'])
            self.display_height = int(main_info['display_height'])
            self.game_display = pygame.display.set_mode(
                (self.display_width, self.display_height))
            self.zombie_image, self.light_image, self.grid_image = self.set_up(
            )
            self.clock = pygame.time.Clock()
        else:
            os.environ[
                "SDL_VIDEODRIVER"] = "dummy"  # not really necessary, here to make sure nothing will pop-up
        # set our agents
        self.agent_zombie = agent_zombie(device, 'zombie')
        self.agent_light = agent_light(device, 'light')
        # load main info
        self.steps_per_episodes = int(main_info['zombies_per_episode']) + int(
            main_info['board_width']) - 1
        self.zombies_per_episode = int(main_info['zombies_per_episode'])
        self.check_point = int(main_info['check_point'])
        self.total_episodes = int(main_info['num_train_episodes']) + int(
            main_info['num_test_episodes'])
        # other fields
        self.max_hit_points = Game.MAX_HIT_POINTS
        self.current_time = 0
        self.alive_zombies = []  # list of the currently alive zombies
        self.all_zombies = []  # list of all zombies (from all time)
        self.max_velocity = int(main_info['max_velocity'])
        self.dt = int(main_info['dt'])
        self.device = device
        self.current_screen = None
        self.done = False
        self.writer = SummaryWriter(log_dir='../runs')

    def calculate_start_positions(self):
        zombie_home_length = int(self.grid.get_height() -
                                 2 * self.grid.get_width() *
                                 math.tan(self.max_angle * math.pi / 180))
        zombie_home_start_pos = int(
            self.grid.get_height() - zombie_home_length -
            self.grid.get_width() *
            math.tan(self.max_angle * math.pi / 180))  # m-n-b
        return np.multiply(
            list(
                range(zombie_home_start_pos,
                      zombie_home_start_pos + zombie_home_length)),
            self.grid.get_width())

    def reset(self):
        self.current_time = 0
        Zombie.reset_id()
        self.alive_zombies = []  # list of the currently alive zombies
        self.all_zombies = []  # list of all zombies (from all time)
        self.current_screen = None
        self.agent_light.reset()
        self.agent_zombie.reset()

    def play_zero_sum_game(self, path):
        episodes_dict = {'episode_rewards': [], 'episode_durations': []}
        steps_dict_light = {'epsilon': [], 'action': [], 'step': []}
        steps_dict_zombie = {'epsilon': [], 'action': [], 'step': []}

        for episode in range(self.total_episodes):
            self.reset()
            state_zombie, state_light = self.get_state()
            zombie_master_reward = 0
            episode_start_time = time.time()
            for time_step in count():
                action_zombie, rate, current_step = self.agent_zombie.select_action(
                    state_zombie, self.alive_zombies, self.writer)
                action_light, rate, current_step = self.agent_light.select_action(
                    state_light, self.alive_zombies, self.writer)

                # update dict
                steps_dict_light['epsilon'].append(rate)
                steps_dict_light['action'].append(
                    int(action_light // self.grid.get_width()))
                steps_dict_light['step'].append(time_step)
                steps_dict_zombie['epsilon'].append(rate)
                steps_dict_zombie['action'].append(int(action_zombie))
                steps_dict_zombie['step'].append(time_step)

                reward = self.apply_actions(action_zombie, action_light)
                if reward > 0:
                    zombie_master_reward += reward
                next_state_zombie, next_state_light = self.get_state()

                self.agent_zombie.learn(state_zombie.unsqueeze(0),
                                        action_zombie,
                                        next_state_zombie.unsqueeze(0), reward,
                                        self.writer)
                self.agent_light.learn(
                    state_light.unsqueeze(0), action_light,
                    next_state_light.unsqueeze(0), reward * -1,
                    self.writer)  # agent_light gets the opposite

                state_zombie, state_light = next_state_zombie, next_state_light

                if self.done:  # if the episode is done, store it's reward and plot the moving average
                    episodes_dict['episode_rewards'].append(
                        zombie_master_reward)
                    episodes_dict['episode_durations'].append(
                        time.time() - episode_start_time)
                    break

            # plotting the moving average
            if episode % self.check_point == 0:
                plot_progress(path, episodes_dict, self.check_point)

        plot_progress(path, episodes_dict, self.check_point)

        return episodes_dict, steps_dict_light, steps_dict_zombie

    def action_space(self):
        light_action_space = self.grid.get_height() * self.grid.get_width()
        zombie_action_space = len(self.start_positions)
        return light_action_space, zombie_action_space

    def apply_actions(self, zombie_action, light_action):
        """
        This method steps the game forward one step and
        shoots a bubble at the given angle.
        Parameters
        ----------
        zombie_action : int
            The action is an angle between 0 and 180 degrees, that
            decides the direction of the bubble.
        light_action
        Returns
        -------
        ob, reward, episode_over, info : tuple
            ob (object) :
                an environment-specific object representing the
                state of the environment.
            reward (float) :
                amount of reward achieved by the previous action.
            episode_over (bool) :
                whether it's time to reset the environment again.
        """
        self.current_time += 1
        # update display in case of interactive mode
        if self.interactive_mode:
            self.update(light_action)

        # add new zombie
        if len(self.all_zombies) < self.zombies_per_episode:
            new_zombie = Game.create_zombie(zombie_action)
            self.alive_zombies.append(new_zombie)
            self.all_zombies.append(new_zombie)

        # move all zombies one step and calc reward
        reward, self.alive_zombies = Game.calc_reward_and_move_zombies(
            self.alive_zombies, light_action)

        self.done = self.current_time > self.steps_per_episodes  # TODO - maybe pick another terminal condition of the game and assign it to done (as True/False)
        return reward

    @staticmethod
    def calc_reward_and_move_zombies(alive_zombies, light_action):
        """
        moving all zombies while aggregating and outputting current reward
        :return all alive zombies (haven't step out of the grid)
        """
        # temp list for later be equal to self.alive_zombies list, it's here just for the for loop (NECESSARY!)
        new_alive_zombies = list(copy.deepcopy(alive_zombies))
        reward = 0
        indices_to_keep = list(range(len(new_alive_zombies)))
        for index, zombie in enumerate(new_alive_zombies):
            zombie.move(light_action)
            if 0 >= zombie.y or zombie.y >= Game.BOARD_HEIGHT:
                indices_to_keep.remove(index)
            elif zombie.x >= Game.BOARD_WIDTH:
                if Game.keep_alive(
                        zombie.hit_points
                ):  # decide whether to keep the zombie alive, if so, give the zombie master reward
                    reward += 1
                else:
                    reward -= 1
                indices_to_keep.remove(
                    index)  # deleting a zombie that reached the border
        return reward, list(np.array(new_alive_zombies)[indices_to_keep])

    @staticmethod
    def keep_alive(h):
        if h >= Game.MAX_HIT_POINTS:  # if the zombie sustained a lot of damaged
            return False
        else:  # else decide by the sine function -> if the result is greater than 0.5 -> keep alive, else -> kill it (no reward for the zombie master)
            """
            the idea is: if the hit points is close to 3 then the result is close to 1 ->
             -> there is small chance for keeping him alive and therefor rewarding the zombie with positive reward
             For example, if zombie hit points is 3 - > the result is 1 -> always return False (the random will never be greater than 1)
            in the past sin(h * pi / 2 * self.max_hit_points) < random.random()
            """
            #return np.power(h / Game.MAX_HIT_POINTS, 1 / 3) < random.random()
            return True

    def get_state(self):
        zombie_grid = self.grid.get_values()
        zombie_grid = zombie_grid.astype(np.float32)
        zombie_grid.fill(0)
        health_grid = copy.deepcopy(zombie_grid)
        for i in self.alive_zombies:
            zombie_grid[int(i.y), int(i.x)] = 1
            health_grid[int(i.y), int(i.x)] = i.hit_points
        return torch.from_numpy(zombie_grid).flatten(), torch.from_numpy(
            np.concatenate((zombie_grid, health_grid))).flatten()

    def get_pygame_window(self):
        return pygame.surfarray.array3d(pygame.display.get_surface())

    @staticmethod
    def create_zombie(position):
        if Game.MAX_ANGLE == 0:
            angle = Game.MAX_ANGLE
        else:
            angle = random.uniform(-Game.MAX_ANGLE, Game.MAX_ANGLE)
        return Zombie(angle, Game.MAX_VELOCITY, position)

    def set_up(self):
        # create the gameUtils directory if doesn't exist
        path = os.path.join(
            os.path.abspath(os.path.join(os.path.dirname(__file__),
                                         os.pardir)), "gameUtils")
        if not os.path.exists(path):
            os.mkdir(path)
            os.chmod(path, 777)
        # get images
        zombie_image = Image.open(os.path.join(path, 'zombie.png'))
        light_image = Image.open(os.path.join(path, 'light.png'))
        # resize (light_image is doubled for 2x2 cells)
        zombie_image = zombie_image.resize(
            (int(self.display_width / self.grid.get_width()),
             int(self.display_height / self.grid.get_height())), 0)
        light_image = light_image.resize(
            (int(self.display_width / self.grid.get_width()) * self.light_size,
             int(self.display_height / self.grid.get_height()) *
             self.light_size), 0)
        # save
        zombie_image.save(os.path.join(path, 'zombie_image.png'))
        light_image.save(os.path.join(path, 'light_image.png'))
        # draw and save the grid
        self.draw_grid()
        # return the images in the pygame format
        return pygame.image.load(os.path.join(
            path, 'zombie_image.PNG')), pygame.image.load(
                os.path.join(path, 'light_image.PNG')), pygame.image.load(
                    os.path.join(path, 'grid.jpeg'))

    def update(self, light_action):
        event = pygame.event.get()
        self.game_display.blit(self.grid_image, (0, 0))
        x_adjustment = int(self.display_width / self.grid.get_width())
        y_adjustment = int(self.display_height / self.grid.get_height())
        self.game_display.blit(
            self.light_image,
            (int(np.mod(light_action, self.grid.get_width()) * x_adjustment),
             int(light_action / self.grid.get_width()) * y_adjustment))
        for z in self.alive_zombies:
            self.game_display.blit(self.zombie_image,
                                   (z.x * x_adjustment, z.y * y_adjustment))
        pygame.display.update(
        )  # better than pygame.display.flip because it can update by param, and not the whole window
        self.clock.tick(30)  # the number of frames per second

    def draw_grid(self):
        x_size = self.display_width / self.grid.get_width(
        )  # x size of the grid block
        y_size = self.display_height / self.grid.get_height(
        )  # y size of the grid block
        for x in range(self.display_width):
            for y in range(self.display_height):
                rect = pygame.Rect(x * x_size, y * y_size, x_size, y_size)
                pygame.draw.rect(self.game_display, (255, 255, 255), rect, 1)
        # draw the start line
        y_adjustment = int(self.display_height / self.grid.get_height())
        pygame.draw.rect(self.game_display, (0, 200, 50), [
            0,
            int((min(self.start_positions))) / self.grid.get_width() *
            y_adjustment, 10,
            int((max(self.start_positions) + np.diff(self.start_positions)[0] -
                 min(self.start_positions))) / self.grid.get_width() *
            y_adjustment
        ])

        path = os.path.join(
            os.path.abspath(os.path.join(os.path.dirname(__file__),
                                         os.pardir)), "gameUtils")
        pygame.image.save(self.game_display, os.path.join(path, 'grid.jpeg'))

    def end_game(self):
        pygame.quit()
        quit()

    def just_starting(self):
        # current screen is set to none in the beginning and in the end of an episode
        return self.current_screen is None

    def get_state_old(self):
        if self.just_starting() or self.done:
            self.current_screen = self.get_processed_screen()
            black_screen = torch.zeros_like(self.current_screen)
            return black_screen
        else:
            s1 = self.current_screen
            s2 = self.get_processed_screen()
            self.current_screen = s2
            return s2 - s1

    def get_processed_screen(self):
        screen = self.get_pygame_window().transpose(
            (2, 0, 1))  # PyTorch expects CHW
        screen = self.crop_screen(screen)
        return self.transform_screen_data(screen)

    def crop_screen(self, screen):
        screen_height = screen.shape[1]

        # Strip off top and bottom
        top = int(screen_height * 0)
        bottom = int(screen_height * 1)
        screen = screen[:, top:bottom, :]
        return screen

    def transform_screen_data(self, screen):
        # Convert to float, rescale, convert to tensor
        screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
        screen = torch.from_numpy(screen)

        # Use torchvision package to compose image transforms
        resize = T.Compose([T.ToPILImage(), T.Resize((60, 30)), T.ToTensor()])

        return resize(screen).unsqueeze(0).to(
            self.device)  # add a batch dimension (BCHW)
Ejemplo n.º 12
0
def update_variables():
    BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
    BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])
    return BOARD_WIDTH, BOARD_HEIGHT
Ejemplo n.º 13
0
class BasicMCTSAgent(Agent):
    MAX_HIT_POINTS = int(get_config("MainInfo")['max_hit_points'])
    MAX_ANGLE = int(get_config("MainInfo")['max_angle'])
    MAX_VELOCITY = int(get_config("MainInfo")['max_velocity'])
    BOARD_WIDTH = int(get_config("MainInfo")['board_width'])
    BOARD_HEIGHT = int(get_config("MainInfo")['board_height'])
    C = float(get_config("TreeAgentInfo")['exploration_const'])

    def __init__(self, device, agent_type):
        BasicMCTSAgent.MAX_HIT_POINTS, BasicMCTSAgent.MAX_ANGLE, BasicMCTSAgent.MAX_VELOCITY, BasicMCTSAgent.BOARD_WIDTH, BasicMCTSAgent.BOARD_HEIGHT, BasicMCTSAgent.C = update_variables()
        super().__init__(EpsilonGreedyStrategy(), agent_type)
        self.possible_actions = list(range(Game.BOARD_HEIGHT)) if self.agent_type == 'zombie' else list(range(Game.BOARD_HEIGHT * Game.BOARD_WIDTH))
        self.root = Node([], self.possible_actions)
        self.temporary_root = self.root  # TODO - change its name to something like: real world state-node
        self.current_step = 0
        self.simulation_reward = 0
        self.simulation_num = int(get_config("TreeAgentInfo")['simulation_num'])  # number of simulations in the simulation phase
        self.simulation_depth = int(get_config("TreeAgentInfo")['simulation_depth'])  # number of times to expand a node in single simulation
        self.episode_reward = 0
        self.tree_depth = 0

        self.pool = mp.Pool(mp.cpu_count())

        main_info = get_config('MainInfo')
        self.steps_per_episodes = int(main_info['zombies_per_episode']) + int(main_info['board_width'])
        self.total_episodes = int(main_info['num_train_episodes']) + int(main_info['num_test_episodes'])

    def select_action(self, state,alive_zombies):
        rate = self.strategy.get_exploration_rate(current_step=self.current_step)
        self.current_step += 1

        # selection phase
        selected_child = self.selection()
        assert selected_child.num_children == 0 or selected_child.is_terminal

        # expansion phase, here we selecting the action from which we will simulate the selected_child play-out
        # keep in mind that in this phase we expand a node that is NOT the temporary root, the expansion action doesn't relate to the real action we are taking
        # action = self.expansion_all_children(selected_child)
        if selected_child == self.root:
            # if the selected child is root, expand all its children
            expanded_child = self.expansion_all_children(selected_child)
        elif selected_child.parent is not None and selected_child.parent.num_children != len(self.possible_actions):
            # if the selected child is missing a brother (we managed to choose him thanks to some real action), expand all its brothers and choose one
            expanded_child = self.expansion_all_children(selected_child.parent)
        elif selected_child.visits == 0:
            # if we never visited that node, start roll-out from there
            expanded_child = selected_child
        else:
            # in case the node is a leaf but we already been there
            expanded_child = self.expansion_all_children(selected_child)

        assert expanded_child.num_children == 0
        assert selected_child.num_children == 0 or selected_child.num_children == len(self.possible_actions)

        # simulation phase
        self.simulation(expanded_child)

        # select next action
        action = self.select_expansion_action(self.temporary_root, self.possible_actions)
        self.expansion_all_children(self.temporary_root)
        self.temporary_root = self.temporary_root.children[action]
        assert self.temporary_root.num_children == len(self.possible_actions) or self.temporary_root.num_children == 0
        # self.PrintTree()

        # when the game ends - close the pool to avoid memory explosion
        if self.current_step == self.total_episodes * self.steps_per_episodes:
            self.pool.close()
            self.pool.join()

        return action, rate, self.current_step

    def learn(self, _, action, __, reward):
        # back-propagation phase, start back-propagating from the current real world node
        # self.episode_reward += reward
        # self.back_propagation(self.temporary_root, reward, self.root)
        pass

    def selection(self):
        """
        The selection Phase in the MCTS algorithm.
        selects leaf by following the UCT algorithm
        :return:
        """
        selected_child = self.temporary_root

        # Check if child nodes exist.
        if selected_child.num_children > 0:
            has_child = True
        else:
            has_child = False

        while has_child:
            # selecting the best child unless there is unexpanded child in the way - select_child method is required!
            selected_child = self.select_child(selected_child)
            if selected_child.num_children == 0 or selected_child.is_terminal:
                has_child = False

        return selected_child

    def select_child(self, node: Node) -> Node:
        """
        Given a node, selects a random unvisited child node.
        Or if all children are visited, selects the node with greatest UTC value.
         @note: we must start the selection from here - imagine that a child was expanded, immediately we expanded all its brothers too.
         in the next turn we might want to start simulating from one of its brothers instead of picking always him with the 'select_best_child'
         (after we evaluated one of the brothers with 'eval_utc' method, that brother would always be selected via 'select_best_child' method)
        :param node: node from which to select child node from.
        :return: The selected child
        """
        if node.num_children == 0:
            return node

        # check if 'node' has any unexpanded nodes - which is any None value in children dictionary OR there is a child but it's simulated
        not_visited_actions = []
        assert node.num_children == 0 or node.num_children == len(self.possible_actions)
        for action, child in node.children.items():
            # search for children that never rolled out (simulation started from them)
            if child.visits == 0:
                not_visited_actions.append(action)
        # chosen child from one of the unexpanded children - if there are any
        if len(not_visited_actions) > 0:
            action = random.sample(not_visited_actions, 1)[0]
            return node.children[action]

        return BasicMCTSAgent.select_best_child(node)

    @staticmethod
    def select_best_child(node):
        """
        Selects the best child of a node
        :param node: Node to select one of its children
        :return: highest UCT valued child
        """
        selected_child = node

        if node.num_children == 0:
            return node

        max_weight = 0.0
        possible_children = []
        for child in list(filter(None, node.children.values())):
            weight = child.uct
            if len(possible_children) == 0:
                possible_children.append(child)
                max_weight = weight
            elif weight == max_weight:
                possible_children.append(child)
            elif weight > max_weight:
                possible_children = [child]
                max_weight = weight
        if len(possible_children) > 0:
            selected_child = random.sample(possible_children, 1)[0]
        return selected_child

    def expansion_all_children(self, leaf):
        self.eval_children(leaf, self.possible_actions)
        return random.sample(list(leaf.children.values()), 1)[0]

    def expansion_one_child(self, leaf):
        action = self.select_expansion_action(leaf, self.possible_actions)
        self.eval_children(leaf, [action])
        return action

    def eval_children(self, node, actions):
        """
        Evaluates all the possible children states given a node state
        :param node: node from which to evaluate children.
        :param actions: list of all possible actions to choose from
        :return: returns the possible children Nodes
        """
        assert node.num_children == len(self.possible_actions) or node.num_children == 0
        if node.num_children == 0:
            for action in actions:
                _, alive_zombies = BasicMCTSAgent.simulate_action(node.state, self.agent_type, action)
                node.add_child(alive_zombies, action)

        return node.children

    def select_expansion_action(self, node, possible_actions):
        """
        Wisely selects a child node.
        :param node: the selected node to expand child from
        :param possible_actions: list of all possible actions to choose from
        :return: the selected action
        """
        selected_child = self.select_best_child(node)
        assert selected_child is not None

        selected_action = None
        if selected_child == node:
            selected_action = random.sample(self.possible_actions, 1)[0]
        else:
            for key, value in node.children.items():
                if value == selected_child:
                    selected_action = key
        assert selected_action is not None

        return selected_action

    @staticmethod
    def select_simulation_action(alive_zombies, possible_actions):
        # Randomly selects a child node.
        i = random.sample(possible_actions, 1)[0]
        return i

    def simulation(self, selected_child):
        """
        Simulating states from previous states and actions
        This phase happens right after we've chose the expansion, and from the selected child with action
        :param selected_child: node from which to perform simulation.
        :return:
        """
        # Perform simulation.
        list_of_objects = []
        simulation_state = selected_child.state

        for _ in range(self.simulation_num):
            obj = CostlySimulation(self.simulation_depth, simulation_state, self.possible_actions, self.agent_type)
            list_of_objects.append(obj)

        list_of_results = self.pool.map(BasicMCTSAgent.worker, ((obj, BasicMCTSAgent.BOARD_HEIGHT, BasicMCTSAgent.BOARD_WIDTH) for obj in list_of_objects))
        assert np.max(list_of_results) <= self.simulation_depth

        average_total_reward = np.average(list_of_results) if self.agent_type == 'zombie' else -1 * np.average(list_of_results)

        # back-prop from the expanded child (the child of the selected node)
        BasicMCTSAgent.back_propagation(selected_child, average_total_reward, self.root)

    @staticmethod
    def worker(arg):
        return arg[0].costly_simulation(arg[1], arg[2])

    @staticmethod
    def simulate_action(alive_zombies, agent_type, action):
        """
        Simulating future states by 'actions' of an agent
        :param alive_zombies: all alive zombies at the real world
        :param agent_type: 'zombie' or 'light' agent
        :param action: array containing all the actions to simulate
        :return: total reward of the simulation
        """
        new_alive_zombies = list(copy.deepcopy(alive_zombies))  # make a copy of all zombies - we do not want to make any act in real world

        # set action and light agents actions
        if agent_type == 'zombie':
            zombie_action = action
            # random sample len(actions) times from light-agent actions-space
            light_action = 0  # np.random.randint(0, BasicMCTSAgent.BOARD_HEIGHT * BasicMCTSAgent.BOARD_WIDTH)
        else:
            light_action = action
            # sample n times from zombie-agent actions-space
            zombie_action = np.random.randint(0, BasicMCTSAgent.BOARD_HEIGHT)

        # simulate and aggregate reward
        total_reward = 0
        new_zombie = Game.create_zombie(zombie_action)
        new_alive_zombies.append(new_zombie)
        reward, final_alive_zombies = Game.calc_reward_and_move_zombies(new_alive_zombies, light_action)
        total_reward += reward

        return total_reward, final_alive_zombies

    @staticmethod
    def back_propagation(node, result, root):
        current_node = node

        # Update node's weight.
        BasicMCTSAgent.eval_utc(current_node, result)

        # keep updating until the desired root
        while current_node.level != root.level:
            # Update parent node's weight.
            current_node = current_node.parent
            BasicMCTSAgent.eval_utc(current_node, result)

    @staticmethod
    def eval_utc(node, result):
        node.wins += result
        node.visits += 1

        node.uct = node.wins / node.visits + BasicMCTSAgent.evaluate_exploration(node)

    @staticmethod
    def evaluate_exploration(node):
        n = node.visits
        if node.parent is None:
            t = node.visits
        else:
            t = node.parent.visits

        # avoid log of 0 with: 't or 1'
        return BasicMCTSAgent.C * np.sqrt(np.log(t or 1) / n)

    @staticmethod
    def has_parent(node):
        if node.parent is None:
            return False
        else:
            return True

    def reset(self):
        # BasicMCTSAgent.back_propagation(self.temporary_root, self.episode_reward)
        self.temporary_root = self.root
        # self.episode_reward = 0
        # if self.agent_type == 'zombie':
        #     self.PrintTree()

    def print_tree(self):
        """
        Prints the tree to file.
        :return:
        """
        f = open(os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)), 'Tree.txt'), 'w')
        node = self.root
        self.print_node(f, node, "")
        f.close()

    def print_node(self, file, node, indent):
        """
        Prints the tree node and its details to file.
        :param file: file to write into
        :param node: node to print.
        :param indent: Indent character.
        :return:
        """
        file.write(indent)
        file.write("|-")
        indent += "| "

        string = str(node.level) + " ("
        string += "W: " + str(node.wins) + ", N: " + str(node.visits) + ", UCT: " + str(node.uct) + ") \n"
        file.write(string)

        for child in list(filter(None, list(node.children.values()))):
            self.print_node(file, child, indent)