class GameFrameBuffer:

    def __init__(self, size=5):
        self.size = size
        self.frames = list()

        self.visual_debugger = VisualDebugger()

    @property
    def full(self):
        return len(self.frames) >= self.size

    @property
    def previous_game_frame(self):
        return self.frames[0] if len(self.frames) else None

    def add_game_frame(self, game_frame):
        if self.full:
            self.frames = [game_frame] + self.frames[:-1]
        else:
            self.frames = [game_frame] + self.frames

    def to_visual_debugger(self):
        for i, game_frame in enumerate(self.frames):
            self.visual_debugger.store_image_data(
                np.array(game_frame.frame * 255, dtype="uint8"),
                game_frame.frame.shape,
                f"frame_{i + 1}"
            )
Exemple #2
0
class T4TFEnv(py_environment.PyEnvironment):

    def __init__(self, fake=False, metrics_key='001'):
        with open('running', 'w') as f:
            f.write(str(os.getpid()))
        
        self._episode_ended = False

        self.game = serpent.initialize_game('T4TF1')

        game_frame = self.game.screen_regions['GAME_REGION']
        self.width = 10
        self.height = 10

        self.state_shape = (int(self.height / 2), int(self.width / 2), 1)
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(), dtype=np.int32, minimum=0, maximum=1, name='action')
        self._observation_spec = array_spec.BoundedArraySpec(
            shape=self.state_shape, dtype=np.float32, minimum=0.0, name='observation')


        self._state = np.zeros(self.state_shape).astype(np.float32)

        if fake:
            return
        self.interrupted = False

        self.game.launch()
        self.game.start_frame_grabber()
        self.input_controller = InputController(game=self.game)
        # self.input_proc = 

        self.frame_buffer = FrameGrabber.get_frames([0])
        self.frame_buffer = self.extract_game_area(self.frame_buffer)

        self.width = self.frame_buffer[0].shape[1]
        self.height = self.frame_buffer[0].shape[0]
        print('width: %d' % self.width)
        print('height: %d' % self.height)
        self.state_shape = (self.height, self.width, 3)
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(), dtype=np.int32, minimum=0, maximum=1, name='action')
        self._observation_spec = array_spec.BoundedArraySpec(
            shape=self.state_shape, dtype=np.float32, minimum=0.0, name='observation')

        self._state = np.zeros(self.state_shape).astype(np.float32)

        # print('created input with pid: %s' % self.input_proc.pid)
        self.sell_keys = [KeyboardKey.KEY_LEFT_SHIFT, KeyboardKey.KEY_LEFT_CTRL, KeyboardKey.KEY_S]
        self.buy_keys = [KeyboardKey.KEY_LEFT_SHIFT, KeyboardKey.KEY_LEFT_CTRL, KeyboardKey.KEY_B]
        self.step_keys = [KeyboardKey.KEY_LEFT_SHIFT, KeyboardKey.KEY_LEFT_CTRL, KeyboardKey.KEY_F]


        self.visual_debugger = VisualDebugger()

        self.scraper = T4Scraper(game=self.game, visual_debugger=self.visual_debugger)
        frame = self.game.grab_latest_frame()
        self.scraper.current_frame = frame
        self.pl = 0
        self.working_trade = 0
        self.current_action = ''
        self.held = False
        self.fill_count = 0

        self.window_controller = WindowController()
        self.window_id = self.window_controller.locate_window(".*Mini-Dow .*")
        # self.window_id = self.window_controller.locate_window(".*S&P .*")

        self.keys = RedisKeys(metrics_key)
#         self.redis = redis.Redis(port=6001)
    
        self.number_of_trades = 0
        self.number_of_wins = 0
        self.buys = 0
        self.sells = 0
        self.holds = 0
        self.history = list()
        self.actions = 0
        self.last_action = ''

        self.previous_write = -1
        self.get_metadata()
        
        self.active_frame = None
        
        self.start_time = time.time()
        
        self.step_read_time = 0
        self.step_write_time = 0
        

    def get_state(self, zeros=False):
        if zeros:
            return np.zeros(self.state_shape).astype(np.float32)
            self.frame_history = [
                np.zeros((int(self.width / 4), int(self.height / 4))).astype(np.float32),
                np.zeros((int(self.width / 4), int(self.height / 4))).astype(np.float32),
                np.zeros((int(self.width / 4), int(self.height / 4))).astype(np.float32),
                np.zeros((int(self.width / 4), int(self.height / 4))).astype(np.float32)
            ]
            
        st = np.stack(
            self.frame_history,
            axis=2
        ).astype(np.float32)
        
        print(st.shape)
        return st

    
    def is_focused(self):
        return self.window_controller.is_window_focused(self.window_id)

    def display_frame(self):
        frame = self.game.grab_latest_frame()
        self.visual_debugger.store_image_data(
                frame.frame,
                frame.frame.shape,
                2
            )

    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec

    def _reset(self):
        print('RESTARTING')
        self._state = np.zeros(self.state_shape).astype(np.float32)
        self._episode_ended = False
        return ts.restart(self._state)


    def update_scraper_frame(self):
        self.scraper.current_frame = self.game.grab_latest_frame()
        self.visual_debugger.store_image_data(
            self.scraper.current_frame.frame,
            self.scraper.current_frame.frame.shape,
            str(2)
        )

    def stop(self):
        self.input_proc.kill()
        self.game.stop_frame_grabber()
        return ts.termination(self._state, 0)
    
    def write_order(self, order_type):
        write_start = time.time()
#         if order_type is self.previous_write: return
        with open('/home/dan/.wine/drive_c/input.txt', 'w') as f:
            f.write('%d' % (order_type))
        
        self.step_write_time += (time.time() - write_start)
        self.previous_write = order_type

    def step_forward(self):
        self.write_order(3)
        
    def add_to_history(self, frame, action, reward):
        history_start = time.time()
        im = Image.fromarray(frame)
        im.save('history/%d_%d_%f.jpg' % (int(datetime.now().timestamp() * 100), action, reward))
        print("history add time: %s seconds" % (time.time() - history_start))
#     def log(self, string):
#         if self.actions % 200 == 0:
#             print(string)
#      
    def _step(self, action):
        self.step_read_time = 0
        self.step_write_time = 0
        
        if self.interrupted:
            return self.stop()

        print('----')
        if self._episode_ended:
        # The last action ended the episode. Ignore the current action and start
        # a new episode.
            return self.reset()

        if action == 0:
            # perform buy
            self.current_action = 'buy'
            self.working_trade = True
            # self.input_controller.tap_keys(self.buy_keys, duration=0.001)
            self.write_order(action)

        elif action == 1:
            # perform sell
            self.current_action = 'sell'
            self.working_trade = True
            # self.input_controller.tap_keys(self.sell_keys, duration=0.001)
            self.write_order(action)
        elif action == 2:
            print('hold')
            self.current_action = 'hold'
            y = 0 
            while y < 3:
#                 self.write_order(action)
                self.step_forward()
                sleep(0.1)
                y = y + 1
                
        self.actions += 1

        if action < 2:
            self.number_of_trades += 1
        
            while not self.has_open_positions():
                sleep(0.01)

                if self.interrupted:
                    return self.stop()


            while self.has_open_positions():
                self.step_forward()
                sleep(0.01)

                if self.interrupted:
                    return self.stop()
        
        
        self.write_order(4)
        
        
        sleep(0.2)
        reward = self.reward_agent()

        if self.actions > 1:
            self.add_to_history(self.active_frame, action, reward)

        start_grab_frame = time.time()
        self.frame_buffer = FrameGrabber.get_frames([0])
        self.frame_buffer = self.extract_game_area(self.frame_buffer)
        print("frame grab time: %s seconds" % (time.time() - start_grab_frame))
        self.active_frame = self.frame_buffer[0]
#         for i, game_frame in enumerate(self.frame_buffer):
#             if i >= 3: break
#             self.visual_debugger.store_image_data(
#                 game_frame,
#                 game_frame.shape,
#                 str(i)
#             )
        print(self.frame_buffer[0].shape)    

#         self.frame_history.insert(0, self.frame_buffer[0])
        
#         self._states = self.get_state()
        states_shape = self.state_shape
        print(states_shape)
        self._states = np.reshape(self.frame_buffer[0], (states_shape[0], states_shape[1], states_shape[2])).astype(np.float32)
        print(self._states.shape)
        if self.number_of_trades > 0:
            self.push_metadata(action=self.current_action, reward=reward)
        
            print('Wins: %d%% - %d / %d' % ((self.number_of_wins / self.number_of_trades * 100), self.number_of_wins, self.number_of_trades))
            
        
        # print history
        
        if len(self.history) > 0:
            historical_wins = len(list(filter(lambda x: x[1] > 0, self.history)))
            print('Wins (last %d): %d%%' % (len(self.history), (historical_wins / len(self.history) * 100)))
        
        print('Buys: %d' % self.buys)
        print('Sells: %d' % self.sells)
        print('Holds: %d' % self.holds)
        
        print('Step read time: %s' % self.step_read_time)
        print('Step write time: %s' % self.step_write_time)
        # print(states.shape)
        return ts.transition(self._states, reward=reward, discount=1.0)

    def read_position_and_pl(self):
        read_start = time.time()
        result = ['','']
        while len(result[0]) < 1 or len(result[1]) < 1:
            with open('/home/dan/.wine/drive_c/output.txt', 'r') as f:
                result = [x.strip() for x in f.read().split(',')]
        
        self.step_read_time += (time.time() - read_start)
        return (int(result[0]), int(result[1]))

    def has_open_positions(self):
        result = self.read_position_and_pl()
        pos = result[0]
        pl = result[1]
        
        # if self.working_trade:
        #     if pos != 0:
        #         self.working_trade = False
        #     else:
        #         if pl != self.pl:
        #             self.working_trade = False
        #             return False
        #         else:
        #             return True
        
        if pos != 0:
            return True
        return False

    def reward_agent(self):
        # get pl for last trade

        newPL = self.read_position_and_pl()[1]
        print('old pl: %d' % self.pl)
        print('new pl: %d' % newPL)
        if newPL > self.pl:
            reward = 1.0
        else:
            reward = -1.0

        if self.current_action is 'hold':
            reward = -0.25
        else:
            if reward > 0:
                if self.last_action is 'hold':
                    reward = 1.25
        # if reward is 1.0:
        #     if self.current_action is 'buy' and self.buys > self.sells:
        #         reward = 0.8
        #     elif self.current_action is 'sell' and self.sells > self.buys:
        #         reward = 0.8
        # elif reward is -1.0:
        #     if self.current_action is 'sell' and self.buys > self.sells:
        #         reward = -0.8
        #     elif self.current_action is 'buy' and self.sells > self.buys:
        #         reward = -0.8
                
        
        self.last_action = self.current_action
        print('REWARD: %f' % reward)
        self.pl = newPL
        return reward
        
    def extract_game_area(self, frame_buffer):
        game_area_buffer = []
#         for game_frame in frame_buffer.frames:
#             game_area = cv.extract_region_from_image(
#                 game_frame.grayscale_frame,
#                 self.game.screen_regions["GAME_REGION"]
#             )

        frame = frame_buffer.frames[0].quarter_resolution_frame
#         frame = FrameTransformer.rescale(frame_buffer.frames[0].grayscale_frame, 0.5)
        game_area_buffer.append(frame)

        return game_area_buffer


    def get_metadata(self):
        return
        if self.redis.exists(self.keys.trades):
            self.number_of_trades = self.redis.llen(self.keys.trades)
            
            self.history = list()
            history_strings = self.redis.lrange(self.keys.trades, -100, 100)
            for s in history_strings:
                d = json.loads(s)
                self.history.append((d['action'], float(d['reward'])))
        
        if self.redis.exists(self.keys.wins):
            self.number_of_wins = int(self.redis.get(self.keys.wins))
            
        if self.redis.exists(self.keys.buys):
            self.buys = int(self.redis.get(self.keys.buys))
        
        if self.redis.exists(self.keys.sells):
            self.sells = int(self.redis.get(self.keys.sells))
        
        if self.redis.exists(self.keys.holds):
            self.holds = int(self.redis.get(self.keys.holds))

    def push_metadata(self, action, reward, reset=False):
        return
        if action is None:
            return
            
        # last 100
        if not action is 'hold':
            while len(self.history) >= 100:
                self.history.pop(0)    
    
            self.history.append((action, reward))

        
        obj = {
            'timestamp':str(datetime.now().timestamp()),
            'action': action,
            'reward': str(reward)
        }
        
        self.redis.rpush(self.keys.trades, json.dumps(obj))
        
        if action == 'buy':
            self.buys += 1
            self.redis.incr(self.keys.buys)
        elif action == 'sell':
            self.sells += 1
            self.redis.incr(self.keys.sells)
        elif action == 'hold':
            self.holds += 1
            self.redis.incr(self.keys.holds)
            
        if reward > 0:
            self.number_of_wins += 1
            self.redis.incr(self.keys.wins)
            
        if reset:
            self.redis.set(self.keys.buys, "0")
            self.redis.set(self.keys.sells, "0")
            self.redis.set(self.keys.wins, "0")
            self.redis.set(self.keys.trades, "0")
            self.redis.set(self.keys.holds, "0")
Exemple #3
0
class DQN:

    def __init__(
        self,
        input_shape=None,
        input_mapping=None,
        replay_memory_size=10000,
        batch_size=32,
        action_space=None,
        max_steps=1000000,
        observe_steps=None,
        initial_epsilon=1.0,
        final_epsilon=0.1,
        gamma=0.99,
        model_file_path=None,
        model_learning_rate=2.5e-4,
        override_epsilon=False
    ):
        self.type = "DQN"
        self.input_shape = input_shape
        self.replay_memory = ReplayMemory(memory_size=replay_memory_size)
        self.batch_size = batch_size
        self.action_space = action_space
        self.action_count = len(self.action_space.combinations)
        self.action_input_mapping = self._generate_action_space_combination_input_mapping(input_mapping)
        self.frame_stack = None
        self.max_steps = max_steps
        self.observe_steps = observe_steps or (0.1 * replay_memory_size)
        self.current_observe_step = 0
        self.current_step = 0
        self.initial_epsilon = initial_epsilon
        self.final_epsilon = final_epsilon
        self.previous_epsilon = initial_epsilon
        self.epsilon_greedy_q_policy = EpsilonGreedyQPolicy(
            initial_epsilon=self.initial_epsilon,
            final_epsilon=self.final_epsilon,
            max_steps=self.max_steps
        )
        self.gamma = gamma
        self.current_action = None
        self.current_action_index = None
        self.current_action_type = None
        self.first_run = True
        self.mode = "OBSERVE"

        self.model_learning_rate = model_learning_rate
        self.model = self._initialize_model()

        if model_file_path is not None:
            self.load_model_weights(model_file_path, override_epsilon)

        self.model_loss = 0

        self.visual_debugger = VisualDebugger()

    def enter_train_mode(self):
        if self.previous_epsilon is not None:
            self.epsilon_greedy_q_policy.epsilon = self.previous_epsilon
            self.previous_epsilon = None

        self.mode = "TRAIN"

    def enter_run_mode(self):
        self.previous_epsilon = self.epsilon_greedy_q_policy.epsilon
        self.epsilon_greedy_q_policy.epsilon = 0.01
        self.mode = "RUN"

    def next_step(self):
        if self.mode == "TRAIN":
            self.current_step += 1
        elif self.mode == "OBSERVE":
            self.current_observe_step += 1

        if self.mode == "OBSERVE" and self.current_observe_step >= self.observe_steps:
            self.mode = "TRAIN"

    def build_frame_stack(self, game_frame):
        frame_stack = np.stack((
            game_frame,
            game_frame,
            game_frame,
            game_frame
        ), axis=2)

        self.frame_stack = frame_stack.reshape((1,) + frame_stack.shape)

    def update_frame_stack(self, game_frame_buffer):
        game_frames = [game_frame.eighth_resolution_grayscale_frame for game_frame in game_frame_buffer.frames]
        frame_stack = np.stack(game_frames, axis=2)

        self.frame_stack = frame_stack.reshape((1,) + frame_stack.shape)

    def append_to_replay_memory(self, game_frame_buffer, reward, terminal=False):
        previous_frame_stack = self.frame_stack
        self.update_frame_stack(game_frame_buffer)

        observation = [
            previous_frame_stack,
            self.current_action_index,
            reward,
            self.frame_stack,
            terminal
        ]

        self.replay_memory.add(self.calculate_target_error(observation), observation)

    def calculate_target_error(self, observation):
        previous_target = self.model.predict(observation[0])[0][observation[1]]

        if observation[4]:
            target = observation[2]
        else:
            target = observation[2] + self.gamma * np.max(self.model.predict(observation[3]))

        return np.abs(target - previous_target)

    def pick_action(self, action_type=None):
        if action_type is None:
            self.compute_action_type()
        else:
            self.current_action_type = action_type

        qs = self.model.predict(self.frame_stack)

        if self.current_action_type == "RANDOM":
            self.current_action_index = random.randrange(self.action_count)
            self.maximum_future_rewards = None
        elif self.current_action_type == "PREDICTED":
            self.current_action_index = np.argmax(qs)
            self.maximum_future_rewards = qs

    def compute_action_type(self):
        use_random = self.epsilon_greedy_q_policy.use_random()
        self.current_action_type = "RANDOM" if use_random else "PREDICTED"

    def erode_epsilon(self, factor=1):
        if self.mode == "TRAIN":
            self.epsilon_greedy_q_policy.erode(factor=factor)

    def generate_mini_batch(self):
        if self.mode == "OBSERVE":
            return None

        return self.replay_memory.sample(self.batch_size)

    def train_on_mini_batch(self):
        mini_batch = self.generate_mini_batch()

        flashback_indices = random.sample(range(self.batch_size), 6)

        for i in range(0, len(mini_batch)):
            if i in flashback_indices:
                flashback_image = np.squeeze(mini_batch[i][1][3][:, :, :, 1])

                self.visual_debugger.store_image_data(
                    np.array(flashback_image * 255, dtype="uint8"),
                    flashback_image.shape,
                    f"flashback_{flashback_indices.index(i) + 1}"
                )

                del flashback_image

            previous_frame_stack = mini_batch[i][1][0]
            action_index = mini_batch[i][1][1]
            reward = mini_batch[i][1][2]
            frame_stack = mini_batch[i][1][3]
            terminal = mini_batch[i][1][4]

            target = self.model.predict(previous_frame_stack)
            previous_target = target[action_index]

            projected_future_rewards = self.model.predict(frame_stack)

            if terminal:
                target[action_index] = reward
            else:
                target[action_index] = reward + self.gamma * np.max(projected_future_rewards)

            error = np.abs(target[action_index] - previous_target)
            self.replay_memory.update(mini_batch[i][0], error)

            self.model.fit(previous_frame_stack, target, epochs=1, verbose=0)

    def generate_action(self):
        self.current_action = self.action_space.combinations[self.current_action_index]

    def get_action_for_index(self, action_index):
        return [action_input.upper() for action_input in self.action_input_mapping[self.action_space.combinations[action_index]]]

    def get_input_values(self):
        return self.action_input_mapping[self.current_action]

    def save_model_weights(self, file_path_prefix="datasets/model_", is_checkpoint=False):
        epsilon = self.epsilon_greedy_q_policy.epsilon

        if is_checkpoint:
            file_path = f"{file_path_prefix}_dqn_{self.current_step}_{epsilon}_.h5"
        else:
            file_path = f"{file_path_prefix}_dqn_{epsilon}_.h5"

        self.model.save_weights(file_path, overwrite=True)

    def load_model_weights(self, file_path, override_epsilon):
        self.model.load_weights(file_path)
        self.model.compile(loss="logcosh", optimizer=Adam(lr=self.model_learning_rate, clipvalue=10))

        *args, steps, epsilon, extension = file_path.split("_")
        self.current_step = int(steps)

        if override_epsilon:
            self.previous_epsilon = float(epsilon)
            self.epsilon_greedy_q_policy.epsilon = float(epsilon)

    def output_step_data(self):
        if self.mode in ["TRAIN", "OBSERVE"]:
            print(f"CURRENT MODE: {self.mode}")
        else:
            cprint(f"CURRENT MODE: {self.mode}", "grey", "on_yellow", attrs=["dark"])

        print(f"CURRENT STEP: {self.current_step}")

        if self.mode == "OBSERVE":
            print(f"CURRENT OBSERVE STEP: {self.current_observe_step}")
            print(f"OBSERVE STEPS: {self.observe_steps}")

        print(f"CURRENT EPSILON: {round(self.epsilon_greedy_q_policy.epsilon, 6)}")
        print(f"CURRENT RANDOM ACTION PROBABILITY: {round(self.epsilon_greedy_q_policy.epsilon * 100.0, 2)}%")
        print(f"LOSS: {self.model_loss}")

    def _initialize_model(self):
        input_layer = Input(shape=self.input_shape)

        tower_1 = Convolution2D(16, 1, 1, border_mode="same", activation="elu")(input_layer)
        tower_1 = Convolution2D(16, 3, 3, border_mode="same", activation="elu")(tower_1)

        tower_2 = Convolution2D(16, 1, 1, border_mode="same", activation="elu")(input_layer)
        tower_2 = Convolution2D(16, 3, 3, border_mode="same", activation="elu")(tower_2)
        tower_2 = Convolution2D(16, 3, 3, border_mode="same", activation="elu")(tower_2)

        tower_3 = MaxPooling2D((3, 3), strides=(1, 1), border_mode="same")(input_layer)
        tower_3 = Convolution2D(16, 1, 1, border_mode="same", activation="elu")(tower_3)

        merged_layer = merge([tower_1, tower_2, tower_3], mode="concat", concat_axis=1)

        output = AveragePooling2D((7, 7), strides=(8, 8))(merged_layer)
        output = Flatten()(output)
        output = Dense(self.action_count)(output)

        model = Model(input=input_layer, output=output)
        model.compile(rmsprop(lr=self.model_learning_rate, clipvalue=1), "mse")

        return model

    def _generate_action_space_combination_input_mapping(self, input_mapping):
        action_input_mapping = dict()

        for combination in self.action_space.combinations:
            combination_values = self.action_space.values_for_combination(combination)
            input_values = [input_mapping[combination_value] for combination_value in combination_values if combination_value is not None]

            action_input_mapping[combination] = list(itertools.chain.from_iterable(input_values))

        return action_input_mapping
Exemple #4
0
class SerpentBombermanGameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers['PLAY'] = self.handle_play

        self.frame_handler_setups['PLAY'] = self.setup_play

        self.value = None
        #print('Sprites')
        #print(type(self.game.sprites))
        #print('game')
        #print(self.game)
        #print('game type')
        #print(type(self.game))

        self.spriteGO = self.game.sprites.get('SPRITE_GAME_OVER')
        self.spriteWO = self.game.sprites.get('SPRITE_GAME_WON')
        self.spriteGirl = self.game.sprites.get('SPRITE_BETTY_0')

        self.printer = TerminalPrinter()
        self.visual_debugger = VisualDebugger()
        self.gamestate = Game()

    def setup_play(self):

        game_inputs = {
            "MoveUp": [KeyboardKey.KEY_UP],
            "MoveDown": [KeyboardKey.KEY_DOWN],
            "MoveLeft": [KeyboardKey.KEY_LEFT],
            "MoveRight": [KeyboardKey.KEY_RIGHT],
            "LeaveBomb": [KeyboardKey.KEY_SPACE],
            "None": [0]
        }
        self.game_inputs = game_inputs
        self.game_actions = [
            KeyboardKey.KEY_UP, KeyboardKey.KEY_DOWN, KeyboardKey.KEY_LEFT,
            KeyboardKey.KEY_RIGHT, KeyboardKey.KEY_SPACE, None
        ]

        ##120, 137
        self.dqn_agent = KerasAgent(shape=(104, 136, 1),
                                    action_size=len(self.game_actions))
        #load model
        #self.ppo_agent.restore_model()

        self.first_run = True

        ##states trainning
        self.epoch = 1
        self.total_reward = 0

        ##state & action
        self.prev_state = None
        self.prev_action = None
        self.prev_reward = 0

        print("Enter - Auto Save")
        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
        self.gamestate.restartState()
        time.sleep(2)

    def extract_game_area(self, frame_buffer):
        game_area_buffer = []

        for game_frame in frame_buffer.frames:
            game_area = \
                serpent.cv.extract_region_from_image(game_frame.grayscale_frame,self.game.screen_regions['GAME_REGION'])

            frame = FrameTransformer.rescale(game_area, 0.25)
            game_area_buffer.append(frame)
        print(np.array(game_area_buffer).shape)
        return np.array(game_area_buffer)

    def convert_to_rgba(self, matrix):
        #print(matrix)
        new_matrix = []
        for x in range(0, len(matrix)):
            line = []
            for y in range(0, len(matrix[x])):
                #pixel
                pixel = matrix[x][y]
                new_pixel = [pixel[0], pixel[1], pixel[2], 255]
                line.append(new_pixel)
            new_matrix.append(line)
        return np.array(new_matrix)

    def update_game_state(self, frame):
        game_area = \
                serpent.cv.extract_region_from_image(frame,self.game.screen_regions['GAME_REGION'])
        #game ...
        # 0,0
        # 32,32
        game_squares = [[None for j in range(0, 11)] for i in range(0, 15)]
        const_offset = 8
        const = 32
        #game variables
        self.gamestate.bombs = []  #{x, y}
        self.gamestate.enemies = []  #{x,y}
        #force girl to die if not found
        girl_found = False
        for i in range(0, 15):
            for j in range(0, 11):
                izq = ((j + 1) * const - const_offset,
                       (i + 1) * const - const_offset)
                der = ((j + 2) * const + const_offset,
                       (i + 2) * const + const_offset)
                reg = (izq[0], izq[1], der[0], der[1])
                square = serpent.cv.extract_region_from_image(game_area, reg)
                square = self.convert_to_rgba(square)
                sprite_to_locate = Sprite("QUERY",
                                          image_data=square[..., np.newaxis])
                sprite = self.sprite_identifier.identify(
                    sprite_to_locate, mode="SIGNATURE_COLORS")
                game_squares[i][j] = sprite
                if ("SPRITE_BETTY" in sprite):
                    self.girl = {"x": i, "y": j}
                    girl_found = True
                elif ("SPRITE_GEORGE" in sprite):
                    self.gamestate.enemies.append({"x": i, "y": j})
                elif ("SPRITE_BOMB" in sprite):
                    self.gamestate.bombs.append({"x": i, "y": j})
        self.gamestate.girl_alive = girl_found
        self.gamestate.done = not girl_found
        return game_squares

    def handle_play(self, game_frame):
        #self.printer.add("")
        #self.printer.add("BombermanAI")
        #self.printer.add("Reinforcement Learning: Training a PPO Agent")
        #self.printer.add("")
        #self.printer.add(f"Stage Started At: {self.started_at}")
        #self.printer.add(f"Current Run: #{self.current_attempts}")
        #self.printer.add("")
        #self.check_game_state(game_frame)

        #####################CHECK STATE###########################
        #game over?
        locationGO = None
        sprite_to_locate = Sprite("QUERY", image_data=self.spriteGO.image_data)
        sprite_locator = SpriteLocator()
        locationGO = sprite_locator.locate(sprite=sprite_to_locate,
                                           game_frame=game_frame)
        #print("Location Game over:",locationGO)

        #won game?
        locationWO = None
        sprite_to_locate = Sprite("QUERY", image_data=self.spriteWO.image_data)
        sprite_locator = SpriteLocator()
        locationWO = sprite_locator.locate(sprite=sprite_to_locate,
                                           game_frame=game_frame)
        #print("Location Game won:",locationWO)

        self.gamestate.victory = locationWO != None
        self.gamestate.lose = locationGO != None
        self.gamestate.girl_alive = (locationGO == None and locationWO == None)
        self.gamestate.done = not self.gamestate.girl_alive

        print(f"Is alive? {self.gamestate.girl_alive}")
        print(f"Game over? {self.gamestate.lose}")
        print(f"Won? {self.gamestate.victory}")
        #####################VISUAL DEBUGGER###########################
        for i, game_frame in enumerate(self.game_frame_buffer.frames):
            self.visual_debugger.store_image_data(game_frame.frame,
                                                  game_frame.frame.shape,
                                                  str(i))

        #####################MODEL###########################
        #get buffer
        frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3],
                                               frame_type="PIPELINE")
        game_frame_buffer = self.extract_game_area(frame_buffer)
        state = game_frame_buffer.reshape(4, 104, 136, 1)

        if (self.gamestate.done):
            print(f"Game over, attemp {self.epoch}")
            if (self.epoch % 10) == 0:
                print("saving model")
                self.dqn_agent.save_model(
                    f"bombergirl_epoch_{self.epoch}.model")
                self.printer.save_file()
            self.printer.add(
                f"{self.gamestate.victory},{self.gamestate.lose},{self.epoch},{self.gamestate.time},{self.total_reward}"
            )
            self.total_reward = 0
            self.dqn_agent.remember(self.prev_state, self.prev_action,
                                    self.prev_reward, state, True)
            self.dqn_agent.replay()
            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
            self.epoch += 1
            self.total_reward = 0
            self.gamestate.restartState()
            self.prev_state = None
            self.prev_action = None
        else:
            #update time
            self.gamestate.updateTime()

            #print(np.stack(game_frame_buffer,axis=1).shape)
            #print(game_frame_buffer.shape)
            #print(state.shape)
            if (not (self.prev_state is None)
                    and not (self.prev_action is None)):
                self.dqn_agent.remember(self.prev_state, self.prev_action,
                                        self.prev_reward, state, False)

            #do something
            action_index = self.dqn_agent.act(state)
            #get key
            action = self.game_actions[action_index]
            #get random frame from buffer
            game_frame_rand = random.choice(frame_buffer.frames).frame
            #update enviroment accorind to frame
            ###################FUN UPDATE STATE#########################################
            game_area = \
                    serpent.cv.extract_region_from_image(game_frame_rand,self.game.screen_regions['GAME_REGION'])
            #game ...
            # 0,0
            # 32,32
            game_squares = [[None for j in range(0, 11)] for i in range(0, 15)]
            const_offset = 8
            const = 32
            #game variables
            self.gamestate.bombs = []  #{x, y}
            self.gamestate.enemies = []  #{x,y}
            #force girl to die if not found
            girl_found = False
            for i in range(0, 15):
                for j in range(0, 11):
                    izq = ((j + 1) * const - const_offset,
                           (i + 1) * const - const_offset)
                    der = ((j + 2) * const + const_offset,
                           (i + 2) * const + const_offset)
                    reg = (izq[0], izq[1], der[0], der[1])
                    square = serpent.cv.extract_region_from_image(
                        game_area, reg)
                    square = self.convert_to_rgba(square)
                    sprite_to_locate = Sprite("QUERY",
                                              image_data=square[...,
                                                                np.newaxis])
                    sprite = self.sprite_identifier.identify(
                        sprite_to_locate, mode="SIGNATURE_COLORS")
                    game_squares[i][j] = sprite
                    if ("SPRITE_BETTY" in sprite):
                        self.girl = {"x": i, "y": j}
                        girl_found = True
                    elif ("SPRITE_GEORGE" in sprite):
                        self.gamestate.enemies.append({"x": i, "y": j})
                    elif ("SPRITE_BOMB" in sprite):
                        self.gamestate.bombs.append({"x": i, "y": j})
                    elif ("SPRITE_BONUSES" in sprite):
                        self.gamestate.bonus.append({"x": i, "y": j})
            #####################CHECK STATE###########################
            #game over?
            locationGO = None
            sprite_to_locate = Sprite("QUERY",
                                      image_data=self.spriteGO.image_data)
            sprite_locator = SpriteLocator()
            locationGO = sprite_locator.locate(sprite=sprite_to_locate,
                                               game_frame=game_frame)
            #print("Location Game over:",locationGO)

            #won game?
            locationWO = None
            sprite_to_locate = Sprite("QUERY",
                                      image_data=self.spriteWO.image_data)
            sprite_locator = SpriteLocator()
            locationWO = sprite_locator.locate(sprite=sprite_to_locate,
                                               game_frame=game_frame)
            #print("Location Game won:",locationWO)

            self.gamestate.lose = locationGO != None
            self.gamestate.victory = locationWO != None
            self.gamestate.girl_alive = (locationGO == None
                                         and locationWO == None)
            self.gamestate.done = not self.gamestate.girl_alive

            print(f"Is alive? {self.gamestate.girl_alive}")
            print(f"Game over? {self.gamestate.lose}")
            print(f"Won? {self.gamestate.victory}")

            ###################REWARD#########################################

            #get reward
            reward = self.gamestate.getReward(action_index)
            self.total_reward += reward
            self.prev_state = state
            self.prev_action = action_index
            self.prev_reward = reward

            if (action):
                self.input_controller.tap_key(
                    action, 0.15 if action_index < 4 else 0.01)
            print(
                f"Action: {self.gamestate.game_inputs[action_index]}, reward: {reward}, total_reward: {self.total_reward}"
            )
            #action, label, value = self.ppo_agent.generate_action(game_frame_buffer)
            #print(action, label, value)
            #key, value = random.choice(list(self.game_inputs.items()))
            #if(value[0]):
            #    self.input_controller.tap_key(value[0])
        #game_squares = self.extract_game_squares(game_frame.frame)

    def check_game_state(self, game_frame):
        #game over?
        locationGO = None
        sprite_to_locate = Sprite("QUERY", image_data=self.spriteGO.image_data)
        sprite_locator = SpriteLocator()
        locationGO = sprite_locator.locate(sprite=sprite_to_locate,
                                           game_frame=game_frame)
        print("Location Game over:", locationGO)
        #won game?
        locationWO = None
        sprite_to_locate = Sprite("QUERY", image_data=self.spriteWO.image_data)
        sprite_locator = SpriteLocator()
        locationWO = sprite_locator.locate(sprite=sprite_to_locate,
                                           game_frame=game_frame.frames)
        print("Location Game won:", locationWO)

        self.gamestate.girl_alive = (locationGO == None and locationWO == None)
        self.gamestate.done = not self.gamestate.girl_alive
        self.gamestate.victory = locationWO != None

        print(f"Is alive? {self.gamestate.girl_alive}")
        print(f"Game over? {self.gamestate.lose}")
        print(f"Won? {self.gamestate.victory}")