def get_q_value(self, state_as_dict: StateAsDict, action : Action) -> float:
     assert self.network is not None, 'You must create network first.'
     self.set_corrent_session()
     vector_of_state = vectorize_state(state_as_dict)
     vector_of_action = vectorize_action(action)
     input_vec = np.array(vector_of_state + vector_of_action)
     return self.network.predict(x=input_vec.reshape(1, 597))[0]
    def choose_act(self, mode) -> Action:

        #first we load observation to the private environment
        current_points = self.env.current_state_of_the_game.active_players_hand(
        ).number_of_my_points()

        if len(self.env.action_space.list_of_actions) > 0:
            actions = []
            potential_reward_max = self.action_to_avoid
            numerator = self.depth - 1
            primary_state = StateAsDict(self.env.current_state_of_the_game)
            self.env_dict[numerator] = StateAsDict(
                self.env.current_state_of_the_game)
            for action in self.env.action_space.list_of_actions:
                ae = action.evaluate(self.env.current_state_of_the_game)
                potential_reward = (np.floor((current_points + ae["card"][2])/POINTS_TO_WIN) * self.weight[0] +\
                                                 self.weight[1] * ae["card"][2] + self.weight[2] *ae["nobles"] +\
                                                 self.weight[3] * ae["card"][0] + self.weight[4] * sum(ae["gems_flow"]))

                potential_reward -= self.decay * self.deep_evaluation(
                    action, numerator - 1, mode)
                self.restore_env(numerator)

                if self.collect_stats:
                    self.stats_dataframe = self.stats_dataframe.append(
                        {
                            'state': primary_state,
                            'action': action.to_dict(),
                            'evaluation': potential_reward
                        },
                        ignore_index=True)
                    self.stats_dataframe_vectorized = self.stats_dataframe_vectorized.append(
                        {
                            'state_vector': vectorize_state(primary_state),
                            'action_vector': vectorize_action(action),
                            'evaluation': potential_reward
                        },
                        ignore_index=True)

                if potential_reward > potential_reward_max:
                    potential_reward_max = potential_reward
                    actions = []
                    actions.append(action)
                elif potential_reward == potential_reward_max:
                    actions.append(action)

            self.env.reset()
            self.env.load_state_from_dict(self.env_dict[numerator])

            return random.choice(actions)

        else:
            return None
 def evaluate_list(self, state_as_dict : StateAsDict, list_of_actions: List[Action]):
     assert self.network is not None, 'You must create network first.'
     self.set_corrent_session()
     X = []
     if len(list_of_actions) > 0:
         vector_of_state = vectorize_state(state_as_dict)
         for action in list_of_actions:
             state_action_concat = vector_of_state + vectorize_action(action)
             X.append(state_action_concat)
         X = np.array(X)
         q_values_predicted = self.network.predict(X)
         return q_values_predicted
     else:
         return None
    def run_one_game_and_collect_data(self, debug_info=True):

        last_value_player_0 = None
        last_value_player_1 = None
        old_value = None
        self.env.reset()
        observation = self.env.show_observation('deterministic')
        is_done = False
        number_of_moves = 0

        debug_collected_data = pd.DataFrame(columns=('active_player_id', 'winner_id', 'reward', 'best_value'))
        collected_data = pd.DataFrame(columns=('state_as_vector', 'value'))
        extra_move_done = False

        while not (is_done and extra_move_done) and number_of_moves < MAX_NUMBER_OF_MOVES:

            if is_done:
                extra_move_done = True

            action, best_value = self.agent.choose_action(observation, [None], info=True)
            #print('best value = {}'.format(best_value))
            observation, reward, is_done, info = self.env.step('deterministic', action)
            previous_player_id = self.env.previous_player_id()
            winner_id = info['winner_id']


            current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)

            if previous_player_id == 0:
                old_value = last_value_player_0
            if previous_player_id == 1:
                old_value = last_value_player_1

            if debug_info:
                debug_collected_data = debug_collected_data.append({
                                                        'new_value': self.new_value_formula(old_value, best_value,
                                                                                            winner_id, reward, alpha=0.1),
                                                        'active_player_id' : self.env.previous_player_id(),
                                                        'winner_id' : winner_id,
                                                        'reward' : reward,
                                                        'best_value' : best_value,
                                                        'old_value': old_value,
                                                        'pa_points' : self.env.previous_players_hand().number_of_my_points()},
                                                        ignore_index=True)


            collected_data = collected_data.append({'state_as_vector' : vectorize_state(current_state_as_dict),
                                                        'value': self.new_value_formula(old_value, best_value,
                                                                                            winner_id, reward, alpha=0.1)},
                                                   ignore_index=True)



            if previous_player_id == 0:
                last_value_player_0 = best_value
            if previous_player_id == 1:
                last_value_player_1 = best_value

            #let the opponent move:
            number_of_moves += 1

        if debug_info:
            debug_collected_data.to_csv('debug_info.csv')
        collected_data = collected_data.iloc[2:]
        return collected_data
    def run_one_game_and_collect_data(self, debug_info=True):

        there_was_no_action = False
        self.agent.train_mode()
        last_actual_player_0 = None
        last_actual_player_1 = None
        last_state_player_0 = None
        last_state_player_1 = None
        last_action_vec_player_0 = None
        last_action_vec_player_1 = None
        old_value = None
        old_state = None
        old_action_vec = None
        self.env.reset()
        observation = self.env.show_observation('deterministic')
        is_done = False
        number_of_moves = 0

        debug_collected_data = pd.DataFrame(columns=('active_player_id', 'winner_id', 'reward', 'best_value'))
        collected_data = pd.DataFrame(columns=('state_as_vector', 'value'))
        extra_move_done = False

        while not (is_done and extra_move_done) and number_of_moves < MAX_NUMBER_OF_MOVES:

            if is_done:
                extra_move_done = True

            current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)

            actual_action, actual_eval, best_eval = self.agent.choose_action(observation, [None])
            if actual_action is None:
                there_was_no_action = True
                break
            #print('best value = {}'.format(best_value))
            observation, reward, is_done, info = self.env.step('deterministic', actual_action)
            previous_player_id = self.env.previous_player_id()
            winner_id = info['winner_id']



            if previous_player_id == 0:
                old_value = last_actual_player_0
                old_state = last_state_player_0
                old_action_vec = last_action_vec_player_0

            if previous_player_id == 1:
                old_value = last_actual_player_1
                old_state = last_state_player_1
                old_action_vec = last_action_vec_player_1

            if debug_info:
                state_status = old_state.__repr__() if old_state is not None else 'NONE'
                state_vector = vectorize_state(old_state) if old_state is not None else 'NONE'
                debug_collected_data = debug_collected_data.append({
                                                        'state_ex' : state_status,
                                                        'state_vec' : state_vector,
                                                        'new_value': self.new_value_formula(old_value, best_eval,
                                                                                            winner_id, reward, self.alpha),
                                                        'active_player_id' : self.env.previous_player_id(),
                                                        'winner_id' : winner_id,
                                                        'reward' : reward,
                                                        'best_eval' : best_eval,
                                                        'actual_eval' : actual_eval,
                                                        'old_value': old_value,
                                                        'pa_points' : self.env.previous_players_hand().number_of_my_points()},
                                                        ignore_index=True)


            if old_state is not None:
                collected_data = collected_data.append({'state_as_vector' : vectorize_state(old_state),
                                                        'action_vector' : old_action_vec,
                                                            'value': self.new_value_formula(old_value, best_eval,
                                                                                                winner_id, reward, self.alpha)},
                                                       ignore_index=True)



            if previous_player_id == 0:
                last_actual_player_0 = actual_eval
                last_state_player_0 = current_state_as_dict
                last_action_vec_player_0 = vectorize_action(actual_action)
            if previous_player_id == 1:
                last_actual_player_1 = actual_eval
                last_state_player_1 = current_state_as_dict
                last_action_vec_player_1 = vectorize_action(actual_action)

            #let the opponent move:
            number_of_moves += 1

        if debug_info:
            debug_collected_data.to_csv('debug_info.csv')
        collected_data = collected_data.iloc[0:]
        self.agent.test_mode()
        return collected_data