Example #1
0
class HeuristicValuePolicy(EvaluationPolicy):
    def __init__(self):
        super().__init__(name='HeuristicValuePolicy')
        self.value_function = ValueFunction()

    def evaluate_state(self,
                       state: State,
                       list_of_actions: List[Action] = None) -> float:
        inversed_state = StateAsDict(state).to_state()
        inversed_state.change_active_player()
        return self.value_function.evaluate(
            state) - self.value_function.evaluate(inversed_state)
Example #2
0
def evaluate_states(files_dir, dump_dir):
    evaluator = ValueFunction()

    list_of_files = os.listdir(files_dir)
    for file_name in list_of_files:
        with open(os.path.join(files_dir, file_name), 'rb') as f:
            X, _ = pickle.load(f)
            Y = []
        for x in X:
            state_to_eval = StateAsDict(x).to_state()
            Y.append(evaluator.evaluate(state_to_eval))
            del state_to_eval

        with open(os.path.join(dump_dir, file_name), 'wb') as f:
            pickle.dump((X, Y), f)
            print(len(X))
        del X
        del Y
Example #3
0
class ValueFunctionAgent(Agent):
    def __init__(self):
        super().__init__()
        self.name = 'Value function Agent'
        self.evaluator = ValueFunction()

    def set_weights(self, weights):
        self.evaluator.set_weights(weights)

    def show_weights(self):
        return self.evaluator.weights

    def choose_act(self, mode, info=False):
        current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)
        list_of_actions = self.env.action_space.list_of_actions
        if list_of_actions:
            best_action = None
            best_action_value = -float('inf')
            for action in list_of_actions:
                state_copy = current_state_as_dict.to_state()
                action.execute(state_copy)
                state_copy.change_active_player()
                # print('*******************')
                current_value = self.evaluator.evaluate(state_copy)
                # print(f'State_copy = {StateAsDict(state_copy)}')
                # print(f'Action = {action} val = {current_value}')
                # print('------------------------------------')
                if current_value > best_action_value:
                    best_action_value = current_value
                    best_action = action

            if not info:
                return best_action
            if info:
                return best_action, best_action_value

        else:
            if not info:
                return None
            if info:
                return None, -1
Example #4
0
class HeuraEvaluator(EvaluationPolicy):
    def __init__(self):

        super().__init__(name='Heura Value evaluator')
        self.evaluator = ValueFunction()

    def evaluate_state(self,
                       state: State,
                       list_of_actions: List[Action] = None) -> float:
        #check if the state is terminal
        if state.active_players_hand().number_of_my_points() >= POINTS_TO_WIN:
            return -1
        elif state.other_players_hand().number_of_my_points() >= POINTS_TO_WIN:
            return 1
        else:
            return self.evaluator.evaluate(state)
Example #5
0
 def __init__(self):
     super().__init__(name='HeuristicValuePolicy')
     self.value_function = ValueFunction()
Example #6
0
import pickle
import matplotlib.pyplot as plt
from nn_models.value_function_heura.value_function import ValueFunction

with open('/home/tomasz/ML_Research/splendor/gym-splendor/supervised_data/test0/valid_epochs/epoch_0.pickle', 'rb') as f:
    X, _ = pickle.load(f)

vf = ValueFunction()
Y = [vf.evaluate(st) for st in X]
print(len(Y))
print(min(Y))
print(max(Y))
plt.hist(Y, bins=60)
plt.show()

Example #7
0
 def __init__(self):
     super().__init__()
     self.name = 'Value function Agent'
     self.evaluator = ValueFunction()
Example #8
0
    def __init__(self):

        super().__init__(name='Heura Value evaluator')
        self.evaluator = ValueFunction()