class ValueEvaluator(EvaluationPolicy):

    def __init__(self, model = None, weights_file = None):
        super().__init__(name='Value average pool evaluator')
        if model is None:
            final_layer = ValueRegressor()
            data_transformer = IdentityTransformer()
            self.model = StateEncoder(final_layer=final_layer, data_transformer=data_transformer)
            if weights_file is not None:
                self.model.load_weights(weights_file)
            if weights_file is not None:
                self.model.load_weights(file_name=weights_file)
        if model is not None:
            self.model = model

    def load_weights(self, weights_file):
        self.model.load_weights(file_name=weights_file)

    def dump_weights(self, weights_file):
        self.model.dump_weights(file_name=weights_file)

    def evaluate_state(self, state : State, list_of_actions: List[Action] = None) -> float:
        #check if the state is terminal
        if state.active_players_hand().number_of_my_points() >= POINTS_TO_WIN:
            return  -1
        elif state.other_players_hand().number_of_my_points() >= POINTS_TO_WIN:
            return  1

        else:
            return self.model.get_value(state)
 def __init__(self, model = None, weights_file = None):
     super().__init__(name='Value average pool evaluator')
     if model is None:
         final_layer = ValueRegressor()
         data_transformer = IdentityTransformer()
         self.model = StateEncoder(final_layer=final_layer, data_transformer=data_transformer)
         if weights_file is not None:
             self.model.load_weights(weights_file)
         if weights_file is not None:
             self.model.load_weights(file_name=weights_file)
     if model is not None:
         self.model = model
Exemple #3
0
def go():
    arena = MultiArena()
    data_transformer = IdentityTransformer()
    model = StateEncoder(final_layer=ValueRegressor(),
                         data_transformer=data_transformer)
    model.load_weights('archive/weights_tt1/epoch_41.h5')
    value_policy = ValueEvaluator(model=model, weights_file=None)
    mcts_agent = SingleMCTSAgent(50,
                                 value_policy,
                                 0.41,
                                 create_visualizer=False,
                                 show_unvisited_nodes=False,
                                 log_to_neptune=False)
    opp = MinMaxAgent()
    results = arena.run_many_duels('deterministic', [mcts_agent, opp], 10, 1,
                                   True)
    print(results)
 def __init__(self, weights=None):
     self.data_transformer = IdentityTransformer()
     self.model = StateEncoder(final_layer=ValueRegressor(),
                               data_transformer=self.data_transformer)
     if weights is not None:
         self.model.load_weights(weights)
     self.value_policy = ValueEvaluator(model=self.model, weights_file=None)
     self.opponent_value_policy = ValueEvaluator(model=self.model,
                                                 weights_file=None)
     self.data_collector = TreeDataCollector()
     self.params = {}
     self.arena = MultiArena()
     self.params_files = []
     self.replay_buffer = {'state': [], 'mcts_value': []}
     if main_process:
         self.model.dump_weights('initial_weights.h5')
         self.initial_weights_file = 'initial_weights.h5'
     self.replay_buffer = ReplayBuffer()
def run_experiment_1_2():
    gin.parse_config_file('nn_models/experiments/series_1/experiment_2/params.gin')
    final_layer = ValueRegressor()
    data_transformer = IdentityTransformer()
    model = StateEncoder(final_layer=final_layer, data_transformer=data_transformer)
    model.train_network_on_many_sets(TRAIN_DIR, VALID_FILE, epochs=5,  test_games=10)
    model.dump_weights('/net/archive/groups/plggluna/plgtodrzygozdz/lvl1/weights/w1.h5')
import gin

from gym_splendor_code.envs.mechanics.state import State
from nn_models.architectures.average_pool_v0 import StateEncoder, ValueRegressor

gin.parse_config_file(
    '/home/tomasz/ML_Research/splendor/gym-splendor/nn_models/experiments/series_1/experiment_1/params.gin'
)

x = StateEncoder()

f = State()
print(x.get_value(f))
def run_experiment_1():
    gin.parse_config_file('params.gin')
    final_layer = ValueRegressor()
    data_transformer = DataTransformerExp(0.2)
    model = StateEncoder(final_layer=final_layer, data_transformer=data_transformer)
    model.train_network_on_many_sets(TRAIN_DIR, VALID_FILE, epochs=1000,  test_games=100)
class MCTS_value_trainer:
    def __init__(self, weights=None):
        self.data_transformer = IdentityTransformer()
        self.model = StateEncoder(final_layer=ValueRegressor(),
                                  data_transformer=self.data_transformer)
        if weights is not None:
            self.model.load_weights(weights)
        self.value_policy = ValueEvaluator(model=self.model, weights_file=None)
        self.opponent_value_policy = ValueEvaluator(model=self.model,
                                                    weights_file=None)
        self.data_collector = TreeDataCollector()
        self.params = {}
        self.arena = MultiArena()
        self.params_files = []
        self.replay_buffer = {'state': [], 'mcts_value': []}
        if main_process:
            self.model.dump_weights('initial_weights.h5')
            self.initial_weights_file = 'initial_weights.h5'
        self.replay_buffer = ReplayBuffer()

    def reset_weights(self):
        self.model.load_weights(self.initial_weights_file)

    def create_neptune_experiment(self, experiment_name, source_files):
        if main_process:
            neptune.init(
                project_qualified_name=NEPTUNE_PROJECT_NAME_NN_TRAINING,
                api_token=NEPTUNE_API_TOKEN)
            neptune.create_experiment(name=experiment_name,
                                      description='training MCTS value',
                                      params=self.params,
                                      upload_source_files=source_files)
        else:
            pass

    def flatten_data(self, gathered_data):
        comm_states = {'state': [], 'mcts_value': []}
        for local_data in gathered_data:
            comm_states['state'] += local_data['state']
            comm_states['mcts_value'] += local_data['mcts_value']
        return comm_states

    def include_params_file(self, file):
        self.params_files.append(file)

    def parse_params_files(self):
        for file in self.params_files:
            with open(file) as inputfile:
                for line in inputfile:
                    to_log = line.split('=')
                    if len(to_log) == 2:
                        self.params[to_log[0]] = to_log[1]

    def run_training_games_multi_process(
            self,
            opponent_to_train,
            baselines,
            epochs,
            n_test_games,
            mcts_passes,
            exploration_ceofficient,
            experiment_name: str = 'MCTS value training',
            weights_path=None,
            confidence_threshold: float = 0.1,
            confidence_limit: int = 2,
            count_ratio: float = 6,
            replay_buffer_n_games: int = 10,
            neural_network_train_epochs: int = 2,
            reset_network: bool = True,
            create_visualizer: bool = True,
            use_neptune: bool = True,
            tags=['experiment'],
            source_files=None):

        count_threshold = int(count_ratio * mcts_passes)
        if main_process:
            self.params['mcts passes'] = mcts_passes
            self.params['exploration coefficient'] = exploration_ceofficient
            self.params['n test games'] = n_test_games
            self.params['n proc'] = comm_size
            self.params['replay buffer games'] = replay_buffer_n_games
            self.params[
                'opponent name'] = opponent_to_train.name if opponent_to_train != 'self' else 'self-play'
            self.params['train_epochs'] = neural_network_train_epochs
            self.params['count threshold'] = count_threshold
            self.parse_params_files()

        self.mcts_agent = SingleMCTSAgent(mcts_passes,
                                          self.value_policy,
                                          exploration_ceofficient,
                                          create_visualizer=create_visualizer,
                                          show_unvisited_nodes=False,
                                          log_to_neptune=(main_process
                                                          and use_neptune))

        if opponent_to_train == 'self':
            self.opponent = SingleMCTSAgent(mcts_passes,
                                            self.opponent_value_policy,
                                            exploration_ceofficient,
                                            create_visualizer=False,
                                            show_unvisited_nodes=False,
                                            log_to_neptune=False)
            self.opponent.name = 'MCTS - opponent'
        else:
            self.opponent = opponent_to_train

        if main_process and use_neptune:
            self.create_neptune_experiment(experiment_name=experiment_name,
                                           source_files=source_files)
            if opponent_to_train == 'self':
                tags.append('self-play')
            neptune.append_tag(tags)

        for epoch_idx in range(epochs):

            if n_test_games > 0:

                for baseline in baselines:
                    results_with_baseline = self.arena.run_many_duels(
                        'deterministic', [self.mcts_agent, baseline],
                        n_games=n_test_games,
                        n_proc_per_agent=1,
                        shuffle=False)

                    if main_process:
                        print(results_with_baseline)
                        _, _, baseline_win_rate, baseline_victory_points = results_with_baseline.return_stats(
                        )
                        neptune.send_metric(f'Win rate vs {baseline.name}',
                                            x=epoch_idx + 1,
                                            y=baseline_win_rate / n_test_games)
                        neptune.send_metric(f'Win points vs {baseline.name}',
                                            x=epoch_idx + 1,
                                            y=baseline_victory_points /
                                            n_test_games)

            if main_process:
                print('============ \n Running MCTS games \n============')
            results = self.arena.run_many_duels(
                'deterministic', [self.mcts_agent, self.opponent],
                n_games=comm_size,
                n_proc_per_agent=1,
                shuffle=False)
            if main_process:
                print(results)
            self.data_collector.setup_root(
                self.mcts_agent.mcts_algorithm.original_root)
            local_data_for_training = self.data_collector.generate_all_tree_data_as_list(
                confidence_threshold, count_threshold, confidence_limit)
            combined_data = mpi_communicator.gather(local_data_for_training,
                                                    root=0)
            if main_process:
                data_from_this_epoch = self.flatten_data(combined_data)
                self.replay_buffer.add_game(data_from_this_epoch)
                data_for_training = self.replay_buffer.data_from_last_games(
                    replay_buffer_n_games)
                _, _, mcts_win_rate, mcts_victory_points = results.return_stats(
                )
                if use_neptune:
                    neptune.log_metric('MCTS train win rate',
                                       x=epoch_idx,
                                       y=mcts_win_rate / comm_size)
                    neptune.log_metric('MCTS train victory points',
                                       x=epoch_idx,
                                       y=mcts_victory_points / comm_size)
                plt.hist(data_for_training['mcts_value'], bins=100)
                plt.savefig('epoch_histogram.png')
                plt.clf()
                img_histogram = Image.open('epoch_histogram.png')
                if use_neptune:
                    neptune.send_image(
                        f'Train set histogram epoch = {epoch_idx}',
                        img_histogram)
                self.data_collector.clean_memory()
                if reset_network:
                    self.reset_weights()
                fit_history = self.model.train_on_mcts_data(
                    data_for_training,
                    train_epochs=neural_network_train_epochs)
                if use_neptune:
                    neptune.send_metric('training set size',
                                        x=epoch_idx,
                                        y=len(data_for_training['mcts_value']))
                    neptune.send_metric('loss',
                                        x=epoch_idx,
                                        y=fit_history.history['loss'][0])
                self.mcts_agent.dump_weights(weights_file=weights_path +
                                             f'epoch_{epoch_idx}.h5')

                saved = main_process
                weights_saved = mpi_communicator.bcast(saved, root=0)

            if not main_process:
                self.mcts_agent.load_weights(weights_file=weights_path +
                                             f'epoch_{epoch_idx}.h5')
                self.opponent.load_weights(weights_file=weights_path +
                                           f'epoch_{epoch_idx}.h5')

        if main_process and use_neptune:
            neptune.stop()