def __init__(self):
        self.multi_arena = MultiArena()
        self.local_trainer = VLearningTrainer()
        token = None
        if main_process:
            token = self.local_trainer._get_token()
            print('Current token = {}'.format(token))
            self.local_trainer._save_weights()

        token = comm.bcast(token, root=0)
        self.local_trainer._set_token(token)
    def __init__(self, alpha):
        if USE_NEPTUNE and main_process:
            neptune.create_experiment('Q learning M alpha = {}'.format(alpha))

        self.multi_arena = MultiArena()
        self.local_trainer = QLearningTrainer(alpha=alpha)
        token = None
        if main_process:
            token = self.local_trainer._get_token()
            print('Current token = {}'.format(token))
            if USE_NEPTUNE:
                neptune.send_text('weights token', x=token)
            self.local_trainer._save_weights()

        token = comm.bcast(token, root=0)
        self.local_trainer._set_token(token)
Beispiel #3
0
def go():
    arena = MultiArena()
    data_transformer = IdentityTransformer()
    model = StateEncoder(final_layer=ValueRegressor(),
                         data_transformer=data_transformer)
    model.load_weights('archive/weights_tt1/epoch_41.h5')
    value_policy = ValueEvaluator(model=model, weights_file=None)
    mcts_agent = SingleMCTSAgent(50,
                                 value_policy,
                                 0.41,
                                 create_visualizer=False,
                                 show_unvisited_nodes=False,
                                 log_to_neptune=False)
    opp = MinMaxAgent()
    results = arena.run_many_duels('deterministic', [mcts_agent, opp], 10, 1,
                                   True)
    print(results)
class MultiVLearningVTrainer:

    def __init__(self):
        self.multi_arena = MultiArena()
        self.local_trainer = VLearningTrainer()
        token = None
        if main_process:
            token = self.local_trainer._get_token()
            print('Current token = {}'.format(token))
            self.local_trainer._save_weights()

        token = comm.bcast(token, root=0)
        self.local_trainer._set_token(token)



    def combine_dataframes(self, list_of_dataframes):
        combined_data_frame = pd.DataFrame(columns=('state_as_vector', 'value'))
        for data_frame in list_of_dataframes:
            combined_data_frame = combined_data_frame.append(data_frame)
        return combined_data_frame


    def run_one_episode(self, epochs):

        if not main_process:
            self.local_trainer._load_weights()

        collected_data = self.local_trainer.run_one_game_and_collect_data()
        #gather data:
        gathered_data = comm.gather(collected_data, root=0)

        network_updated = False

        if main_process:
            combined_data = self.combine_dataframes(gathered_data)
            self.local_trainer.train_network(combined_data, epochs=epochs)
            self.local_trainer._save_weights()
            network_updated = True

        network_updated = comm.bcast(network_updated, root=0)
        assert network_updated

        #broadcats information about saving weights:

    def run_test(self, opponent: Agent):
        results = self.multi_arena.run_many_duels('deterministic', [self.local_trainer.agent, opponent], n_games=comm.Get_size(),
                                        n_proc_per_agent=1, shuffle=True)
        if main_process:
            print(results)

    def run_full_training(self, n_terations, opponent):
        for i in range(n_terations):
            if main_process:
                print('Game number = {}'.format(i))
            self.run_one_episode(epochs=2)
            if i %2 == 0:
                self.run_test(opponent=opponent)
 def __init__(self, weights=None):
     self.data_transformer = IdentityTransformer()
     self.model = StateEncoder(final_layer=ValueRegressor(),
                               data_transformer=self.data_transformer)
     if weights is not None:
         self.model.load_weights(weights)
     self.value_policy = ValueEvaluator(model=self.model, weights_file=None)
     self.opponent_value_policy = ValueEvaluator(model=self.model,
                                                 weights_file=None)
     self.data_collector = TreeDataCollector()
     self.params = {}
     self.arena = MultiArena()
     self.params_files = []
     self.replay_buffer = {'state': [], 'mcts_value': []}
     if main_process:
         self.model.dump_weights('initial_weights.h5')
         self.initial_weights_file = 'initial_weights.h5'
     self.replay_buffer = ReplayBuffer()
Beispiel #6
0
    def full_training(self, n_repetitions, alpha, epochs):

        self.prepare_training()
        for i in range(n_repetitions):
            if main_process:
                print('Game number = {}'.format(i))
            self.run_self_play('deterministic', alpha=alpha, epochs=epochs)
            agent_to_test = self.mcts_agent
            arena = MultiArena()
            results = arena.run_many_duels(
                'deterministic',
                [agent_to_test,
                 RandomAgent(distribution='first_buy')], 1, 24)
            if main_process:
                self.eval_policy.model.save_weights(
                    'Weights_i = {}.h5'.format(i))
                text_file = open("Results_{}.txt".format(i), "w")
                text_file.write(results.__repr__())
                text_file.close()
class MultiQLearningTrainer:

    def __init__(self, alpha):
        if USE_NEPTUNE and main_process:
            neptune.create_experiment('Q learning M alpha = {}'.format(alpha))

        self.multi_arena = MultiArena()
        self.local_trainer = QLearningTrainer(alpha=alpha)
        token = None
        if main_process:
            token = self.local_trainer._get_token()
            print('Current token = {}'.format(token))
            if USE_NEPTUNE:
                neptune.send_text('weights token', x=token)
            self.local_trainer._save_weights()

        token = comm.bcast(token, root=0)
        self.local_trainer._set_token(token)



    def combine_dataframes(self, list_of_dataframes):
        combined_data_frame = pd.DataFrame(columns=('state_as_vector', 'value'))
        for data_frame in list_of_dataframes:
            combined_data_frame = combined_data_frame.append(data_frame)
        return combined_data_frame


    def run_one_episode(self, epochs):

        if not main_process:
            self.local_trainer._load_weights()

        collected_data = self.local_trainer.run_one_game_and_collect_data()
        #gather data:
        gathered_data = comm.gather(collected_data, root=0)

        network_updated = False

        if main_process:
            combined_data = self.combine_dataframes(gathered_data)
            self.local_trainer.train_network(combined_data, epochs=epochs)
            self.local_trainer._save_weights()
            network_updated = True

        network_updated = comm.bcast(network_updated, root=0)
        assert network_updated

        #broadcats information about saving weights:

    def run_test(self, opponent: Agent, x_coord):
        results = self.multi_arena.run_many_duels('deterministic', [self.local_trainer.agent, opponent], n_games=2*comm.Get_size(),
                                        n_proc_per_agent=1, shuffle=True)


        if main_process:
            print(results)
            if USE_NEPTUNE and main_process:
                for pair in results.data.keys():
                    neptune.send_metric(pair[0] + '_wins', x=x_coord, y=results.data[pair].wins)
                    neptune.send_metric(pair[0] + '_reward', x=x_coord,  y=results.data[pair].reward)
                    neptune.send_metric(pair[0] + '_victory_points', x=x_coord, y=results.data[pair].victory_points)


    def run_full_training(self, n_iterations, opponent):


        for i in range(n_iterations):
            if main_process:
                print('Game number = {}'.format(i))
            self.run_one_episode(epochs=2)
            if i %2 == 0:
                self.run_test(opponent=opponent, x_coord=i)

        if USE_NEPTUNE and main_process:
            neptune.stop()
class MCTS_value_trainer:
    def __init__(self, weights=None):
        self.data_transformer = IdentityTransformer()
        self.model = StateEncoder(final_layer=ValueRegressor(),
                                  data_transformer=self.data_transformer)
        if weights is not None:
            self.model.load_weights(weights)
        self.value_policy = ValueEvaluator(model=self.model, weights_file=None)
        self.opponent_value_policy = ValueEvaluator(model=self.model,
                                                    weights_file=None)
        self.data_collector = TreeDataCollector()
        self.params = {}
        self.arena = MultiArena()
        self.params_files = []
        self.replay_buffer = {'state': [], 'mcts_value': []}
        if main_process:
            self.model.dump_weights('initial_weights.h5')
            self.initial_weights_file = 'initial_weights.h5'
        self.replay_buffer = ReplayBuffer()

    def reset_weights(self):
        self.model.load_weights(self.initial_weights_file)

    def create_neptune_experiment(self, experiment_name, source_files):
        if main_process:
            neptune.init(
                project_qualified_name=NEPTUNE_PROJECT_NAME_NN_TRAINING,
                api_token=NEPTUNE_API_TOKEN)
            neptune.create_experiment(name=experiment_name,
                                      description='training MCTS value',
                                      params=self.params,
                                      upload_source_files=source_files)
        else:
            pass

    def flatten_data(self, gathered_data):
        comm_states = {'state': [], 'mcts_value': []}
        for local_data in gathered_data:
            comm_states['state'] += local_data['state']
            comm_states['mcts_value'] += local_data['mcts_value']
        return comm_states

    def include_params_file(self, file):
        self.params_files.append(file)

    def parse_params_files(self):
        for file in self.params_files:
            with open(file) as inputfile:
                for line in inputfile:
                    to_log = line.split('=')
                    if len(to_log) == 2:
                        self.params[to_log[0]] = to_log[1]

    def run_training_games_multi_process(
            self,
            opponent_to_train,
            baselines,
            epochs,
            n_test_games,
            mcts_passes,
            exploration_ceofficient,
            experiment_name: str = 'MCTS value training',
            weights_path=None,
            confidence_threshold: float = 0.1,
            confidence_limit: int = 2,
            count_ratio: float = 6,
            replay_buffer_n_games: int = 10,
            neural_network_train_epochs: int = 2,
            reset_network: bool = True,
            create_visualizer: bool = True,
            use_neptune: bool = True,
            tags=['experiment'],
            source_files=None):

        count_threshold = int(count_ratio * mcts_passes)
        if main_process:
            self.params['mcts passes'] = mcts_passes
            self.params['exploration coefficient'] = exploration_ceofficient
            self.params['n test games'] = n_test_games
            self.params['n proc'] = comm_size
            self.params['replay buffer games'] = replay_buffer_n_games
            self.params[
                'opponent name'] = opponent_to_train.name if opponent_to_train != 'self' else 'self-play'
            self.params['train_epochs'] = neural_network_train_epochs
            self.params['count threshold'] = count_threshold
            self.parse_params_files()

        self.mcts_agent = SingleMCTSAgent(mcts_passes,
                                          self.value_policy,
                                          exploration_ceofficient,
                                          create_visualizer=create_visualizer,
                                          show_unvisited_nodes=False,
                                          log_to_neptune=(main_process
                                                          and use_neptune))

        if opponent_to_train == 'self':
            self.opponent = SingleMCTSAgent(mcts_passes,
                                            self.opponent_value_policy,
                                            exploration_ceofficient,
                                            create_visualizer=False,
                                            show_unvisited_nodes=False,
                                            log_to_neptune=False)
            self.opponent.name = 'MCTS - opponent'
        else:
            self.opponent = opponent_to_train

        if main_process and use_neptune:
            self.create_neptune_experiment(experiment_name=experiment_name,
                                           source_files=source_files)
            if opponent_to_train == 'self':
                tags.append('self-play')
            neptune.append_tag(tags)

        for epoch_idx in range(epochs):

            if n_test_games > 0:

                for baseline in baselines:
                    results_with_baseline = self.arena.run_many_duels(
                        'deterministic', [self.mcts_agent, baseline],
                        n_games=n_test_games,
                        n_proc_per_agent=1,
                        shuffle=False)

                    if main_process:
                        print(results_with_baseline)
                        _, _, baseline_win_rate, baseline_victory_points = results_with_baseline.return_stats(
                        )
                        neptune.send_metric(f'Win rate vs {baseline.name}',
                                            x=epoch_idx + 1,
                                            y=baseline_win_rate / n_test_games)
                        neptune.send_metric(f'Win points vs {baseline.name}',
                                            x=epoch_idx + 1,
                                            y=baseline_victory_points /
                                            n_test_games)

            if main_process:
                print('============ \n Running MCTS games \n============')
            results = self.arena.run_many_duels(
                'deterministic', [self.mcts_agent, self.opponent],
                n_games=comm_size,
                n_proc_per_agent=1,
                shuffle=False)
            if main_process:
                print(results)
            self.data_collector.setup_root(
                self.mcts_agent.mcts_algorithm.original_root)
            local_data_for_training = self.data_collector.generate_all_tree_data_as_list(
                confidence_threshold, count_threshold, confidence_limit)
            combined_data = mpi_communicator.gather(local_data_for_training,
                                                    root=0)
            if main_process:
                data_from_this_epoch = self.flatten_data(combined_data)
                self.replay_buffer.add_game(data_from_this_epoch)
                data_for_training = self.replay_buffer.data_from_last_games(
                    replay_buffer_n_games)
                _, _, mcts_win_rate, mcts_victory_points = results.return_stats(
                )
                if use_neptune:
                    neptune.log_metric('MCTS train win rate',
                                       x=epoch_idx,
                                       y=mcts_win_rate / comm_size)
                    neptune.log_metric('MCTS train victory points',
                                       x=epoch_idx,
                                       y=mcts_victory_points / comm_size)
                plt.hist(data_for_training['mcts_value'], bins=100)
                plt.savefig('epoch_histogram.png')
                plt.clf()
                img_histogram = Image.open('epoch_histogram.png')
                if use_neptune:
                    neptune.send_image(
                        f'Train set histogram epoch = {epoch_idx}',
                        img_histogram)
                self.data_collector.clean_memory()
                if reset_network:
                    self.reset_weights()
                fit_history = self.model.train_on_mcts_data(
                    data_for_training,
                    train_epochs=neural_network_train_epochs)
                if use_neptune:
                    neptune.send_metric('training set size',
                                        x=epoch_idx,
                                        y=len(data_for_training['mcts_value']))
                    neptune.send_metric('loss',
                                        x=epoch_idx,
                                        y=fit_history.history['loss'][0])
                self.mcts_agent.dump_weights(weights_file=weights_path +
                                             f'epoch_{epoch_idx}.h5')

                saved = main_process
                weights_saved = mpi_communicator.bcast(saved, root=0)

            if not main_process:
                self.mcts_agent.load_weights(weights_file=weights_path +
                                             f'epoch_{epoch_idx}.h5')
                self.opponent.load_weights(weights_file=weights_path +
                                           f'epoch_{epoch_idx}.h5')

        if main_process and use_neptune:
            neptune.stop()