Example #1
0
    def update(self, memory_buffer):
        """ Calculates the average gradients and applies them
            :param memory_buffer: An instance of memory buffer (for distributed training) or None for non-distributed.
            :return: A dictionary of results with evaluation_tags as key, and a list as value
            :type memory_buffer: diplomacy_research.models.self_play.memory_buffer.MemoryBuffer
        """
        assert memory_buffer is not None or self.cluster_config is None, 'Memory buffer required for dist. training'
        yield self.sample(queue_name='reinforce_policy')

        # Non-distributed
        if not self.cluster_config:
            yield self._update('reinforce_update')
            return self.get_results()

        # Distributed - Non Chief
        if not self.cluster_config.is_chief:
            set_barrier_status(memory_buffer, 'train', value=1)
            return {}

        # Distributed - Chief
        nb_learners = self.cluster_config.count('learner')
        wait_for_barrier(memory_buffer,
                         barrier_name='train',
                         job_name='learner',
                         min_value=1,
                         min_done=nb_learners - 1)
        yield self._update('reinforce_update')
        return self.get_results()
Example #2
0
    def update(self, memory_buffer):
        """ Calculates the average gradients and applies them
            :param memory_buffer: An instance of memory buffer (for distributed training) or None for non-distributed.
            :return: A dictionary of results with evaluation_tags as key, and a list as value
            :type memory_buffer: diplomacy_research.models.self_play.memory_buffer.MemoryBuffer
        """
        assert memory_buffer is not None or self.cluster_config is None, 'Memory buffer required for dist. training'
        for epoch_ix in range(self.hparams['nb_mini_epochs']):
            yield self.sample(queue_name='ppo_policy_baseline',
                              wait_per_mini_batch=True)

            # Not distributed - Continuing
            if not self.cluster_config:
                continue

            # Distributed - Non Chief
            # Setting status on barrier and waiting for all learners to complete epoch
            if not self.cluster_config.is_chief:
                nb_learners = self.cluster_config.count('learner')
                set_barrier_status(memory_buffer, 'train', value=epoch_ix + 1)
                wait_for_barrier(memory_buffer,
                                 barrier_name='train',
                                 job_name='learner',
                                 min_value=epoch_ix + 1,
                                 min_done=nb_learners)
                continue

            # Distributed - Chief
            # Waiting for all learners to have completed, then clear barrier
            nb_learners = self.cluster_config.count('learner')
            wait_for_barrier(memory_buffer,
                             barrier_name='train',
                             job_name='learner',
                             min_value=epoch_ix + 1,
                             min_done=nb_learners - 1)
            set_barrier_status(memory_buffer, 'train', value=epoch_ix + 1)

        # Increasing version if non-distributed or if chief
        if not self.cluster_config or self.cluster_config.is_chief:
            yield self.queue_dataset.get_results('ppo_increase_version',
                                                 item={})
            return self.get_results()

        # Returning empty results if distributed and non-chief
        return {}
def run_training_epoch(trainer):
    """ Runs a training epoch
        :param trainer: A reinforcement trainer instance.
        :type trainer: diplomacy_research.models.training.reinforcement.trainer.ReinforcementTrainer
    """
    from diplomacy_research.utils.tensorflow import tf
    print('\n=========== TRAINING ===========')

    # Variables
    epoch_results = {}
    nb_learners = trainer.cluster_config.count('learner')
    nb_missing_transitions = -1
    partial_games_only = bool(trainer.flags.update_interval > 0)
    set_barrier_status(trainer.memory_buffer, 'train', value=0)

    # Clearing transition buffers
    yield trainer.algorithm.clear_buffers()

    # Sampling new replay samples from the memory buffer
    if trainer.algorithm_constructor.can_do_experience_replay:
        trainer.replay_samples = get_replay_samples(trainer)

    # Getting games from the memory buffer
    learned_games_proto, game_ids = get_online_games(trainer.memory_buffer, all_games=False)
    saved_games_proto, power_phases_ix = extract_games_and_power_phases(trainer,
                                                                        learned_games_proto,
                                                                        trainer.replay_samples,
                                                                        partial_games_only=partial_games_only)
    yield trainer.algorithm.learn(saved_games_proto, power_phases_ix, trainer.advantage_fn)

    # Waiting for additional games - We don't have enough transitions to proceed
    while len(trainer.algorithm.transition_buffer) < (trainer.flags.nb_transitions_per_update // nb_learners):
        if nb_missing_transitions != trainer.flags.nb_transitions_per_update - len(trainer.algorithm.transition_buffer):
            nb_missing_transitions = trainer.flags.nb_transitions_per_update - len(trainer.algorithm.transition_buffer)
            LOGGER.info('Waiting for additional games. Missing %d transitions.', nb_missing_transitions)
        yield gen.sleep(1.)

        # Getting additional games
        new_games_proto, new_game_ids = get_online_games(trainer.memory_buffer, excluding=game_ids, all_games=False)
        learned_games_proto += new_games_proto
        game_ids += new_game_ids

        # Learning from those games
        saved_games_proto, power_phases_ix = extract_games_and_power_phases(trainer,
                                                                            new_games_proto,
                                                                            [],
                                                                            partial_games_only=partial_games_only)
        yield trainer.algorithm.learn(saved_games_proto, power_phases_ix, trainer.advantage_fn)

    # Proceeding
    LOGGER.info('[Train] Retrieved %d games from Redis', len(learned_games_proto))
    try:
        epoch_results = yield trainer.algorithm.update(trainer.memory_buffer)
    except (TimeoutError, tf.errors.DeadlineExceededError):
        LOGGER.warning('learn/update took more than %d ms to run. Timeout error.', SESSION_RUN_TIMEOUT)
    LOGGER.info('[Train] Done updating the model version.')

    # Updating priorities
    yield update_priorities(trainer, learned_games_proto, trainer.replay_samples)
    trainer.replay_samples = []

    # Marking games as processed and adjusting factor
    mark_games_as_processed(trainer.memory_buffer, game_ids)

    # Non-Chief - Can just wait for chief and return
    if not trainer.cluster_config.is_chief:
        set_barrier_status(trainer.memory_buffer, 'train', value=1)
        wait_for_barrier(trainer.memory_buffer, 'train')
        complete_epoch(trainer)
        return

    # Chief - Marking learner as done
    set_barrier_status(trainer.memory_buffer, 'train', value=1)

    # Updating the model used by the actors
    new_version_id = get_version_id(trainer)
    save_version_model(trainer)
    set_version_id(trainer.memory_buffer, new_version_id)
    LOGGER.info('[Train] *** New version available: Version %d ***.', new_version_id)

    # Clearing barrier
    clear_barrier(trainer.memory_buffer, 'train')

    # Compiling stats (every 60s by default)
    if (time.time() - trainer.stats_every) >= trainer.last_stats_time:
        stats_games_proto = load_games_from_folder(trainer,
                                                   target_dir=get_version_directory(trainer, 'player'),
                                                   pattern='games_learned*.pbz',
                                                   minimum=trainer.cluster_config.count('actor'),
                                                   timeout=60)
        update_stats(trainer, 'train', stats_games_proto, epoch_results=epoch_results)
        compile_stats(trainer, 'train')
        save_stats(trainer)
        trainer.last_stats_time = int(time.time())

    # Displaying progress and completing
    display_progress(trainer)
    complete_epoch(trainer)
Example #4
0
def run_validation_epoch(trainer, sess):
    """ Runs a validation epoch
        :param trainer: A supervised trainer instance.
        :param sess: The TensorFlow session
        :type trainer: diplomacy_research.models.training.supervised.trainer.SupervisedTrainer
    """
    from diplomacy_research.utils.tensorflow import tf

    # Initializing the dataset to use the validation set
    if trainer.supervised_dataset.training_mode != TrainingMode.VALIDATION:
        trainer.supervised_dataset.start_validation_mode(sess)
    elif not trainer.supervised_dataset.iterator_initialized:
        trainer.supervised_dataset.initialize_iterator(sess)

    # If there are no batches in the validation set, aborting
    nb_batches = trainer.supervised_dataset.nb_validation_steps_per_epoch
    if not nb_batches:
        return

    # Variables for stats
    results, detailed_results = OrderedDict(), OrderedDict()
    batch_results, batch_detailed_results = OrderedDict(), OrderedDict()
    trainer.progress = trainer.supervised_dataset.get_progress()

    # Indicating to the barrier that we are starting training
    if trainer.cluster_config.is_chief:
        clear_barrier(trainer.memory_buffer, __TRAIN_BARRIER__)
    set_barrier_status(trainer.memory_buffer, __EVAL_BARRIER__, 0)
    done, incomplete = workers_on_barrier(trainer.memory_buffer,
                                          __EVAL_BARRIER__)
    LOGGER.info(
        'Starting validation - Barrier Status: Done %d - Incomplete: %d', done,
        incomplete)

    # For each loop in the validation set
    for eval_loop_ix in range(trainer.model.nb_evaluation_loops):
        last_status_perc = 0.

        # Making sure the dataset is initialized
        if not trainer.supervised_dataset.iterator_initialized or trainer.supervised_dataset.is_done:
            trainer.supervised_dataset.initialize_iterator(sess)

        # ---- Starting Validation Epoch -----
        print('-' * 80)

        # Running each batch sequentially
        for batch_ix in range(nb_batches):

            # Checking if we need to stop training, or are done with the current eval_loop_ix
            if hasattr(sess, 'should_stop') and sess.should_stop():
                trainer.supervised_dataset.close()
                break
            if trainer.supervised_dataset.is_done:
                break

            # Running single batch
            try:
                batch_results, batch_detailed_results = run_next_decoded_validation_batch(
                    trainer, sess, eval_loop_ix)
                trainer.progress = trainer.progress[0], (batch_ix + 1) / max(
                    1, nb_batches)

                # Storing batch results
                for result_name, result_value in batch_results.items():
                    results.setdefault(result_name, [])
                    if isinstance(result_value, list):
                        results[result_name] += result_value
                    else:
                        results[result_name] += [result_value]
                for result_name, result_value in batch_detailed_results.items(
                ):
                    assert isinstance(result_value,
                                      list), 'Detailed results must be a list.'
                    detailed_results.setdefault(result_name, [])
                    detailed_results[result_name] += result_value

            except tf.errors.OutOfRangeError:
                trainer.supervised_dataset.mark_as_done()
            except tf.errors.DeadlineExceededError:
                LOGGER.warning(
                    'Validation took more than %d ms to run. Timeout error.',
                    SESSION_RUN_TIMEOUT)

            # Printing status every 10% completed
            current_perc_completed = (batch_ix + 1) / max(1., nb_batches)
            if current_perc_completed > last_status_perc + 0.10:
                last_status_perc = round(current_perc_completed, 1)
                display_validation_stats(trainer, sess, batch_results,
                                         batch_ix, eval_loop_ix)
                trainer.supervised_dataset.save_status()

    # Post-processing eval detailed results
    detailed_results = trainer.model.post_process_results(detailed_results)

    # Printing final validation status, and freezing graph
    display_final_validation_stats(trainer,
                                   sess,
                                   results,
                                   detailed_results,
                                   aggregated=False)
    if trainer.cluster_config.is_chief:
        save_model(trainer, sess)
    trainer.supervised_dataset.mark_as_done()

    # ---- Done Validation Epoch -----
    print('-' * 80)

    # Stopping barrier and dumping results to disk for chief to aggregate
    set_barrier_status(trainer.memory_buffer, __EVAL_BARRIER__, 1)
    save_results_to_disk(trainer, results, detailed_results)

    # Non-Chief - Wait for chief to do aggregation
    if not trainer.cluster_config.is_chief:
        done, incomplete = workers_on_barrier(trainer.memory_buffer,
                                              __EVAL_BARRIER__)
        LOGGER.info('Waiting for barrier Status: Done %d - Incomplete: %d',
                    done, incomplete)
        wait_for_barrier(trainer.memory_buffer, __EVAL_BARRIER__)
        return

    # Chief - Performs aggregation across all workers
    aggregate_results(trainer, sess)
    print('-' * 80)
Example #5
0
def run_training_epoch(trainer, sess):
    """ Runs a training epoch
        :param trainer: A supervised trainer instance.
        :param sess: The TensorFlow session
        :type trainer: diplomacy_research.models.training.supervised.trainer.SupervisedTrainer
    """
    from diplomacy_research.utils.tensorflow import tf

    # Initializing the dataset to use the training set
    if trainer.supervised_dataset.is_done:
        trainer.supervised_dataset.start_training_mode(sess)
    elif not trainer.supervised_dataset.iterator_initialized:
        trainer.supervised_dataset.initialize_iterator(sess)

    # If another dataset was loaded from load_status(), skipping Training
    if trainer.supervised_dataset.training_mode != TrainingMode.TRAINING:
        return

    # Indicating to the barrier that we are starting training
    if trainer.cluster_config.is_chief:
        clear_barrier(trainer.memory_buffer, __EVAL_BARRIER__)
    set_barrier_status(trainer.memory_buffer, __TRAIN_BARRIER__, 0)
    done, incomplete = workers_on_barrier(trainer.memory_buffer,
                                          __TRAIN_BARRIER__)
    LOGGER.info('Starting training - Barrier Status: Done %d - Incomplete: %d',
                done, incomplete)

    # For the first epoch, we only want to run validation to get an idea of performance with random weights
    nb_epochs_completed, _ = trainer.supervised_dataset.get_progress()
    if not nb_epochs_completed:
        LOGGER.info(
            'Only running validation for the first epoch (to get pre-training performance).'
        )
        trainer.supervised_dataset.mark_as_done()

    # Variable for stats and synchronization
    done_training = False
    done_time = 0
    nb_long_steps = 0
    nb_workers = trainer.cluster_config.count('worker')
    trainer.status_time = int(time.time())
    trainer.step_last_status = 0
    last_barrier_status = 0.

    # Running epoch
    while True:
        current_time = int(time.time())

        # For barriers, printing status every 30 secs
        print_barrier_status = False
        if time.time() > (last_barrier_status + 30):
            last_barrier_status = time.time()
            print_barrier_status = True

        # Checking if we need to stop training, or are done with the current epoch
        if hasattr(sess, 'should_stop') and sess.should_stop():
            trainer.supervised_dataset.close()
            break
        if trainer.supervised_dataset.is_done:
            if not done_training:
                set_barrier_status(trainer.memory_buffer, __TRAIN_BARRIER__, 1)
                done, incomplete = workers_on_barrier(trainer.memory_buffer,
                                                      __TRAIN_BARRIER__)
                LOGGER.info(
                    'Waiting for barrier Status: Done %d - Incomplete: %d',
                    done, incomplete)
                done_training = True
                done_time = time.time()

            # For the first epoch, everyone blocks until all workers have signaled the barrier
            # This is to prevent a worker from training before we first run the evaluation loop
            if not nb_epochs_completed:
                wait_for_barrier(trainer.memory_buffer,
                                 __TRAIN_BARRIER__,
                                 job_name='worker',
                                 min_done=nb_workers)
                break

            # Chief can break if everyone has marked as done
            if trainer.cluster_config.is_chief and can_proceed_through_barrier(
                    trainer.memory_buffer,
                    __TRAIN_BARRIER__,
                    job_name='worker',
                    min_done=nb_workers,
                    print_status=print_barrier_status):
                break

            # Others can only break when the barrier is cleared
            if can_proceed_through_barrier(trainer.memory_buffer,
                                           __TRAIN_BARRIER__,
                                           last_cleared=done_time,
                                           print_status=print_barrier_status):
                break

        # OutOfRangeError is thrown when we reach the end of the dataset
        try:
            run_gradient_step(trainer, sess)
        except tf.errors.OutOfRangeError:
            trainer.supervised_dataset.mark_as_done()
        except tf.errors.DeadlineExceededError:
            nb_long_steps += 1
            if nb_long_steps >= 5 or not done_training:
                LOGGER.warning(
                    'run_gradient_step took more than %d ms to run. Timeout error.',
                    SESSION_RUN_TIMEOUT)

        # Printing status
        if (current_time - trainer.status_time) > trainer.status_every \
                or (trainer.supervised_dataset.is_done and not done_training):

            # Updating stats
            elasped_time = current_time - trainer.status_time
            elapsed_steps = trainer.supervised_dataset.steps_in_current_mode - trainer.step_last_status
            trainer.status_time = current_time
            trainer.step_last_status = trainer.supervised_dataset.steps_in_current_mode
            prev_nb_epochs_completed = trainer.progress[0]
            trainer.progress = trainer.supervised_dataset.get_progress()
            epoch_eta = 0
            if elapsed_steps > 0:
                epoch_eta = int(
                    trainer.supervised_dataset.nb_total_steps_per_epoch *
                    elasped_time / elapsed_steps)

            # Decaying rates
            decay_rates(trainer, sess)

            # Displaying status
            try:
                results, _ = run_next_decoded_training_batch(trainer, sess)
                display_training_stats(trainer, sess, results, epoch_eta)
            except tf.errors.OutOfRangeError:
                trainer.supervised_dataset.mark_as_done()

            # Saving dataset status to disk (to be able to resume)
            trainer.supervised_dataset.save_status()

            # Saving model in infinite training
            if trainer.supervised_dataset.do_infinite_training \
                    and trainer.cluster_config.is_chief \
                    and nb_epochs_completed > prev_nb_epochs_completed:
                save_model(trainer, sess, start_of_epoch=True)