Ejemplo n.º 1
0
 def restore(self, sess, restore_from=None):
     if restore_from is None:
         restore_from = self._restore_path
     if self.print_log:
         print_and_log('saver: {} restoring model from {}'.format(
             self.name, restore_from))
     self._saver.restore(sess, restore_from)
Ejemplo n.º 2
0
 def collect_test_data(self, top_level, is_challenging=False):
     if is_challenging:
         start_goal_pairs = self.hard_fixed_start_goal_pairs
     else:
         start_goal_pairs = self.fixed_start_goal_pairs
     print_and_log('collecting {} test episodes of level {}'.format(
         len(start_goal_pairs), top_level))
     return self.collect_data(start_goal_pairs, top_level, False)
Ejemplo n.º 3
0
 def collect_data(self, start_goal_pairs, top_level, is_train):
     episode_results = self.episode_runner.play_episodes(
         start_goal_pairs, top_level, is_train)
     successes, accumulated_cost, dataset, endpoints_by_path = self._process_episode_results(
         episode_results, top_level)
     print_and_log(
         'data collection done, success rate is {}, accumulated cost is {}'.
         format(successes, accumulated_cost))
     return successes, accumulated_cost, dataset, endpoints_by_path
Ejemplo n.º 4
0
 def save(self, sess, global_step):
     self.assertion_w_dictionary = self.get_assertion_vars(sess)
     self._restore_path = self._saver.save(sess,
                                           os.path.join(
                                               self.saver_dir, self.name),
                                           global_step=global_step)
     if self.print_log:
         print_and_log(
             'saver {}: saved model from global step {} to {}'.format(
                 self.name, global_step, self._restore_path))
Ejemplo n.º 5
0
 def policy_function(starts, goals, level, is_train):
     res = network.predict_policy(starts, goals, level, sess, is_train)
     means = 0.5 * (np.array(starts) + np.array(goals))
     distance = np.linalg.norm(res[0] - means, axis=1)
     print(
         f'distance from mean: mean {distance.mean()} min {distance.min()} max {distance.max()}'
     )
     if np.any(np.isnan(res)):
         print_and_log(
             '######################## Nan predictions detected...')
     return res
Ejemplo n.º 6
0
 def collect_data(self,
                  count,
                  is_train=True,
                  use_fixed_start_goal_pairs=False):
     print_and_log('collecting {} {} episodes'.format(
         count, 'train' if is_train else 'test'))
     if use_fixed_start_goal_pairs:
         episode_results = self.episode_runner.play_fixed_episodes(is_train)
     else:
         episode_results = self.episode_runner.play_random_episodes(
             count, is_train)
     successes, accumulated_cost, dataset, endpoints_by_path = self._process_episode_results(
         episode_results)
     print_and_log(
         'data collection done, success rate is {}, accumulated cost is {}'.
         format(successes, accumulated_cost))
     if is_train:
         self.train_episodes_counter += len(endpoints_by_path)
     return successes, accumulated_cost, dataset, endpoints_by_path
Ejemplo n.º 7
0
def end_of_level_test(best_cost, best_cost_global_step, best_curriculum_coefficient, best_saver, sess,
                      test_trajectories_dir, trainer):
    print_and_log('end of run. best: {} from step: {}'.format(best_cost, best_cost_global_step))
    restore_best(sess, best_saver, best_curriculum_coefficient, trainer)
    # test all
    test_trajectories_file = os.path.join(test_trajectories_dir, 'all.txt')
    endpoints_by_path = trainer.collect_test_data(is_challenging=False)[-1]
    serialize_compress(endpoints_by_path, test_trajectories_file)
    print_and_log(os.linesep)
    # test hard
    test_trajectories_file = os.path.join(test_trajectories_dir, 'challenging.txt')
    endpoints_by_path = trainer.collect_test_data(is_challenging=True)[-1]
    serialize_compress(endpoints_by_path, test_trajectories_file)
    print_and_log(os.linesep)
Ejemplo n.º 8
0
def run_for_config(config):
    # set the name of the model
    model_name = config['general']['name']
    now = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y_%m_%d_%H_%M_%S')
    model_name = now + '_' + model_name if model_name is not None else now

    # where we save all the outputs
    scenario = config['general']['scenario']
    working_dir = os.path.join(get_base_directory(), 'sgt', scenario)
    init_dir(working_dir)

    saver_dir = os.path.join(working_dir, 'models', model_name)
    init_dir(saver_dir)
    init_log(log_file_path=os.path.join(saver_dir, 'log.txt'))
    copy_config(config, os.path.join(saver_dir, 'config.yml'))
    episodic_success_rates_path = os.path.join(saver_dir, 'results.txt')
    test_trajectories_dir = os.path.join(working_dir, 'test_trajectories',
                                         model_name)
    init_dir(test_trajectories_dir)

    # generate game
    game = _get_game(config)

    network = Network(config, game)
    network_variables = network.get_all_variables()

    # save model
    latest_saver = ModelSaver(os.path.join(saver_dir, 'latest_model'),
                              2,
                              'latest',
                              variables=network_variables)
    best_saver = ModelSaver(os.path.join(saver_dir, 'best_model'),
                            1,
                            'best',
                            variables=network_variables)

    summaries_collector = SummariesCollector(
        os.path.join(working_dir, 'tensorboard', model_name), model_name)

    with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
            gpu_options=tf.compat.v1.GPUOptions(
                per_process_gpu_memory_fraction=config['general']
                ['gpu_usage']))) as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        def policy_function(starts, goals, level, is_train):
            res = network.predict_policy(starts, goals, level, sess, is_train)
            means = 0.5 * (np.array(starts) + np.array(goals))
            distance = np.linalg.norm(res[0] - means, axis=1)
            print(
                f'distance from mean: mean {distance.mean()} min {distance.min()} max {distance.max()}'
            )
            if np.any(np.isnan(res)):
                print_and_log(
                    '######################## Nan predictions detected...')
            return res

        episode_runner = EpisodeRunnerSubgoal(config, game, policy_function)
        trainer = TrainerSubgoal(
            model_name,
            config,
            working_dir,
            network,
            sess,
            episode_runner,
            summaries_collector,
            curriculum_coefficient=get_initial_curriculum(config))

        decrease_learn_rate_if_static_success = config['model'][
            'decrease_learn_rate_if_static_success']
        stop_training_after_learn_rate_decrease = config['model'][
            'stop_training_after_learn_rate_decrease']
        reset_best_every = config['model']['reset_best_every']

        global_step = 0
        best_curriculum_coefficient = None

        for current_level in range(config['model']['starting_level'],
                                   config['model']['levels'] + 1):

            best_cost, best_cost_global_step = None, None
            no_test_improvement, consecutive_learn_rate_decrease = 0, 0

            if config['model']['init_from_lower_level'] and current_level > 1:
                print_and_log('initiating level {} from previous level'.format(
                    current_level))
                network.init_policy_from_lower_level(sess, current_level)

            for cycle in range(config['general']['training_cycles_per_level']):
                print_and_log('starting cycle {}, level {}'.format(
                    cycle, current_level))

                new_global_step, success_ratio = trainer.train_policy_at_level(
                    current_level, global_step)
                if new_global_step == global_step:
                    print_and_log(
                        'no data found in training cycle {} global step still {}'
                        .format(cycle, global_step))
                    continue
                else:
                    global_step = new_global_step

                if (cycle + 1) % config['policy']['decrease_std_every'] == 0:
                    network.decrease_base_std(sess, current_level)
                    print_and_log('new base stds {}'.format(
                        network.get_base_stds(sess, current_level)))

                print_and_log('done training cycle {} global step {}'.format(
                    cycle, global_step))

                # save every now and then
                if cycle % config['general']['save_every_cycles'] == 0:
                    latest_saver.save(sess, global_step=global_step)

                if cycle % config['general']['test_frequency'] == 0:
                    # do test
                    test_successes, test_cost, _, endpoints_by_path = trainer.collect_test_data(
                        current_level, False)
                    summaries_collector.write_test_success_summaries(
                        sess, global_step, test_successes, test_cost,
                        trainer.curriculum_coefficient)
                    with open(episodic_success_rates_path, 'a') as f:
                        f.write('{} {} {} {} {}'.format(
                            current_level, trainer.train_episodes_counter,
                            test_successes, test_cost, os.linesep))

                    # decide how to act next
                    print_and_log('old cost was {} at step {}'.format(
                        best_cost, best_cost_global_step))
                    print_and_log('current learn rates {}'.format(
                        network.get_learn_rates(sess, current_level)))
                    print_and_log('current base stds {}'.format(
                        network.get_base_stds(sess, current_level)))
                    if best_cost is None or test_cost < best_cost:
                        print_and_log('new best cost {} at step {}'.format(
                            test_cost, global_step))
                        best_cost, best_cost_global_step = test_cost, global_step
                        best_curriculum_coefficient = trainer.curriculum_coefficient
                        no_test_improvement, consecutive_learn_rate_decrease = 0, 0
                        best_saver.save(sess, global_step)
                        test_trajectories_file = os.path.join(
                            test_trajectories_dir,
                            '{}.txt'.format(global_step))
                        serialize_compress(endpoints_by_path,
                                           test_trajectories_file)
                    else:
                        print_and_log(
                            'new model is not the best with cost {} at step {}'
                            .format(test_cost, global_step))
                        no_test_improvement += 1
                        print_and_log('no improvement count {} of {}'.format(
                            no_test_improvement,
                            decrease_learn_rate_if_static_success))
                        if reset_best_every > 0 and no_test_improvement % reset_best_every == reset_best_every - 1:
                            # restore the model every once in a while if did not find a better solution in a while
                            restore_best(sess, best_saver,
                                         best_curriculum_coefficient, trainer)
                        if no_test_improvement == decrease_learn_rate_if_static_success:
                            # restore the best model
                            if config['model']['restore_on_decrease']:
                                restore_best(sess, best_saver,
                                             best_curriculum_coefficient,
                                             trainer)
                            # decrease learn rates
                            network.decrease_learn_rates(sess, current_level)
                            no_test_improvement = 0
                            consecutive_learn_rate_decrease += 1
                            print_and_log(
                                'decreasing learn rates {} of {}'.format(
                                    consecutive_learn_rate_decrease,
                                    stop_training_after_learn_rate_decrease))
                            print_and_log('new learn rates {}'.format(
                                network.get_learn_rates(sess, current_level)))
                            if consecutive_learn_rate_decrease == stop_training_after_learn_rate_decrease:
                                break

                if trainer.curriculum_coefficient is not None:
                    if success_ratio > config['curriculum'][
                            'raise_when_train_above']:
                        print_and_log(
                            'current curriculum coefficient {}'.format(
                                trainer.curriculum_coefficient))
                        trainer.curriculum_coefficient *= config['curriculum'][
                            'raise_times']
                        print_and_log(
                            'curriculum coefficient raised to {}'.format(
                                trainer.curriculum_coefficient))

                # mark in log the end of cycle
                print_and_log(os.linesep)

            # if we finished because we ran out of cycles, we still need to make one more test
            end_of_level_test(best_cost, best_cost_global_step,
                              best_curriculum_coefficient, best_saver, sess,
                              test_trajectories_dir, trainer, current_level)

        print_and_log('trained all levels - needs to stop')
        close_log()
        return best_cost
Ejemplo n.º 9
0
 def policy_function(starts, goals, is_train):
     res = network.predict_policy(starts, goals, sess, is_train)
     if np.any(np.isnan(res)):
         print_and_log('######################## Nan predictions detected...')
     return res
Ejemplo n.º 10
0
 def collect_train_data(self, count, top_level):
     print_and_log('collecting {} train episodes of level {}'.format(
         count, top_level))
     start_goal_pairs = self.episode_runner.game.get_start_goals(
         count, self.curriculum_coefficient, get_free_states=True)
     return self.collect_data(start_goal_pairs, top_level, True)
Ejemplo n.º 11
0
def run_for_config(config):
    # set the name of the model
    model_name = config['general']['name']
    now = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y_%m_%d_%H_%M_%S')
    model_name = now + '_' + model_name if model_name is not None else now

    # where we save all the outputs
    scenario = config['general']['scenario']
    working_dir = os.path.join(get_base_directory(), 'sequential', scenario)
    init_dir(working_dir)

    saver_dir = os.path.join(working_dir, 'models', model_name)
    init_dir(saver_dir)
    init_log(log_file_path=os.path.join(saver_dir, 'log.txt'))
    copy_config(config, os.path.join(saver_dir, 'config.yml'))
    episodic_success_rates_path = os.path.join(saver_dir, 'results.txt')
    weights_log_dir = os.path.join(saver_dir, 'weights_logs')
    init_dir(weights_log_dir)
    test_trajectories_dir = os.path.join(working_dir, 'test_trajectories',
                                         model_name)
    init_dir(test_trajectories_dir)

    # generate game
    game = _get_game(config)

    network = NetworkSequential(config,
                                game.get_state_space_size(),
                                game.get_action_space_size(),
                                is_rollout_agent=False)
    network_variables = network.get_all_variables()

    # save model
    latest_saver = ModelSaver(os.path.join(saver_dir, 'latest_model'),
                              2,
                              'latest',
                              variables=network_variables)
    best_saver = ModelSaver(os.path.join(saver_dir, 'best_model'),
                            1,
                            'best',
                            variables=network_variables)

    summaries_collector = SummariesCollector(
        os.path.join(working_dir, 'tensorboard', model_name), model_name)

    with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
            gpu_options=tf.compat.v1.GPUOptions(
                per_process_gpu_memory_fraction=config['general']
                ['gpu_usage']))) as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        episode_runner = EpisodeRunnerSequential(
            config,
            game,
            curriculum_coefficient=get_initial_curriculum(config))

        trainer = TrainerSequential(model_name, config, working_dir, network,
                                    sess, episode_runner, summaries_collector)

        decrease_learn_rate_if_static_success = config['model'][
            'decrease_learn_rate_if_static_success']
        stop_training_after_learn_rate_decrease = config['model'][
            'stop_training_after_learn_rate_decrease']
        reset_best_every = config['model']['reset_best_every']

        global_step = 0
        best_cost, best_cost_global_step, best_curriculum_coefficient = None, None, None
        no_test_improvement, consecutive_learn_rate_decrease = 0, 0

        for cycle in range(config['general']['training_cycles']):
            print_and_log('starting cycle {}'.format(cycle))

            global_step, success_ratio = trainer.train_policy(global_step)

            if (cycle + 1) % config['policy']['decrease_std_every'] == 0:
                network.decrease_base_std(sess)
                print_and_log('new base stds {}'.format(
                    network.get_base_std(sess)))

            print_and_log('done training cycle {} global step {}'.format(
                cycle, global_step))

            # save every now and then
            if cycle % config['general']['save_every_cycles'] == 0:
                latest_saver.save(sess, global_step=global_step)

            if cycle % config['general']['test_frequency'] == 0:
                # do test
                test_successes, test_cost, _, endpoints_by_path = trainer.collect_data(
                    config['general']['test_episodes'],
                    is_train=False,
                    use_fixed_start_goal_pairs=True)
                summaries_collector.write_test_success_summaries(
                    sess, global_step, test_successes, test_cost,
                    episode_runner.curriculum_coefficient)
                with open(episodic_success_rates_path, 'a') as f:
                    f.write('{} {} {} {}'.format(
                        trainer.train_episodes_counter, test_successes,
                        test_cost, os.linesep))

                # decide how to act next
                print_and_log('old cost was {} at step {}'.format(
                    best_cost, best_cost_global_step))
                print_and_log('current learn rates {}'.format(
                    network.get_learn_rate(sess)))
                print_and_log('current base stds {}'.format(
                    network.get_base_std(sess)))
                if best_cost is None or test_cost < best_cost:
                    print_and_log('new best cost {} at step {}'.format(
                        test_cost, global_step))
                    best_cost, best_cost_global_step = test_cost, global_step
                    best_curriculum_coefficient = episode_runner.curriculum_coefficient
                    no_test_improvement = 0
                    consecutive_learn_rate_decrease = 0
                    best_saver.save(sess, global_step)
                    test_trajectories_file = os.path.join(
                        test_trajectories_dir, '{}.txt'.format(global_step))
                    serialize_compress(endpoints_by_path,
                                       test_trajectories_file)
                else:
                    print_and_log(
                        'new model is not the best with cost {} at step {}'.
                        format(test_cost, global_step))
                    no_test_improvement += 1
                    print_and_log('no improvement count {} of {}'.format(
                        no_test_improvement,
                        decrease_learn_rate_if_static_success))
                    if reset_best_every > 0 and no_test_improvement % reset_best_every == reset_best_every - 1:
                        # restore the model every once in a while if did not find a better solution in a while
                        best_saver.restore(sess)
                        episode_runner.curriculum_coefficient = best_curriculum_coefficient
                    if no_test_improvement == decrease_learn_rate_if_static_success:
                        # restore the best model
                        if config['model']['restore_on_decrease']:
                            best_saver.restore(sess)
                            episode_runner.curriculum_coefficient = best_curriculum_coefficient
                        network.decrease_learn_rates(sess)

                        no_test_improvement = 0
                        consecutive_learn_rate_decrease += 1
                        print_and_log('decreasing learn rates {} of {}'.format(
                            consecutive_learn_rate_decrease,
                            stop_training_after_learn_rate_decrease))
                        print_and_log('new learn rates {}'.format(
                            network.get_learn_rate(sess)))
                        if consecutive_learn_rate_decrease == stop_training_after_learn_rate_decrease:
                            print_and_log('needs to stop')
                            best_saver.restore(sess)
                            break

            if episode_runner.curriculum_coefficient is not None:
                if success_ratio > config['curriculum'][
                        'raise_when_train_above']:
                    print_and_log('current curriculum coefficient {}'.format(
                        episode_runner.curriculum_coefficient))
                    episode_runner.curriculum_coefficient *= config[
                        'curriculum']['raise_times']
                    print_and_log('curriculum coefficient raised to {}'.format(
                        episode_runner.curriculum_coefficient))

            # mark in log the end of cycle
            print_and_log(os.linesep)

        print_and_log('end of run best: {} from step: {}'.format(
            best_cost, best_cost_global_step))
        print_and_log('testing on a new set of start-goal pairs')
        best_saver.restore(sess)
        test_trajectories_file = os.path.join(test_trajectories_dir, '-1.txt')
        endpoints_by_path = trainer.collect_data(
            config['general']['test_episodes'],
            is_train=False,
            use_fixed_start_goal_pairs=True)[-1]
        serialize_compress(endpoints_by_path, test_trajectories_file)

        close_log()
        return best_cost