Ejemplo n.º 1
0
    def test_logger(self):
        args = SimpleNamespace()
        args.args_data = ""

        logger.set_snapshot_dir("exp")
        logger.save_itr_params(1, {})
        logger.log_parameters_lite("exp-log", args)
        logger.log_variant("exp-log", {})
        logger.record_tabular_misc_stat("key", 1)
Ejemplo n.º 2
0
with open(osp.join(args.log_dir, 'total_result.csv'), mode='w') as csv_file:
    fieldnames = ['step_count']
    for i in range(top_k):
        fieldnames.append('reward ' + str(i))
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()

    for trial in range(args.n_trial):
        # Create the logger
        log_dir = args.log_dir + '/' + str(trial)

        tabular_log_file = osp.join(log_dir, 'process.csv')
        text_log_file = osp.join(log_dir, 'text.txt')
        params_log_file = osp.join(log_dir, 'args.txt')

        logger.set_snapshot_dir(log_dir)
        logger.set_snapshot_mode(args.snapshot_mode)
        logger.set_snapshot_gap(args.snapshot_gap)
        logger.log_parameters_lite(params_log_file, args)
        if trial > 0:
            old_log_dir = args.log_dir + '/' + str(trial - 1)
            logger.pop_prefix()
            logger.remove_text_output(osp.join(old_log_dir, 'text.txt'))
            logger.remove_tabular_output(osp.join(old_log_dir, 'process.csv'))
        logger.add_text_output(text_log_file)
        logger.add_tabular_output(tabular_log_file)
        logger.push_prefix("[" + args.exp_name + '_trial ' + str(trial) + "]")

        np.random.seed(trial)

        # Instantiate the garage objects
Ejemplo n.º 3
0
def run_experiment(argv):
    default_log_dir = config.LOG_DIR
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=("Number of parallel workers to perform rollouts. "
              "0 => don't start any workers"))
    parser.add_argument(
        '--exp_name',
        type=str,
        default=default_exp_name,
        help='Name of the experiment.')
    parser.add_argument(
        '--log_dir',
        type=str,
        default=None,
        help='Path to save the log and iteration snapshot.')
    parser.add_argument(
        '--snapshot_mode',
        type=str,
        default='all',
        help='Mode to save the snapshot. Can be either "all" '
        '(all iterations will be saved), "last" (only '
        'the last iteration will be saved), "gap" (every'
        '`snapshot_gap` iterations are saved), or "none" '
        '(do not save snapshots)')
    parser.add_argument(
        '--snapshot_gap',
        type=int,
        default=1,
        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--tabular_log_file',
        type=str,
        default='progress.csv',
        help='Name of the tabular log file (in csv).')
    parser.add_argument(
        '--text_log_file',
        type=str,
        default='debug.log',
        help='Name of the text log file (in pure text).')
    parser.add_argument(
        '--tensorboard_step_key',
        type=str,
        default=None,
        help=("Name of the step key in tensorboard_summary."))
    parser.add_argument(
        '--params_log_file',
        type=str,
        default='params.json',
        help='Name of the parameter log file (in json).')
    parser.add_argument(
        '--variant_log_file',
        type=str,
        default='variant.json',
        help='Name of the variant log file (in json).')
    parser.add_argument(
        '--resume_from',
        type=str,
        default=None,
        help='Name of the pickle file to resume experiment from.')
    parser.add_argument(
        '--plot',
        type=ast.literal_eval,
        default=False,
        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed', type=int, help='Random seed for numpy')
    parser.add_argument(
        '--args_data', type=str, help='Pickled data for objects')
    parser.add_argument(
        '--variant_data',
        type=str,
        help='Pickled data for variant configuration')
    parser.add_argument(
        '--use_cloudpickle', type=ast.literal_eval, default=False)

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        set_seed(args.seed)

    # SIGINT is blocked for all processes created in parallel_sampler to avoid
    # the creation of sleeping and zombie processes.
    #
    # If the user interrupts run_experiment, there's a chance some processes
    # won't die due to a dead lock condition where one of the children in the
    # parallel sampler exits without releasing a lock once after it catches
    # SIGINT.
    #
    # Later the parent tries to acquire the same lock to proceed with his
    # cleanup, but it remains sleeping waiting for the lock to be released.
    # In the meantime, all the process in parallel sampler remain in the zombie
    # state since the parent cannot proceed with their clean up.
    with mask_signals([signal.SIGINT]):
        if args.n_parallel > 0:
            parallel_sampler.initialize(n_parallel=args.n_parallel)
            if args.seed is not None:
                parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = osp.join(default_log_dir, args.exp_name)
    else:
        log_dir = args.log_dir
    tabular_log_file = osp.join(log_dir, args.tabular_log_file)
    text_log_file = osp.join(log_dir, args.text_log_file)
    params_log_file = osp.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = osp.join(log_dir, args.variant_log_file)
        logger.log_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    if not args.use_cloudpickle:
        logger.log_parameters_lite(params_log_file, args)

    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    logger.set_tensorboard_dir(log_dir)
    prev_snapshot_dir = logger.get_snapshot_dir()
    prev_mode = logger.get_snapshot_mode()
    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(args.snapshot_mode)
    logger.set_snapshot_gap(args.snapshot_gap)
    logger.set_log_tabular_only(args.log_tabular_only)
    logger.set_tensorboard_step_key(args.tensorboard_step_key)
    logger.push_prefix("[%s] " % args.exp_name)

    if args.resume_from is not None:
        data = joblib.load(args.resume_from)
        assert 'algo' in data
        algo = data['algo']
        algo.train()
    else:
        # read from stdin
        if args.use_cloudpickle:
            import cloudpickle
            method_call = cloudpickle.loads(base64.b64decode(args.args_data))
            try:
                method_call(variant_data)
            except BaseException:
                children = garage.plotter.Plotter.get_plotters()
                children += garage.tf.plotter.Plotter.get_plotters()
                if args.n_parallel > 0:
                    children += [parallel_sampler]
                child_proc_shutdown(children)
                raise
        else:
            data = pickle.loads(base64.b64decode(args.args_data))
            maybe_iter = concretize(data)
            if is_iterable(maybe_iter):
                for _ in maybe_iter:
                    pass

    logger.set_snapshot_mode(prev_mode)
    logger.set_snapshot_dir(prev_snapshot_dir)
    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
Ejemplo n.º 4
0
def run_task(*_):
    # Configure TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config).as_default() as tf_session:
        ## Load data from itr_N.pkl
        with open(snapshot_file, 'rb') as file:
            saved_data = dill.load(file)


        ## Construct PathTrie and find missing skill description
        # This is basically ASA.decide_new_skill
        min_length = 3
        max_length = 5
        action_map = {0: 's', 1: 'L', 2: 'R'}
        min_f_score = 1
        max_results = 10
        aggregations = []  # sublist of ['mean', 'most_freq', 'nearest_mean', 'medoid'] or 'all'

        paths = saved_data['paths']
        path_trie = PathTrie(saved_data['hrl_policy'].num_skills)
        for path in paths:
            actions = path['actions'].argmax(axis=1).tolist()
            observations = path['observations']
            path_trie.add_all_subpaths(
                    actions,
                    observations,
                    min_length=min_length,
                    max_length=max_length
            )
        logger.log('Searched {} rollouts'.format(len(paths)))

        frequent_paths = path_trie.items(
                action_map=action_map,
                min_count=10,  # len(paths) * 2
                min_f_score=min_f_score,
                max_results=max_results,
                aggregations=aggregations
        )
        logger.log('Found {} frequent paths: [index, actions, count, f-score]'.format(len(frequent_paths)))
        for i, f_path in enumerate(frequent_paths):
            logger.log('    {:2}: {:{pad}}\t{}\t{:.3f}'.format(
                i,
                f_path['actions_text'],
                f_path['count'],
                f_path['f_score'],
                pad=max_length))

        top_subpath = frequent_paths[0]
        start_obss = top_subpath['start_observations']
        end_obss   = top_subpath['end_observations']



        ## Prepare elements for training
        # Environment
        base_env = saved_data['env'].env.env  # <NormalizedEnv<MinibotEnv instance>>
        skill_learning_env = TfEnv(
                SkillLearningEnv(
                    # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!)
                    env=base_env,
                    start_obss=start_obss,
                    end_obss=end_obss
                )
        )

        # Skill policy
        hrl_policy = saved_data['hrl_policy']
        new_skill_policy, new_skill_id = hrl_policy.create_new_skill(
                end_obss=end_obss
        )

        # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline
        low_algo_kwargs = dict(saved_data['low_algo_kwargs'])
        baseline_to_clone = low_algo_kwargs.get('baseline', saved_data['baseline'])
        baseline = Serializable.clone(  # to create blank baseline
                obj=baseline_to_clone,
                name='{}Skill{}'.format(type(baseline_to_clone).__name__, new_skill_id)
        )
        low_algo_kwargs['baseline'] = baseline
        low_algo_cls = saved_data['low_algo_cls']

        # Set custom training params (should`ve been set in asa_basic_run)
        low_algo_kwargs['batch_size'] = 2500
        low_algo_kwargs['max_path_length'] = 50
        low_algo_kwargs['n_itr'] = 500

        # Algorithm
        algo = low_algo_cls(
            env=skill_learning_env,
            policy=new_skill_policy,
            **low_algo_kwargs
        )

        # Logger parameters
        logger_snapshot_dir_before = logger.get_snapshot_dir()
        logger_snapshot_mode_before = logger.get_snapshot_mode()
        logger_snapshot_gap_before = logger.get_snapshot_gap()
        # No need to change snapshot dir in this script, it is used in ASA-algo.create_and_train_new_skill()
        # logger.set_snapshot_dir(os.path.join(
        #         logger_snapshot_dir_before,
        #         'skill{}'.format(new_skill_id)
        # ))
        logger.set_snapshot_mode('none')
        logger.set_tensorboard_step_key('Iteration')


        ## Train new skill
        with logger.prefix('Skill {} | '.format(new_skill_id)):
            algo.train(sess=tf_session)



        ## Save new policy and its end_obss (we`ll construct skill stopping function
        #  from them manually in asa_resume_with_new_skill.py)
        out_file = os.path.join(logger.get_snapshot_dir(), 'final.pkl')
        with open(out_file, 'wb') as file:
            out_data = {
                    'policy': new_skill_policy,
                    'subpath': top_subpath
            }
            dill.dump(out_data, file)

        # Restore logger parameters
        logger.set_snapshot_dir(logger_snapshot_dir_before)
        logger.set_snapshot_mode(logger_snapshot_mode_before)
        logger.set_snapshot_gap(logger_snapshot_gap_before)
Ejemplo n.º 5
0
def run_experiment(argv):
    default_log_dir = config.LOG_DIR
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=("Number of parallel workers to perform rollouts. "
              "0 => don't start any workers"))
    parser.add_argument(
        '--exp_name',
        type=str,
        default=default_exp_name,
        help='Name of the experiment.')
    parser.add_argument(
        '--log_dir',
        type=str,
        default=None,
        help='Path to save the log and iteration snapshot.')
    parser.add_argument(
        '--snapshot_mode',
        type=str,
        default='all',
        help='Mode to save the snapshot. Can be either "all" '
        '(all iterations will be saved), "last" (only '
        'the last iteration will be saved), "gap" (every'
        '`snapshot_gap` iterations are saved), or "none" '
        '(do not save snapshots)')
    parser.add_argument(
        '--snapshot_gap',
        type=int,
        default=1,
        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--tabular_log_file',
        type=str,
        default='progress.csv',
        help='Name of the tabular log file (in csv).')
    parser.add_argument(
        '--text_log_file',
        type=str,
        default='debug.log',
        help='Name of the text log file (in pure text).')
    parser.add_argument(
        '--tensorboard_step_key',
        type=str,
        default=None,
        help=("Name of the step key in tensorboard_summary."))
    parser.add_argument(
        '--params_log_file',
        type=str,
        default='params.json',
        help='Name of the parameter log file (in json).')
    parser.add_argument(
        '--variant_log_file',
        type=str,
        default='variant.json',
        help='Name of the variant log file (in json).')
    parser.add_argument(
        '--resume_from',
        type=str,
        default=None,
        help='Name of the pickle file to resume experiment from.')
    parser.add_argument(
        '--plot',
        type=ast.literal_eval,
        default=False,
        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed', type=int, help='Random seed for numpy')
    parser.add_argument(
        '--args_data', type=str, help='Pickled data for stub objects')
    parser.add_argument(
        '--variant_data',
        type=str,
        help='Pickled data for variant configuration')
    parser.add_argument(
        '--use_cloudpickle', type=ast.literal_eval, default=False)

    args = parser.parse_args(argv[1:])

    assert (os.environ.get("JOBLIB_START_METHOD", None) == "forkserver")
    if args.seed is not None:
        set_seed(args.seed)

    if args.n_parallel > 0:
        from garage.sampler import parallel_sampler
        parallel_sampler.initialize(n_parallel=args.n_parallel)
        if args.seed is not None:
            parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = osp.join(default_log_dir, args.exp_name)
    else:
        log_dir = args.log_dir
    tabular_log_file = osp.join(log_dir, args.tabular_log_file)
    text_log_file = osp.join(log_dir, args.text_log_file)
    params_log_file = osp.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = osp.join(log_dir, args.variant_log_file)
        logger.log_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    if not args.use_cloudpickle:
        logger.log_parameters_lite(params_log_file, args)

    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    logger.set_tensorboard_dir(log_dir)
    prev_snapshot_dir = logger.get_snapshot_dir()
    prev_mode = logger.get_snapshot_mode()
    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(args.snapshot_mode)
    logger.set_snapshot_gap(args.snapshot_gap)
    logger.set_log_tabular_only(args.log_tabular_only)
    logger.set_tensorboard_step_key(args.tensorboard_step_key)
    logger.push_prefix("[%s] " % args.exp_name)

    if args.resume_from is not None:
        data = joblib.load(args.resume_from)
        assert 'algo' in data
        algo = data['algo']
        algo.train()
    else:
        # read from stdin
        if args.use_cloudpickle:
            import cloudpickle
            method_call = cloudpickle.loads(base64.b64decode(args.args_data))
            try:
                method_call(variant_data)
            except BaseException:
                if args.n_parallel > 0:
                    parallel_sampler.terminate()
                raise
        else:
            data = pickle.loads(base64.b64decode(args.args_data))
            maybe_iter = concretize(data)
            if is_iterable(maybe_iter):
                for _ in maybe_iter:
                    pass

    logger.set_snapshot_mode(prev_mode)
    logger.set_snapshot_dir(prev_snapshot_dir)
    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
Ejemplo n.º 6
0
from sandbox.asa.envs import MinibotEnv, HierarchizedEnv
from sandbox.asa.policies import MinibotForwardPolicy, MinibotRightPolicy, MinibotLeftPolicy
from sandbox.asa.policies import HierarchicalPolicy
from sandbox.asa.sampler import skill_rollout

from garage.misc import logger
from garage.tf.envs import TfEnv
from garage.tf.policies import GaussianMLPPolicy, CategoricalMLPPolicy



base_env = MinibotEnv(use_maps=[1])
step = MinibotForwardPolicy(base_env.spec)
left = MinibotLeftPolicy(base_env.spec)
right = MinibotRightPolicy(base_env.spec)
logger.set_snapshot_dir('/home/h/holas3/garage/data/local/minibot-env-playground')


# Skill policies, operating in base environment
tf_base_env = TfEnv(base_env)
trained_skill_policies = [
    MinibotForwardPolicy(env_spec=base_env.spec),
    MinibotLeftPolicy(env_spec=base_env.spec)
]
trained_skill_policies_stop_funcs = [
    lambda path: len(path['actions']) >= 5,  # 5 steps to move 1 tile
    lambda path: len(path['actions']) >= 3  # 3 steps to rotate 90°
]
skill_policy_prototype = GaussianMLPPolicy(
    env_spec=tf_base_env.spec,
    hidden_sizes=(64, 64)
Ejemplo n.º 7
0
from sandbox.asa.envs import GridworldGathererEnv
from sandbox.asa.policies import GridworldTargetPolicy, GridworldStepPolicy
from sandbox.asa.sampler import skill_rollout

from garage.tf.envs import TfEnv
from garage.misc import logger

env = TfEnv(
    GridworldGathererEnv(
        plot={'visitation': {
            'save': False,
            'live': True,
            'alpha': 1
        }}))
policy = GridworldStepPolicy(
    env_spec=env.spec,
    # target=(3, 5)
    direction='up')

logger.set_snapshot_dir(
    '/home/h/holas3/garage/data/local/gridworld-env-playground')
path = skill_rollout(env=env,
                     agent=policy,
                     skill_stopping_func=policy.skill_stopping_func,
                     max_path_length=20,
                     reset_start_rollout=True)

env.unwrapped.log_diagnostics([path])
input('< Press Enter to quit >')  # Prevent plots from closing
Ejemplo n.º 8
0
    def create_and_train_new_skill(self, skill_subpath):
        """
        Create and train a new skill based on given subpath. The new skill policy and
        ID are returned, and also saved in self._hrl_policy.
        """
        ## Prepare elements for training
        # Environment
        skill_learning_env = TfEnv(
                SkillLearningEnv(
                    # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!)
                    env=self.env.env.env,
                    start_obss=skill_subpath['start_observations'],
                    end_obss=skill_subpath['end_observations']
                )
        )

        # Skill policy
        new_skill_pol, new_skill_id = self._hrl_policy.create_new_skill(skill_subpath['end_observations'])  # blank policy to be trained

        # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline
        #   We need to clone baseline, as each skill policy must have its own instance
        la_kwargs = dict(self._low_algo_kwargs)
        baseline_to_clone = la_kwargs.get('baseline', self.baseline)
        baseline = Serializable.clone(  # to create blank baseline
                obj=baseline_to_clone,
                name='{}Skill{}'.format(type(baseline_to_clone).__name__, new_skill_id)
        )
        la_kwargs['baseline'] = baseline

        # Algorithm
        algo = self._low_algo_cls(
                env=skill_learning_env,
                policy=new_skill_pol,
                **la_kwargs
        )

        # Logger parameters
        logger.dump_tabular(with_prefix=False)
        logger.log('Launching training of the new skill')
        logger_snapshot_dir_before = logger.get_snapshot_dir()
        logger_snapshot_mode_before = logger.get_snapshot_mode()
        logger_snapshot_gap_before = logger.get_snapshot_gap()
        logger.set_snapshot_dir(os.path.join(
                logger_snapshot_dir_before,
                'skill{}'.format(new_skill_id)
        ))
        logger.set_snapshot_mode('none')
        # logger.set_snapshot_gap(max(1, np.floor(la_kwargs['n_itr'] / 10)))
        logger.push_tabular_prefix('Skill{}/'.format(new_skill_id))
        logger.set_tensorboard_step_key('Iteration')

        # Train new skill
        with logger.prefix('Skill {} | '.format(new_skill_id)):
            algo.train(sess=self._tf_sess)

        # Restore logger parameters
        logger.pop_tabular_prefix()
        logger.set_snapshot_dir(logger_snapshot_dir_before)
        logger.set_snapshot_mode(logger_snapshot_mode_before)
        logger.set_snapshot_gap(logger_snapshot_gap_before)
        logger.log('Training of the new skill finished')

        return new_skill_pol, new_skill_id