def adapt_and_test():
    import os
    import dill
    from playground.maml.maml_torch.maml_multi_step import FunctionalMLP

    logger.configure(log_directory=Args.log_dir, prefix=Args.log_prefix)
    logger.log_params(Args=vars(Args))

    # load weights
    with open(os.path.join(Args.log_dir, Args.log_prefix, Args.weight_path),
              'rb') as f:
        weights = dill.load(f)
    model = FunctionalMLP(1, 1)

    losses = DefaultBear(list)
    for amp, task in amp_tasks:
        model.params.update({
            k: t.tensor(v, requires_grad=True, dtype=t.double).to(device)
            for k, v in weights[0].items()
        })
        sgd = t.optim.SGD(model.parameters(), lr=Args.learning_rate)
        proper = t.tensor(task.proper()).to(device)
        samples = t.tensor(task.samples(Args.k_shot)).to(device)

        for grad_ind in range(Args.grad_steps):
            with t.no_grad():
                xs, labels = proper
                ys = model(xs.unsqueeze(-1))
                loss = model.criteria(ys, labels.unsqueeze(-1))
                logger.log(grad_ind,
                           loss=loss.item(),
                           silent=grad_ind != Args.grad_steps - 1)
                losses[f"amp-{amp:.2f}-loss"].append(loss.item())

            xs, labels = samples
            ys = model(xs.unsqueeze(-1))
            loss = model.criteria(ys, labels.unsqueeze(-1))
            sgd.zero_grad()
            loss.backward()
            sgd.step()
        # losses = np.array([v for k, v in losses.items()])

    import matplotlib.pyplot as plt
    fig = plt.figure()
    plt.title(f'Learning Curves')
    for amp, task in amp_tasks:
        plt.plot(losses[f"amp-{amp:.2f}-loss"], label=f"amp {amp:.2f}")
    plt.legend()
    logger.log_pyplot(None, key=f"losses/learning_curves_amp.png", fig=fig)
    plt.close()

    average_losses = np.array(
        [losses[f"amp-{amp:.2f}-loss"] for amp, task in amp_tasks])
    fig = plt.figure()
    plt.title(f'Learning Curves Averaged amp ~ [5 - 10]')
    plt.plot(average_losses.mean(0))
    plt.ylim(0, 28)
    logger.log_pyplot(None, key=f"losses/learning_curves_amp_all.png", fig=fig)
    plt.close()
    def __init__(self,
                 exp_prefix,
                 est_params,
                 sim_params,
                 observations,
                 keys_of_interest,
                 n_mc_samples=10**7,
                 n_x_cond=5,
                 n_seeds=5,
                 use_gpu=True,
                 tail_measures=True):

        assert est_params and exp_prefix and sim_params and keys_of_interest
        assert observations.all()

        # every simulator configuration will be run multiple times with different randomness seeds
        sim_params = _add_seeds_to_sim_params(n_seeds, sim_params)

        self.observations = observations
        self.n_mc_samples = n_mc_samples
        self.n_x_cond = n_x_cond
        self.keys_of_interest = keys_of_interest
        self.exp_prefix = exp_prefix
        self.use_gpu = use_gpu
        self.tail_measures = tail_measures

        logger.configure(log_directory=config.DATA_DIR,
                         prefix=exp_prefix,
                         color='green')
        ''' ---------- Either load or generate the configs ----------'''
        config_pkl_path = os.path.join(logger.log_directory, logger.prefix,
                                       EXP_CONFIG_FILE)

        if os.path.isfile(config_pkl_path):
            logger.log("{:<70s} {:<30s}".format(
                "Loading experiment previous configs from file: ",
                config_pkl_path))
            self.configs = logger.load_pkl(EXP_CONFIG_FILE)
        else:
            logger.log("{:<70s} {:<30s}".format(
                "Generating and storing experiment configs under: ",
                config_pkl_path))
            self.configs = self._generate_configuration_variants(
                est_params, sim_params)
            logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE)
        ''' ---------- Either load already existing results or start a new result collection ---------- '''
        results_pkl_path = os.path.join(logger.log_directory, logger.prefix,
                                        RESULTS_FILE)
        if os.path.isfile(results_pkl_path):
            logger.log_line("{:<70s} {:<30s}".format("Continue with: ",
                                                     results_pkl_path))
            self.gof_single_res_collection = dict(
                logger.load_pkl_log(RESULTS_FILE))

        else:  # start from scratch
            self.gof_single_res_collection = {}

        self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection)
Example #3
0
def test_metrics_prefix(setup):
    from ml_logger import logger

    logger.remove("metrics.pkl")

    with logger.Prefix(metrics="evaluate/", sep=""):
        logger.log(loss=0.5, flush=True)

    assert logger.read_metrics("evaluate/loss", )[0] == 0.5
    def _fit_by_cv_ml_eval(model_dict_tuple, train_valid_set_tuple):
        estimator_key, conf_dict = model_dict_tuple
        X_train, Y_train, X_valid, Y_valid = train_valid_set_tuple

        estimator = _initialize_model_cv_ml(conf_dict)
        estimator.fit(X_train, Y_train)
        score = estimator.score(X_valid, Y_valid)

        result_dict[estimator_key].append(score)
        logger.log('%s: score: %.4f'%(estimator_key, score))
Example #5
0
def test(setup):
    d = Color(3.1415926, 'red')
    s = "{:.1}".format(d)
    print(s)

    logger.log_params(G=dict(some_config="hey"))
    logger.log(step=0, some=Color(0.1, 'yellow'))
    logger.log(step=1, some=Color(0.28571, 'yellow', lambda v: "{:.5f}%".format(v * 100)))
    logger.log(step=2, some=Color(0.85, 'yellow', percent))
    logger.log({"some_var/smooth": 10}, some=Color(0.85, 'yellow', percent), step=3)
    logger.log(step=4, some=Color(10, 'yellow'))
def run_benchmark_train_test_fit_cv_ml(dataset, model_dict, seed=27, n_train_valid_splits=1, shuffle_splits=True,
                                       n_jobs_outer=-1):

    if logger.log_directory is None:
        logger.configure(log_directory='/tmp/ml-logger')

    rds = np.random.RandomState(seed)

    logger.log("\n------------------  empirical cv_ml benchmark with %s ----------------------" % str(dataset))

    datasets = zip(*dataset.get_train_valid_splits(valid_portion=0.2, n_splits=n_train_valid_splits,
                                                  shuffle=shuffle_splits, random_state=rds))

    exps = list(zip(*itertools.product(model_dict.items(), datasets)))

    manager = Manager()
    result_dict = manager.dict()

    for estimator_key in model_dict.keys():
        result_dict[estimator_key] = manager.list()

    def _fit_by_cv_ml_eval(model_dict_tuple, train_valid_set_tuple):
        estimator_key, conf_dict = model_dict_tuple
        X_train, Y_train, X_valid, Y_valid = train_valid_set_tuple

        estimator = _initialize_model_cv_ml(conf_dict)
        estimator.fit(X_train, Y_train)
        score = estimator.score(X_valid, Y_valid)

        result_dict[estimator_key].append(score)
        logger.log('%s: score: %.4f'%(estimator_key, score))

    executor = AsyncExecutor(n_jobs=n_jobs_outer)
    executor.run(_fit_by_cv_ml_eval, *exps)

    # convert result_dict into normal python dict containing lists
    result_dict = dict([(key, list(value)) for key, value in result_dict.items()])

    pprint(result_dict)

    # rearrange results as pandas df
    final_results_dict = {'scores_mean': [], 'scores_std': [], 'dataset': []}
    for estimator_key, scores in result_dict.items():
        final_results_dict['scores_mean'].append(np.nanmean(scores))
        final_results_dict['scores_std'].append(np.nanstd(scores))
        final_results_dict['dataset'].append(str(dataset))

    df = pd.DataFrame.from_dict(data=final_results_dict, orient='columns')
    df.index = list(model_dict.keys())

    logger.log('\n' + str(df))
    return df
Example #7
0
def experiment():
    logger.configure(log_directory=config.DATA_DIR,
                     prefix=EXP_PREFIX,
                     color='green')

    # 1) EUROSTOXX
    dataset = datasets.EuroStoxx50()

    result_df = run_benchmark_train_test_fit_cv(dataset,
                                                model_dict,
                                                n_train_valid_splits=3,
                                                n_eval_seeds=5,
                                                shuffle_splits=False,
                                                n_folds=5,
                                                seed=22,
                                                n_jobs_inner=-1,
                                                n_jobc_outer=3)

    # 2) NYC Taxi
    for n_samples in [10000]:
        dataset = datasets.NCYTaxiDropoffPredict(n_samples=n_samples)

    df = run_benchmark_train_test_fit_cv(dataset,
                                         model_dict,
                                         n_train_valid_splits=3,
                                         n_eval_seeds=5,
                                         shuffle_splits=True,
                                         n_folds=5,
                                         seed=22,
                                         n_jobs_inner=-1,
                                         n_jobc_outer=3)
    result_df = pd.concat([result_df, df], ignore_index=True)

    # 3) UCI
    for dataset_class in [
            datasets.BostonHousing, datasets.Conrete, datasets.Energy
    ]:
        dataset = dataset_class()
        df = run_benchmark_train_test_fit_cv(dataset,
                                             model_dict,
                                             n_train_valid_splits=3,
                                             n_eval_seeds=5,
                                             shuffle_splits=True,
                                             n_folds=5,
                                             seed=22,
                                             n_jobs_inner=-1,
                                             n_jobc_outer=3)
        result_df = pd.concat([result_df, df], ignore_index=True)

    logger.log('\n', str(result_df))
    logger.log('\n', result_df.tolatex())
Example #8
0
    def run_configurations(self,
                           dump_models=False,
                           multiprocessing=True,
                           n_workers=None):
        """
    Runs the given configurations, i.e.
    1) fits the estimator to the simulation and
    2) executes goodness-of-fit (currently: e.g. kl-divergence, wasserstein-distance etc.) tests
    Every successful run yields a result object of type GoodnessOfFitResult which contains
    information on both estimator, simulator and chosen hyperparameters

    such as n_samples, see GoodnessOfFitResult documentation for more information.

      Args:
        estimator_filter: a parameter to decide whether to execute just a specific type of estimator, e.g. "KernelMixtureNetwork",
                          must be one of the density estimator class types
        limit: limit the number of (potentially filtered) tasks
        dump_models: (boolean) whether to save/dump the fitted estimators

      Returns:
         returns two objects: (result_list, full_df)
          1) a GoodnessOfFitResults object containing all configurations as GoodnessOfFitSingleResult objects, carrying information about the
          estimator and simulator hyperparameters as well as n_obs, n_x_cond, n_mc_samples and the statistic results.
          2) a full pandas dataframe of the csv
          Additionally, if export_pickle is True, the path to the pickle file will be returned, i.e. return values are (results_list, full_df, path_to_pickle)

    """
        self.dump_models = dump_models
        ''' Asserts '''
        assert len(self.configs) > 0
        tasks = self.configs
        ''' Run the configurations '''

        logger.log("{:<70s} {:<30s}".format(
            "Number of total tasks in pipeline:", str(len(self.configs))))
        logger.log("{:<70s} {:<30s}".format(
            "Number of aleady finished tasks (found in results pickle): ",
            str(len(self.gof_single_res_collection))))

        iters = range(len(tasks))

        if multiprocessing:
            executor = AsyncExecutor(n_jobs=n_workers)
            executor.run(self._run_single_task, iters, tasks)

        else:
            for i, task in zip(iters, tasks):
                self._run_single_task(i, task)
Example #9
0
def sgd_baseline(lr=0.001):
    from playground.maml.maml_torch.tasks import Sine
    task = Sine()
    model = StandardMLP(1, 1) if G.debug else FunctionalMLP(1, 1)

    adam = t.optim.Adam([p for p in model.parameters()], lr=lr)
    mse = t.nn.MSELoss()
    for ep_ind in range(1000):
        xs, labels = h.const(task.proper())
        ys = model(xs.unsqueeze(-1))
        loss = mse(ys, labels.unsqueeze(-1))
        logger.log(ep_ind, loss=loss.item(), silent=ep_ind % 50)
        adam.zero_grad()
        loss.backward()
        adam.step()
    logger.flush()
def _initialize_model_cv(model_key, conf_dict, verbose=False):
    ''' make kartesian product of listed parameters per model '''
    assert 'estimator' in conf_dict.keys()
    estimator = conf_dict.pop('estimator')
    param_dict_cv = {}
    param_dict_init = {}
    for param_key, param_value in conf_dict.items():
        if type(param_value) in (list, tuple):
            param_dict_cv[param_key] = param_value
            param_dict_init[param_key] = param_value[0]
        else:
            param_dict_init[param_key] = param_value

    param_dict_init['name'] = model_key

    if verbose: logger.log('initialize %s'%model_key)

    estimator_instance = globals()[estimator](**param_dict_init)

    return estimator_instance, param_dict_cv, param_dict_init
Example #11
0
def regular_sgd_baseline(model, Task, n_epochs, batch_n, k_shot=100, **_):
    problem = Task()
    # simple gradient descent
    for ep_ind in trange(n_epochs, desc="Epochs", ncols=50, leave=False):
        loss = 0
        for _ in range(batch_n):
            xs, ys = problem.samples(k_shot)
            output = model(t.tensor(xs).unsqueeze(dim=-1))
            targets = t.tensor(ys).unsqueeze(dim=-1)
            loss += model.criteria(output, targets)

        loss /= batch_n

        model.zero_grad()
        loss.backward()
        model.step(lr=G.alpha)

        logger.log(ep_ind, loss=loss.item())

        if ep_ind % 100 == 0 or ep_ind == (n_epochs - 1):
            pass
        def _fit_by_cv_and_eval(estimator_key, conf_dict):
            estimator, param_grid, param_dict_init = _initialize_model_cv(estimator_key, conf_dict, verbose=True)

            # 1) perform cross-validation hyperparam search to select params
            selected_params = estimator.fit_by_cv(X_train, Y_train, param_grid=param_grid, n_folds=n_folds,
                                                  n_jobs=n_jobs_inner, random_state=rds)

            logger.log("%s selected params:"%estimator_key)
            logger.log_params(**selected_params)
            # 2) evaluate selected params with different initializations
            param_dict_init.update(selected_params)

            logger.log("evaluating %s parameters with %i seeds"%(estimator_key, len(eval_seeds)))
            scores = _evaluate_params(estimator.__class__, param_dict_init, X_train, Y_train, X_valid, Y_valid,
                                      seeds=eval_seeds)

            cv_result_dict[estimator_key] = {'selected_params': selected_params, 'scores': scores, 'eval_seeds': eval_seeds}
            logger.log("evaluation scores for %s: %s" % (estimator_key, str(scores)))
Example #13
0
def train_maml(*, n_tasks: int, tasks: MetaRLTasks, maml: E_MAML):
    if not G.inner_alg.startswith("BC"):
        path_gen = path_gen_fn(env=tasks.envs, policy=maml.runner.policy, start_reset=G.reset_on_start)
        next(path_gen)

    meta_path_gen = path_gen_fn(env=tasks.envs, policy=maml.meta_runner.policy, start_reset=G.reset_on_start)
    next(meta_path_gen)

    if G.load_from_checkpoint:
        # todo: add variable to checkpoint
        # todo: set the epoch_ind starting point here.
        logger.load_variables(G.load_from_checkpoint)

    if G.meta_sgd:
        assert maml.alpha is not None, "Coding Mistake if meta_sgd is trueful but maml.alpha is None."

    max_episode_length = tasks.spec.max_episode_steps

    sess = tf.get_default_session()
    epoch_ind, prefix = G.epoch_init - 1, ""
    while epoch_ind < G.epoch_init + G.n_epochs:
        logger.flush()
        logger.split()

        is_bc_test = (prefix != "test/" and G.eval_interval and epoch_ind % G.eval_interval == 0)
        prefix = "test/" if is_bc_test else ""
        epoch_ind += 0 if is_bc_test else 1

        if G.meta_sgd:
            alpha_lr = sess.run(maml.alpha)  # only used in the runner.
            logger.log(metrics={f"alpha_{i}/{stem(t.name, 2)}": a
                                for i, a_ in enumerate(alpha_lr)
                                for t, a in zip(maml.runner.trainables, a_)}, silent=True)
        else:
            alpha_lr = G.alpha.send(epoch_ind) if isinstance(G.alpha, Schedule) else np.array(G.alpha)
            logger.log(alpha=metrify(alpha_lr), epoch=epoch_ind, silent=True)

        beta_lr = G.beta.send(epoch_ind) if isinstance(G.beta, Schedule) else np.array(G.beta)
        clip_range = G.clip_range.send(epoch_ind) if isinstance(G.clip_range, Schedule) else np.array(G.clip_range)
        logger.log(beta=metrify(beta_lr), clip_range=metrify(clip_range), epoch=epoch_ind, silent=True)

        batch_timesteps = G.batch_timesteps.send(epoch_ind) \
            if isinstance(G.batch_timesteps, Schedule) else G.batch_timesteps

        # Compute updates for each task in the batch
        # 0. save value of variables
        # 1. sample
        # 2. gradient descent
        # 3. repeat step 1., 2. until all gradient steps are exhausted.
        batch_data = defaultdict(list)

        maml.save_weight_cache()
        load_ops = [] if DEBUG.no_weight_reset else [maml.cache.load]

        if G.checkpoint_interval and epoch_ind % G.checkpoint_interval == 0 \
                and not is_bc_test and epoch_ind >= G.start_checkpoint_after_epoch:
            cp_path = f"checkpoints/variables_{epoch_ind:04d}.pkl"
            logger.log_line(f'saving checkpoint {cp_path}')
            # note: of course I don't know that are all of the trainables at the moment.
            logger.save_variables(tf.trainable_variables(), path=cp_path)

        feed_dict = {}
        for task_ind in range(n_tasks if is_bc_test else G.n_tasks):
            graph_branch = maml.graphs[0] if G.n_graphs == 1 else maml.graphs[task_ind]
            if G.n_graphs == 1:
                gradient_sum_op = maml.gradient_sum.set_op if task_ind == 0 else maml.gradient_sum.add_op

            print(f"task_ind {task_ind}...")
            if not DEBUG.no_task_resample:
                if not is_bc_test:
                    print(f'L250: sampling task')
                    tasks.sample()
                elif task_ind < n_tasks:
                    task_spec = dict(index=task_ind % n_tasks)
                    print(f'L254: sampling task {task_spec}')
                    tasks.sample(**task_spec)
                else:
                    raise RuntimeError('should never hit here.')

            for k in range(G.n_grad_steps + 1):  # 0 - 10 <== last one being the maml policy.
                _is_new = False
                # for imitation inner loss, we still sample trajectory for evaluation purposes, but
                # replace it with the demonstration data for learning
                if k < G.n_grad_steps:
                    if G.inner_alg.startswith("BC"):
                        p = p if G.single_sampling and k > 0 else \
                            bc.sample_demonstration_data(tasks.task_spec, key=("eval" if is_bc_test else None))
                    else:
                        p, _is_new = path_gen.send(batch_timesteps), True
                elif k == G.n_grad_steps:
                    if G.meta_alg.startswith("BC"):
                        # note: use meta bc samples.
                        p = bc.sample_demonstration_data(tasks.task_spec, key="meta")
                    else:
                        p, _is_new = meta_path_gen.send(batch_timesteps), True
                else:
                    raise Exception('Implementation error. Should never reach this line.')

                if k in G.eval_grad_steps:
                    _ = path_gen if k < G.n_grad_steps else meta_path_gen
                    p_eval = p if _is_new else _.send(G.eval_timesteps)
                    # reporting on new trajectory samples
                    avg_r = p_eval['ep_info']['reward'] if G.normalize_env else np.mean(p_eval['rewards'])
                    episode_r = avg_r * max_episode_length  # default horizon for HalfCheetah

                    if episode_r < G.term_reward_threshold:  # todo: make this batch-based instead of on single episode
                        logger.log_line("episode reward is too low: ", episode_r, "terminating training.", flush=True)
                        raise RuntimeError('AVERAGE REWARD TOO LOW. Terminating the experiment.')

                    batch_data[prefix + f"grad_{k}_step_reward"].append(avg_r if Reporting.report_mean else episode_r)
                    if k in G.eval_grad_steps:
                        logger.log_key_value(prefix + f"task_{task_ind}_grad_{k}_reward", episode_r, silent=True)

                _p = {k: v for k, v in p.items() if k != "ep_info"}

                if k < G.n_grad_steps:
                    # note: under meta-SGD mode, the runner needs the k^th learning rate.
                    _lr = alpha_lr[k] if G.meta_sgd else alpha_lr

                    # clip_range is not used in BC mode. but still passed in.
                    runner_feed_dict = \
                        path_to_feed_dict(inputs=maml.runner.inputs, paths=_p, lr=_lr,
                                          baseline=G.baseline, gamma=G.gamma, use_gae=G.use_gae, lam=G.lam,
                                          horizon=max_episode_length, clip_range=clip_range)
                    # todo: optimize `maml.meta_runner` if k >= G.n_grad_steps.
                    loss, *_, __ = maml.runner.optim.run_optimize(feed_dict=runner_feed_dict)
                    runner_feed_dict.clear()

                    for key, value in zip(maml.runner.model.reports.keys(), [loss, *_]):
                        batch_data[prefix + f"grad_{k}_step_{key}"].append(value)
                        logger.log_key_value(prefix + f"task_{task_ind}_grad_{k}_{key}", value, silent=True)

                    if loss > G.term_loss_threshold:  # todo: make this batch-based instead of on single episode
                        logger.log_line(prefix + "episode loss blew up:", loss, "terminating training.", flush=True)
                        raise RuntimeError('loss is TOO HIGH. Terminating the experiment.')

                    # done: has bug when using fixed learning rate. Needs the learning rate as input.
                    feed_dict.update(  # do NOT pass in the learning rate because the graph already includes those.
                        path_to_feed_dict(inputs=graph_branch.workers[k].inputs, paths=_p,
                                          lr=None if G.meta_sgd else alpha_lr,  # but do with fixed alpha
                                          horizon=max_episode_length,
                                          baseline=G.baseline, gamma=G.gamma, use_gae=G.use_gae, lam=G.lam,
                                          clip_range=clip_range))

                elif k == G.n_grad_steps:
                    yield_keys = dict(
                        movie=epoch_ind >= G.start_movie_after_epoch and epoch_ind % G.record_movie_interval == 0,
                        eval=is_bc_test
                    )
                    if np.fromiter(yield_keys.values(), bool).any():
                        yield yield_keys, epoch_ind, tasks.task_spec
                    if is_bc_test:
                        if load_ops:  # we need to reset the weights. Otherwise the world would be on fire.
                            tf.get_default_session().run(load_ops)
                        continue  # do NOT meta learn from test samples.

                    # we don't treat the meta_input the same way even though we could. This is more clear to read.
                    # note: feed in the learning rate only later.
                    feed_dict.update(  # do NOT need learning rate
                        path_to_feed_dict(inputs=graph_branch.meta.inputs, paths=_p,
                                          horizon=max_episode_length,
                                          baseline=G.baseline, gamma=G.gamma, use_gae=G.use_gae, lam=G.lam,
                                          clip_range=clip_range))

                    if G.n_graphs == 1:
                        # load from checkpoint before computing the meta gradient\nrun gradient sum operation
                        if load_ops:
                            tf.get_default_session().run(load_ops)
                        # note: meta reporting should be run here. Not supported for simplicity. (need to reduce across
                        # note: tasks, and can not be done outside individual task graphs.
                        if G.meta_sgd is None:  # note: copied from train_supervised_maml, not tested
                            feed_dict[maml.alpha] = alpha_lr
                        tf.get_default_session().run(gradient_sum_op, feed_dict)
                        feed_dict.clear()

                    if load_ops:
                        tf.get_default_session().run(load_ops)

        if is_bc_test:
            continue  # do NOT meta learn from test samples.

        # note: copied from train_supervised_maml, not tested
        if G.meta_sgd is None:
            feed_dict[maml.alpha] = alpha_lr

        if G.n_graphs == 1:
            assert G.meta_n_grad_steps == 1, "ERROR: Can only run 1 meta gradient step with a single graph."
            # note: remove meta reporting b/c meta report should be in each task in this case.
            tf.get_default_session().run(maml.meta_update_ops[0], {maml.beta: beta_lr})
        else:
            assert feed_dict, "ERROR: It is likely that you jumped here from L:178."
            feed_dict[maml.beta] = beta_lr
            for i in range(G.meta_n_grad_steps):
                update_op = maml.meta_update_ops[0 if G.reuse_meta_optimizer else i]
                *reports, _ = tf.get_default_session().run(maml.meta_reporting + [update_op], feed_dict)
                if i not in (0, G.meta_n_grad_steps - 1):
                    continue
                for key, v in zip(maml.meta_reporting_keys, reports):
                    logger.log_key_value(prefix + f"grad_{G.n_grad_steps + i}_step_{key}", v, silent=True)

            feed_dict.clear()

        tf.get_default_session().run(maml.cache.save)

        # Now compute the meta gradients.
        # note: runner shares variables with the MAML graph. Reload from state_dict
        # note: if max_grad_step is the same as n_grad_steps then no need here.

        dt = logger.split()
        logger.log_line('Timer Starts...' if dt is None else f'{dt:0.2f} sec/epoch')
        logger.log(dt_epoch=dt or np.nan, epoch=epoch_ind)

        for key, arr in batch_data.items():
            reduced = np.array(arr).mean()
            logger.log_key_value(key, reduced)

        logger.flush()
Example #14
0
def train_supervised_maml(*, k_tasks=1, maml: E_MAML):
    # env used for evaluation purposes only.
    if G.meta_sgd:
        assert maml.alpha is not None, "Coding Mistake if meta_sgd is trueful but maml.alpha is None."

    assert G.n_tasks >= k_tasks, f"Is this intended? You probably want to have " \
                                 f"meta-batch({G.n_tasks}) >= k_tasks({k_tasks})."

    sess = tf.get_default_session()

    epoch_ind, pref = -1, ""
    while epoch_ind < G.n_epochs:
        # for epoch_ind in range(G.n_epochs + 1):
        logger.flush()
        logger.split()

        is_bc_test = (pref != "test/" and G.eval_interval and epoch_ind % G.eval_interval == 0)
        pref = "test/" if is_bc_test else ""
        epoch_ind += 0 if is_bc_test else 1

        if G.meta_sgd:
            alpha_lr = sess.run(maml.alpha)  # only used in the runner.
            logger.log(metrics={f"alpha_{i}/{stem(t.name, 2)}": a
                                for i, a_ in enumerate(alpha_lr)
                                for t, a in zip(maml.runner.trainables, a_)}, silent=True)
        else:
            alpha_lr = G.alpha.send(epoch_ind) if isinstance(G.alpha, Schedule) else np.array(G.alpha)
            logger.log(alpha=metrify(alpha_lr), epoch=epoch_ind, silent=True)

        beta_lr = G.beta.send(epoch_ind) if isinstance(G.beta, Schedule) else np.array(G.beta)
        logger.log(beta=metrify(beta_lr), epoch=epoch_ind, silent=True)

        if G.checkpoint_interval and epoch_ind % G.checkpoint_interval == 0:
            yield "pre-update-checkpoint", epoch_ind

        # Compute updates for each task in the batch
        # 0. save value of variables
        # 1. sample
        # 2. gradient descent
        # 3. repeat step 1., 2. until all gradient steps are exhausted.
        batch_data = defaultdict(list)

        maml.save_weight_cache()
        load_ops = [] if DEBUG.no_weight_reset else [maml.cache.load]

        feed_dict = {}
        for task_ind in range(k_tasks if is_bc_test else G.n_tasks):
            graph_branch = maml.graphs[0] if G.n_graphs == 1 else maml.graphs[task_ind]
            if G.n_graphs == 1:
                gradient_sum_op = maml.gradient_sum.set_op if task_ind == 0 else maml.gradient_sum.add_op

            """
            In BC mode, we don't have an environment. The sampling is handled here then fed to the sampler.
            > task_spec = dict(index=0)
            
            Here we make the testing more efficient.
            """
            if not DEBUG.no_task_resample:
                if not is_bc_test:
                    task_spec = dict(index=np.random.randint(0, k_tasks))
                elif task_ind < k_tasks:
                    task_spec = dict(index=task_ind % k_tasks)
                else:
                    raise RuntimeError('should never hit here.')

            for k in range(G.n_grad_steps + 1):  # 0 - 10 <== last one being the maml policy.

                # for imitation inner loss, we still sample trajectory for evaluation purposes, but
                # replace it with the demonstration data for learning
                if k < G.n_grad_steps:
                    p = p if G.single_sampling and k > 0 else \
                        bc.sample_demonstration_data(task_spec, key=("eval" if is_bc_test else None))
                elif k == G.n_grad_steps:
                    # note: use meta bc samples.
                    p = bc.sample_demonstration_data(task_spec, key="meta")
                else:
                    raise Exception('Implementation error. Should never reach this line.')

                _p = {k: v for k, v in p.items() if k != "ep_info"}

                if k < G.n_grad_steps:
                    # note: under meta-SGD mode, the runner needs the k^th learning rate.
                    _lr = alpha_lr[k] if G.meta_sgd else alpha_lr

                    runner_feed_dict = \
                        path_to_feed_dict(inputs=maml.runner.inputs, paths=_p, lr=_lr)
                    # todo: optimize `maml.meta_runner` if k >= G.n_grad_steps.
                    loss, *_, __ = maml.runner.optim.run_optimize(feed_dict=runner_feed_dict)
                    runner_feed_dict.clear()

                    for key, value in zip(maml.runner.model.reports.keys(), [loss, *_]):
                        batch_data[pref + f"grad_{k}_step_{key}"].append(value)
                        logger.log_key_value(pref + f"task_{task_ind}_grad_{k}_{key}", value, silent=True)

                    if loss > G.term_loss_threshold:  # todo: make this batch-based instead of on single episode
                        err = pref + "episode loss blew up:", loss, "terminating training."
                        logger.log_line(colored(err, "red"), flush=True)
                        raise RuntimeError('loss is TOO HIGH. Terminating the experiment.')

                    # fixit: has bug when using fixed learning rate. Still needs to get learning rate from placeholder
                    feed_dict.update(path_to_feed_dict(inputs=graph_branch.workers[k].inputs, paths=_p))
                elif k == G.n_grad_steps:
                    yield_keys = dict(
                        movie=G.record_movie_interval and epoch_ind >= G.start_movie_after_epoch and
                              epoch_ind % G.record_movie_interval == 0,
                        eval=is_bc_test
                    )
                    if np.fromiter(yield_keys.values(), bool).any():
                        yield yield_keys, epoch_ind, task_spec
                    if is_bc_test:
                        if load_ops:
                            tf.get_default_session().run(load_ops)
                        continue  # do NOT meta learn from test samples.

                    # we don't treat the meta_input the same way even though we could. This is more clear to read.
                    # note: feed in the learning rate only later.
                    feed_dict.update(path_to_feed_dict(inputs=graph_branch.meta.inputs, paths=_p))

                    if G.n_graphs == 1:
                        # load from checkpoint before computing the meta gradient\nrun gradient sum operation
                        if load_ops:
                            tf.get_default_session().run(load_ops)
                        # note: meta reporting should be run here. Not supported for simplicity. (need to reduce across
                        # note: tasks, and can not be done outside individual task graphs.
                        if G.meta_sgd is None:
                            feed_dict[maml.alpha] = alpha_lr
                        tf.get_default_session().run(gradient_sum_op, feed_dict)
                        feed_dict.clear()

                    if load_ops:
                        tf.get_default_session().run(load_ops)

        if is_bc_test:
            continue  # do NOT meta learn from test samples.

        if G.meta_sgd is None:
            feed_dict[maml.alpha] = alpha_lr

        if G.n_graphs == 1:
            assert G.meta_n_grad_steps == 1, "ERROR: Can only run 1 meta gradient step with a single graph."
            # note: remove meta reporting b/c meta report should be in each task in this case.
            tf.get_default_session().run(maml.meta_update_ops[0], {maml.beta: beta_lr})
        else:
            assert feed_dict, "ERROR: It is likely that you jumped here from L:178."
            feed_dict[maml.beta] = beta_lr
            for i in range(G.meta_n_grad_steps):
                update_op = maml.meta_update_ops[0 if G.reuse_meta_optimizer else i]
                *reports, _ = tf.get_default_session().run(maml.meta_reporting + [update_op], feed_dict)
                if i not in (0, G.meta_n_grad_steps - 1):
                    continue
                for key, v in zip(maml.meta_reporting_keys, reports):
                    logger.log_key_value(pref + f"grad_{G.n_grad_steps + i}_step_{key}", v, silent=True)

            feed_dict.clear()

        tf.get_default_session().run(maml.cache.save)

        # Now compute the meta gradients.
        # note: runner shares variables with the MAML graph. Reload from state_dict
        # note: if max_grad_step is the same as n_grad_steps then no need here.

        dt = logger.split()
        logger.log_line('Timer Starts...' if dt is None else f'{dt:0.2f} sec/epoch')
        logger.log(dt_epoch=dt or np.nan, epoch=epoch_ind)

        for key, arr in batch_data.items():
            reduced = np.array(arr).mean()
            logger.log_key_value(key, reduced)
Example #15
0
    def train(self, *, tasks, maml: E_MAML, plot_fn=None, test_tasks=None):
        max_grad_steps = max(G.n_grad_steps, *G.eval_grad_steps)
        for epoch_ind in range(G.n_epochs):

            is_the_end = (epoch_ind == G.n_epochs)
            should_plot = (epoch_ind % Reporting.plot_interval
                           == 0) if Reporting.plot_interval else False
            should_save = (epoch_ind % Reporting.save_interval
                           == 0) if Reporting.save_interval else False
            should_test = (epoch_ind % G.eval_test_interval
                           == 0) if G.eval_test_interval else False

            frac = 1.0 - (epoch_ind - 1.0) / G.n_epochs
            alpha_lr = G.alpha * frac
            beta_lr = G.beta * frac
            clip_range = G.clip_range * frac

            # Compute updates for each task in the batch
            # 0. save value of variables
            # 1. sample
            # 2. gradient descent
            # 3. repeat step 1., 2. until all gradient steps are exhausted.

            batch_data = defaultdict(list)

            if DEBUG.debug_params:
                debug_tensor_key = 'runner_network/MlpPolicy/pi/b:0'
                runner_state_dict = {}
                meta_state_dict = {}
                runner_grads = defaultlist(dict)
                meta_grads = defaultlist(dict)

            all_grads = []
            if not DEBUG.no_weight_reset:
                # M.white('<--- save weights')
                maml.save_checkpoint()

            feed_dict = {}
            for task_ind, meta_branch in enumerate(maml.task_graphs):
                if not DEBUG.no_task_resample or (task_ind == 0
                                                  and epoch_ind == 0):
                    # M.white('===> re-sample tasks', end='')
                    env = tasks.sample()
                if task_ind != 0 and not DEBUG.no_weight_reset:
                    # M.white('---> resetting weights for worker sampling')
                    maml.load_checkpoint()
                else:
                    # M.white('---> Do NOT reset for first worker')
                    pass

                worker_paths = defaultlist(
                    None)  # get paths for the first update.
                for k in range(
                        max_grad_steps +
                        1):  # 0 - 10 <== last one being the maml policy.
                    # debug code
                    if DEBUG.debug_params:
                        runner_state_dict[k] = maml.runner.policy.state_dict
                        print("k =", k, debug_tensor_key, ": ", end='')
                        print(runner_state_dict[k][debug_tensor_key])

                    # collect samples from the environment
                    if G.single_sampling:
                        if k == 0 or k == G.n_grad_steps:
                            # M.print('$!#$@#$ sample from environment')
                            worker_paths[k] = p = self.sample_from_env(
                                env, maml.runner.policy, render=False)
                        else:
                            # M.print('^^^^^^^ copy previous sample')
                            worker_paths[k] = p
                    else:
                        # M.print('$!#$@#$ sample from environment')
                        worker_paths[k] = p = self.sample_from_env(
                            env, maml.runner.policy, render=False)

                    avg_r = np.mean(p['rewards'])
                    episode_r = avg_r * tasks.spec.max_episode_steps  # default horizon for HalfCheetah

                    if k in G.eval_grad_steps:
                        batch_data['grad_{}_step_reward'.format(k)].append(
                            avg_r if Reporting.report_mean else episode_r)

                    if episode_r < G.term_reward_threshold:
                        # todo: make this based on batch instead of a single episode.
                        print(episode_r)
                        raise RuntimeError(
                            'AVERAGE REWARD TOO LOW. Terminating the experiment.'
                        )

                    _p = {k: v for k, v in p.items() if k != "ep_infos"}

                    # Here we gradient descent on the same data only once. In the future, we could explore case
                    # involving more updates.
                    if k < max_grad_steps:
                        # M.red('....... Optimize Model')
                        runner_feed_dict = \
                            path_to_feed_dict(inputs=maml.runner.inputs, paths=_p, lr=alpha_lr, clip_range=clip_range)

                        if not DEBUG.debug_params:
                            maml.runner.optim.run_optimize(
                                feed_dict=runner_feed_dict)

                        if DEBUG.debug_params:
                            _grads, *_ = maml.runner.model.run_grads(
                                feed_dict=runner_feed_dict)
                            runner_grads[k] = {
                                t.name: g
                                for t, g in zip(maml.runner.policy.trainables,
                                                _grads)
                            }
                            print('runner_grads:',
                                  runner_grads[k][debug_tensor_key])
                            if DEBUG.debug_apply_gradient:
                                maml.runner.optim.run_apply_grads(grads=_grads,
                                                                  lr=alpha_lr)
                            else:
                                maml.runner.optim.run_optimize(
                                    feed_dict=runner_feed_dict)

                    if k < G.n_grad_steps:
                        feed_dict.update(
                            path_to_feed_dict(
                                inputs=meta_branch.workers[k].inputs,
                                paths=_p,
                                lr=alpha_lr,
                                clip_range=clip_range))
                    elif k == G.n_grad_steps:
                        # we don't treat the meta_input the same way even though we could. This is more clear to read.
                        # note: feed in the learning rate only later.
                        feed_dict.update(
                            path_to_feed_dict(inputs=meta_branch.meta.inputs,
                                              paths=_p,
                                              clip_range=clip_range))

            # Now compute the gradients.
            # note: runner shares variables with the MAML graph. Reload from state_dict
            # note: should use variable placeholders for these inputs.
            if not DEBUG.no_weight_reset:
                from moleskin import moleskin as M
                M.green('---> resetting weights for meta gradient')
                maml.load_checkpoint()

            feed_dict[maml.beta] = beta_lr
            maml.optim.run_optimize(feed_dict=feed_dict)

            for key in batch_data.keys():
                reduced = np.array(batch_data[key]).mean()
                logger.log_keyvalue(epoch_ind, key, reduced)

            if should_test and test_tasks is not None:
                maml.save_checkpoint()
                print(test_tasks.spec)
                test_envs = test_tasks.envs
                test_envs.reset()
                p = self.sample_from_env(
                    test_envs,
                    maml.runner.policy,
                    timestep_limit=test_tasks.spec.timestep_limit)
                logger.log(epoch_ind, pre_update_rewards=np.mean(p['rewards']))
                p = self.sample_from_env(test_envs, maml.runner.policy)
                runner_feed_dict = \
                    path_to_feed_dict(inputs=maml.runner.inputs, paths=p, lr=alpha_lr, clip_range=clip_range)
                maml.runner.model.run_optimize(feed_dict=runner_feed_dict)
                p = self.sample_from_env(
                    test_envs,
                    maml.runner.policy,
                    timestep_limit=test_tasks.spec.timestep_limit)
                logger.log(epoch_ind,
                           post_update_rewards=np.mean(p['rewards']))
                maml.load_checkpoint()

            if should_plot and callable(plot_fn):
                plot_fn(save=True if should_save or is_the_end else False,
                        lr=beta_lr)
    def _run_single_task(self, i, task):
        start_time = time.time()
        try:
            task_hash = _hash_task_dict(
                task)  # generate SHA256 hash of task dict as identifier

            # skip task if it has already been completed
            if task_hash in self.gof_single_res_collection.keys():
                logger.log("Task {:<1} {:<63} {:<10} {:<1} {:<1} {:<1}".format(
                    i + 1, "has already been completed:", "Estimator:",
                    task['estimator_name'], " Simulator: ",
                    task["simulator_name"]))
                return None

            # run task when it has not been completed
            else:
                logger.log("Task {:<1} {:<63} {:<10} {:<1} {:<1} {:<1}".format(
                    i + 1, "running:", "Estimator:", task['estimator_name'],
                    " Simulator: ", task["simulator_name"]))

                tf.reset_default_graph()
                ''' build simulator and estimator model given the specified configurations '''

                simulator = globals()[task['simulator_name']](
                    **task['simulator_config'])

                t = time.time()
                estimator = globals()[task['estimator_name']](
                    task['task_name'], simulator.ndim_x, simulator.ndim_y,
                    **task['estimator_config'])
                time_to_initialize = time.time() - t

                # if desired hide gpu devices
                if not self.use_gpu:
                    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

                with tf.Session() as sess:
                    sess.run(tf.global_variables_initializer())
                    ''' train the model '''
                    gof = GoodnessOfFit(estimator=estimator,
                                        probabilistic_model=simulator,
                                        X=task['X'],
                                        Y=task['Y'],
                                        n_observations=task['n_obs'],
                                        n_mc_samples=task['n_mc_samples'],
                                        x_cond=task['x_cond'],
                                        task_name=task['task_name'],
                                        tail_measures=self.tail_measures)

                    t = time.time()
                    gof.fit_estimator(print_fit_result=True)
                    time_to_fit = time.time() - t

                    if self.dump_models:
                        logger.dump_pkl(data=gof.estimator,
                                        path="model_dumps/{}.pkl".format(
                                            task['task_name']))
                        logger.dump_pkl(data=gof.probabilistic_model,
                                        path="model_dumps/{}.pkl".format(
                                            task['task_name'] + "_simulator"))
                    ''' perform tests with the fitted model '''
                    t = time.time()
                    gof_results = gof.compute_results()
                    time_to_evaluate = time.time() - t

                    gof_results.task_name = task['task_name']

                    gof_results.hash = task_hash

                logger.log_pkl(data=(task_hash, gof_results),
                               path=RESULTS_FILE)
                logger.flush(file_name=RESULTS_FILE)
                del gof_results

                task_duration = time.time() - start_time
                logger.log(
                    "Finished task {:<1} in {:<1.4f} {:<43} {:<10} {:<1} {:<1} {:<2} | {:<1} {:<1.2f} {:<1} {:<1.2f} {:<1} {:<1.2f}"
                    .format(i + 1, task_duration, "sec:", "Estimator:",
                            task['estimator_name'], " Simulator: ",
                            task["simulator_name"], "t_init:",
                            time_to_initialize, "t_fit:", time_to_fit,
                            "t_eval:", time_to_evaluate))

        except Exception as e:
            logger.log("error in task: ", str(i + 1))
            logger.log(str(e))
            traceback.print_exc()
Example #17
0
def test_configuration(log_dir):
    logger.configure(log_dir, prefix='main_test_script', color='green')
    logger.log("This is a unittest")
    logger.log("Some stats", reward=0.05, kl=0.001)
    logger.flush()
Example #18
0
from ml_logger import logger

### First configure the logger to log to a direction (or a server)
logger.configure('/tmp/ml-logger-debug')
# outputs ~>
# logging data to /tmp/ml-logger-debug

# We can log individual keys
for i in range(1):
    logger.log(metrics={
        'some_val/smooth': 10,
        'status': f"step ({i})"
    },
               reward=20,
               timestep=i)
    ### flush the data, otherwise the value would be overwritten with new values in the next iteration.
    logger.flush()
# outputs ~>
# ╒════════════════════╤════════════════════════════╕
# │       reward       │             20             │
# ├────────────────────┼────────────────────────────┤
# │      timestep      │             0              │
# ├────────────────────┼────────────────────────────┤
# │  some val/smooth   │             10             │
# ├────────────────────┼────────────────────────────┤
# │       status       │          step (0)          │
# ├────────────────────┼────────────────────────────┤
# │      timestamp     │'2018-11-04T11:37:03.324824'│
# ╘════════════════════╧════════════════════════════╛

for i in range(100):
Example #19
0
def train():
    from moleskin import moleskin as M

    M.tic('Full Run')
    if G.model == "lenet":
        model = Conv2d()
    elif G.model == 'mlp':
        model = Mlp()
    else:
        raise NotImplementedError('only lenet and mlp are allowed')
    model.train()
    print(model)

    G.log_prefix = f"mnist_{type(model).__name__}"
    logger.configure(log_directory=G.log_dir, prefix=G.log_prefix)
    logger.log_params(G=vars(G), Model=dict(architecture=str(model)))

    from torchvision import datasets, transforms

    trans = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, ), (1.0, ))])
    train_set = datasets.MNIST(root=G.data_dir,
                               train=True,
                               transform=trans,
                               download=True)
    test_set = datasets.MNIST(root=G.data_dir,
                              train=False,
                              transform=trans,
                              download=True)
    train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                               batch_size=G.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                              batch_size=G.batch_size,
                                              shuffle=False)

    celoss = nn.CrossEntropyLoss()
    adam = optim.SGD(model.parameters(), lr=G.learning_rate, momentum=0.9)
    for epoch in range(G.n_epochs):
        for it, (x, target) in enumerate(train_loader):
            adam.zero_grad()
            ys = model(x)
            loss = celoss(ys, target)
            loss.backward()
            adam.step()

            if it % G.test_interval == 0:
                with h.Eval(model), torch.no_grad():
                    accuracy = h.Average()
                    for x, label in test_loader:
                        acc = h.cast(
                            h.one_hot_to_int(model(x).detach()) == label,
                            float).sum() / len(x)
                        accuracy.add(acc.detach().numpy())
                logger.log(float(epoch) + it / len(train_loader),
                           accuracy=accuracy.value)

        M.split("epoch")
        # logger.log(epoch, it=it, loss=loss.detach().numpy())
    M.toc('Full Run')
def run_benchmark_train_test_fit_cv(dataset, model_dict, seed=27, n_jobs_inner=-1, n_jobc_outer=1, n_train_valid_splits=1,
                                    shuffle_splits=True, n_eval_seeds=1, n_folds=5):

    if logger.log_directory is None:
        logger.configure(log_directory='/tmp/ml-logger')

    rds = np.random.RandomState(seed)
    eval_seeds = list(rds.randint(0, 10**7, size=n_eval_seeds))

    logger.log("\n------------------  empirical benchmark with %s ----------------------"%str(dataset))

    for model_key in model_dict:
        model_dict[model_key].update({'ndim_x': dataset.ndim_x, 'ndim_y': dataset.ndim_y})

    # run experiments
    cv_result_dicts = []

    datasets = zip(*dataset.get_train_valid_splits(valid_portion=0.2, n_splits=n_train_valid_splits,
                                                   shuffle=shuffle_splits, random_state=rds))

    for i, (X_train, Y_train, X_valid, Y_valid) in enumerate(datasets):
        logger.log("--------  train-valid split %i --------"%i)


        manager = Manager()
        cv_result_dict = manager.dict()

        def _fit_by_cv_and_eval(estimator_key, conf_dict):
            estimator, param_grid, param_dict_init = _initialize_model_cv(estimator_key, conf_dict, verbose=True)

            # 1) perform cross-validation hyperparam search to select params
            selected_params = estimator.fit_by_cv(X_train, Y_train, param_grid=param_grid, n_folds=n_folds,
                                                  n_jobs=n_jobs_inner, random_state=rds)

            logger.log("%s selected params:"%estimator_key)
            logger.log_params(**selected_params)
            # 2) evaluate selected params with different initializations
            param_dict_init.update(selected_params)

            logger.log("evaluating %s parameters with %i seeds"%(estimator_key, len(eval_seeds)))
            scores = _evaluate_params(estimator.__class__, param_dict_init, X_train, Y_train, X_valid, Y_valid,
                                      seeds=eval_seeds)

            cv_result_dict[estimator_key] = {'selected_params': selected_params, 'scores': scores, 'eval_seeds': eval_seeds}
            logger.log("evaluation scores for %s: %s" % (estimator_key, str(scores)))


        executor = AsyncExecutor(n_jobs=n_jobc_outer)
        executor.run(_fit_by_cv_and_eval, model_dict.keys(), model_dict.values())

        cv_result_dicts.append(dict(cv_result_dict))

    pprint(cv_result_dicts)

    # rearrange results as pandas df
    final_results_dict = {'scores_mean':[], 'scores_std':[], 'dataset':[]}
    for estimator_key in model_dict.keys():
        scores = []
        for result_dict in cv_result_dicts:
            scores.extend(result_dict[estimator_key]['scores'])

        final_results_dict['scores_mean'].append(np.mean(scores))
        final_results_dict['scores_std'].append(np.std(scores))
        final_results_dict['dataset'].append(str(dataset))

    df = pd.DataFrame.from_dict(data=final_results_dict, orient='columns')
    df.index = list(model_dict.keys())

    logger.log('\n' + str(df))
    return df
Example #21
0
def maml_supervised(model, Task, n_epochs, task_batch_n, npts, k_shot,
                    n_gradient_steps, **_):
    """

    :param model:
    :param Task:
    :param n_epochs:
    :param task_batch_n:
    :param npts: the total number of samples for the sinusoidal task
    :param k_shot:
    :param n_gradient_steps:
    :param _:
    :return:
    """
    import playground.maml.maml_torch.paper_metrics as metrics

    device = t.device('cuda' if t.cuda.is_available() else 'cpu')
    model.to(device)

    alpha = 0.01
    beta = 0.01

    ps = list(model.parameters())

    # for ep_ind in trange(n_epochs, desc='Epochs', ncols=50, leave=False):
    for ep_ind in range(n_epochs):
        M.split('epoch')
        meta_grads = defaultdict(lambda: 0)
        theta = copy.deepcopy(model.state_dict())
        tasks = [Task(npts=npts) for _ in range(task_batch_n)]
        for task_ind, task in enumerate(tasks):  # sample a new problem
            # todo: this part is highly-parallelizable
            if task_ind != 0:
                model.load_state_dict(theta)

            task_grads = defaultdict(deque)
            proper = t.tensor(task.proper()).to(device)
            samples = t.tensor(task.samples(k_shot)).to(device)

            for grad_ind in range(n_gradient_steps):
                # done: ready to be repackaged
                loss, _ = metrics.comp_loss(*samples, model)
                model.zero_grad()
                # back-propagate once, retain graph.
                loss.backward(t.ones(1).to(device), retain_graph=True)

                # done: need to use gradient descent, plus creating a meta graph.
                U, grad_outputs = [], []
                for p in model.parameters():
                    U.append(p - alpha * p.grad)  # meta update
                    grad_outputs.append(t.ones(1).to(device).expand_as(p))

                # t.autograd.grad returns sum of gradient between all U and all grad_outputs
                # note: this is the row sum of \partial theta_prime \partial theta, which is a matrix.
                dU = t.autograd.grad(outputs=U,
                                     grad_outputs=grad_outputs,
                                     inputs=model.parameters())

                # Now update the param.figs
                for p, updated_p, du in zip(ps, U, dU):
                    p.data = updated_p.data  # these are leaf notes, so we can directly manipulate the data attribute.
                    task_grads[p].append(du)

                # note: evaluate the 1-grad loss
                if grad_ind == 0:
                    with t.no_grad():
                        _loss, _ = metrics.comp_loss(*proper, model)
                    logger.log_keyvalue(ep_ind,
                                        key=f"1-grad-loss-{task_ind:02d}",
                                        value=loss.item(),
                                        silent=True)

            # compute Loss_theta_prime
            samples = t.tensor(task.samples(k_shot)).to(
                device)  # sample from this problem
            loss, _ = metrics.comp_loss(*samples, model)
            model.zero_grad()
            loss.backward()

            for i, grad in enumerate(
                    model.gradients()):  # Now accumulate the gradient
                p = ps[i]
                task_grads[p].append(grad)
                meta_grads[p] += t.prod(t.cat(list(
                    map(lambda d: d.unsqueeze(dim=-1), task_grads[p])),
                                              dim=-1),
                                        dim=-1)

        # theta_prime = copy.deepcopy(model.state_dict())
        model.load_state_dict(theta)
        for p in ps:
            p.grad = t.tensor(
                (meta_grads[p] / task_batch_n).detach()).to(device)

        model.meta_step(lr=beta)

        with t.no_grad():
            _loss, _ = metrics.comp_loss(*proper, model)
        logger.log(ep_ind, meta_loss=_loss.item())
Example #22
0
import os
from ml_logger import logger

logger.configure(log_directory=os.path.expanduser("~/ml-logger-debug"),
                 prefix='episodeyang/demo-project/first-run')
for i in range(100):
    logger.log(loss=0.9**i, step=i, flush=True)
logger.log_text('charts: [{"yKey": "loss", "xKey": "step"}]', ".charts.yml")