Example #1
0
def execution_plan(workers, config):
    # Sync workers with meta policy
    workers.sync_weights()

    # Samples and sets worker tasks
    use_meta_env = config["use_meta_env"]
    set_worker_tasks(workers, use_meta_env)

    # Metric Collector
    metric_collect = CollectMetrics(
        workers,
        min_history=config["metrics_smoothing_episodes"],
        timeout_seconds=config["collect_metrics_timeout"])

    # Iterator for Inner Adaptation Data gathering (from pre->post adaptation)
    inner_steps = config["inner_adaptation_steps"]

    def inner_adaptation_steps(itr):
        buf = []
        split = []
        metrics = {}
        for samples in itr:

            # Processing Samples (Standardize Advantages)
            split_lst = []
            for sample in samples:
                sample["advantages"] = standardized(sample["advantages"])
                split_lst.append(sample.count)

            buf.extend(samples)
            split.append(split_lst)

            adapt_iter = len(split) - 1
            metrics = post_process_metrics(adapt_iter, workers, metrics)
            if len(split) > inner_steps:
                out = SampleBatch.concat_samples(buf)
                out["split"] = np.array(split)
                buf = []
                split = []

                # Reporting Adaptation Rew Diff
                ep_rew_pre = metrics["episode_reward_mean"]
                ep_rew_post = metrics["episode_reward_mean_adapt_" +
                                      str(inner_steps)]
                metrics["adaptation_delta"] = ep_rew_post - ep_rew_pre
                yield out, metrics
                metrics = {}
            else:
                inner_adaptation(workers, samples)

    rollouts = from_actors(workers.remote_workers())
    rollouts = rollouts.batch_across_shards()
    rollouts = rollouts.transform(inner_adaptation_steps)

    # Metaupdate Step
    train_op = rollouts.for_each(
        MetaUpdate(workers, config["maml_optimizer_steps"], metric_collect,
                   use_meta_env))
    return train_op
Example #2
0
def execution_plan(workers, config):
    # Sync workers with meta policy
    workers.sync_weights()

    # Samples and sets worker tasks
    set_worker_tasks(workers)

    # Metric Collector
    metric_collect = CollectMetrics(
        workers,
        min_history=config["metrics_smoothing_episodes"],
        timeout_seconds=config["collect_metrics_timeout"])

    # Iterator for Inner Adaptation Data gathering (from pre->post adaptation)
    rollouts = from_actors(workers.remote_workers())
    rollouts = rollouts.batch_across_shards()
    rollouts = rollouts.combine(
        InnerAdaptationSteps(workers, config["inner_adaptation_steps"],
                             metric_collect))

    # Metaupdate Step
    train_op = rollouts.for_each(
        MetaUpdate(workers, config["maml_optimizer_steps"], metric_collect))
    return train_op
Example #3
0
    def execution_plan(workers: WorkerSet, config: AlgorithmConfigDict,
                       **kwargs) -> LocalIterator[dict]:
        assert (
            len(kwargs) == 0
        ), "MBMPO execution_plan does NOT take any additional parameters"

        # Train TD Models on the driver.
        workers.local_worker().foreach_policy(fit_dynamics)

        # Sync driver's policy with workers.
        workers.sync_weights()

        # Sync TD Models and normalization stats with workers
        sync_ensemble(workers)
        sync_stats(workers)

        # Dropping metrics from the first iteration
        _, _ = collect_episodes(workers.local_worker(),
                                workers.remote_workers(), [],
                                timeout_seconds=9999)

        # Metrics Collector.
        metric_collect = CollectMetrics(
            workers,
            min_history=0,
            timeout_seconds=config["metrics_episode_collection_timeout_s"],
        )

        num_inner_steps = config["inner_adaptation_steps"]

        def inner_adaptation_steps(itr):
            buf = []
            split = []
            metrics = {}
            for samples in itr:
                print("Collecting Samples, Inner Adaptation {}".format(
                    len(split)))
                # Processing Samples (Standardize Advantages)
                samples, split_lst = post_process_samples(samples, config)

                buf.extend(samples)
                split.append(split_lst)

                adapt_iter = len(split) - 1
                prefix = "DynaTrajInner_" + str(adapt_iter)
                metrics = post_process_metrics(prefix, workers, metrics)

                if len(split) > num_inner_steps:
                    out = SampleBatch.concat_samples(buf)
                    out["split"] = np.array(split)
                    buf = []
                    split = []

                    yield out, metrics
                    metrics = {}
                else:
                    inner_adaptation(workers, samples)

        # Iterator for Inner Adaptation Data gathering (from pre->post
        # adaptation).
        rollouts = from_actors(workers.remote_workers())
        rollouts = rollouts.batch_across_shards()
        rollouts = rollouts.transform(inner_adaptation_steps)

        # Meta update step with outer combine loop for multiple MAML
        # iterations.
        train_op = rollouts.combine(
            MetaUpdate(
                workers,
                config["num_maml_steps"],
                config["maml_optimizer_steps"],
                metric_collect,
            ))
        return train_op
Example #4
0
def execution_plan(workers: WorkerSet,
                   config: TrainerConfigDict) -> LocalIterator[dict]:
    """Execution plan of the PPO algorithm. Defines the distributed dataflow.

    Args:
        workers (WorkerSet): The WorkerSet for training the Polic(y/ies)
            of the Trainer.
        config (TrainerConfigDict): The trainer's configuration dict.

    Returns:
        LocalIterator[dict]: The Policy class to use with PPOTrainer.
            If None, use `default_policy` provided in build_trainer().
    """
    # Train TD Models on the driver.
    workers.local_worker().foreach_policy(fit_dynamics)

    # Sync driver's policy with workers.
    workers.sync_weights()

    # Sync TD Models and normalization stats with workers
    sync_ensemble(workers)
    sync_stats(workers)

    # Dropping metrics from the first iteration
    _, _ = collect_episodes(workers.local_worker(),
                            workers.remote_workers(), [],
                            timeout_seconds=9999)

    # Metrics Collector.
    metric_collect = CollectMetrics(
        workers,
        min_history=0,
        timeout_seconds=config["collect_metrics_timeout"])

    num_inner_steps = config["inner_adaptation_steps"]

    def inner_adaptation_steps(itr):
        buf = []
        split = []
        metrics = {}
        for samples in itr:
            print("Collecting Samples, Inner Adaptation {}".format(len(split)))
            # Processing Samples (Standardize Advantages)
            samples, split_lst = post_process_samples(samples, config)

            buf.extend(samples)
            split.append(split_lst)

            adapt_iter = len(split) - 1
            prefix = "DynaTrajInner_" + str(adapt_iter)
            metrics = post_process_metrics(prefix, workers, metrics)

            if len(split) > num_inner_steps:
                out = SampleBatch.concat_samples(buf)
                out["split"] = np.array(split)
                buf = []
                split = []

                yield out, metrics
                metrics = {}
            else:
                inner_adaptation(workers, samples)

    # Iterator for Inner Adaptation Data gathering (from pre->post adaptation).
    rollouts = from_actors(workers.remote_workers())
    rollouts = rollouts.batch_across_shards()
    rollouts = rollouts.transform(inner_adaptation_steps)

    # Meta update step with outer combine loop for multiple MAML iterations.
    train_op = rollouts.combine(
        MetaUpdate(workers, config["num_maml_steps"],
                   config["maml_optimizer_steps"], metric_collect))
    return train_op
Example #5
0
def execution_plan(workers, config):
    # Train TD Models
    workers.local_worker().foreach_policy(fit_dynamics)

    # Sync workers policy with workers
    workers.sync_weights()

    # Sync TD Models and normalization stats with workers
    sync_ensemble(workers)
    sync_stats(workers)

    # Dropping metrics from the first iteration
    episodes, to_be_collected = collect_episodes(
        workers.local_worker(),
        workers.remote_workers(), [],
        timeout_seconds=9999)

    # Metrics Collector
    metric_collect = CollectMetrics(
        workers,
        min_history=0,
        timeout_seconds=config["collect_metrics_timeout"])

    inner_steps = config["inner_adaptation_steps"]

    def inner_adaptation_steps(itr):
        buf = []
        split = []
        metrics = {}
        for samples in itr:
            print("Collecting Samples, Inner Adaptation {}".format(len(split)))
            # Processing Samples (Standardize Advantages)
            samples, split_lst = post_process_samples(samples, config)

            buf.extend(samples)
            split.append(split_lst)

            adapt_iter = len(split) - 1
            prefix = "DynaTrajInner_" + str(adapt_iter)
            metrics = post_process_metrics(prefix, workers, metrics)

            if len(split) > inner_steps:
                out = SampleBatch.concat_samples(buf)
                out["split"] = np.array(split)
                buf = []
                split = []

                yield out, metrics
                metrics = {}
            else:
                inner_adaptation(workers, samples)

    # Iterator for Inner Adaptation Data gathering (from pre->post adaptation)
    rollouts = from_actors(workers.remote_workers())
    rollouts = rollouts.batch_across_shards()
    rollouts = rollouts.transform(inner_adaptation_steps)

    # Metaupdate Step with outer combine loop for multiple MAML iterations
    train_op = rollouts.combine(
        MetaUpdate(workers, config["num_maml_steps"],
                   config["maml_optimizer_steps"], metric_collect))
    return train_op