def main():
    tdm_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/",
        criteria={
            'exp_id': '16',
            'algorithm': 'DDPG-TDM',
        })
    mb_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/",
        criteria={
            'exp_id': '2',
            'algorithm': 'Model-Based-Dagger',
        })
    ddpg_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/",
        criteria={
            'exp_id': '7',
            'algorithm': 'DDPG',
        })
    her_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/",
        criteria={
            'exp_id': '10',
            'algorithm': 'HER-Andrychowicz',
        })
    ddpg_sparse_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/",
        criteria={
            'exp_id': '8',
            'algorithm': 'DDPG-Sparse',
        })

    MAX_ITERS = 1000000

    plt.figure()
    base_key = 'Multitask Final L2 distance to goal Mean'
    for trials, name, key in [
        (tdm_trials, 'TDMs', base_key),
        (ddpg_trials, 'DDPG', base_key),
        (ddpg_sparse_trials, 'DDPG-Sparse', base_key),
        (her_trials, 'HER', base_key),
        (mb_trials, 'Model-Based', base_key),
    ]:
        key = key.replace(" ", "_")
        all_values = []
        for trial in trials:
            try:
                values_ts = trial.data[key]
            except:
                import ipdb
                ipdb.set_trace()
            values_ts = sliding_mean(values_ts, window=10)
            all_values.append(values_ts)
        min_len = min(map(len, all_values))
        costs = np.vstack([values[:min_len] for values in all_values])
        costs = costs[:, :min(costs.shape[1], MAX_ITERS)]
        mean = np.mean(costs, axis=0)
        std = np.std(costs, axis=0)
        epochs = np.arange(0, len(costs[0]))
        plt.fill_between(epochs, mean - std, mean + std, alpha=0.1)
        plt.plot(epochs, mean, label=name)

    plt.xlabel("Environment Samples (x1,000)")
    plt.ylabel("Final Euclidean Distance to Goal Position")
    plt.legend()
    plt.savefig('results/iclr2018/gym-pusher-3d.jpg')
    plt.show()
def main():
    tdm_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-ant-max-distance-6/",
        criteria={
            'algorithm': 'DDPG-TDM',
            'exp_id': '4',
        }
    )
    ddpg_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-04-ddpg-ant-max-d-6-post-sweep/",
        criteria={
            'algorithm': 'DDPG',
            'exp_id': '7',
        }
    )
    ddpg_sparse_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-ant-max-distance-6/",
        criteria={
            'algorithm': 'DDPG-Sparse',
            'exp_id': '0',
        }
    )
    her_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-ant-max-distance-6/",
        criteria={
            'algorithm': 'HER-Andrychowicz',
            'exp_id': '13',
        }
    )
    mb_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/01-03-final-ant-max-distance-6/",
        criteria={
            'algorithm': 'Model-Based-Dagger',
            'exp_id': '2',
        }
    )

    MAX_ITERS = 1000000

    plt.figure()
    key = 'Final Distance to goal Mean'
    # key = 'Multitask Final L2 distance to goal Mean'
    for trials, name in [
        (tdm_trials, 'TDM'),
        (ddpg_trials, 'DDPG'),
        (her_trials, 'HER'),
        (ddpg_sparse_trials, 'DDPG-Sparse'),
        (mb_trials, 'Model-Based'),
    ]:
        key = key.replace(" ", "_")
        all_values = []
        for trial in trials:
            try:
                values_ts = trial.data[key]
            except:
                import ipdb; ipdb.set_trace()
            values_ts = sliding_mean(values_ts, window=10)
            all_values.append(values_ts)
            # if len(values_ts) > 450:
            #     all_values.append(values_ts)
        min_len = min(map(len, all_values))
        max_len = max(map(len, all_values))
        all_values = [
            np.pad(values, (0, max_len - len(values)), 'constant',
                   constant_values=np.nan)
            for values in all_values
        ]
        costs = np.vstack([
            # values[:min_len]
            values
            for values in all_values
        ])
        costs = costs[:, :min(costs.shape[1], MAX_ITERS)]
        mean = np.nanmean(costs, axis=0)
        std = np.nanstd(costs, axis=0)
        epochs = np.arange(0, len(costs[0]))
        plt.fill_between(epochs, mean - std, mean + std, alpha=0.1)
        plt.plot(epochs, mean, label=name)

    plt.xlabel("Environment Samples (x1,000)")
    plt.ylabel("Final Distance to Goal Position")
    plt.legend()
    plt.savefig('results/iclr2018/ant-max-distance-6.jpg')
    plt.show()
Example #3
0
import os
from railrl.misc.data_processing import get_trials
from subprocess import call


base_dir = '/home/vitchyr/git/railrl/data/papers/nips2018/for-ashvin' \
           '/reacher-main-results-ours'


trials = get_trials(
    '/home/vitchyr/git/railrl/data/papers/nips2018/reacher-abalation-resample-strategy/',
    criteria={
        'replay_kwargs.fraction_resampled_goals_are_env_goals': 0.5,
        'replay_kwargs.fraction_goals_are_rollout_goals': 0.2,
        'algo_kwargs.num_updates_per_env_step': 5,
    }
)
output_dir = base_dir

os.makedirs(output_dir, exist_ok=True)
print("Making dir", output_dir)
for trial in trials:
    dir = trial[2]
    print("cp -r {} {}".format(dir, output_dir))
    call(["cp", "-r", dir, output_dir])
Example #4
0
import matplotlib.pyplot as plt

from railrl.misc.data_processing import get_trials
from railrl.visualization.plot_util import plot_trials, padded_ma_filter

plt.style.use("ggplot")

vae_trials = get_trials(
    # '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-reach-vae-rl-log-prob-rewards-2',
    '/home/vitchyr/git/railrl/data/doodads3/05-14-paper-sawyer-reach-vae-rl-lprob-rewards-min-var-after-fact/',
    criteria={
        'replay_kwargs.fraction_resampled_goals_are_env_goals': 0.5,
        'replay_kwargs.fraction_goals_are_rollout_goals': 0.2,
        'reward_params.min_variance': 1,
        'vae_wrapped_env_kwargs.sample_from_true_prior': False,
    })
state_her_td3 = get_trials(
    '/home/vitchyr/git/railrl/data/doodads3/05-13-full-state-sawyer-reach-2/',
    criteria={
        'replay_buffer_kwargs.fraction_resampled_goals_are_env_goals': 0.5,
        'replay_buffer_kwargs.fraction_goals_are_rollout_goals': 0.2,
        'exploration_type': 'ou',
    })
state_tdm_ddpg = get_trials(
    '/home/vitchyr/git/railrl/data/doodads3/05-14-tdm-ddpg-reach-sweep-2/',
    criteria={
        'env_class.$class':
        'railrl.envs.mujoco.sawyer_gripper_env.SawyerXYEnv',
        'algo_kwargs.base_kwargs.num_updates_per_env_step': 10,
        'algo_kwargs.tdm_kwargs.max_tau': 5,
    },
Example #5
0
def main():
    her_andry_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/12-24-her-andrychowicz-pusher-rebutal/",
        criteria={
            'exp_id': '11',
        },
    )

    ddpg_path = "/mnt/data-backup-12-02-2017/doodads3/10-25-ddpg-pusher-again-baseline-with-reward-bonus/"
    ddpg_criteria = {
        'algo_params.num_updates_per_env_step': 5,
        'algo_params.scale_reward': 1,
        'algo_params.tau': 0.01,
    }
    mb_path = "/mnt/data-backup-12-02-2017/doodads3/10-25-abhishek-mb-baseline-pusher-again-shaped/"
    mb_criteria = None
    our_path = "/mnt/data-backup-12-02-2017/doodads3/11-02-get-results-handxyxy-small-sweep"
    our_criteria = {
        'algo_params.num_updates_per_env_step': 5,
        'epoch_discount_schedule_params.value': 5,
        'algo_params.tau': 0.001,
    }
    ddpg_trials = get_trials(ddpg_path, criteria=ddpg_criteria)
    mb_trials = get_trials(mb_path, criteria=mb_criteria)
    our_trials = get_trials(our_path, criteria=our_criteria)
    ddpg_sparse_trials = get_trials(
        "/home/vitchyr/git/railrl/data/doodads3/12-23-ddpg-sparse-sweep-4/",
        criteria={
            'env_class.$class': 'railrl.envs.multitask.pusher2d.CylinderXYPusher2DEnv',
            'ddpg_tdm_kwargs.base_kwargs.num_updates_per_env_step': 1,
        }
    )
    MAX_ITERS = 100

    base_key = 'Final_Euclidean_distance_to_goal_Mean'
    plt.figure()
    for trials, name, key in [
        (our_trials, 'TDM', 'test_'+base_key),
        (ddpg_trials, 'DDPG', base_key),
        (her_andry_trials, 'HER', "Final Distance object to goal Mean"),
        (ddpg_sparse_trials, 'DDPG-Sparse', "Final Distance object to goal Mean"),
        (mb_trials, 'Model Based', base_key),
    ]:
        key = key.replace(" ", "_")
        all_values = []
        min_len = np.inf
        for trial in trials:
            values_ts = trial.data[key]
            min_len = min(min_len, len(values_ts))
            all_values.append(values_ts)
        try:
            costs = np.vstack([
                values[:min_len]
                for values in all_values
            ])
        except ValueError as e:
            import ipdb; ipdb.set_trace()
        costs = costs[:, :min(costs.shape[1], MAX_ITERS)]
        if name == 'HER':
            costs = sliding_mean(costs, 20)
        mean = np.mean(costs, axis=0)
        std = np.std(costs, axis=0)
        epochs = np.arange(0, len(costs[0]))
        plt.fill_between(epochs, mean - std, mean + std, alpha=0.1)
        plt.plot(epochs, mean, label=name)



    # plt.xscale('log')
    plt.xlabel("Environment Samples (x1000)")
    plt.ylabel("Final Distance to Goal Position")
    plt.legend()
    plt.savefig('results/iclr2018/pusher.jpg')
    plt.show()
import matplotlib.pyplot as plt

from railrl.misc.data_processing import get_trials
from railrl.visualization.plot_util import plot_trials

plt.style.use("ggplot")

state_trials = get_trials(
    '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-push-and-reach-easy/',
    criteria={
        'replay_buffer_kwargs.fraction_resampled_goals_are_env_goals': 0.5,
        'replay_buffer_kwargs.fraction_goals_are_rollout_goals': 0.2,
        'exploration_type': 'ou'
    })
td3_trials = get_trials(
    '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-push-and-reach-easy/',
    criteria={
        'replay_buffer_kwargs.fraction_goals_are_rollout_goals': 1.0,
        'exploration_type': 'ou'
    })
my_trials = get_trials(
    '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-push-and-reach-easy-vae-rl',
    criteria={
        'replay_kwargs.fraction_resampled_goals_are_env_goals': 0.5,
        'replay_kwargs.fraction_goals_are_rollout_goals': 0.2,
    })
vae_td3_trials = get_trials(
    '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-push-and-reach-easy-vae-rl',
    criteria={
        'replay_kwargs.fraction_goals_are_rollout_goals': 1.,
    })
Example #7
0
from railrl.misc.data_processing import get_trials
import matplotlib.pyplot as plt
import numpy as np

her_andry_trials = get_trials(
    '/home/vitchyr/git/railrl/data/doodads3/12-24-her-andrychowicz-reacher-rebutal/',
    criteria={
        'exp_id': '7',
    },
)

mb_path = "/home/vitchyr/git/railrl/data/doodads3/12-24-model-based-reacher-multitask-fixed-2/"
ddpg_indicator_trials = get_trials(
    "/home/vitchyr/git/railrl/data/doodads3/12-23-ddpg-sparse-sweep-4/",
    criteria={
        'env_class.$class':
        "railrl.envs.multitask.reacher_7dof.Reacher7DofXyzGoalState",
        'ddpg_tdm_kwargs.base_kwargs.num_updates_per_env_step': 1,
    },
)

tdm_trials = get_trials(
    "/mnt/data-backup-12-02-2017/doodads3/10-27-sdql-reacher-get-long-results",
    criteria={
        'epoch_discount_schedule_params.value': 15,
        'eval_with_oc_policy': False,
        'algo_params.num_updates_per_env_step': 25,
    })

ddpg_trials = get_trials(
    "/mnt/data-backup-12-02-2017/doodads3/10-25-ddpg-reacher-pusher-baseline/",
Example #8
0
import os
import shutil
from railrl.misc.data_processing import get_trials
from subprocess import call


base_dir = '/home/vitchyr/git/railrl/data/papers/nips2018/for-ashvin' \
             '/reacher-baseline-oracle'

trials = get_trials(
    '/home/vitchyr/git/railrl/data/doodads3/05-16-paper-reacher-results-full-state-oracle-ish/',
    criteria={
        'replay_buffer_kwargs.fraction_resampled_goals_are_env_goals': 1.0,
        'algo_kwargs.num_updates_per_env_step': 5,
        'exploration_type': 'epsilon',
    })
output_dir = base_dir

os.makedirs(output_dir, exist_ok=True)
print("Making dir", output_dir)
for trial in trials:
    dir = trial[2]
    print("cp -r {} {}".format(dir, output_dir))
    call(["cp", "-r", dir, output_dir])
    vitchyr_base_dir,
    format_func,
    configure_matplotlib,
)
import matplotlib.pyplot as plt
from railrl.visualization import plot_util as plot
from railrl.misc import data_processing as dp

configure_matplotlib(matplotlib)

pusher_dir = vitchyr_base_dir + 'papers/nips2018/pusher-online-vae'

online_pusher = dp.get_trials(pusher_dir,
                              criteria={
                                  'rdim':
                                  250,
                                  'algo_kwargs.should_train_vae.$function':
                                  'railrl.torch.vae.vae_schedules.every_three',
                              })
offline_pusher = dp.get_trials(
    pusher_dir,
    criteria={
        'rdim':
        250,
        'algo_kwargs.should_train_vae.$function':
        'railrl.torch.vae.vae_schedules.never_train',
    })
plt.figure(figsize=(6, 5))
plot.plot_trials(
    OrderedDict([
        ("Online", online_pusher),
Example #10
0
def get_fanova_info(
    base_dir,
    params_to_ignore=('seed', 'exp_id', 'unique_id', 'exp_name'),
    ylabel='AverageReturn',
):
    data_and_variants = get_trials(base_dir)
    experiment_data_list, variants_list = zip(*data_and_variants)
    ylabel = ylabel.replace(' ', '_')
    ylabel = ylabel.replace('-', '')
    if ylabel not in experiment_data_list[0].dtype.names:
        print("Possible ylabels:")
        for name in experiment_data_list[0].dtype.names:
            print(" - {}".format(name))
        raise ValueError("Invalid ylabel: {}".format(ylabel))
    indices_of_experiments_with_data = [
        i for i, exp in enumerate(experiment_data_list)
        if exp[ylabel].size >= 1
    ]
    if len(indices_of_experiments_with_data) != len(experiment_data_list):
        print("WARNING: Skipping some experiments. Probably because they only "
              "have one data point.")
    valid_experiment_data_list = [
        d for i, d in enumerate(experiment_data_list)
        if i in indices_of_experiments_with_data
    ]
    variants_list = [
        v for i, v in enumerate(variants_list)
        if i in indices_of_experiments_with_data
    ]
    Y = np.array([
        exp[ylabel][-1] if exp[ylabel].size > 1 else np.array(
            float(exp[ylabel]), dtype=np.double)
        for exp in valid_experiment_data_list
    ])
    filtered_variants_list = remove_keys_with_nonunique_values(
        variants_list, params_to_ignore=params_to_ignore)
    filtered_variants_to_values = get_dict_key_to_values(
        filtered_variants_list)
    names = list(filtered_variants_list[0].keys())
    X_raw = _extract_features(filtered_variants_list, names)
    config_space, X, categorical_remapping = (
        _get_config_space_and_new_features(
            X_raw,
            names,
            filtered_variants_to_values,
        ))

    # Not sure why, but config_space shuffles the order of the hyperparameters
    new_name_order = [
        config_space.get_hyperparameter_by_idx(i) for i in range(len(names))
    ]
    new_order = [names.index(name) for name in new_name_order]
    X = [X[i] for i in new_order]
    # X has be [feature_dim X batch_size], but Fanova expects the transpose
    X = np.array(X, dtype=object).T
    return FanovaInfo(
        fANOVA(X, Y, config_space=config_space),
        config_space,
        X,
        Y,
        categorical_remapping,
        variants_list,
    )
Example #11
0
from visualization.grill.config import (
    output_dir,
    vitchyr_base_dir,
    format_func,
    configure_matplotlib,
)
import matplotlib.pyplot as plt
from railrl.visualization import plot_util as plot
from railrl.misc import data_processing as dp

configure_matplotlib(matplotlib)

reacher_dir = vitchyr_base_dir + 'papers/nips2018/reacher_online_vae'
online_reacher = dp.get_trials(
    reacher_dir,
    criteria={
        'algo_kwargs.should_train_vae.$function':
        'railrl.torch.vae.vae_schedules.always_train',
    })
offline_reacher = dp.get_trials(
    reacher_dir,
    criteria={
        'algo_kwargs.should_train_vae.$function':
        'railrl.torch.vae.vae_schedules.never_train',
    })

plt.figure(figsize=(6, 5))
plot.plot_trials(
    OrderedDict([
        ("Online", online_reacher),
        ("Offline", offline_reacher),
    ]),
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("expdir",
                        help="experiment dir, e.g., /tmp/experiments")
    parser.add_argument("--ylabel", default='AverageReturn')
    args = parser.parse_args()
    y_label = args.ylabel
    """
    Load data
    """
    trials = get_trials(args.expdir)

    data = trials[0][0]
    if y_label not in data.dtype.names:
        print("Invalid ylabel. Valid ylabels:")
        for name in sorted(data.dtype.names):
            print(name)
        return
    """
    Get the unique parameters
    """
    _, all_variants = zip(*trials)
    unique_param_to_values = get_unique_param_to_values(all_variants)
    unique_numeric_param_to_values = {
        k: unique_param_to_values[k]
        for k in unique_param_to_values
        if is_numeric(list(unique_param_to_values[k])[0])
    }
    # TODO(vitchyr): Use bar plot if xlabel is not numeric, rather than just
    # ignoring it
    value_to_unique_params = defaultdict(dict)
    """
    Plot results
    """
    num_params = len(unique_numeric_param_to_values)
    fig, axes = plt.subplots(num_params)
    if num_params == 1:
        axes = [axes]
    for i, x_label in enumerate(unique_numeric_param_to_values):
        x_value_to_y_values = defaultdict(list)
        for data, variant in trials:
            if len(data[y_label]) > 0:
                print(
                    "WARNING. data is missing this label: {}".format(y_label))
                x_value_to_y_values[variant[x_label]].append(data[y_label][-1])
        y_means = []
        y_stds = []
        x_values = []
        for x_value, y_values in x_value_to_y_values.items():
            x_values.append(x_value)
            y_means.append(np.mean(y_values))
            y_stds.append(np.std(y_values))
            value_to_unique_params[np.mean(y_values)][x_label] = x_value

        x_values, y_means, y_stds = sort_by_first(x_values, y_means, y_stds)

        axes[i].errorbar(x_values, y_means, yerr=y_stds)
        axes[i].set_ylabel(y_label)
        axes[i].set_xlabel(x_label)
    """
    Display information about the best parameters
    """
    value_and_unique_params = sorted(value_to_unique_params.items(),
                                     key=lambda v_and_params: -v_and_params[0])
    unique_params = list(unique_numeric_param_to_values.keys())
    default_params = {k: variant[k] for k in variant if k not in unique_params}
    print("Default Param", default_params)
    print("Top 3 params")
    for value, params in value_and_unique_params[:3]:
        for k, v in params.items():
            print("\t{}: {}".format(k, v))
        print("Value", value)

    plt.show()