def main():
    relabel_exp = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/01-02-ddpg-tdm-ant-nupo-sweep/",
        criteria={
            'relabel': True,
            'ddpg_tdm_kwargs.base_kwargs.reward_scale': 1,
        })
    no_relabel_exp = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/01-02-ddpg-tdm-ant-nupo-sweep/",
        criteria={
            'relabel': False,
            'ddpg_tdm_kwargs.base_kwargs.reward_scale': 1,
        })

    MAX_ITERS = 100

    # ax1 = fig.add_subplot(211)
    # ax2 = fig.add_subplot(212)
    plot_key = 'Final Distance to goal Mean'.replace(' ', '_')
    # for ax, exp, name in [
    #     (ax1, relabel_exp, 'Relabel'),
    #     (ax2, no_relabel_exp, 'No Relabel'),
    # ]:
    for exp, name in [
        (relabel_exp, 'Relabel'),
        (no_relabel_exp, 'No Relabel'),
    ]:
        fig = plt.figure()
        for nupo in [1, 5, 10, 20, 30]:
            trials = exp.get_trials(
                {'ddpg_tdm_kwargs.base_kwargs.num_updates_per_env_step': nupo})
            all_values = []
            for trial in trials:
                try:
                    values_ts = trial.data[plot_key]
                    values_ts = sliding_mean(values_ts, window=10)
                except:
                    import ipdb
                    ipdb.set_trace()
                all_values.append(values_ts)
            min_len = min(map(len, all_values))
            costs = np.vstack([values[:min_len] for values in all_values])
            costs = costs[:, :min(costs.shape[1], MAX_ITERS)]
            mean = np.mean(costs, axis=0)
            std = np.std(costs, axis=0)
            epochs = np.arange(0, len(costs[0]))
            plt.fill_between(epochs, mean - std, mean + std, alpha=0.1)
            plt.plot(epochs, mean, label="{} updates per step".format(nupo))
        # plt.title(name)

        plt.xlabel("Environment Samples (x1,000)")
        plt.ylabel("Final Distance to Goal Position")
        plt.legend()
        # print(fig.get_size_inches())
        # fig.set_size_inches(6.4*1, 4.8*2)
        plt.savefig('results/iclr2018/ant-nupo-sweep-{}.jpg'.format(
            name.lower().replace(' ', '-')),
                    # transparent=True, bbox_inches='tight', pad_inches=0,
                    )
        plt.show()
Example #2
0
def main():
    tdm_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/01-02-ddpg-tdm-ant-nupo-sweep/",
        criteria={
            'exp_id': '27',  # 23 for NUPO = 20, 27 for NUPO = 10
        }
    ).get_trials()
    mb_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/01-02-ant-distance-3-to-5/",
        criteria={
            'exp_id': '0',
            'algorithm': 'Model-Based-Dagger',
        }
    ).get_trials()
    ddpg_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/01-02-ant-distance-3-to-5/",
        criteria={
            'exp_id': '3',
            'algorithm': 'DDPG',
        }
    ).get_trials()

    MAX_ITERS = 1000000

    plt.figure()
    base_key = 'Final Distance to goal Mean'
    for trials, name, key in [
        (tdm_trials, 'TDMs', base_key),
        (ddpg_trials, 'DDPG', base_key),
        (mb_trials, 'Model-Based', base_key),
    ]:
        key = key.replace(" ", "_")
        all_values = []
        for trial in trials:
            try:
                values_ts = trial.data[key]
            except:
                import ipdb; ipdb.set_trace()
            values_ts = sliding_mean(values_ts, window=10)
            all_values.append(values_ts)
        min_len = min(map(len, all_values))
        costs = np.vstack([
            values[:min_len]
            for values in all_values
        ])
        costs = costs[:, :min(costs.shape[1], MAX_ITERS)]
        mean = np.mean(costs, axis=0)
        std = np.std(costs, axis=0)
        epochs = np.arange(0, len(costs[0]))
        plt.fill_between(epochs, mean - std, mean + std, alpha=0.1)
        plt.plot(epochs, mean, label=name)

    plt.xlabel("Environment Samples (x1,000)")
    plt.ylabel("Final Euclidean Distance to Goal Position")
    plt.legend()
    plt.savefig('results/iclr2018/ant-distance-3-to-5.jpg')
    plt.show()
def main():
    tdm_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-25-tdm-ddpg-walker-position-long/"
    ).get_trials({
        'exp_id': '1',
    })
    trpo_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-25-trpo-walker-position-long/"
    ).get_trials({
        'exp_id': '4',
    })
    mb_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-25-mb-dagger-walker-position-long-take2/"
    ).get_trials({
        'exp_id': '2',
    })
    ddpg_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-25-ddpg-walker-position-long-take2/"
    ).get_trials({
        'exp_id': '1',
    })

    MAX_ITERS = 10001

    plt.figure()
    base_key = 'Final xpos errors Mean'
    for trials, name, key in [
        (tdm_trials, 'TDMs', base_key),
        (mb_trials, 'Model-Based', base_key),
        (ddpg_trials, 'DDPG', base_key),
        (trpo_trials, 'TRPO', base_key),
    ]:
        key = key.replace(" ", "_")
        all_values = []
        for trial in trials:
            try:
                values_ts = trial.data[key]
            except:
                import ipdb; ipdb.set_trace()
            all_values.append(values_ts)
        min_len = min(map(len, all_values))
        costs = np.vstack([
            values[:min_len]
            for values in all_values
        ])
        costs = costs[:, :min(costs.shape[1], MAX_ITERS)]
        mean = np.mean(costs, axis=0)
        std = np.std(costs, axis=0)
        epochs = np.arange(0, len(costs[0]))
        plt.fill_between(epochs, mean - std, mean + std, alpha=0.1)
        plt.plot(epochs, mean, label=name)

    plt.xlabel("Environment Samples (x10,000)")
    plt.ylabel("Final Distance to Goal Position")
    plt.legend()
    plt.savefig('results/iclr2018/walker.jpg')
    plt.show()
Example #4
0
def main():
    ddpg_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-29-find-pusher3d-mismatch-2",
        criteria={
            'env_kwargs.reward_coefs': [1, 0, 0],
            'exp_id': '1',
        }).get_trials()
    mb_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-30-mb-dagger-pusher3d-fixed-2/",
    ).get_trials()

    MAX_ITERS = 10000

    plt.figure()
    base_key = 'Final Distance to goal Mean'
    for trials, name, key in [
        (ddpg_trials, 'DDPG', base_key),
        (mb_trials, 'Model-Based', base_key),
    ]:
        key = key.replace(" ", "_")
        all_values = []
        for trial in trials:
            try:
                values_ts = trial.data[key]
            except:
                import ipdb
                ipdb.set_trace()
            all_values.append(values_ts)
        min_len = min(map(len, all_values))
        costs = np.vstack([values[:min_len] for values in all_values])
        costs = costs[:, :min(costs.shape[1], MAX_ITERS)]
        mean = np.mean(costs, axis=0)
        std = np.std(costs, axis=0)
        epochs = np.arange(0, len(costs[0]))
        plt.fill_between(epochs, mean - std, mean + std, alpha=0.1)
        plt.plot(epochs, mean, label=name)

    plt.xlabel("Environment Samples (x1,000)")
    plt.ylabel("Final Euclidean Distance to Goal Position")
    plt.legend()
    plt.savefig('results/iclr2018/pusher3d.jpg')
    plt.show()
from railrl.misc.data_processing import Experiment
import matplotlib.pyplot as plt
import numpy as np

from railrl.misc.visualization_util import sliding_mean

mb_trials = Experiment(
    "/home/vitchyr/git/railrl/data/doodads3/12-30-cheetah-xpos-increase-distance/",
    criteria={
        'algorithm': 'Model-Based-Dagger',
        'env_kwargs.max_distance': 40,
    },
).get_trials()
ddpg_trials = Experiment(
    "/home/vitchyr/git/railrl/data/doodads3/12-30-cheetah-xpos-increase-distance/",
    criteria={
        'algorithm': 'DDPG',
        'env_kwargs.max_distance': 40,
        'exp_id': '10',
    },
).get_trials()

MAX_ITERS = 10000
plt.figure()
base_key = 'Final_Distance_to_goal_Mean'
for trials, name, key in [
    (mb_trials, 'Model Based', base_key),
    (ddpg_trials, 'DDPG', base_key),
]:
    all_values = []
    min_len = np.inf
Example #6
0
def main():
    # matplotlib.rcParams.update({'font.size': 39})
    # base_dir = "/home/vitchyr/git/rllab-rail/railrl/data/papers/icml2017/watermaze/ablation"
    base_dir = "/home/vitchyr/git/rllab-rail/railrl/data/papers/icml2017/watermaze/ablation2"
    experiment = Experiment(base_dir)

    version_to_list_of_final_scores = OrderedDict()
    subtraj_lengths = [1, 5, 10, 15, 20, 25]
    for do_not_load_memories, ignore_memories, name in [
        [False, False, 'Our Method'],
        [True, False, 'No Memory State Loaded'],
        [False, True, 'No Memory State for Critic'],
        [True, True, 'No Memory State (Truncated BPTT)'],
    ]:
        version_to_list_of_final_scores[name] = []
        for subtraj_length in subtraj_lengths:
            trials = experiment.get_trials({
                'algo_params.do_not_load_initial_memories':
                    do_not_load_memories,
                'qf_params.ignore_memory': ignore_memories,
                'algo_params.subtraj_length': subtraj_length,
            })
            final_scores = np.array([t.data['AverageReturn'][-1] for t in
                                     trials])
            version_to_list_of_final_scores[name].append(final_scores)

    cmap = matplotlib.cm.get_cmap('plasma')

    index_to_color_and_pattern = {
        0: (cmap(0), ''),
        1: (cmap(0.33), '/'),
        2: (cmap(0.66), '.'),
        3: (cmap(1.), 'x'),
    }
    x_axis = subtraj_lengths
    N = len(x_axis)
    ind = np.arange(N)
    width = 0.2
    fig, ax = plt.subplots(figsize=(32.0, 20.0))
    legend_rects = []
    legend_names = []
    for i, (version_name, final_scores) in enumerate(
            version_to_list_of_final_scores.items()
    ):
        color, pattern = index_to_color_and_pattern[i]
        y_means = [np.mean(score) for score in final_scores]
        y_stds = [np.std(score) for score in final_scores]
        assert len(y_means) == len(y_stds) == len(x_axis)
        rect = ax.bar(
            ind + width * i,
            y_means,
            width,
            color=color,
            yerr=y_stds,
            hatch=pattern,
            ecolor='red',
            capsize=10,
            linewidth=10,
        )
        legend_rects.append(rect[0])
        legend_names.append(version_name)
    fontsize = 50
    ax.set_xticks(ind + width / 2)
    ax.set_xticklabels(x_axis)
    ax.legend(legend_rects, legend_names, prop={'size': 30},
              bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
              ncol=2, mode="expand", borderaxespad=0.)
    plt.xlabel("Subtrajectory Length", fontsize=fontsize)
    plt.ylabel("Average Return", fontsize=fontsize)
    plt.xticks(fontsize=fontsize)
    plt.yticks(fontsize=fontsize)

    ltext = plt.gca().get_legend().get_texts()
    plt.setp(ltext[0], fontsize=fontsize)
    plt.savefig("test.png", bbox_inches='tight')

    plt.show()
Example #7
0
from railrl.misc.data_processing import Experiment, get_trials
import matplotlib.pyplot as plt
import numpy as np

path = "/mnt/data-backup-12-02-2017/doodads3/10-21-sdql-compare-vectorized-delta-normal-big-sweep/"
exp = Experiment(path)
base_criteria = {
    'env_class.$class':
        "railrl.envs.multitask.reacher_7dof.Reacher7DofFullGoalState"
}
algos = [
    'railrl.algos.state_distance.state_distance_q_learning.HorizonFedStateDistanceQLearning',
    'railrl.algos.state_distance.vectorized_sdql.VectorizedTauSdql',
]
algo_to_trials = {}
for algo in algos:
    criteria = base_criteria.copy()
    criteria['algo_class.$class'] = algo
    algo_to_trials[algo] = exp.get_trials(criteria)

key = 'Final_Euclidean_distance_to_goal_Mean'
MAX_ITERS = 50
for algo, trials in algo_to_trials.items():
    if algo == 'railrl.algos.state_distance.state_distance_q_learning' \
               '.HorizonFedStateDistanceQLearning':
        name = 'Scalar'
    else:
        name = 'Vector'
    all_values = []
    min_len = np.inf
    for trial in trials:
def main():
    # fontsize = 50
    fontsize = 12
    linewidth = 5
    plt.rc('legend', fontsize=fontsize)
    plt.rc('xtick', labelsize=fontsize)
    plt.rc('ytick', labelsize=fontsize)
    # sns.set_style("whitegrid")
    base_dir = "/home/vitchyr/git/rllab-rail/railrl/data/papers/icml2017" \
               "/watermaze/watermaze-memory"
    experiment = Experiment(base_dir)
    fig, axes = plt.subplots(3, 1, figsize=(10.0, 7.5))

    method_to_our_data = OrderedDict()
    subtraj_lengths = [1, 10, 20, 25]
    for subtraj_length in subtraj_lengths:
        name = "Our Method, Subtrajectory Length = {}".format(subtraj_length)
        trials = experiment.get_trials(
            {
                'algo_params.subtraj_length': subtraj_length,
                'version': "Our Method",
            },
            ignore_missing_keys=True,
        )
        final_scores = [
            t.data['AverageReturn'][:50] for t in trials
            if len(t.data['AverageReturn']) >= 50  # some things crashed
        ]
        method_to_our_data[name] = final_scores

    ax = axes[0]
    method_names = []
    cmap = matplotlib.cm.get_cmap('plasma')
    num_values = len(method_to_our_data)
    index_to_color = {
        i: cmap((i) / (num_values)) for i in range(num_values)
    }
    index_to_linestyle = {
        0: '-',
        1: '--',
        2: ':',
        3: '-.',
    }
    for i, (method, data) in enumerate(method_to_our_data.items()):
        method_names.append(method)
        data_combined = np.vstack(data)
        sns.tsplot(
            data=data_combined,
            color=index_to_color[i],
            linestyle=index_to_linestyle[i],
            condition=method,
            ax=ax,
        )
    ax.set_ylabel("Average Return", fontsize=fontsize)
    ax.set_xlabel("Environment samples (x100)", fontsize=fontsize)
    ax.legend(method_names, bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
              ncol=2, mode="expand", borderaxespad=0.,
              markerscale=10)


    method_to_ddpg_data = OrderedDict()
    for name in [
        ['DDPG'],
        ['Memory States + DDPG'],
        ['Recurrent DPG'],
        # ['TRPO'],
        # ['Memory States + TRPO'],
        # ['Recurrent TRPO'],
        # ['Our Method'],
    ]:
        name = name[0]
        trials = experiment.get_trials(
            {
                'algo_params.subtraj_length': 25,
                'version': name,
            },
            ignore_missing_keys=True,
        )
        final_scores = [
            t.data['AverageReturn'][:50] for t in trials
            if len(t.data['AverageReturn']) >= 50  # some things crashed
        ]
        method_to_ddpg_data[name] = final_scores

    method_names = []
    cmap = matplotlib.cm.get_cmap('plasma')
    num_values = len(method_to_ddpg_data)
    index_to_color = {
        i: cmap((i) / (num_values)) for i in range(num_values)
    }
    index_to_linestyle = {
        0: '-',
        1: '--',
        2: ':',
        3: '-.',
    }
    ax = axes[1]
    for i, (method, data) in enumerate(method_to_ddpg_data.items()):
        method_names.append(method)
        data_combined = np.vstack(data)
        sns.tsplot(
            data=data_combined,
            color=index_to_color[i],
            linestyle=index_to_linestyle[i],
            condition=method,
            ax=ax,
        )
    ax.set_ylabel("Average Return", fontsize=fontsize)
    ax.set_xlabel("Environment samples (x100)", fontsize=fontsize)
    ax.legend(method_names, bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
              ncol=2, mode="expand", borderaxespad=0.,
              markerscale=100)


    method_to_trpo_data = OrderedDict()
    for name in [
        ['TRPO'],
        ['Memory States + TRPO'],
        ['Recurrent TRPO'],
    ]:
        name = name[0]
        trials = experiment.get_trials(
            {
                'algo_params.subtraj_length': 25,
                'version': name,
            },
            ignore_missing_keys=True,
        )
        final_scores = [
            t.data['AverageReturn'][:100] for t in trials
            if len(t.data['AverageReturn']) >= 100  # some things crashed
        ]
        method_to_trpo_data[name] = final_scores

    ax = axes[2]
    method_names = []
    cmap = matplotlib.cm.get_cmap('plasma')
    num_values = len(method_to_trpo_data)
    index_to_color = {
        i: cmap((i) / (num_values)) for i in range(num_values)
    }
    index_to_linestyle = {
        0: '-',
        1: '--',
        2: ':',
        3: '-.',
    }
    for i, (method, data) in enumerate(method_to_trpo_data.items()):
        method_names.append(method)
        data_combined = np.vstack(data)
        sns.tsplot(
            data=data_combined,
            color=index_to_color[i],
            linestyle=index_to_linestyle[i],
            condition=method,
            ax=ax,
        )
    ax.set_ylabel("Average Return", fontsize=fontsize)
    ax.set_xlabel("Environment samples (x1000)", fontsize=fontsize)
    ax.legend(method_names, bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
              ncol=2, mode="expand", borderaxespad=0.,
              markerscale=10)
    # for legobj in legend.legendHandles:
    #     legobj.set_linewidth(linewidth)
    fig.subplots_adjust(hspace=1)
    plt.savefig("comparison.png", bbox_inches='tight', dpi=1000)
    plt.savefig("comparison.eps", bbox_inches='tight')
    plt.savefig("comparison.svg", bbox_inches='tight')
    plt.show()
Example #9
0
from railrl.misc.data_processing import Experiment
import matplotlib.pyplot as plt
import numpy as np

mb_trials = Experiment(
    "/home/vitchyr/git/railrl/data/doodads3/12-24-dagger-mb-ant-cheetah-pos-and-vel/",
    criteria={
        'exp_id': '0',
    },
).get_trials()
ddpg_trials = Experiment(
    "/home/vitchyr/git/railrl/data/doodads3/12-12-tdm-half-cheetah-short-epoch-nupo-sweep/",
    criteria={
        'exp_id': '5',
        'algorithm': 'DDPG',
    },
).get_trials()
tdm_trials = Experiment(
    "/home/vitchyr/git/railrl/data/doodads3/12-12-tdm-half-cheetah-short-epoch-nupo-sweep/",
    criteria={
        'exp_id': '8',
        'algorithm': 'DDPG-TDM',
    }).get_trials()
ddpg_indicator_trials = Experiment(
    "/home/vitchyr/git/railrl/data/doodads3/12-24-ddpg-sparse-no-relabel-cheetah-xvel/",
    criteria={
        'exp_id': '7',
    }).get_trials()
her_andry_trials = Experiment(
    "/home/vitchyr/git/railrl/data/doodads3/12-24-her-andrychowicz-cheetah-xvel-rebutal/",
    criteria={
Example #10
0
from railrl.misc.data_processing import Experiment
import matplotlib.pyplot as plt
import numpy as np

path = "/mnt/data-backup-12-02-2017/doodads3/10-27-get-results-handxyxy-best-hp-no-oc-sampling-nspe1000/"
exp = Experiment(path)
base_criteria = {
    'algo_params.num_updates_per_env_step': 25,
}
tau_to_criteria = {}
taus = [1, 5, 15, 50]
for tau in taus:
    criteria = base_criteria.copy()
    criteria['epoch_discount_schedule_params.value'] = tau
    tau_to_criteria[tau] = criteria

tau_to_trials = {}
for tau in taus:
    tau_to_trials[tau] = exp.get_trials(tau_to_criteria[tau])

# key = 'Final_Euclidean_distance_to_goal_Mean'
key = 'test_Final_Euclidean_distance_to_goal_Mean'
MAX_ITERS = 35
for tau in taus:
    trials = tau_to_trials[tau]
    all_values = []
    min_len = np.inf
    for trial in trials:
        values_ts = trial.data[key]
        min_len = min(min_len, len(values_ts))
        all_values.append(values_ts)
def main():
    ddpg_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-23-ddpg-nupo-sweep-ant/",
        criteria={
            'exp_id': '16',
        },
    ).get_trials()
    her_andrychowicz_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-23-her-andrychowicz-ant-rebutal/",
        criteria={
            'exp_id': '14',
        },
    ).get_trials()
    # Ant results with batch size of 128
    tdm_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-24-ddpg-nupo-sweep-ant/",
        criteria={
            'exp_id': '16',
        }
    ).get_trials()
    # Accidentally called this pusher, but it's really ant
    # Here, x-axis is 10k steps.
    # tdm_trials = Experiment(
    #     "/home/vitchyr/git/railrl/data/doodads3/12-27-pusher-reward-scale-tau-uniform-or-truncated-geo-sweep-2/",
    #     criteria={
    #         'ddpg_tdm_kwargs.base_kwargs.reward_scale': 100,
    #         'ddpg_tdm_kwargs.tdm_kwargs.tau_sample_strategy':
    #             'truncated_geometric',
    #     }
    # ).get_trials()
    ddpg_indicator_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-23-ddpg-sparse-sweep-4/",
        criteria={
            'env_class.$class': 'railrl.envs.multitask.ant_env.GoalXYPosAnt',
            'ddpg_tdm_kwargs.base_kwargs.num_updates_per_env_step': 1,
        },
    ).get_trials()
    mb_trials = Experiment(
        "/home/vitchyr/git/railrl/data/doodads3/12-24-dagger-mb-ant-cheetah-pos-and-vel/",
        criteria={
            'exp_id': '1',
        },
    ).get_trials()

    # MAX_ITERS = 10001
    MAX_ITERS = 200

    plt.figure()
    base_key = 'Final Distance to goal Mean'
    for trials, name, key in [
        (tdm_trials, 'TDM', base_key),
        (mb_trials, 'Model-Based', base_key),
        (ddpg_trials, 'DDPG', base_key),
        (her_andrychowicz_trials, 'HER', base_key),
        (ddpg_indicator_trials, 'DDPG-Sparse', base_key),
    ]:
        key = key.replace(" ", "_")
        all_values = []
        for trial in trials:
            try:
                values_ts = trial.data[key]
            except:
                import ipdb; ipdb.set_trace()
            all_values.append(values_ts)
        min_len = min(map(len, all_values))
        costs = np.vstack([
            values[:min_len]
            for values in all_values
        ])
        costs = costs[:, :min(costs.shape[1], MAX_ITERS)]
        mean = np.mean(costs, axis=0)
        std = np.std(costs, axis=0)
        epochs = np.arange(0, len(costs[0]))
        plt.fill_between(epochs, mean - std, mean + std, alpha=0.1)
        plt.plot(epochs, mean, label=name)

    plt.xlabel("Environment Samples (x1,000)")
    plt.ylabel("Final Euclidean Distance to Goal Position")
    plt.legend()
    plt.savefig('results/iclr2018/ant.jpg')