Ejemplo n.º 1
0
def raw_lifelong_plot(
        dfs,
        agents,
        path,
        n_tasks,
        n_episodes,
        confidence=None,
        open_plot=False,
        plot_title=True,
        plot_legend=True,
        legend_at_bottom=False,
        ma=False,
        ma_width=10,
        latex_rendering=False
):
    x = np.array(range(1, n_episodes + 1))
    x_label = r'Episode number'
    labels = ['Task ' + str(t) for t in range(1, n_tasks + 1)]
    for i in range(len(agents)):
        tr_per_task, tr_per_task_lo, tr_per_task_up = [], [], []
        dr_per_task, dr_per_task_lo, dr_per_task_up = [], [], []
        for j in range(1, n_tasks + 1):
            task_j = dfs[i].loc[dfs[i]['task'] == j]
            # n_instances = task_j['instance'].nunique()
            tr, tr_lo, tr_up = [], [], []
            dr, dr_lo, dr_up = [], [], []
            for k in range(1, n_episodes + 1):
                task_j_episodes_k = task_j.loc[task_j['episode'] == k]
                tr_mci = mean_confidence_interval(task_j_episodes_k['return'], confidence)
                dr_mci = mean_confidence_interval(task_j_episodes_k['discounted_return'], confidence)
                tr.append(tr_mci[0])
                tr_lo.append(tr_mci[1])
                tr_up.append(tr_mci[2])
                dr.append(dr_mci[0])
                dr_lo.append(dr_mci[1])
                dr_up.append(dr_mci[2])
            tr_per_task.append(tr)
            tr_per_task_lo.append(tr_lo)
            tr_per_task_up.append(tr_up)
            dr_per_task.append(dr)
            dr_per_task_lo.append(dr_lo)
            dr_per_task_up.append(dr_up)
        agent_name = str(agents[i])
        pdf_name = 'lifelong-' + agent_name
        pdf_name = pdf_name.lower()

        plot_color_bars(path, pdf_name=pdf_name+'-return', x=x, y=tr_per_task, y_lo=None, y_up=None, cb_min=1,
                        cb_max=n_tasks + 1, cb_step=1, x_label=x_label,
                        y_label='Return', title_prefix='', labels=labels, cbar_label='Task number',
                        title=agent_name, plot_title=plot_title, plot_markers=False, plot_legend=False,
                        legend_at_bottom=legend_at_bottom, ma=ma, ma_width=ma_width,
                        latex_rendering=latex_rendering)

        plot_color_bars(path, pdf_name=pdf_name+'-discounted-return', x=x, y=dr_per_task, y_lo=None, y_up=None, cb_min=1,
                        cb_max=n_tasks + 1, cb_step=1, x_label=x_label,
                        y_label='Discounted return', title_prefix='', labels=labels, cbar_label='Task number',
                        title=agent_name, plot_title=plot_title, plot_markers=False, plot_legend=False,
                        legend_at_bottom=legend_at_bottom, ma=ma, ma_width=ma_width,
                        latex_rendering=latex_rendering)
Ejemplo n.º 2
0
def plot_results(path, lrmax_path, n_run, confidence=0.9, open_plot=False):
    lrmax_df = pd.read_csv(lrmax_path)

    x = []
    rlbu_m, rlbu_lo, rlbu_up = [], [], []

    for i in range(n_run):
        df = lrmax_df.loc[lrmax_df['run_number'] == i]

        # Prior
        prior = df.iloc[0]['prior']
        x.append(prior)

        # Ratio Lipschitz bound use
        _rlbu_m, _rlbu_lo, _rlbu_up = mean_confidence_interval(
            100. * np.array(df.ratio_lip_bound_use), confidence=confidence)
        rlbu_m.append(_rlbu_m)
        rlbu_lo.append(_rlbu_lo)
        rlbu_up.append(_rlbu_up)

    label_data_dict = {r'Ratio lip bound use': (rlbu_m, rlbu_lo, rlbu_up)}

    my_plot(path=path,
            pdf_name='exp-result',
            x=x,
            label_data_dict=label_data_dict,
            open_plot=open_plot)
Ejemplo n.º 3
0
def save_result(results, root_path, name):
    path = get_path_computation_number(root_path, name)
    csv_write(
        ['prior_use_ratio_mean', 'prior_use_ratio_lo', 'prior_use_ratio_up'],
        path,
        mode='w')

    length = max([len(r) for r in results])
    for i in range(length):
        data_i = []
        for r in results:
            if len(r) > i:
                data_i.append(r[i][1])
        mean, lo, up = mean_confidence_interval(data_i)
        csv_write([mean, lo, up], path, mode='a')

    path = get_path_time_step(root_path, name)
    csv_write(['time_step', 'prior_use_ratio'], path, mode='w')
    for r in results:
        for row in r:
            csv_write([row[0], row[1]], path, mode='a')
Ejemplo n.º 4
0
def compute_speed_up(m,
                     lo,
                     up,
                     lrmax_df,
                     rmax_df,
                     confidence=0.9,
                     rmax_m_lrmax=True):
    lrmax_data = np.array(lrmax_df)
    rmax_data = np.array(rmax_df)
    speed_up = np.zeros(shape=rmax_data.shape)
    for i in range(len(speed_up)):
        if rmax_data[i] < 1e-10:
            rmax_data[i] = 0.
        diff = (
            rmax_data[i] -
            lrmax_data[i]) if rmax_m_lrmax else -(rmax_data[i] - lrmax_data[i])
        speed_up[
            i] = 100. if rmax_data[i] == 0. else 100. * diff / rmax_data[i]
    _su_m, _su_lo, _su_up = mean_confidence_interval(speed_up,
                                                     confidence=confidence)
    m.append(_su_m)
    lo.append(_su_lo)
    up.append(_su_up)
    return m, lo, up
Ejemplo n.º 5
0
def plot_bound_use(path,
                   lrmax_path,
                   rmax_path,
                   n_run,
                   confidence=0.9,
                   open_plot=False):
    lrmax_df = pd.read_csv(lrmax_path)
    rmax_df = pd.read_csv(rmax_path)

    x = []

    rlbu_m, rlbu_lo, rlbu_up = [], [], []
    su_m, su_lo, su_up = [], [], []
    su_t2_m, su_t2_lo, su_t2_up = [], [], []
    su_t5_m, su_t5_lo, su_t5_up = [], [], []
    su_t10_m, su_t10_lo, su_t10_up = [], [], []
    su_t50_m, su_t50_lo, su_t50_up = [], [], []
    tr_m, tr_lo, tr_up = [], [], []
    dr_m, dr_lo, dr_up = [], [], []

    for i in range(n_run):
        ldf = lrmax_df.loc[lrmax_df['run_number'] == i]
        rdf = rmax_df.loc[rmax_df['run_number'] == i]

        # Prior
        prior = ldf.iloc[0]['prior']
        x.append(prior)

        # Ratio Lipschitz bound use
        _rlbu_m, _rlbu_lo, _rlbu_up = mean_confidence_interval(
            100. * np.array(ldf.ratio_lip_bound_use), confidence=confidence)
        rlbu_m.append(_rlbu_m)
        rlbu_lo.append(_rlbu_lo)
        rlbu_up.append(_rlbu_up)

        # Total speed-up
        su_m, su_lo, su_up = compute_speed_up(su_m, su_lo, su_up,
                                              ldf.n_time_steps_cv,
                                              rdf.n_time_steps_cv)

        # Average speed-up 2 ts
        su_t2_m, su_t2_lo, su_t2_up = compute_speed_up(su_t2_m, su_t2_lo,
                                                       su_t2_up, ldf.avg_ts_l2,
                                                       rdf.avg_ts_l2)

        # Average speed-up 5 ts
        su_t5_m, su_t5_lo, su_t5_up = compute_speed_up(su_t5_m, su_t5_lo,
                                                       su_t5_up, ldf.avg_ts_l5,
                                                       rdf.avg_ts_l5)

        # Average speed-up 10 ts
        su_t10_m, su_t10_lo, su_t10_up = compute_speed_up(
            su_t10_m, su_t10_lo, su_t10_up, ldf.avg_ts_l10, rdf.avg_ts_l10)

        # Average speed-up 50 ts
        su_t50_m, su_t50_lo, su_t50_up = compute_speed_up(
            su_t50_m, su_t50_lo, su_t50_up, ldf.avg_ts_l50, rdf.avg_ts_l50)

        # Total return
        tr_m, tr_lo, tr_up = compute_speed_up(tr_m,
                                              tr_lo,
                                              tr_up,
                                              ldf.total_return,
                                              rdf.total_return,
                                              rmax_m_lrmax=False)

        # Discounted return
        dr_m, dr_lo, dr_up = compute_speed_up(dr_m,
                                              dr_lo,
                                              dr_up,
                                              ldf.discounted_return,
                                              rdf.discounted_return,
                                              rmax_m_lrmax=False)

    label_data_dict = {
        r'$\rho_{Lip}$ (\% use Lipschitz bound)': (rlbu_m, rlbu_lo, rlbu_up),
        # r'\% time-steps to convergence gained': (su_m, su_lo, su_up),
        # r'\% average speed-up 2': (su_t2_m, su_t2_lo, su_t2_up),
        # r'\% average speed-up 5': (su_t5_m, su_t5_lo, su_t5_up),
        # r'\% average speed-up 10': (su_t10_m, su_t10_lo, su_t10_up),
        r'$\rho_{Speed-up}$ (\% convergence speed-up)':
        (su_t50_m, su_t50_lo, su_t50_up
         ),  # r'\% average speed-up 50': (su_t50_m, su_t50_lo, su_t50_up),
        r'$\rho_{Return}$ (\% total return gain)': (tr_m, tr_lo, tr_up),
        # r'\% discounted return gained': (dr_m, dr_lo, dr_up)
    }
    '''
    for key, val in label_data_dict.items():
        print(key)
        print(val[0])
        print(val[1])
        print(val[2])
    exit()
    '''

    my_plot_bound_use(path=path,
                      pdf_name='bounds_comparison',
                      x=x,
                      label_data_dict=label_data_dict,
                      open_plot=open_plot)
Ejemplo n.º 6
0
def bounds_comparison_experiment(verbose=False):
    # MDP
    sz = 2
    mdp1 = GridWorld(width=sz,
                     height=sz,
                     init_loc=(1, 1),
                     goal_locs=[(sz, sz)],
                     goal_reward=0.8)
    mdp2 = GridWorld(width=sz,
                     height=sz,
                     init_loc=(1, 1),
                     goal_locs=[(sz, sz)],
                     goal_reward=1.0)

    results = []

    for _ in range(N_INSTANCES):
        csv_write([
            'prior', 'ratio_rmax_bound_use', 'ratio_lip_bound_use',
            'lrmax_n_time_steps', 'lrmax_n_time_steps_cv'
        ], LRMAX_TMP_SAVE_PATH, 'w')
        csv_write(['rmax_n_time_steps', 'rmax_n_time_steps_cv'],
                  RMAX_TMP_SAVE_PATH, 'w')

        for prior in PRIORS:
            lrmaxct = LRMaxCTExp(actions=mdp1.get_actions(),
                                 gamma=GAMMA,
                                 count_threshold=1,
                                 prior=prior,
                                 path=LRMAX_TMP_SAVE_PATH)
            rmaxvi = RMaxExp(actions=mdp1.get_actions(),
                             gamma=GAMMA,
                             count_threshold=1,
                             path=RMAX_TMP_SAVE_PATH)

            # Run twice
            run_agents_on_mdp([lrmaxct],
                              mdp1,
                              instances=1,
                              episodes=100,
                              steps=30,
                              reset_at_terminal=True,
                              verbose=False,
                              open_plot=False)

            run_agents_on_mdp([lrmaxct, rmaxvi],
                              mdp2,
                              instances=1,
                              episodes=100,
                              steps=30,
                              reset_at_terminal=True,
                              verbose=False,
                              open_plot=False)

        # Retrieve data
        df_lrmax = pd.read_csv(LRMAX_TMP_SAVE_PATH)
        df_rmax = pd.read_csv(RMAX_TMP_SAVE_PATH)
        df = pd.concat([df_lrmax, df_rmax], axis=1, sort=False)
        result = []
        for index, row in df.iterrows():
            prior = row['prior']
            ratio_lip_bound_use = 100. * row['ratio_lip_bound_use']
            speed_up = 100. * (
                row['rmax_n_time_steps_cv'] -
                row['lrmax_n_time_steps_cv']) / row['rmax_n_time_steps_cv']
            result.append([prior, ratio_lip_bound_use, speed_up])
        results.append(result)

    # Gather results
    csv_write([
        'prior', 'ratio_lip_bound_use_mean', 'ratio_lip_bound_use_upper',
        'ratio_lip_bound_use_lower', 'speed_up_mean', 'speed_up_upper',
        'speed_up_lower'
    ], SAVE_PATH, 'w')
    for i in range(len(PRIORS)):
        rlbu = []
        su = []
        for result in results:
            rlbu.append(result[i][1])
            su.append(result[i][2])
        rlbu_mci = mean_confidence_interval(rlbu)
        su_mci = mean_confidence_interval(su)
        csv_write([PRIORS[i]] + list(rlbu_mci) + list(su_mci), SAVE_PATH, 'a')
    if verbose:
        for result in results:
            print(result)
Ejemplo n.º 7
0
def averaged_lifelong_plot(
        dfs,
        agents,
        path,
        n_tasks,
        n_episodes,
        confidence,
        open_plot,
        plot_title,
        norm_ag=None,  # normalize everything w.r.t. the agent of the specified index
        which_norm_ag=0,  # 0: normalize everything; 1: normalize w.r.t. episodes; 2: normalize w.r.t. tasks
        plot_legend=0,
        legend_at_bottom=False,
        episodes_moving_average=False,
        episodes_ma_width=10,
        tasks_moving_average=False,
        tasks_ma_width=10,
        latex_rendering=False
):
    # Extract data
    n_agents = len(agents)
    tre, dre, trt, drt = [], [], [], []
    for i in range(n_agents):
        tre_i, dre_i = [], []
        for j in range(1, n_episodes + 1):
            tr_norm, dr_norm = 1., 1.
            if norm_ag is not None and which_norm_ag in [0, 1]:
                df = dfs[norm_ag]
                # TODO set this param (1500 for tight and 12 for corridor)
                df = df.loc[df['episode'] >= 12]  # remove extra episodes
                df = df.loc[df['episode'] <= n_episodes]  # remove extra episodes
                df = df.loc[df['task'] <= n_tasks]  # remove extra tasks
                tr_norm = max(df['return'].mean(), .001)
                dr_norm = max(df['discounted_return'].mean(), .001)

            df = dfs[i].loc[dfs[i]['episode'] == j]  # data-frame for agent i, episode j
            df = df.loc[df['task'] <= n_tasks]  # only select tasks <= n_tasks

            tre_i.append(mean_confidence_interval(df['return'] / tr_norm, confidence))
            dre_i.append(mean_confidence_interval(df['discounted_return'] / dr_norm, confidence))
        tre.append(tre_i)
        dre.append(dre_i)

        trt_i, drt_i = [], []
        for j in range(1, n_tasks + 1):
            tr_norm, dr_norm = 1., 1.
            if norm_ag is not None and which_norm_ag in [0, 2]:
                df = dfs[norm_ag]
                df = df.loc[df['episode'] <= n_episodes]  # remove extra episodes
                df = df.loc[df['task'] <= n_tasks]  # remove extra tasks
                df = df.loc[df['task'] == j ]  # remove extra tasks
                tr_norm = max(df['return'].mean(), .001)
                dr_norm = max(df['discounted_return'].mean(), .001)

            df = dfs[i].loc[dfs[i]['task'] == j]  # data-frame for agent i, task j
            df = df.loc[df['episode'] <= n_episodes]  # only select episodes <= n_episodes

            trt_i.append(mean_confidence_interval(df['return'] / tr_norm, confidence))
            drt_i.append(mean_confidence_interval(df['discounted_return'] / dr_norm, confidence))
        trt.append(trt_i)
        drt.append(drt_i)

    # x-axis
    x_e = np.array(range(1, n_episodes + 1))
    x_t = np.array(range(1, n_tasks + 1))

    # Unzip everything for confidence intervals
    tre, tre_lo, tre_up = unzip(tre)
    dre, dre_lo, dre_up = unzip(dre)
    trt, trt_lo, trt_up = unzip(trt)
    drt, drt_lo, drt_up = unzip(drt)

    # Labels
    x_label_e = r'Episode number'
    x_label_t = r'Task number'
    if norm_ag is None:
        y_labels = [r'Average Return', r'Average Discounted Return', r'Average Return', r'Average Discounted Return']
    else:
        y_labels = [
            r'Average Relative Return' if
            which_norm_ag in [0, 1] else r'Average Return',
            r'Average Relative Discounted Return' if
            which_norm_ag in [0, 1] else r'Average Discounted Return',
            r'Average Relative Return' if
            which_norm_ag in [0, 2] else r'Average Return',
            r'Average Relative Discounted Return' if
            which_norm_ag in [0, 2] else r'Average Discounted Return'
        ]

    # Plots w.r.t. episodes
    _lgd = True if plot_legend in [1, 3] else False
    plot(path, pdf_name='return_vs_episode', agents=agents, x=x_e, y=tre, y_lo=tre_lo, y_up=tre_up,
         x_label=x_label_e, y_label=y_labels[0], title_prefix=r'Average Return: ', open_plot=open_plot,
         plot_title=plot_title, plot_legend=_lgd, legend_at_bottom=legend_at_bottom,
         ma=episodes_moving_average, ma_width=episodes_ma_width, latex_rendering=latex_rendering,
         x_cut=None, plot_markers=False)
    plot(path, pdf_name='discounted_return_vs_episode', agents=agents, x=x_e, y=dre, y_lo=dre_lo, y_up=dre_up,
         x_label=x_label_e, y_label=y_labels[1], title_prefix=r'Average Discounted Return: ',
         open_plot=open_plot, plot_title=plot_title, plot_legend=_lgd, legend_at_bottom=legend_at_bottom,
         ma=episodes_moving_average, ma_width=episodes_ma_width, latex_rendering=latex_rendering,
         x_cut=None, plot_markers=False)

    # Plots w.r.t. tasks
    _lgd = True if plot_legend in [2, 3] else False
    _lgd_btm = True
    _cst = True
    plot(path, pdf_name='return_vs_task', agents=agents, x=x_t, y=trt, y_lo=trt_lo, y_up=trt_up,
         x_label=x_label_t, y_label=y_labels[2], title_prefix=r'Average Return: ', open_plot=open_plot,
         plot_title=plot_title, plot_legend=_lgd, legend_at_bottom=_lgd_btm,
         ma=tasks_moving_average, ma_width=tasks_ma_width, latex_rendering=latex_rendering, custom=_cst)
    plot(path, pdf_name='discounted_return_vs_task', agents=agents, x=x_t, y=drt, y_lo=drt_lo, y_up=drt_up,
         x_label=x_label_t, y_label=y_labels[3], title_prefix=r'Average Discounted Return: ',
         open_plot=open_plot, plot_title=plot_title, plot_legend=_lgd, legend_at_bottom=_lgd_btm,
         ma=tasks_moving_average, ma_width=tasks_ma_width, latex_rendering=latex_rendering, custom=_cst)