def raw_lifelong_plot( dfs, agents, path, n_tasks, n_episodes, confidence=None, open_plot=False, plot_title=True, plot_legend=True, legend_at_bottom=False, ma=False, ma_width=10, latex_rendering=False ): x = np.array(range(1, n_episodes + 1)) x_label = r'Episode number' labels = ['Task ' + str(t) for t in range(1, n_tasks + 1)] for i in range(len(agents)): tr_per_task, tr_per_task_lo, tr_per_task_up = [], [], [] dr_per_task, dr_per_task_lo, dr_per_task_up = [], [], [] for j in range(1, n_tasks + 1): task_j = dfs[i].loc[dfs[i]['task'] == j] # n_instances = task_j['instance'].nunique() tr, tr_lo, tr_up = [], [], [] dr, dr_lo, dr_up = [], [], [] for k in range(1, n_episodes + 1): task_j_episodes_k = task_j.loc[task_j['episode'] == k] tr_mci = mean_confidence_interval(task_j_episodes_k['return'], confidence) dr_mci = mean_confidence_interval(task_j_episodes_k['discounted_return'], confidence) tr.append(tr_mci[0]) tr_lo.append(tr_mci[1]) tr_up.append(tr_mci[2]) dr.append(dr_mci[0]) dr_lo.append(dr_mci[1]) dr_up.append(dr_mci[2]) tr_per_task.append(tr) tr_per_task_lo.append(tr_lo) tr_per_task_up.append(tr_up) dr_per_task.append(dr) dr_per_task_lo.append(dr_lo) dr_per_task_up.append(dr_up) agent_name = str(agents[i]) pdf_name = 'lifelong-' + agent_name pdf_name = pdf_name.lower() plot_color_bars(path, pdf_name=pdf_name+'-return', x=x, y=tr_per_task, y_lo=None, y_up=None, cb_min=1, cb_max=n_tasks + 1, cb_step=1, x_label=x_label, y_label='Return', title_prefix='', labels=labels, cbar_label='Task number', title=agent_name, plot_title=plot_title, plot_markers=False, plot_legend=False, legend_at_bottom=legend_at_bottom, ma=ma, ma_width=ma_width, latex_rendering=latex_rendering) plot_color_bars(path, pdf_name=pdf_name+'-discounted-return', x=x, y=dr_per_task, y_lo=None, y_up=None, cb_min=1, cb_max=n_tasks + 1, cb_step=1, x_label=x_label, y_label='Discounted return', title_prefix='', labels=labels, cbar_label='Task number', title=agent_name, plot_title=plot_title, plot_markers=False, plot_legend=False, legend_at_bottom=legend_at_bottom, ma=ma, ma_width=ma_width, latex_rendering=latex_rendering)
def plot_results(path, lrmax_path, n_run, confidence=0.9, open_plot=False): lrmax_df = pd.read_csv(lrmax_path) x = [] rlbu_m, rlbu_lo, rlbu_up = [], [], [] for i in range(n_run): df = lrmax_df.loc[lrmax_df['run_number'] == i] # Prior prior = df.iloc[0]['prior'] x.append(prior) # Ratio Lipschitz bound use _rlbu_m, _rlbu_lo, _rlbu_up = mean_confidence_interval( 100. * np.array(df.ratio_lip_bound_use), confidence=confidence) rlbu_m.append(_rlbu_m) rlbu_lo.append(_rlbu_lo) rlbu_up.append(_rlbu_up) label_data_dict = {r'Ratio lip bound use': (rlbu_m, rlbu_lo, rlbu_up)} my_plot(path=path, pdf_name='exp-result', x=x, label_data_dict=label_data_dict, open_plot=open_plot)
def save_result(results, root_path, name): path = get_path_computation_number(root_path, name) csv_write( ['prior_use_ratio_mean', 'prior_use_ratio_lo', 'prior_use_ratio_up'], path, mode='w') length = max([len(r) for r in results]) for i in range(length): data_i = [] for r in results: if len(r) > i: data_i.append(r[i][1]) mean, lo, up = mean_confidence_interval(data_i) csv_write([mean, lo, up], path, mode='a') path = get_path_time_step(root_path, name) csv_write(['time_step', 'prior_use_ratio'], path, mode='w') for r in results: for row in r: csv_write([row[0], row[1]], path, mode='a')
def compute_speed_up(m, lo, up, lrmax_df, rmax_df, confidence=0.9, rmax_m_lrmax=True): lrmax_data = np.array(lrmax_df) rmax_data = np.array(rmax_df) speed_up = np.zeros(shape=rmax_data.shape) for i in range(len(speed_up)): if rmax_data[i] < 1e-10: rmax_data[i] = 0. diff = ( rmax_data[i] - lrmax_data[i]) if rmax_m_lrmax else -(rmax_data[i] - lrmax_data[i]) speed_up[ i] = 100. if rmax_data[i] == 0. else 100. * diff / rmax_data[i] _su_m, _su_lo, _su_up = mean_confidence_interval(speed_up, confidence=confidence) m.append(_su_m) lo.append(_su_lo) up.append(_su_up) return m, lo, up
def plot_bound_use(path, lrmax_path, rmax_path, n_run, confidence=0.9, open_plot=False): lrmax_df = pd.read_csv(lrmax_path) rmax_df = pd.read_csv(rmax_path) x = [] rlbu_m, rlbu_lo, rlbu_up = [], [], [] su_m, su_lo, su_up = [], [], [] su_t2_m, su_t2_lo, su_t2_up = [], [], [] su_t5_m, su_t5_lo, su_t5_up = [], [], [] su_t10_m, su_t10_lo, su_t10_up = [], [], [] su_t50_m, su_t50_lo, su_t50_up = [], [], [] tr_m, tr_lo, tr_up = [], [], [] dr_m, dr_lo, dr_up = [], [], [] for i in range(n_run): ldf = lrmax_df.loc[lrmax_df['run_number'] == i] rdf = rmax_df.loc[rmax_df['run_number'] == i] # Prior prior = ldf.iloc[0]['prior'] x.append(prior) # Ratio Lipschitz bound use _rlbu_m, _rlbu_lo, _rlbu_up = mean_confidence_interval( 100. * np.array(ldf.ratio_lip_bound_use), confidence=confidence) rlbu_m.append(_rlbu_m) rlbu_lo.append(_rlbu_lo) rlbu_up.append(_rlbu_up) # Total speed-up su_m, su_lo, su_up = compute_speed_up(su_m, su_lo, su_up, ldf.n_time_steps_cv, rdf.n_time_steps_cv) # Average speed-up 2 ts su_t2_m, su_t2_lo, su_t2_up = compute_speed_up(su_t2_m, su_t2_lo, su_t2_up, ldf.avg_ts_l2, rdf.avg_ts_l2) # Average speed-up 5 ts su_t5_m, su_t5_lo, su_t5_up = compute_speed_up(su_t5_m, su_t5_lo, su_t5_up, ldf.avg_ts_l5, rdf.avg_ts_l5) # Average speed-up 10 ts su_t10_m, su_t10_lo, su_t10_up = compute_speed_up( su_t10_m, su_t10_lo, su_t10_up, ldf.avg_ts_l10, rdf.avg_ts_l10) # Average speed-up 50 ts su_t50_m, su_t50_lo, su_t50_up = compute_speed_up( su_t50_m, su_t50_lo, su_t50_up, ldf.avg_ts_l50, rdf.avg_ts_l50) # Total return tr_m, tr_lo, tr_up = compute_speed_up(tr_m, tr_lo, tr_up, ldf.total_return, rdf.total_return, rmax_m_lrmax=False) # Discounted return dr_m, dr_lo, dr_up = compute_speed_up(dr_m, dr_lo, dr_up, ldf.discounted_return, rdf.discounted_return, rmax_m_lrmax=False) label_data_dict = { r'$\rho_{Lip}$ (\% use Lipschitz bound)': (rlbu_m, rlbu_lo, rlbu_up), # r'\% time-steps to convergence gained': (su_m, su_lo, su_up), # r'\% average speed-up 2': (su_t2_m, su_t2_lo, su_t2_up), # r'\% average speed-up 5': (su_t5_m, su_t5_lo, su_t5_up), # r'\% average speed-up 10': (su_t10_m, su_t10_lo, su_t10_up), r'$\rho_{Speed-up}$ (\% convergence speed-up)': (su_t50_m, su_t50_lo, su_t50_up ), # r'\% average speed-up 50': (su_t50_m, su_t50_lo, su_t50_up), r'$\rho_{Return}$ (\% total return gain)': (tr_m, tr_lo, tr_up), # r'\% discounted return gained': (dr_m, dr_lo, dr_up) } ''' for key, val in label_data_dict.items(): print(key) print(val[0]) print(val[1]) print(val[2]) exit() ''' my_plot_bound_use(path=path, pdf_name='bounds_comparison', x=x, label_data_dict=label_data_dict, open_plot=open_plot)
def bounds_comparison_experiment(verbose=False): # MDP sz = 2 mdp1 = GridWorld(width=sz, height=sz, init_loc=(1, 1), goal_locs=[(sz, sz)], goal_reward=0.8) mdp2 = GridWorld(width=sz, height=sz, init_loc=(1, 1), goal_locs=[(sz, sz)], goal_reward=1.0) results = [] for _ in range(N_INSTANCES): csv_write([ 'prior', 'ratio_rmax_bound_use', 'ratio_lip_bound_use', 'lrmax_n_time_steps', 'lrmax_n_time_steps_cv' ], LRMAX_TMP_SAVE_PATH, 'w') csv_write(['rmax_n_time_steps', 'rmax_n_time_steps_cv'], RMAX_TMP_SAVE_PATH, 'w') for prior in PRIORS: lrmaxct = LRMaxCTExp(actions=mdp1.get_actions(), gamma=GAMMA, count_threshold=1, prior=prior, path=LRMAX_TMP_SAVE_PATH) rmaxvi = RMaxExp(actions=mdp1.get_actions(), gamma=GAMMA, count_threshold=1, path=RMAX_TMP_SAVE_PATH) # Run twice run_agents_on_mdp([lrmaxct], mdp1, instances=1, episodes=100, steps=30, reset_at_terminal=True, verbose=False, open_plot=False) run_agents_on_mdp([lrmaxct, rmaxvi], mdp2, instances=1, episodes=100, steps=30, reset_at_terminal=True, verbose=False, open_plot=False) # Retrieve data df_lrmax = pd.read_csv(LRMAX_TMP_SAVE_PATH) df_rmax = pd.read_csv(RMAX_TMP_SAVE_PATH) df = pd.concat([df_lrmax, df_rmax], axis=1, sort=False) result = [] for index, row in df.iterrows(): prior = row['prior'] ratio_lip_bound_use = 100. * row['ratio_lip_bound_use'] speed_up = 100. * ( row['rmax_n_time_steps_cv'] - row['lrmax_n_time_steps_cv']) / row['rmax_n_time_steps_cv'] result.append([prior, ratio_lip_bound_use, speed_up]) results.append(result) # Gather results csv_write([ 'prior', 'ratio_lip_bound_use_mean', 'ratio_lip_bound_use_upper', 'ratio_lip_bound_use_lower', 'speed_up_mean', 'speed_up_upper', 'speed_up_lower' ], SAVE_PATH, 'w') for i in range(len(PRIORS)): rlbu = [] su = [] for result in results: rlbu.append(result[i][1]) su.append(result[i][2]) rlbu_mci = mean_confidence_interval(rlbu) su_mci = mean_confidence_interval(su) csv_write([PRIORS[i]] + list(rlbu_mci) + list(su_mci), SAVE_PATH, 'a') if verbose: for result in results: print(result)
def averaged_lifelong_plot( dfs, agents, path, n_tasks, n_episodes, confidence, open_plot, plot_title, norm_ag=None, # normalize everything w.r.t. the agent of the specified index which_norm_ag=0, # 0: normalize everything; 1: normalize w.r.t. episodes; 2: normalize w.r.t. tasks plot_legend=0, legend_at_bottom=False, episodes_moving_average=False, episodes_ma_width=10, tasks_moving_average=False, tasks_ma_width=10, latex_rendering=False ): # Extract data n_agents = len(agents) tre, dre, trt, drt = [], [], [], [] for i in range(n_agents): tre_i, dre_i = [], [] for j in range(1, n_episodes + 1): tr_norm, dr_norm = 1., 1. if norm_ag is not None and which_norm_ag in [0, 1]: df = dfs[norm_ag] # TODO set this param (1500 for tight and 12 for corridor) df = df.loc[df['episode'] >= 12] # remove extra episodes df = df.loc[df['episode'] <= n_episodes] # remove extra episodes df = df.loc[df['task'] <= n_tasks] # remove extra tasks tr_norm = max(df['return'].mean(), .001) dr_norm = max(df['discounted_return'].mean(), .001) df = dfs[i].loc[dfs[i]['episode'] == j] # data-frame for agent i, episode j df = df.loc[df['task'] <= n_tasks] # only select tasks <= n_tasks tre_i.append(mean_confidence_interval(df['return'] / tr_norm, confidence)) dre_i.append(mean_confidence_interval(df['discounted_return'] / dr_norm, confidence)) tre.append(tre_i) dre.append(dre_i) trt_i, drt_i = [], [] for j in range(1, n_tasks + 1): tr_norm, dr_norm = 1., 1. if norm_ag is not None and which_norm_ag in [0, 2]: df = dfs[norm_ag] df = df.loc[df['episode'] <= n_episodes] # remove extra episodes df = df.loc[df['task'] <= n_tasks] # remove extra tasks df = df.loc[df['task'] == j ] # remove extra tasks tr_norm = max(df['return'].mean(), .001) dr_norm = max(df['discounted_return'].mean(), .001) df = dfs[i].loc[dfs[i]['task'] == j] # data-frame for agent i, task j df = df.loc[df['episode'] <= n_episodes] # only select episodes <= n_episodes trt_i.append(mean_confidence_interval(df['return'] / tr_norm, confidence)) drt_i.append(mean_confidence_interval(df['discounted_return'] / dr_norm, confidence)) trt.append(trt_i) drt.append(drt_i) # x-axis x_e = np.array(range(1, n_episodes + 1)) x_t = np.array(range(1, n_tasks + 1)) # Unzip everything for confidence intervals tre, tre_lo, tre_up = unzip(tre) dre, dre_lo, dre_up = unzip(dre) trt, trt_lo, trt_up = unzip(trt) drt, drt_lo, drt_up = unzip(drt) # Labels x_label_e = r'Episode number' x_label_t = r'Task number' if norm_ag is None: y_labels = [r'Average Return', r'Average Discounted Return', r'Average Return', r'Average Discounted Return'] else: y_labels = [ r'Average Relative Return' if which_norm_ag in [0, 1] else r'Average Return', r'Average Relative Discounted Return' if which_norm_ag in [0, 1] else r'Average Discounted Return', r'Average Relative Return' if which_norm_ag in [0, 2] else r'Average Return', r'Average Relative Discounted Return' if which_norm_ag in [0, 2] else r'Average Discounted Return' ] # Plots w.r.t. episodes _lgd = True if plot_legend in [1, 3] else False plot(path, pdf_name='return_vs_episode', agents=agents, x=x_e, y=tre, y_lo=tre_lo, y_up=tre_up, x_label=x_label_e, y_label=y_labels[0], title_prefix=r'Average Return: ', open_plot=open_plot, plot_title=plot_title, plot_legend=_lgd, legend_at_bottom=legend_at_bottom, ma=episodes_moving_average, ma_width=episodes_ma_width, latex_rendering=latex_rendering, x_cut=None, plot_markers=False) plot(path, pdf_name='discounted_return_vs_episode', agents=agents, x=x_e, y=dre, y_lo=dre_lo, y_up=dre_up, x_label=x_label_e, y_label=y_labels[1], title_prefix=r'Average Discounted Return: ', open_plot=open_plot, plot_title=plot_title, plot_legend=_lgd, legend_at_bottom=legend_at_bottom, ma=episodes_moving_average, ma_width=episodes_ma_width, latex_rendering=latex_rendering, x_cut=None, plot_markers=False) # Plots w.r.t. tasks _lgd = True if plot_legend in [2, 3] else False _lgd_btm = True _cst = True plot(path, pdf_name='return_vs_task', agents=agents, x=x_t, y=trt, y_lo=trt_lo, y_up=trt_up, x_label=x_label_t, y_label=y_labels[2], title_prefix=r'Average Return: ', open_plot=open_plot, plot_title=plot_title, plot_legend=_lgd, legend_at_bottom=_lgd_btm, ma=tasks_moving_average, ma_width=tasks_ma_width, latex_rendering=latex_rendering, custom=_cst) plot(path, pdf_name='discounted_return_vs_task', agents=agents, x=x_t, y=drt, y_lo=drt_lo, y_up=drt_up, x_label=x_label_t, y_label=y_labels[3], title_prefix=r'Average Discounted Return: ', open_plot=open_plot, plot_title=plot_title, plot_legend=_lgd, legend_at_bottom=_lgd_btm, ma=tasks_moving_average, ma_width=tasks_ma_width, latex_rendering=latex_rendering, custom=_cst)