def plot_every_shallow(df, env, rep): upper_limit = 30000 scaling_factor = analysis_specs['avg_max_rwd'][env + '1'] fig, ax = plt.subplots(1, 2, sharey='col', sharex='col') ftsz = 8 groups_to_split = ['env_name', 'representation'] df_gb = df.groupby(groups_to_split)["save_id"] id_list = list(df_gb.get_group((env + '1', rep))) print(env, rep, len(id_list)) total_avg_reward = [] for i, id_num in enumerate(id_list): with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) scaled_ = (np.asarray(dats['total_reward']) + 2.5) / (scaling_factor + 2.5) if len(scaled_) < upper_limit: print('hello', len(scaled_)) num_extras = upper_limit - len(scaled_) last_200_mean = np.mean(scaled_[-200:]) last_200_std = np.std(scaled_[-200:]) filler = np.random.normal(last_200_mean, last_200_std, num_extras) nans = np.zeros(num_extras) nans[:] = np.nan #if last_200_mean > 0.95: # scaled_ = np.concatenate((scaled_, filler)) #else: if list(df.loc[df['save_id'] == id_num] ['load_from'])[0] == ' ': scaled_ = np.concatenate((scaled_, nans)) else: scaled_ = np.concatenate((nans, scaled_ + 0.15)) else: print(len(scaled_), 'len scaled') total_avg_reward.append(scaled_) mean = rm(np.nanmean(total_avg_reward, axis=0), 200) stand = rm(np.nanstd(total_avg_reward, axis=0), 200) / np.sqrt( len(total_avg_reward)) print(len(mean), 'len mean') ax[0].set_ylim([0, 1.1]) ax[0].set_yticks([0, 1]) ax[0].set_yticklabels([0, 100], fontsize=ftsz) ax[0].set_ylabel('Performance \n(% Optimal)', fontsize=ftsz) for index, x in enumerate(total_avg_reward): ax[0].plot(rm(x, 200), label=f'{id_list[index][0:8]}') ax[0].legend(loc=0) ax[1].plot(mean, color=col_to_plot[rep]) ax[1].fill_between(np.arange(len(mean)), mean - stand, mean + stand, color=col_to_plot[rep], alpha=0.2) plt.show()
def plot_pref_dir(data): indices = [5000, 10000, 14999] colors = { 100: LINCLAB_COLS['red'], 75: LINCLAB_COLS['orange'], 50: LINCLAB_COLS['green'], 25: LINCLAB_COLS['purple'], 'mf': 'black' } fig, ax = plt.subplots(2, len(indices)) gs = ax[0, 0].get_gridspec() for a in ax[0, :]: a.remove() big_ax = fig.add_subplot(gs[0, :]) if pct == 'mf': raw_score = data['total_reward'][5000:20000] else: raw_score = data['bootstrap_reward'][0:15000] normalization = analysis_specs['avg_max_rwd'][env_name[0:22]] transformed = rm((np.asarray(raw_score) + 2.5) / (normalization + 2.5), 100) big_ax.plot(transformed, color=colors[pct]) big_ax.set_ylim([-0.1, 1.1]) for i, index in enumerate(indices): if pct == 'mf': print(len(data['P_snap'])) index = [75, 100, 149][i] print(index) pol_array = data['P_snap'][index].flatten() else: pol_array = data['P_snap'][index].flatten() for x in env.obstacle: pol_array[x] = tuple(np.zeros(4)) dir_map = convert_pol_array_to_pref_dir(pol_array.flatten()) a = ax[1, i].imshow(dir_map, cmap=fade, vmin=0, vmax=360) old_loc = plt.Rectangle((4.49, 4.47), width=1, height=1, edgecolor='w', fill=False, linestyle='--') new_loc = plt.Rectangle((13.49, 13.47), width=1, height=1, edgecolor='w', fill=False, linestyle='-') ax[1, i].add_patch(old_loc) ax[1, i].add_patch(new_loc) ax[1, i].get_xaxis().set_visible(False) ax[1, i].get_yaxis().set_visible(False) plt.colorbar(a, ax=ax[1, 2]) plt.savefig(f'../figures/CH3/pref_dir_{rep}_{pct}.svg') plt.show()
def plot_each(env_name, rep, cutoff=25000, smoothing=500): plt.figure() list_of_ids = master_dict[env_name][rep] for id_num in list_of_ids: with open(data_dir + f'{id_num}_data.p', 'rb') as f: dats = pickle.load(f) reward_info = dats['total_reward'][0:cutoff] processed_rwd = rm(reward_info, smoothing) plt.plot(processed_rwd, label=id_num[0:8]) plt.legend(loc='upper center', bbox_to_anchor=(0.1, 1.1)) plt.ylim([-4, 12]) plt.show() #plot_all(cutoff=25000) #plot_each(envs[2],reps[1])
def get_train_test(env,rep): scaling_factor = analysis_specs['avg_max_rwd'][env] id_list = list(gb.get_group((env, rep))) load_id = list(df.loc[df['save_id']==id_list[0]]['load_from'])[0] with open(data_dir+f'{load_id}_data.p','rb') as f: training_data = pickle.load(f)['total_reward'] print(len(training_data)) tot_rwds =[] for save_id in id_list: with open(data_dir+f'{save_id}_data.p','rb') as f: retrain_data = pickle.load(f)['total_reward'] train_test_data = training_data+retrain_data scaled_ = (np.asarray(train_test_data)+2.5)/(scaling_factor+2.5) tot_rwds.append(rm(scaled_,200)) mean_performance = np.nanmean(tot_rwds,axis=0) std_e_mean = np.nanstd(tot_rwds,axis=0)/np.sqrt(len(tot_rwds)) return mean_performance, std_e_mean
def plot_reward_loss(): smoothing = 10 fig, ax = plt.subplots(3, 1, sharex=True) ax[0].plot(rm(run.data['total_reward'], smoothing), 'k', alpha=0.5) if "bootstrap_reward" in run.data.keys(): ax[0].plot(rm(run.data['bootstrap_reward'], smoothing), 'r') ax[1].plot(rm(run.data['loss'][0], smoothing), label='ec_p') ax[2].plot(rm(run.data['loss'][1], smoothing), label='ec_v') if "mf_loss" in run.data.keys(): ax[1].plot(rm(run.data['mf_loss'][0], smoothing), label='mf_p') ax[2].plot(rm(run.data['mf_loss'][1], smoothing), label='mf_v') ax[1].legend(loc=0) ax[2].legend(loc=0) plt.show() plt.close()
def plot_compare_conv_retraining(envs_to_plot, pcts_to_plot): fig, ax = plt.subplots(len(envs_to_plot),3, figsize=(10,12)) for e, env in enumerate(envs_to_plot): if env[-1] == '5': rwd_colrow0 = (3,9) rwd_colrow1= (16,9) else: rwd_colrow0 = (5,5) rwd_colrow1=(14,14) rect0 = plt.Rectangle(rwd_colrow0, 1, 1, facecolor='b',edgecolor=None, alpha=0.3) rect1 = plt.Rectangle(rwd_colrow1, 1, 1, facecolor='g', edgecolor=None,alpha=0.3) ax[e,0].pcolor(grids[envs_to_plot.index(env)],cmap='bone_r',edgecolors='k', linewidths=0.1) ax[e,0].axis(xmin=0, xmax=20, ymin=0,ymax=20) ax[e,0].set_aspect('equal') #ax[e,0].add_patch(rect0) ax[e,0].add_patch(rect1) ax[e,0].get_xaxis().set_visible(False) ax[e,0].get_yaxis().set_visible(False) ax[e,0].invert_yaxis() rep = 'conv' id_list = gb_base.get_group((env,'conv','x')) mf_retrain = [] print('MF data') for id_num in id_list: with open(data_dir+f'{id_num}_data.p','rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'][0:5000] normalization = analysis_specs['avg_max_rwd'][env[0:22]] transformed = rm((np.asarray(raw_score)+2.5)/(normalization +2.5) , 200) mf_retrain.append(transformed) means = np.nanmean(mf_retrain,axis=0) maxes = means+(np.nanstd(mf_retrain,axis=0)/np.sqrt(len(mf_retrain))) mins = means-(np.nanstd(mf_retrain,axis=0)/np.sqrt(len(mf_retrain))) ax[e,2].plot(means, 'k', alpha=0.7) ax[e,2].fill_between(np.arange(len(means)),mins,maxes, color='k', alpha=0.2) print('EC bootstrapped data') for p, pct in enumerate(pcts_to_plot): print(pct) ec_performance = [] mf_bootstrap = [] try: id_list = gb.get_group((env,rep,int(cache_limits[env][100]*(pct/100)))) for i, id_num in enumerate(id_list): with open(data_dir+f'{id_num}_data.p','rb') as f: dats = pickle.load(f) raw_score = dats['bootstrap_reward'][0:5000] normalization = analysis_specs['avg_max_rwd'][env[0:22]] transformed = rm((np.asarray(raw_score)+2.5)/(normalization +2.5) , 200) mf_bootstrap.append(transformed) raw_score = dats['total_reward'][0:5000] normalization = analysis_specs['avg_max_rwd'][env[0:22]] transformed = rm((np.asarray(raw_score)+2.5)/(normalization +2.5) , 200) ec_performance.append(transformed) means = np.nanmean(ec_performance,axis=0) ax[e,1].plot(means, label=f'{pct}') means = np.nanmean(mf_bootstrap,axis=0) ax[e,2].plot(means, label=f'{pct}') except: print(f'no data for EC{env}{rep}{int(cache_limits[env][100]*(pct/100))}') ax[e,1].legend(loc=0) ax[e,2].legend(loc=0) ax[e,1].set_ylim(0,1.1) ax[e,2].set_ylim(0,1.1) plt.show()
env = gym.make(env_name) plt.close() # generate network if network_id == None: # generate parameters for network from environment observation shape params = nets.fc_params(env) params.lr = 0.001 params.temp = 1.1 print(params.__dict__) network = nets.ActorCritic(params) else: network = torch.load(f=f'./Data/agents/load_agents/{network_id}.pt') memtemp = 1 memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n, mem_temp=memtemp) agent = Agent(network, memory=memory) run = expt(agent, env) ntrials = 1000 run.run(NUM_TRIALS=ntrials, NUM_EVENTS=100) #run.record_log(f'mf_ec_t{memtemp}', env_name, n_trials=ntrials) smoothing = 10 plt.figure() plt.plot(rm(run.data['total_reward'], smoothing), c='k', alpha=0.5) if 'bootstrap_reward' in run.data.keys(): plt.plot(rm(run.data['bootstrap_reward'], smoothing), c='r') plt.show()
ec_results = [] mf_results = [] colors = [ LINCLAB_COLS['red'], LINCLAB_COLS['blue'], LINCLAB_COLS['green'], LINCLAB_COLS['purple'], LINCLAB_COLS['orange'], LINCLAB_COLS['grey'] ] fig, ax = plt.subplots(2, 4, sharey=True) for r, rep in enumerate(reps_to_plot): for p, pct in enumerate(pcts_to_plot): print(rep, pct) try: id_list = gb.get_group( (env, rep, int(cache_limits[env][100] * (pct / 100)), 5000)) for i, id_num in enumerate(id_list): if i == 0: with open(data_dir + f'{id_num}_data.p', 'rb') as f: dats = pickle.load(f) ax[r, p].plot(rm(dats['total_reward'], 200), linestyle=':', color=colors[i]) ax[r, p].plot(rm(dats['bootstrap_reward'], 200), color='k') baseline_id = list(gb_base.get_group((env, rep)))[0] with open(data_dir + f'{baseline_id}_data.p', 'rb') as f: dats = pickle.load(f) filt = rm(dats['total_reward'][0:5000], 200) ax[r, p].plot(filt, color='cyan') except: print('no data') ax[0, 0].set_ylim([-2.5, 10]) plt.show()
memtemp = 1 memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n, mem_temp=memtemp) agent = Agent(network, memory=memory) run = expt(agent, env) ntrials = 1000 nevents = 250 run.run(NUM_TRIALS=ntrials, NUM_EVENTS=nevents) run.record_log(file='MFtraining.csv', expt_type=f'{type(run).__name__}', env_name=env_name, n_trials=ntrials, n_steps=nevents) smoothing = 10 fig, ax = plt.subplots(3, 1, sharex=True) ax[0].plot(rm(run.data['total_reward'], smoothing), 'k', alpha=0.5) ax[0].plot(rm(run.data['bootstrap_reward'], smoothing), 'r') ax[1].plot(rm(run.data['loss'][0], smoothing), label='ec_p') ax[1].plot(rm(run.data['mf_loss'][0], smoothing), label='mf_p') ax[1].legend(loc=0) ax[2].plot(rm(run.data['loss'][1], smoothing), label='ec_v') ax[2].plot(rm(run.data['mf_loss'][1], smoothing), label='mf_v') ax[2].legend(loc=0) plt.show()
for r, rep in enumerate(['unstructured', 'structured']): train_avg_reward = [] test_avg_reward = [] train_ids = list(train_gb.get_group((env, rep, 5000))) test_ids = list(test_gb.get_group((env + '1', rep, 10000))) print(env, rep, len(train_ids), len(test_ids)) ax[0, r].set_title(f'{rep}') for i, id_num in enumerate(train_ids): with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'] normalization = analysis_specs['avg_max_rwd'][env + '1'] transformed = (np.asarray(raw_score) + 2.5) / (normalization + 2.5) train_avg_reward.append(transformed) ax[e, r].plot(rm(transformed, 200)) #train_mean = np.mean(train_avg_reward,axis=0) #train_maxes = train_mean+np.std(train_avg_reward,axis=0)/np.sqrt(len(train_avg_reward)) #train_mins = train_mean-np.std(train_avg_reward,axis=0)/np.sqrt(len(train_avg_reward)) for i, id_num in enumerate(test_ids): with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'] normalization = analysis_specs['avg_max_rwd'][env + '1'] transformed = (np.asarray(raw_score) + 2.5) / (normalization + 2.5) test_avg_reward.append(transformed) ax[e, r].plot(np.arange(5000, 5000 + len(rm(transformed, 200))), rm(transformed, 200))
env = 'gridworld:gridworld-v51' e = gym.make(env) plt.close() rep = 'structured' id_list = { 'gridworld:gridworld-v11': 'a886a36b-77af-4845-b950-71e64506190c', 'gridworld:gridworld-v31': '22d9a5cc-13e4-4fe3-9e92-9f25c5ba9b18', 'gridworld:gridworld-v41': '5f0f1b3f-db3e-4a19-9d19-cf817e9aeee3', 'gridworld:gridworld-v51': 'ac9a6807-9ecb-405c-b31e-3ff4ccaa2bfd' } id_num = id_list[env] with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) print(id_num, [(x, len(dats[x])) for x in dats.keys()]) start_ind = 0 end_ind = -1 MF_map = np.nansum(np.asarray(dats['MF_occupancy'][start_ind:end_ind]), axis=0) MF_visits = np.nansum(np.asarray(MF_map)) MF = MF_map / MF_visits MF_occ = np.log(MF.reshape(20, 20) / (1 / len(e.useable))) fig, ax = plt.subplots(2, 1) ax[0].plot(rm(dats['total_reward'][start_ind:end_ind], 200)) ax[1].imshow(MF_occ, cmap='RdBu_r', vmin=-4, vmax=4) plt.savefig(f'../figures/CH3/mf_only_{env[-2:]}_state_occ.svg') plt.show()
raw_score = dats['total_reward'] normalization = analysis_specs['avg_max_rwd'][env[0:22]] scaled_ = (np.asarray(raw_score) + 2.5) / (normalization + 2.5) if len(scaled_) < upper_limit: num_extras = upper_limit - len(scaled_) last_200_mean = np.mean(scaled_[-5000:]) last_200_std = np.std(scaled_[-5000:]) filler = np.random.normal(last_200_mean, last_200_std, num_extras) nans = np.zeros(num_extras) nans[:] = np.nan if last_200_mean > 0.9: scaled_ = np.concatenate((scaled_, filler)) else: scaled_ = np.concatenate((scaled_, nans)) total_avg_reward.append(rm(scaled_, smoothing)) mean = np.nanmean(total_avg_reward, axis=0) maxes = mean + np.nanstd(total_avg_reward, axis=0) / np.sqrt( len(total_avg_reward)) mins = mean - np.nanstd(total_avg_reward, axis=0) / np.sqrt( len(total_avg_reward)) mean = chop_(mean) maxes = chop_(maxes) mins = chop_(mins) ax[e, r].axvline(x=5000 - smoothing + 1, linestyle=":", color='gray') ax[e, r].plot(np.arange(len(mean)), mean, color='k', alpha=0.7) ax[e, r].fill_between(np.arange(len(mean)), mins, maxes,
def plot_single_retraining(env, pcts_to_plot, rep, index): fig, ax = plt.subplots(2, 2, figsize=(10, 12)) ## get MF only -- baseline id_list = gb_base.get_group((env[0:22], rep)) filler = np.zeros(5000) filler[:] = np.nan mf_retrain = [] for id_num in id_list[0:3]: print(id_num) with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'][5000:20000] normalization = analysis_specs['avg_max_rwd'][env[0:22]] transformed = rm( (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200) if len(transformed) == 9801: transformed = np.concatenate((transformed, filler)) mf_retrain.append(transformed) lens = [len(x) for x in mf_retrain] print(lens, len(mf_retrain), len(mf_retrain[0])) means = np.nanmean(np.asarray(mf_retrain), axis=0) maxes = means + (np.nanstd(mf_retrain, axis=0) / np.sqrt(len(mf_retrain))) mins = means - (np.nanstd(mf_retrain, axis=0) / np.sqrt(len(mf_retrain))) ax[0, 1].plot(means, 'k', alpha=0.7) ax[0, 1].fill_between(np.arange(len(means)), mins, maxes, color='k', alpha=0.2) print('EC bootstrapped data') for p, pct in enumerate(pcts_to_plot): print(pct) ec_performance = [] mf_bootstrap = [] try: current_id_list = gb.get_group( (env, rep, int(cache_limits[env][100] * (pct / 100)), 15000)) print(env, pct, len(current_id_list), 'helloooooo') print(current_id_list) id_num = list(current_id_list)[index] print(id_num) with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['bootstrap_reward'][0:15000] normalization = analysis_specs['avg_max_rwd'][env[0:22]] transformed = rm( (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200) mf_bootstrap.append(transformed) raw_score = dats['total_reward'][0:15000] normalization = analysis_specs['avg_max_rwd'][env[0:22]] transformed = rm( (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200) ec_performance.append(transformed) ax[0, 0].plot(ec_performance[0], label=f'{pct}', color=colors[pct]) ax[0, 1].plot(mf_bootstrap[0], label=f'{pct}', color=colors[pct]) except: print( f'no data for EC{env}{rep}{int(cache_limits[env][100]*(pct/100))}' ) ax[0, 0].legend(loc=0) ax[0, 1].legend(loc=0) ax[0, 0].set_title('EC perf') ax[0, 1].set_title('Bootstrap Perf') ax[0, 0].set_ylim(0, 1.1) ax[0, 1].set_ylim(0, 1.1) #plt.savefig(f'../figures/CH3/example_bootstrap.svg') plt.show()
def plot_each_env_retraining(envs_to_plot, pcts_to_plot, rep): fig, ax = plt.subplots(len(envs_to_plot), 2, figsize=(14, 10), sharex=True, sharey=True) for e, env in enumerate(envs_to_plot): ## get MF only -- baseline id_list = gb_base.get_group((env[0:22], rep, 30000)) mf_retrain = [] print(f'numMF retraining on pol = {len(id_list)}') for id_num in id_list: with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'][5000:20000] normalization = analysis_specs['avg_max_rwd'][env[0:22]] transformed = rm( (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200) mf_retrain.append(transformed) means = np.nanmean(mf_retrain, axis=0) maxes = means + (np.nanstd(mf_retrain, axis=0) / np.sqrt(len(mf_retrain))) mins = means - (np.nanstd(mf_retrain, axis=0) / np.sqrt(len(mf_retrain))) ax[e, 1].plot(means, 'k', alpha=0.7) ax[e, 1].fill_between(np.arange(len(means)), mins, maxes, color='k', alpha=0.2) print('EC bootstrapped data') for p, pct in enumerate(pcts_to_plot): print(pct) ec_performance = [] mf_bootstrap = [] try: id_list = gb.get_group( (env, rep, int(cache_limits[env][100] * (pct / 100)), 15000)) print(env, pct, len(id_list)) for i, id_num in enumerate(id_list): with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['bootstrap_reward'][0:15000] normalization = analysis_specs['avg_max_rwd'][ env[0:22]] transformed = rm((np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200) mf_bootstrap.append(transformed) raw_score = dats['total_reward'][0:15000] normalization = analysis_specs['avg_max_rwd'][ env[0:22]] transformed = rm((np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200) ec_performance.append(transformed) means = np.nanmean(ec_performance, axis=0) maxes = means + (np.nanstd(ec_performance, axis=0) / np.sqrt(len(ec_performance))) mins = means - (np.nanstd(ec_performance, axis=0) / np.sqrt(len(ec_performance))) ax[e, 0].plot(means, label=f'{pct}', color=colors[pct]) ax[e, 0].fill_between(np.arange(len(means)), mins, maxes, color=colors[pct], alpha=0.2) means = np.nanmean(mf_bootstrap, axis=0) maxes = means + (np.nanstd(mf_bootstrap, axis=0) / np.sqrt(len(mf_bootstrap))) mins = means - (np.nanstd(mf_bootstrap, axis=0) / np.sqrt(len(mf_bootstrap))) ax[e, 1].plot(means, label=f'{pct}', color=colors[pct]) ax[e, 1].fill_between(np.arange(len(means)), mins, maxes, color=colors[pct], alpha=0.2) except: print( f'no data for EC{env}{rep}{int(cache_limits[env][100]*(pct/100))}' ) ax[0, 0].set_title('Episodic Control') ax[0, 1].set_title('Model-Free Control') for r in range(len(envs_to_plot)): ax[r, 0].set_ylabel(f'Performance (% Optimal)') ax[r, 0].set_ylim(0, 1.1) ax[r, 1].set_ylim(0, 1.1) ax[r, 0].set_xlabel('Episodes') ax[r, 1].set_xlabel('Episodes') plt.savefig(f'../figures/CH3/example_bootstrap_{rep}_all_env.svg') plt.show()
def plot_perceptron(df, envs_to_plot, reps_to_plot): grids = get_grids(envs_to_plot) fig, ax = plt.subplots(len(envs_to_plot), 2, sharey='col', sharex='col') for e, env in enumerate(envs_to_plot): scaling_factor = analysis_specs['avg_max_rwd'][env + '1'] if env[-1] == '5': rwd_colrow0 = (3, 9) rwd_colrow1 = (16, 9) else: rwd_colrow0 = (5, 5) rwd_colrow1 = (14, 14) rect0 = plt.Rectangle(rwd_colrow0, 1, 1, facecolor='gray', edgecolor=None, alpha=0.3) rect1 = plt.Rectangle(rwd_colrow1, 1, 1, facecolor='g', edgecolor=None, alpha=0.3) ax[e, 0].pcolor(grids[e], cmap='bone_r', edgecolors='k', linewidths=0.1) ax[e, 0].axis(xmin=0, xmax=20, ymin=0, ymax=20) ax[e, 0].set_aspect('equal') ax[e, 0].add_patch(rect0) ax[e, 0].add_patch(rect1) ax[e, 0].get_xaxis().set_visible(False) ax[e, 0].get_yaxis().set_visible(False) ax[e, 0].invert_yaxis() for r, rep in enumerate(reps_to_plot): id_list = list(df_gb.get_group((env + '1', rep, 10000))) print(env, rep, len(id_list)) total_avg_reward = [] for i, id_num in enumerate(id_list): # get training data train_dat_id = list( df.loc[df['save_id'] == id_num]['load_from'])[0] with open(parent_path + f'results/{train_dat_id}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'][0:5000] training_transformed = (np.asarray(raw_score) + 2.5) / (scaling_factor + 2.5) with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) scaled_ = (np.asarray(dats['total_reward']) + 2.5) / (scaling_factor + 2.5) train_test = np.concatenate((training_transformed, scaled_)) total_avg_reward.append(rm(train_test, 200)) mean = np.mean(total_avg_reward, axis=0) stand = np.std(total_avg_reward, axis=0) / np.sqrt( len(total_avg_reward)) print(len(mean)) #for j in total_avg_reward: #ax[e,r].plot(j) ax[e, 1].set_ylim([0, 1.1]) ax[e, 1].set_yticks([0, 1]) ax[e, 1].set_yticklabels([0, 100]) ax[e, 1].set_ylabel('Performance \n(% Optimal)') ax[e, 1].plot(mean, color=col_to_plot[rep]) ax[e, 1].fill_between(np.arange(len(mean)), mean - stand, mean + stand, color=col_to_plot[rep], alpha=0.2) #ax[0,r].set_title(rep) plt.savefig('../figures/CH1/perceptron_FC.svg') plt.show()
def plot_shallow( df, envs_to_plot, reps_to_plot, ): upper_limit = 30000 grids = get_grids(envs_to_plot) groups_to_split = ['env_name', 'representation', 'extra_info'] df_gb = df.groupby(groups_to_split)["save_id"] fig, ax = plt.subplots(len(envs_to_plot), 2, sharey='col', sharex='col') ftsz = 8 for e, env in enumerate(envs_to_plot): scaling_factor = analysis_specs['avg_max_rwd'][env + '1'] if env[-1] == '5': rwd_colrow0 = (3, 9) rwd_colrow1 = (16, 9) else: rwd_colrow0 = (5, 5) rwd_colrow1 = (14, 14) rect0 = plt.Rectangle(rwd_colrow0, 1, 1, facecolor='gray', edgecolor=None, alpha=0.5) rect1 = plt.Rectangle(rwd_colrow1, 1, 1, facecolor='g', edgecolor=None, alpha=0.3) ax[e, 0].pcolor(grids[e], cmap='bone_r', edgecolors='k', linewidths=0.1) ax[e, 0].axis(xmin=0, xmax=20, ymin=0, ymax=20) ax[e, 0].set_aspect('equal') ax[e, 0].add_patch(rect0) ax[e, 0].add_patch(rect1) ax[e, 0].get_xaxis().set_visible(False) ax[e, 0].get_yaxis().set_visible(False) ax[e, 0].invert_yaxis() for r, rep in enumerate(reps_to_plot): id_list = list(df_gb.get_group((env, rep, 'x'))) print(env, rep, len(id_list)) total_avg_reward = [] for i, id_num in enumerate(id_list): with open(parent_path + f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) scaled_ = (np.asarray(dats['total_reward']) + 2.5) / (scaling_factor + 2.5) if len(scaled_) < upper_limit: print('hello', len(scaled_)) num_extras = upper_limit - len(scaled_) last_200_mean = np.mean(scaled_[-200:]) last_200_std = np.std(scaled_[-200:]) filler = np.random.normal(last_200_mean, last_200_std, num_extras) nans = np.zeros(num_extras) nans[:] = np.nan if last_200_mean > 0.95: scaled_ = np.concatenate((scaled_, filler)) else: scaled_ = np.concatenate((scaled_, nans)) else: print(len(scaled_)) total_avg_reward.append(scaled_) mean = rm(np.nanmean(total_avg_reward, axis=0), 200) stand = rm( np.nanstd(total_avg_reward, axis=0) / np.sqrt(len(total_avg_reward)), 200) print(len(mean)) ax[e, 1].set_ylim([0, 1.1]) ax[e, 1].set_yticks([0, 1]) ax[e, 1].set_yticklabels([0, 100], fontsize=ftsz) ax[e, 1].set_ylabel('Performance \n(% Optimal)', fontsize=ftsz) #for index, x in enumerate(total_avg_reward): # ax[e,r+1].plot(rm(x,200), label=f'{id_list[index][0:8]}') #ax[e,r+1].legend(loc=0) ax[e, 1].plot(mean, color=col_to_plot[rep]) ax[e, 1].fill_between(np.arange(len(mean)), mean - stand, mean + stand, color=col_to_plot[rep], alpha=0.2) ax[e, 1].set_xlabel('Episodes', fontsize=ftsz) ax[e, 1].set_xticks([0, 10000, 20000, 30000]) ax[e, 1].set_xticklabels([0, 10000, 20000, 30000], fontsize=ftsz) plt.savefig('../figures/CH1/shallow_FC.svg') plt.show()
envs_to_plot = ['gridworld:gridworld-v11','gridworld:gridworld-v41','gridworld:gridworld-v31','gridworld:gridworld-v51'] pcts_to_plot = [100,75,50,25] reps_to_plot = ['unstructured','structured'] grids = get_grids(envs_to_plot) env = envs_to_plot[1] pct = 100 rep = 'structured' id_list = list(bl_gb.get_group((env,rep))) print(id_list) total_avg_reward = [] for i, id_num in enumerate(id_list): with open(parent_path+ f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) total_avg_reward.append(rm(dats['total_reward'],100)) mean = np.mean(total_avg_reward,axis=0) print(len(mean)) plt.plot(mean) for pct in pcts_to_plot: try: id_list = list(df_gb.get_group((env,rep,int(cache_limits[env][100]*(pct/100)),5000))) print(id_list) total_avg_reward = [] for i, id_num in enumerate(id_list): with open(parent_path+ f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) total_avg_reward.append(rm(dats['total_reward'],100)) mean = np.mean(total_avg_reward,axis=0) print(len(mean))
} total_reward = {} for key, value in ids.items(): if value == '': pass else: load_id = value with open(data_dir+ f'results/{load_id}_data.p', 'rb') as f: total_reward[key] = pickle.load(f)['total_reward'] smoothing = 100 plt.figure() plt.plot(rm(total_reward['oh_test'],smoothing),':',c='C0',label='model free control (onehot)') plt.plot(rm(total_reward['oh_test_ec'],smoothing), c='C0',label='episodic control (onehot)') plt.plot(rm(total_reward['pc_test'],smoothing),':',c='C1',label='model free control (place cell)') plt.plot(rm(total_reward['pc_test_ec'],smoothing), c='C1',label='episodic control (place cell)') plt.xlim([0,1900]) plt.legend(loc='upper center', bbox_to_anchor =(0.5, 1.1), ncol=2) plt.show()
empties[:]=np.nan for e, env in enumerate(envs_to_plot): for r, rep in enumerate(reps_to_plot): id_list = list(df_gb.get_group((env,rep))) print(env, rep, len(id_list)) total_avg_reward = [] ax[0,r].set_title(f'{rep}') for i, id_num in enumerate(id_list): with open(parent_path+ f'results/{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'] print(len(raw_score)) if len(raw_score)==15000: raw_score = np.concatenate((raw_score, empties)) normalization = analysis_specs['avg_max_rwd'][env+'1'] transformed = rm((np.asarray(raw_score)+2.5)/(normalization +2.5),200) total_avg_reward.append(transformed) print(len(transformed)) #ax[e,r].plot(transformed,label=f'{id_num[0:8]}') mean = np.nanmean(total_avg_reward,axis=0) maxes = mean+np.nanstd(total_avg_reward,axis=0)/np.sqrt(len(total_avg_reward)) mins = mean-np.nanstd(total_avg_reward,axis=0)/np.sqrt(len(total_avg_reward)) ax[e,r].axvline(x=4801, linestyle=":",color='gray') ax[e,r].plot(np.arange(len(mean)),mean,LINCLAB_COLS['green']) ax[e,r].fill_between(np.arange(len(mean)),mins,maxes,color=LINCLAB_COLS['green'], alpha=0.2) ax[e,r].set_ylim(0,1.1) plt.savefig(f'../figures/CH1/MFshallow_traintest.svg') plt.show()
def plot_train_test(df, df_gb, envs, reps, save=False): fig, ax = plt.subplots(len(envs), 2, sharex='col') for e, env in enumerate(envs): if env[-1] == '5': rwd_colrow0 = (3, 9) rwd_colrow1 = (16, 9) else: rwd_colrow0 = (5, 5) rwd_colrow1 = (14, 14) rect0 = plt.Rectangle(rwd_colrow0, 1, 1, facecolor='b', edgecolor=None, alpha=0.3) rect1 = plt.Rectangle(rwd_colrow1, 1, 1, facecolor='g', edgecolor=None, alpha=0.3) ax[e, 0].pcolor(grids[e], cmap='bone_r', edgecolors='k', linewidths=0.1) ax[e, 0].axis(xmin=0, xmax=20, ymin=0, ymax=20) ax[e, 0].set_aspect('equal') ax[e, 0].add_patch(rect0) ax[e, 0].add_patch(rect1) ax[e, 0].get_xaxis().set_visible(False) ax[e, 0].get_yaxis().set_visible(False) ax[e, 0].invert_yaxis() for r, rep in enumerate(reps): train_test_array = [] id_list = list(df_gb.get_group((env, rep))) print(env, rep) for i, id_num in enumerate(id_list): # get training data train_dat_id = list( df.loc[df['save_id'] == id_num]['load_from'])[0] with open(data_dir + f'{train_dat_id}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'][0:5000] normalization = analysis_specs['avg_max_rwd'][env + '1'] training_transformed = (np.asarray(raw_score) + 2.5) / (normalization + 2.5) # get testing data with open(data_dir + f'{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'] normalization = analysis_specs['avg_max_rwd'][env + '1'] testing_transformed = (np.asarray(raw_score) + 2.5) / (normalization + 2.5) train_test_data = rm( np.concatenate( (training_transformed, testing_transformed)), 200) train_test_array.append(train_test_data) print('done', id_num) mean_perf = np.mean(train_test_array, axis=0) std_perf = np.std(train_test_array, axis=0) / np.sqrt( len(train_test_array)) mins = mean_perf - std_perf maxes = mean_perf + std_perf ax[e, 1].plot(np.arange(len(mean_perf)), mean_perf, color=LINCLAB_COLS[rep_to_col[rep]], label=labels_for_plot[rep]) ax[e, 1].fill_between(np.arange(len(mean_perf)), mins, maxes, color=LINCLAB_COLS[rep_to_col[rep]], alpha=0.2) ax[e, 1].set_ylim(0, 1.1) ax[e, 1].axvline(x=4801, linestyle=":", color='gray') ax[e, 1].set_ylabel('Performance \n(% Optimal)') ax[e, 1].set_xlabel('Episodes') ax[0, 1].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1)) plt.savefig('../figures/CH1/conv_net_retrain.svg') plt.show()
def plot_train_test(df, envs, reps, save=False): fig, ax = plt.subplots(4, 2, sharex='col') ftsz = 8 groups_to_split = ['env_name', 'representation', 'extra_info'] training_df = pd.read_csv('../../Data/conv_mf_training.csv') tr_gb = training_df.groupby(groups_to_split)['save_id'] df_gb = df.groupby(groups_to_split)["save_id"] for e, env in enumerate(envs): if env[-1] == '5': rwd_colrow0 = (3, 9) rwd_colrow1 = (16, 9) else: rwd_colrow0 = (5, 5) rwd_colrow1 = (14, 14) rect0 = plt.Rectangle(rwd_colrow0, 1, 1, facecolor='gray', edgecolor=None, alpha=0.5) rect1 = plt.Rectangle(rwd_colrow1, 1, 1, facecolor='g', edgecolor=None, alpha=0.3) ax[e, 0].pcolor(grids[e], cmap='bone_r', edgecolors='k', linewidths=0.1) ax[e, 0].axis(xmin=0, xmax=20, ymin=0, ymax=20) ax[e, 0].set_aspect('equal') ax[e, 0].add_patch(rect0) ax[e, 0].add_patch(rect1) ax[e, 0].get_xaxis().set_visible(False) ax[e, 0].get_yaxis().set_visible(False) ax[e, 0].invert_yaxis() for r, rep in enumerate(reps): print(env, rep) train_test_array = [] id_array = [] # get training test_dummy = np.zeros(25000) test_dummy[:] = np.nan train_dummy = np.zeros(5000) train_dummy[:] = np.nan train_ids = list(tr_gb.get_group((env, rep, 'x'))) for i, id_num in enumerate(train_ids): with open(data_dir + f'{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'][0:5000] normalization = analysis_specs['avg_max_rwd'][env + '1'] training_transformed = (np.asarray(raw_score) + 2.5) / (normalization + 2.5) run_info = np.concatenate((training_transformed, test_dummy)) id_array.append(id_num) train_test_array.append(run_info) print('training') # get testing test_ids = list(df_gb.get_group((env, rep, 'x'))) for i, id_num in enumerate(test_ids): with open(data_dir + f'{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'] normalization = analysis_specs['avg_max_rwd'][env + '1'] testing_transformed = (np.asarray(raw_score) + 2.5) / (normalization + 2.5) run_info = np.concatenate((train_dummy, testing_transformed)) id_array.append(id_num) train_test_array.append(run_info) print('testing') mean_perf = rm(np.nanmean(train_test_array, axis=0), 200) std_perf = rm(np.nanstd(train_test_array, axis=0), 200) / np.sqrt( len(train_test_array)) mins = mean_perf - std_perf maxes = mean_perf + std_perf ax[e, 1].plot(np.arange(len(mean_perf)), mean_perf, color=LINCLAB_COLS[rep_to_col[rep]], label=labels_for_plot[rep]) ax[e, 1].fill_between(np.arange(len(mean_perf)), mins, maxes, color=LINCLAB_COLS[rep_to_col[rep]], alpha=0.2) ax[e, 1].set_ylim(0, 1.1) ax[e, 1].set_yticks([0, 1]) ax[e, 1].set_yticklabels([0, 100]) ax[e, 1].axvline(x=4801, linestyle=":", color='gray') ax[e, 1].tick_params(axis='both', which='major', labelsize=8) ax[e, 1].set_ylabel('Performance \n(% Optimal)', fontsize=ftsz) ax[e, 1].set_xlabel('Episodes', fontsize=ftsz) ax[0, 1].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1)) plt.savefig('../figures/CH1/conv_net_retrain.svg') plt.show()
def compare_conv_fc(env, conv_gb, fc_gb): fig, ax = plt.subplots(1, 2, sharex='col') if env[-1] == '5': rwd_colrow0 = (3, 9) rwd_colrow1 = (16, 9) else: rwd_colrow0 = (5, 5) rwd_colrow1 = (14, 14) rect0 = plt.Rectangle(rwd_colrow0, 1, 1, facecolor='b', edgecolor=None, alpha=0.3) rect1 = plt.Rectangle(rwd_colrow1, 1, 1, facecolor='g', edgecolor=None, alpha=0.3) ax[0].pcolor(grids[envs.index(env)], cmap='bone_r', edgecolors='k', linewidths=0.1) ax[0].axis(xmin=0, xmax=20, ymin=0, ymax=20) ax[0].set_aspect('equal') ax[0].add_patch(rect0) #ax[0].add_patch(rect1) ax[0].get_xaxis().set_visible(False) ax[0].get_yaxis().set_visible(False) ax[0].invert_yaxis() # show CONV Results id_list = list(conv_gb.get_group((env, 'reward_conv'))) train_array = [] for i, id_num in enumerate(id_list): with open(data_dir + f'{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'] #[0:5000] print(len(raw_score)) normalization = analysis_specs['avg_max_rwd'][env + '1'] training_transformed = rm( (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200) train_array.append(training_transformed) print('done', id_num) mean_perf = np.mean(train_array, axis=0) std_perf = np.std(train_array, axis=0) / np.sqrt(len(train_array)) mins = mean_perf - std_perf maxes = mean_perf + std_perf ax[1].plot(np.arange(len(mean_perf)), mean_perf, color="gray", label='Image (Conv)') ax[1].fill_between(np.arange(len(mean_perf)), mins, maxes, color='gray', alpha=0.2) # show FC Results for rep in ['unstructured', 'structured']: id_list = list(fc_gb.get_group((env, rep))) train_array = [] filler = np.zeros(10000) filler[:] = np.nan for i, id_num in enumerate(id_list): with open(data_dir + f'{id_num}_data.p', 'rb') as f: dats = pickle.load(f) raw_score = dats['total_reward'][5000:-1] if len(raw_score) == 9999: raw_score += list(filler) print(len(raw_score)) normalization = analysis_specs['avg_max_rwd'][env + '1'] training_transformed = rm( (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200) train_array.append(training_transformed) print('done', id_num) mean_perf = np.nanmean(train_array, axis=0) std_perf = np.nanstd(train_array, axis=0) / np.sqrt(len(train_array)) mins = mean_perf - std_perf maxes = mean_perf + std_perf ax[1].plot(np.arange(len(mean_perf)), mean_perf, color=LINCLAB_COLS[rep_to_col[rep]], label=f'{rep} (FC)') ax[1].fill_between(np.arange(len(mean_perf)), mins, maxes, color=LINCLAB_COLS[rep_to_col[rep]], alpha=0.2) ax[1].set_ylim(0, 1.1) ax[1].set_yticks([0, 1]) ax[1].set_yticklabels([0, 100]) ax[1].set_ylabel('Performance \n(% Optimal)') ax[1].set_xlabel('Episodes') ax[1].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1)) plt.savefig(f'../figures/CH1/compare_conv_fc{env[-2:]}.svg') plt.show()