def plot_rewards(data): fig, ax = plt.subplots(3, 1, sharex=True) smoothing = 10 ax[0].plot(rm(data['total_reward'], smoothing), c='k', alpha=0.5, label='EC') if 'bootstrap_reward' in data.keys(): ax[0].plot(rm(data['bootstrap_reward'], smoothing), c='r', label='MF') ax[0].set_ylabel('Reward') ax[0].legend(loc=0) ax[1].plot(rm(data['loss'][0], smoothing), label='ec_p') ax[1].plot(rm(data['loss'][1], smoothing), ':', label='ec_v') if 'mf_loss' in data.keys(): ax[1].plot(rm(data['mf_loss'][0], smoothing), label='mf_p') ax[1].plot(rm(data['mf_loss'][1], smoothing), ':', label='mf_v') ax[1].set_ylabel('Loss') ax[1].legend(loc=0) if 'weights' in data.keys(): ax[2].plot(rm(data['weights']['h0'], smoothing), ':', label='h0', c='k', alpha=0.5) ax[2].plot(rm(data['weights']['h1'], smoothing), label='h1', c='k', alpha=0.7) ax[2].plot(rm(data['weights']['p'], smoothing), label='p', c='r') ax[2].plot(rm(data['weights']['v'], smoothing), label='v', c='g') ax[2].set_ylabel('Gradient \nNorm') ax[2].legend(loc=0) plt.show()
pre_m_reward = [] for x in range(len(modelfree_data)): pre_m_reward.append(modelfree_data[x]['total_reward']) e_reward = np.mean(np.vstack(pre_e_reward), axis=0) e_std = np.std(np.vstack(pre_e_reward), axis=0) b_reward = np.mean(np.vstack(pre_b_reward), axis=0) b_std = np.std(np.vstack(pre_b_reward), axis=0) m_reward = np.mean(np.vstack(pre_m_reward), axis=0) m_std = np.std(np.vstack(pre_m_reward), axis=0) smoothing = 100 smooth_e = rm(e_reward, smoothing) smooth_e_std = rm(e_std, smoothing) smooth_m = rm(m_reward, smoothing) smooth_m_std = rm(m_std, smoothing) smooth_b = rm(b_reward, smoothing) smooth_b_std = rm(b_std, smoothing) plt.figure(0, figsize=(5, 5)) plt.plot(smooth_m, label='model free', color='gray') plt.fill_between(np.arange(len(smooth_m)), smooth_m - smooth_m_std, smooth_m + smooth_m_std, alpha=0.5, color='gray')
run_id = id_data.iloc[[i]]['run_id'].item() env_type = id_data.iloc[[i]]['env_type'].item() expt_type = id_data.iloc[[i]]['expt_type'].item() with open(f'../Data/results/{run_id}_data.p', 'rb') as f: data = pickle.load(f) if env_type == 'gym_grid:gridworld-v1': #reward at 5,5 novel_R[expt_type].append(data) elif env_type == 'gym_grid:gridworld-v11': # reward at 10,10, MF trained on 5,5 moved_R[expt_type].append(data) else: raise Exception('Env Type Error') for x in novel_R['MF']: dat = rm(x['total_reward'], 50) plt.plot(dat, alpha=0.5) plt.show() ''' pre_e_reward = [] pre_b_reward = [] for x in range(len(bootstrap_data)): print(x) pre_e_reward.append(bootstrap_data[x]['total_reward']) pre_b_reward.append(bootstrap_data[x]['bootstrap_reward']) pre_m_reward = [] for x in range(len(modelfree_data)): pre_m_reward.append(modelfree_data[x]['total_reward'])
#env = gym.make('FrozenLake-v0', is_slippery=False) env = gym.make('CartPole-v0') s_ = env.reset() a = env.action_space.sample() s, r, done, __ = env.step(a) print(s_, a, s, r, done) ## write network parameters params = basic_agent_params(env) print(params.__dict__) params.hidden_types = ['linear', 'linear'] params.hidden_dims = [50, 50] params.lr = 0.001 network = nets.ActorCritic(params) print(network) memory = None #Memory(entry_size=params.action_dims, cache_limit=400) agent = Agent(network, memory=memory) agent.get_action = agent.MF_action run = ex(agent, env) run.run(5000, 250, printfreq=100, render=False) #run.record_log(expt_type='test',env_name='FrozenLake-V0',n_trials = 0, dir='../Data/', file='test_environments.csv') fig, ax = plt.subplots(2, 1, sharex=True) ax[0].plot(rm(run.data['total_reward'], 100)) #ax[0].set_ylim([0,1]) ax[1].plot(run.data['loss'][0], label='p') ax[1].plot(run.data['loss'][1], label='v') ax[1].legend(bbox_to_anchor=(1.05, 0.95)) plt.show()
from basic.modules.Utils import running_mean as rm from basic.modules.Utils.gridworld_plotting import plot_polmap, plot_pref_pol, plot_valmap, plot_world from basic.Analysis.vis_bootstrap_pol_maps import daves_idea, plot_pol_evol, trajectories, plot_maps, plot_rewards filename = '../Data/linear_track.csv' df = pd.read_csv(filename) rewards = {'mf':{}, 'ecmf':{}} for x in range(len(df)): run_id = df['run_id'].loc[x] lr = df['lr'].loc[x] if df['expt_type'].loc[x][0:9] == 'Bootstrap': dkey = 'ecmf' elif df['expt_type'].loc[x][0:9] == 'gridworld': dkey = 'mf' with open(f'../Data/results/{run_id}_data.p', 'rb') as f: data = pickle.load(f) rewards[dkey][str(lr)] = data['total_reward'] smoothing = 30 fig, ax = plt.subplots(2,1,sharex=True) for i in rewards['mf'].keys(): ax[0].plot(rm(rewards['mf'][i],smoothing), label = i) for i in rewards['ecmf'].keys(): ax[1].plot(rm(rewards['ecmf'][i],smoothing), label = i) ax[0].legend(loc=0) ax[1].legend(loc=0) plt.show()
] big_dict = {'oh':oh, 'pc_01':pc_01, 'pc_02':pc_02, 'pc_03':pc_03} results_dict = {} for key, value in big_dict.items(): print(key) results_dict[key] = [] results = [] for id_num in value: print(id_num) file_addr = data_dir + f'{id_num}_data.p' print(f"-- loc: {file_addr}") with open(file_addr, 'rb') as f: try: print(data_dir) dats = pickle.load(f) except: print("!!!!!! !!!!! errors") reward_info = dats['total_reward'] results.append(reward_info) pp = np.vstack(results) smoothing = 100 avg_ = rm(np.mean(pp, axis=0),smoothing)[0:2000] std_ = rm(np.std(pp, axis=0), smoothing)[0:2000] results_dict[key].append(avg_) results_dict[key].append(std_)
for i, cont in enumerate(controllers): idx = np.where( (df['environment']==f'gym_grid:gridworld-v{env_types[env]}') & (df['controller']==cont) & (df['n_trials']==5000)) for value in df.loc[idx]['id']: with open(f'../../Data/results/{value}_data.p', 'rb') as f: d = pickle.load(f) r = d['total_reward'] print(value, len(r)) r_data[i].append(d['total_reward']) smoothing = 30 mf_r_avg = rm(np.mean(np.vstack(r_data[0]), axis=0),smoothing) mf_r_std = rm(np.std(np.vstack(r_data[0]), axis=0),smoothing) ec_r_avg = rm(np.mean(np.vstack(r_data[1]), axis=0),smoothing) ec_r_std = rm(np.std(np.vstack(r_data[1]), axis=0),smoothing) plt.plot(mf_r_avg) plt.fill_between(np.arange(len(mf_r_std)), mf_r_avg-mf_r_std, mf_r_avg+mf_r_std, alpha=0.5) plt.plot(ec_r_avg) plt.fill_between(np.arange(len(ec_r_std)), ec_r_avg-ec_r_std, ec_r_avg+ec_r_std, alpha=0.5) plt.show()