Example #1
0
def plot_rewards(data):
    fig, ax = plt.subplots(3, 1, sharex=True)
    smoothing = 10
    ax[0].plot(rm(data['total_reward'], smoothing),
               c='k',
               alpha=0.5,
               label='EC')
    if 'bootstrap_reward' in data.keys():
        ax[0].plot(rm(data['bootstrap_reward'], smoothing), c='r', label='MF')
    ax[0].set_ylabel('Reward')
    ax[0].legend(loc=0)

    ax[1].plot(rm(data['loss'][0], smoothing), label='ec_p')
    ax[1].plot(rm(data['loss'][1], smoothing), ':', label='ec_v')
    if 'mf_loss' in data.keys():
        ax[1].plot(rm(data['mf_loss'][0], smoothing), label='mf_p')
        ax[1].plot(rm(data['mf_loss'][1], smoothing), ':', label='mf_v')
    ax[1].set_ylabel('Loss')
    ax[1].legend(loc=0)

    if 'weights' in data.keys():
        ax[2].plot(rm(data['weights']['h0'], smoothing),
                   ':',
                   label='h0',
                   c='k',
                   alpha=0.5)
        ax[2].plot(rm(data['weights']['h1'], smoothing),
                   label='h1',
                   c='k',
                   alpha=0.7)
        ax[2].plot(rm(data['weights']['p'], smoothing), label='p', c='r')
        ax[2].plot(rm(data['weights']['v'], smoothing), label='v', c='g')
        ax[2].set_ylabel('Gradient \nNorm')
        ax[2].legend(loc=0)

    plt.show()
Example #2
0
pre_m_reward = []
for x in range(len(modelfree_data)):
    pre_m_reward.append(modelfree_data[x]['total_reward'])

e_reward = np.mean(np.vstack(pre_e_reward), axis=0)
e_std = np.std(np.vstack(pre_e_reward), axis=0)

b_reward = np.mean(np.vstack(pre_b_reward), axis=0)
b_std = np.std(np.vstack(pre_b_reward), axis=0)

m_reward = np.mean(np.vstack(pre_m_reward), axis=0)
m_std = np.std(np.vstack(pre_m_reward), axis=0)

smoothing = 100
smooth_e = rm(e_reward, smoothing)
smooth_e_std = rm(e_std, smoothing)

smooth_m = rm(m_reward, smoothing)
smooth_m_std = rm(m_std, smoothing)

smooth_b = rm(b_reward, smoothing)
smooth_b_std = rm(b_std, smoothing)

plt.figure(0, figsize=(5, 5))
plt.plot(smooth_m, label='model free', color='gray')
plt.fill_between(np.arange(len(smooth_m)),
                 smooth_m - smooth_m_std,
                 smooth_m + smooth_m_std,
                 alpha=0.5,
                 color='gray')
Example #3
0
    run_id = id_data.iloc[[i]]['run_id'].item()
    env_type = id_data.iloc[[i]]['env_type'].item()
    expt_type = id_data.iloc[[i]]['expt_type'].item()

    with open(f'../Data/results/{run_id}_data.p', 'rb') as f:
        data = pickle.load(f)

    if env_type == 'gym_grid:gridworld-v1':  #reward at 5,5
        novel_R[expt_type].append(data)
    elif env_type == 'gym_grid:gridworld-v11':  # reward at 10,10, MF trained on 5,5
        moved_R[expt_type].append(data)
    else:
        raise Exception('Env Type Error')

for x in novel_R['MF']:
    dat = rm(x['total_reward'], 50)
    plt.plot(dat, alpha=0.5)

plt.show()
'''
pre_e_reward = []
pre_b_reward = []
for x in range(len(bootstrap_data)):
    print(x)
    pre_e_reward.append(bootstrap_data[x]['total_reward'])
    pre_b_reward.append(bootstrap_data[x]['bootstrap_reward'])

pre_m_reward = []
for x in range(len(modelfree_data)):
    pre_m_reward.append(modelfree_data[x]['total_reward'])
Example #4
0
#env = gym.make('FrozenLake-v0', is_slippery=False)
env = gym.make('CartPole-v0')
s_ = env.reset()
a = env.action_space.sample()
s, r, done, __ = env.step(a)
print(s_, a, s, r, done)

## write network parameters
params = basic_agent_params(env)
print(params.__dict__)
params.hidden_types = ['linear', 'linear']
params.hidden_dims = [50, 50]
params.lr = 0.001

network = nets.ActorCritic(params)
print(network)
memory = None  #Memory(entry_size=params.action_dims, cache_limit=400)
agent = Agent(network, memory=memory)
agent.get_action = agent.MF_action
run = ex(agent, env)

run.run(5000, 250, printfreq=100, render=False)
#run.record_log(expt_type='test',env_name='FrozenLake-V0',n_trials = 0, dir='../Data/', file='test_environments.csv')

fig, ax = plt.subplots(2, 1, sharex=True)
ax[0].plot(rm(run.data['total_reward'], 100))
#ax[0].set_ylim([0,1])
ax[1].plot(run.data['loss'][0], label='p')
ax[1].plot(run.data['loss'][1], label='v')
ax[1].legend(bbox_to_anchor=(1.05, 0.95))
plt.show()
Example #5
0
from basic.modules.Utils import running_mean as rm
from basic.modules.Utils.gridworld_plotting import plot_polmap, plot_pref_pol, plot_valmap, plot_world
from basic.Analysis.vis_bootstrap_pol_maps import daves_idea, plot_pol_evol, trajectories, plot_maps, plot_rewards

filename = '../Data/linear_track.csv'
df = pd.read_csv(filename)
rewards = {'mf':{}, 'ecmf':{}}
for x in range(len(df)):
    run_id = df['run_id'].loc[x]
    lr = df['lr'].loc[x]
    if df['expt_type'].loc[x][0:9] == 'Bootstrap':
        dkey = 'ecmf'
    elif df['expt_type'].loc[x][0:9] == 'gridworld':
        dkey = 'mf'
    with open(f'../Data/results/{run_id}_data.p', 'rb') as f:
        data = pickle.load(f)

    rewards[dkey][str(lr)] = data['total_reward']

smoothing = 30
fig, ax = plt.subplots(2,1,sharex=True)
for i in rewards['mf'].keys():
    ax[0].plot(rm(rewards['mf'][i],smoothing), label = i)
for i in rewards['ecmf'].keys():
    ax[1].plot(rm(rewards['ecmf'][i],smoothing), label = i)
ax[0].legend(loc=0)
ax[1].legend(loc=0)
plt.show()


Example #6
0
]

big_dict = {'oh':oh, 'pc_01':pc_01, 'pc_02':pc_02, 'pc_03':pc_03}

results_dict = {}
for key, value in big_dict.items():
    print(key)
    results_dict[key] = []
    results = []

    for id_num in value:
        print(id_num)
        file_addr = data_dir + f'{id_num}_data.p'
        print(f"--  loc:  {file_addr}")
        with open(file_addr, 'rb') as f:
            try:
                print(data_dir)
                dats = pickle.load(f)
            except:
                print("!!!!!! !!!!!  errors")
            reward_info = dats['total_reward']
            results.append(reward_info)

    pp = np.vstack(results)

    smoothing = 100
    avg_ = rm(np.mean(pp, axis=0),smoothing)[0:2000]
    std_ = rm(np.std(pp, axis=0), smoothing)[0:2000]
    results_dict[key].append(avg_)
    results_dict[key].append(std_)
Example #7
0

for i, cont in enumerate(controllers):
    idx = np.where( (df['environment']==f'gym_grid:gridworld-v{env_types[env]}')
                  & (df['controller']==cont)
                  & (df['n_trials']==5000))


    for value in df.loc[idx]['id']:
        with open(f'../../Data/results/{value}_data.p', 'rb') as f:
            d = pickle.load(f)
            r = d['total_reward']
            print(value, len(r))
            r_data[i].append(d['total_reward'])

smoothing = 30

mf_r_avg = rm(np.mean(np.vstack(r_data[0]), axis=0),smoothing)
mf_r_std = rm(np.std(np.vstack(r_data[0]), axis=0),smoothing)

ec_r_avg = rm(np.mean(np.vstack(r_data[1]), axis=0),smoothing)
ec_r_std = rm(np.std(np.vstack(r_data[1]), axis=0),smoothing)

plt.plot(mf_r_avg)
plt.fill_between(np.arange(len(mf_r_std)), mf_r_avg-mf_r_std, mf_r_avg+mf_r_std, alpha=0.5)

plt.plot(ec_r_avg)
plt.fill_between(np.arange(len(ec_r_std)), ec_r_avg-ec_r_std, ec_r_avg+ec_r_std, alpha=0.5)

plt.show()