예제 #1
0
def plot_outputs(entries):
    """
    Takes a pandas dataframe as an input and uses the seagul.plot function to create a plot.
    """
    all_models = np.unique(np.array(entries['model']))
    ax = None
    for model in all_models:
        entries_of_model = entries.loc[entries['model'] == model]
        for i in range(len(entries_of_model['ts'])):
            cutoff = entries_of_model['ts'].iloc[i].shape[0] if 'cutoff' not in locals() or entries_of_model['ts'].iloc[i].shape[0] < cutoff else cutoff
            ts_min = int(entries_of_model['ts'].iloc[i].iloc[0]) if 'ts_min' not in locals() or int(entries_of_model['ts'].iloc[i].iloc[0]) > ts_min else ts_min
            ts_max = int(entries_of_model['ts'].iloc[i].iloc[-1]) if 'ts_max' not in locals() or int(entries_of_model['ts'].iloc[i].iloc[-1]) < ts_max else ts_max
        for i in range(len(entries_of_model['ts'])):
            rew_f = interp1d(entries_of_model['ts'].iloc[i].to_numpy(dtype=float), entries_of_model['rewards'].iloc[i].to_numpy(dtype=float))
            ts = np.linspace(ts_min, ts_max, num=1000)
            try:
                # rew = np.vstack((rew, entries_of_model['rewards'].iloc[i].to_numpy(dtype=float)[:cutoff]))
                rew = np.vstack((rew, rew_f(ts)))
            except:
                rew = rew_f(ts)
        # ts = entries_of_model['ts'].iloc[0].to_numpy(dtype=float)[:cutoff]
        col = entries_of_model['color'].iloc[0]
        where_is_nan = np.isnan(rew)
        rew[where_is_nan] = 0
        try:
            rew.shape[1]
        except:
            rew = np.expand_dims(rew,0)
        fig, ax = smooth_bounded_curve(data=np.transpose(rew), time_steps=ts, label=model, ax=ax, color = col, alpha=0.1)
        rew = 0
예제 #2
0
    print(entry.path)
    ws_list.append(ws)
    model_list.append(model)

plt.show()
plt.figure()
rewards = np.zeros((max_size, len(ws_list)))

for i, ws in enumerate(ws_list):
    # plt.plot(ws["raw_rew_hist"])
    # plt.figure()
    # print(len(ws["raw_rew_hist"]))

    rewards[:len(ws["raw_rew_hist"]), i] = np.array(ws["raw_rew_hist"])

fig, ax = smooth_bounded_curve(rewards, window=100)
plt.show()

# %%

#ws = ws_list[-1]
#model = model_list[-1]

plt.plot(ws['raw_rew_hist'], 'ko')
plt.title('Return')
plt.show()

plt.plot(ws['pol_loss_hist'], 'k')
plt.title('Policy loss')
plt.show()
예제 #3
0
파일: ugh.py 프로젝트: sgillen/ssac
for entry in os.scandir(directory):
    model, env, args, ws = load_workspace(entry.path)

    if len(ws["raw_rew_hist"]) < min_length:
        min_length = len(ws["raw_rew_hist"])

    ws_list.append(ws)
    model_list.append(model)

min_length = int(min_length)
rewards = np.zeros((min_length, len(ws_list)))
for i, ws in enumerate(ws_list):
    rewards[:, i] = np.array(ws["raw_rew_hist"][:min_length])

print("seagul", rewards[-1, :].mean(), rewards[-1, :].std())
fig, ax = smooth_bounded_curve(rewards)
ssac_size = rewards.shape[0]

color_iter = iter(['b', 'g', 'y', 'm', 'c'])
log_dir = jup_dir + 'ssac/rl-baselines-zoo/baseline_log2/'
for algo in os.scandir(log_dir):
    try:
        df_list = []
        min_length = float('inf')

        for entry in os.scandir(algo.path):
            df = load_results(entry.path)

            if len(df['r']) < min_length:
                min_length = len(df['r'])
예제 #4
0
directory = script_dir + "/data/tune/euler_but_working/PPO"

df_list = []

for i, entry in enumerate(os.scandir(directory)):
    try:
        df_list.append(pd.read_csv(entry.path + "/progress.csv"))
    except FileNotFoundError:
        pass

rewards = np.zeros((df_list[0]['episode_reward_mean'].shape[0], len(df_list)))

for i, df in enumerate(df_list):
    rewards[:, i] = df['episode_reward_mean']

smooth_bounded_curve(rewards)

# %%


def do_rollout(init_point):
    env = gym.make(env_name, **config['env_config'])
    obs = env.reset(init_point)

    action_hist = []
    m_act_hist = []
    obs_hist = []
    reward_hist = []

    done = False
예제 #5
0
        "init_noise_max": 10,
    }

    alg_config = {
        "env_name": env_name,
        "model": model,
        "total_steps": 2e6,
        "epoch_batch_size": 1024,
        "sgd_batch_size": 512,
        "lam": .2,
        "gamma": .95,
        "env_config": env_config,
        "sgd_epochs": 30,
        "reward_stop": 300
    }

    seeds = np.random.randint(0,2**32,8)
    pool = Pool(processes=8)


#    results = run_and_test(seeds[0])
    results = pool.map(run_and_test, seeds)


    results = chop_returns(results)
    results = np.array(results).transpose(1,0)

    smooth_bounded_curve(results)
    plt.show()

예제 #6
0
        cur_step += 1

    ep_obs1 = torch.stack(obs1_list)
    ep_acts = torch.stack(acts_list).reshape(-1, act_size)
    ep_rews = torch.stack(rews_list).reshape(-1, 1)
    ep_obs2 = torch.stack(obs2_list)
    ep_path = torch.tensor(path_list).reshape(-1, 1)

    return ep_obs1, ep_acts, ep_rews, ep_path


# %%

ws_list, model_list, rewards = load_trials(
    "seagul/seagul/notebooks/switching/data_needle/50k_slow_longer")
fig, ax = smooth_bounded_curve(
    rewards, time_steps=[i * 50 for i in range(rewards.shape[0])])
plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
ax.ticklabel_format(axis='x', style='sci')
ax.set_title('Reward Curve')
plt.show()

#fig.savefig('reward_curve.pdf')

ws = ws_list[-1]
model = model_list[-1]

#%%
obs_hist, act_hist, rew_hist, lqr_on = do_rollout()
print(lqr_on)

t = np.array([i * .2 for i in range(act_hist.shape[0])])
예제 #7
0
    ws_list.append(ws)
    model_list.append(model)

min_length = int(min_length)
rewards = np.zeros((min_length, len(ws_list)))
for i, ws in enumerate(ws_list):
    rewards[:, i] = np.array(ws["raw_rew_hist"][:min_length])

print("seagul", rewards[-1, :].mean(), rewards[-1, :].std())
ssac_size = rewards.shape[0]

shifted_reward = np.nan * np.ones((int(2e6 / 51), 8))
shifted_reward[int(1e6 / 51):int(1e6 / 51) + ssac_size] = rewards
fig, ax = smooth_bounded_curve(
    shifted_reward,
    time_steps=[51 * i for i in range(shifted_reward.shape[0])])

color_iter = iter(['b', 'g', 'y', 'm', 'c'])
log_dir = script_path + '../rl-baselines-zoo/baseline_log2/'
for algo in os.scandir(log_dir):
    try:
        df_list = []
        min_length = float('inf')

        for entry in os.scandir(algo.path):
            df = load_results(entry.path)

            if len(df['r']) < min_length:
                min_length = len(df['r'])
예제 #8
0
# %% md

# Needle sac can be made to work well

## Observation: Bigger networks and longer runs improve performance (shocking...)

## worth noting the one successful rllib trial from last week was a [256, 256] network, and trying to replicate those results with a [32,32] failed

# %%

fig, ax = plt.subplots(1, 2, figsize=(16, 6))

ws_list, model_list, rewards = load_trials(
    "seagul/seagul/notebooks/switching2/data_needle/long_small_strong")

smooth_bounded_curve(rewards, ax=ax[0])
ax[0].set_title('Hidden sizes: (32,32)')

ws_list, model_list, rewards = load_trials(
    "seagul/seagul/notebooks/switching2/data_needle/less_hack")
smooth_bounded_curve(rewards, ax=ax[1])
ax[1].set_title('Hidden sizes: (256,256)')

ws = ws_list[-1]
model = model_list[-1]

# %% md

# Again, reasonably robust to initial conditions

### This time environment is reset normally, but with randomized initial *position* (initial velocity is always zero, learning degrades dramatically with nonzero starting velocities)