def plot_tactical():
    tactical_data = np.genfromtxt(
        '/home/jazz/Documents/openai-results/open-ai-tactical.csv',
        delimiter=',')
    plt.plot([i[0] for i in tactical_data],
             pu.smooth([i[1] for i in tactical_data], radius=1),
             linewidth=1,
             linestyle=lines[0],
             color=colors[0])
    plt.xlabel('Number of steps')
    plt.ylabel('Reward per episode')
    plt.show()
Beispiel #2
0
def main(logs='logs-arm', n_arm=3, smooth=True):
    logs = glob.glob(join(logs, str(n_arm),'*'))
    for log in logs:
        results = pu.load_results(log)
        r = results[0]
        eps  = np.cumsum(r[1].epoch).values
        dist = r[1]['test/success_rate'].values 
        if smooth:
            dist = pu.smooth(dist, radius=2)
        plt.plot(eps, dist, label=basename(log))
    plt.legend()
    plt.title('Number of arms: {}'.format(n_arm))
    plt.show()
Beispiel #3
0
def plot_training(experiment_no, experiment_dir):
    results = plot_util.load_results(experiment_dir, verbose=True)
    results.sort(key=lambda x: x.dirname)
    fig, ax = plt.subplots()
    color_set = ['b', 'g', 'r', 'm', 'y', 'c']
    ax.grid(color='#dddddd', linestyle='-', linewidth=1)
    for i, r in enumerate(results):
        i_ = i % len(color_set)
        plt.plot(np.cumsum(r.monitor.l), plot_util.smooth(r.monitor.r, radius=200), color=color_set[i_])
    for i, r in enumerate(results):
        i_ = i % len(color_set)
        plt.plot(np.cumsum(r.monitor.l), r.monitor.r, alpha=0.1, color=color_set[i_])
    ax.ticklabel_format(axis='x', style='sci', scilimits=(0, 0))
    ax.set_xlim(left=0)
    plt.tight_layout(pad=2)
    plt.xlabel('time steps')
    plt.ylabel('episode_reward')
    plt.title('experiment_{}'.format(experiment_no), fontsize=10)
    plt.legend([os.path.basename(r.dirname) for r in results])
    plt.savefig(os.path.join(experiment_dir, 'training_{}.png'.format(experiment_no)))
    plt.show()
def individual_graph():
    for i in range(len(results)):
        # print(np.ones(3))
        # print(np.cumsum(r.monitor.l))
        # print(r.monitor.r)
        # print(r.monitor.r.mean())
        # print(pu.smooth(r.monitor.r, radius=8))
        # for l in np.cumsum(r.monitor.l):
        #   plt.axvline(l, linestyle=':', linewidth=1)
        # plt.plot(np.cumsum(r.monitor.l), r.monitor.r)

        # Understanding convolution with window size: https://stackoverflow.com/a/20036959/7308982
        plt.plot(np.cumsum(results[i].monitor.l),
                 pu.smooth(results[i].monitor.r, radius=1),
                 linewidth=1,
                 linestyle=lines[i],
                 color=colors[i])

    plt.xlim(right=500)
    plt.xlabel('Number of steps')
    plt.ylabel('Average reward per episode')

    plt.show()
def average_graph():
    min_number_episodes = 10000

    for i in range(len(results)):
        result = results[i]
        rewards = result.monitor.r
        number_episodes = len(rewards)
        print('Results {}: number of episodes = {}'.format(i, number_episodes))

        if (number_episodes < min_number_episodes):
            min_number_episodes = number_episodes

    total_reward_per_episode = [0] * min_number_episodes

    for i in range(len(results)):
        result = results[i]
        rewards = result.monitor.r

        for j in range(min_number_episodes):
            reward_episode = rewards[j]
            total_reward_per_episode[j] += reward_episode

    average_reward_per_episode = [
        x / len(results) for x in total_reward_per_episode
    ]
    print(min_number_episodes)

    plt.plot(range(len(total_reward_per_episode)),
             pu.smooth(average_reward_per_episode, radius=1),
             linewidth=1,
             linestyle=lines[0],
             color=colors[0])
    # plt.plot(range(len(total_reward_per_episode)), average_reward_per_episode, linewidth=1, linestyle=lines[0], marker=markers[0], markersize=3, color=colors[0])
    plt.xlabel('Number of episodes')
    plt.ylabel('Average reward per episode')
    plt.show()
Beispiel #6
0
def tr_fn(r):
    x = r.monitor.t
    y = smooth(r.monitor.r, radius=10)
    return x, y
Beispiel #7
0
def lr_fn(r):
    x = np.cumsum(r.monitor.l)
    y = smooth(r.monitor.r, radius=10)
    return x, y
def xy_fn(r):
    x = np.divide(r.progress.steps, 1000)
    y = smooth(r.progress["mean 100 episode reward"], radius=1)
    #x = np.divide(r.progress.total_timesteps, 1000)
    #y = smooth(r.progress.eprewmean, radius=1)
    return x, y
Beispiel #9
0
from baselines.common import plot_util as pu
mujoco_results = pu.load_results(
    '/home/developer/logs/her_pyrobot_push_mujoco/250k_v2')
#results = pu.load_results('/home/developer/logs/her_pyrobot_reach/joint_100k_v4')
import matplotlib.pyplot as plt
import numpy as np

mujoco_r = mujoco_results[0]
mujoco_arr = np.array(mujoco_r.progress)
epoch = mujoco_arr[:, 0]
mujoco_test_success_rate = mujoco_arr[:, 7]
mujoco_train_success_rate = mujoco_arr[:, 9]
plt.suptitle(
    'LocoBot Push Task Trained in MuJoCo with 250k timesteps(100 epoches)~')

plt.title('Training in Mujoco')
plt.plot(epoch,
         pu.smooth(mujoco_train_success_rate, radius=10),
         label="mujoco_train")
plt.plot(epoch,
         pu.smooth(mujoco_test_success_rate, radius=10),
         label="mujoco_test")
plt.xlabel('epoches')
plt.ylabel('success_rate')
plt.legend()
#plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10))
plt.show()
Beispiel #10
0
model.save('berater-ppo-v8.pkl')
monitored_env.close()

##################################################

print("--- Plot ---")

from baselines.common import plot_util as pu
results = pu.load_results(log_dir)

import matplotlib.pyplot as plt
import numpy as np
r = results[0]
plt.ylim(0, .75)
# plt.plot(np.cumsum(r.monitor.l), r.monitor.r)
plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=100))

##################################################

import numpy as np 

print("--- Enjoy ---")


BeraterEnv.BeraterEnv.showStep = True


for c in range(1000):
    observation = env.reset()
    for c in range(1000):
        actions, _, state, _ = model.step(observation)
Beispiel #11
0
from baselines.common import plot_util as pu
import matplotlib.pyplot as plt
import numpy as np

num = input()
results = pu.load_results('logs/single_ppo_092901')

r = results[0]
print(r.monitor.r)

plt.plot(np.cumsum(r.monitor['l']), pu.smooth(r.monitor['r'], radius=10))
plt.show()

# # plt.plot(r.progress.total_timesteps, r.progress.eprewmean)
Beispiel #12
0
import json
import numpy as np
import matplotlib.pyplot as plt
from baselines.common import plot_util as pu

exp_id = 'payload_empty_her100_demo'
results = pu.load_results('../policies/{}'.format(exp_id))[0]
epoch = results.progress["epoch"]
trainscs = results.progress["train/success_rate"]
testscs = results.progress["test/success_rate"]

plt.plot(epoch, pu.smooth(trainscs, radius=1), label="Train")
plt.plot(epoch, pu.smooth(testscs, radius=1), label="Test")
# plt.plot(epoch,trainscs,label="Train")
# plt.plot(epoch,testscs,label="Test")
plt.xlabel('Epoch')
plt.ylabel('Success Rate')

f = open('../policies/{}/params.json'.format(exp_id))
params = json.load(f)
plt.title(params['env_name'])
plt.legend()
#plt.show()
#print(results.progress)
plt.savefig('plots/exp_{}_results.png'.format(exp_id))
Beispiel #13
0
def plot():
    results = pu.read_csv('results/baselines/ppo_50m.monitor.csv')

    plt.plot(np.cumsum(results.l), pu.smooth(results.r, radius=100))
    plt.show()
Beispiel #14
0
from baselines.common import plot_util as plot
import matplotlib.pyplot as plt
import numpy as np

results = plot.load_results('~/logs/HER')
print(results)
r = results[0]
plt.plot(np.cumsum(r.monitor.l), plot.smooth(r.monitor.r,
                                             radius=10))  # radius to smoother
plt.xlabel('Iterations')
plt.ylabel('Reward')
plt.show()