def finish(self, nb_train_steps, nb_epoch_steps, nb_warmup_steps=0): import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt from util import mstd x_vals = range(0, nb_train_steps + 1, nb_epoch_steps) # Plot online reward and test reward. for (k, v) in self.records.items(): v = np.array(v) if len(v.shape) == 3: for i in range(v.shape[1]): f0, ax0 = plt.subplots() m, ids25, ids75 = mstd(v[:, i, :].T) # If the number of warmup steps > the number of epoch # step or there is no episode completed, v may not have # the same length with the x_vals. _ = ax0.plot(x_vals[-len(v):], m, color='b') _ = ax0.fill_between(x_vals[-len(v):], list(ids75), list(ids25), facecolor='b', alpha=0.2) if k == 'mean_nlogdetcov': _ = ax0.set_ylim(-1500, 3000) _ = ax0.grid() _ = ax0.set_ylabel(k) _ = ax0.set_xlabel('Learning Steps') if x_vals[-len(v)] < nb_warmup_steps: _ = ax0.axvline(x=nb_warmup_steps, color='k') _ = f0.savefig( os.path.join(self.save_dir, "%s_eval_%d.png" % (k, i))) elif len(v) > 0 and (k == 'test_reward' or k == 'mean_nlogdetcov' or k == 'online_reward'): f0, ax0 = plt.subplots() if len(v.shape) == 2: m, ids25, ids75 = mstd(v.T) # If the number of warmup steps > the number of epoch # step or there is no episode completed, v may not have # the same length with the x_vals. _ = ax0.plot(x_vals[-len(v):], m, color='b') _ = ax0.fill_between(x_vals[-len(v):], list(ids75), list(ids25), facecolor='b', alpha=0.2) else: _ = ax0.plot(x_vals[-len(v):], v, color='b') if k == 'mean_nlogdetcov': _ = ax0.set_ylim(-1500, 3000) _ = ax0.grid() _ = ax0.set_ylabel(k) _ = ax0.set_xlabel('Learning Steps') if x_vals[-len(v)] < nb_warmup_steps: _ = ax0.axvline(x=nb_warmup_steps, color='k') _ = f0.savefig(os.path.join(self.save_dir, "%s.png" % k))
def batch_plot(list_records, save_dir, nb_train_steps, nb_epoch_steps, is_target_tracking=False): import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt from util import mstd results = {'online_reward':[], 'test_reward':[]} if is_target_tracking: results['mean_nlogdetcov'] = [] for r in list_records: for (k,v) in results.items(): r[k] = np.array(r[k]) v.append(r[k].T) x_vals = range(0, nb_train_steps+1, nb_epoch_steps) for (k,v) in results.items(): v = np.array(v) if len(v.shape) == 4: for i in range(v.shape[2]): v_i = np.concatenate(v[:,:,i,:], axis=0) f0, ax0 = plt.subplots() m, ids25, ids75 = mstd(v_i) _ = ax0.plot(x_vals[-v_i.shape[1]:], m, color='k') _ = ax0.fill_between(x_vals[-v_i.shape[1]:], list(ids75), list(ids25), facecolor='k', alpha=0.2) _ = ax0.plot(x_vals[-v_i.shape[1]:], np.max(v_i, axis=0), color='b') _ = ax0.plot(x_vals[-v_i.shape[1]:], np.min(v_i, axis=0), color='r') _ = ax0.grid() if k == 'mean_nlogdetcov': ax0.set_ylim(-1500, 3000) _ = f0.savefig(os.path.join(save_dir, "%s_eval_%d.png"%(k,i))) plt.close() else: if len(v.shape) == 3: v = np.concatenate(v, axis=0) f0, ax0 = plt.subplots() m, ids25, ids75 = mstd(v) _ = ax0.plot(x_vals[-v.shape[1]:], m, color='k') _ = ax0.fill_between(x_vals[-v.shape[1]:], list(ids75), list(ids25), facecolor='k', alpha=0.2) _ = ax0.plot(x_vals[-v.shape[1]:], np.max(v, axis=0), color='b') _ = ax0.plot(x_vals[-v.shape[1]:], np.min(v, axis=0), color='r') _ = ax0.grid() if k == 'mean_nlogdetcov': ax0.set_ylim(-1500, 3000) _ = f0.savefig(os.path.join(save_dir, k+".png")) plt.close()