def dyian_test(path): path_plot = path + "plot_pt\\" if not os.path.exists(path_plot): os.makedirs(path_plot) path_save = path + "save_pt\\" if not os.path.exists(path_save): os.makedirs(path_save) diayn_mod = build_diayn(2) diayn_mod.load(path + "save_diayn\\") mountaincar(diayn_mod, path_plot + "pretrained_trajectoires_0") pretrained = A2C.from_diayn(diayn_mod, 0) model = pretrained for k in range(0, 1): iter_ = 200 model.train(iter_) model.plot_rewards(path_plot + "pretrained_rewards_" + str((k + 1) * iter_)) mountaincar_baseline( model, path_plot + "pretrained_trajectoires_" + str( (k + 1) * iter_)) model.save(path_save)
def plot_results(path): env = gym.make("MountainCar-v0") baseline = A2C(env, {"actor": [30, 30], "critic": [30, 30]}, gamma=0.99) baseline.load(path + "save_bl\\") diayn_mod = build_diayn(2) diayn_mod.load(path + "save_diayn\\") pretrained = A2C.from_diayn(diayn_mod, 0) pretrained.load(path + "save_pt\\") plt.figure() plt.plot(range(99, len(pretrained.rewards)), np.convolve(pretrained.rewards, np.ones(100) / 100, "valid"), label="pretrained") plt.plot(range(99, len(baseline.rewards)), np.convolve(baseline.rewards, np.ones(100) / 100, "valid"), label="baseline") plt.legend() plt.show() plt.savefig(path + "results") plt.pause(1)