def __init__(self): self.env = MountainCar() self.noiseRange = 1.0 self.om = 0 self.alpha = 0.6 self.beta = 0.4 self.t = 0 self.totStep = 0 self.r = 0 self.ep = 0 self.perfs = result_log(algo="DDPG", l1=20, l2=10) self.actif = True
def __init__(self, logger = None): self.env = MountainCar() self.noiseRange = 1.0 self.noiseMax = 1.0 self.om = 0 self.alpha = 0.6 self.beta = 0.4 self.t = 0 self.totStep = 0 self.r = 0 self.ep = 0 if logger==None: self.perfs = result_log(algo="DDPG", l1=20, l2=10) else: self.perfs = logger self.actif = True
if CMA_obj.totStep != term_callback.lastCall: term_callback.lastCall = CMA_obj.totStep CMA_obj.act.load_parameters(c.mean) CMA_obj.env.reset(noise = False) ret = 0 while not CMA_obj.env.isFinished(): actn, r = CMA_obj.env.act(CMA_obj.act.action_batch(CMA_obj.env.state())) ret += r[0] term_callback.plot_data.addData(CMA_obj.totStep, CMA_obj.env.t, ret) return False CMA_obj.env = MountainCarEnv() CMA_obj.act = simple_actor_network(2, 1, l1_size = 20, l2_size = 10) CMA_obj.totStep = 0 term_callback.plot_data = result_log("CMA-ES", 20,10) term_callback.lastCall = -1 print "Going for CMA-ES" op = cma.CMAOptions() op['termination_callback'] = term_callback op['maxiter']=200 x= CMA_obj.act.linear_parameters() fx = cma.fmin(CMA_obj, x, 0.5, options = op) term_callback.plot_data.save() def draw_politic(): plt.close() act = CMA_obj.act img = np.zeros((200, 200)) pos = -1.
CMA_obj.act.load_parameters(c.mean) CMA_obj.env.reset(noise=False) ret = 0 while not CMA_obj.env.isFinished(): actn, r = CMA_obj.env.act( CMA_obj.act.action_batch(CMA_obj.env.state())) ret += r[0] term_callback.plot_data.addData(CMA_obj.totStep, CMA_obj.env.t, ret) return False CMA_obj.env = MountainCarEnv() CMA_obj.act = simple_actor_network(2, 1, l1_size=20, l2_size=10) CMA_obj.totStep = 0 term_callback.plot_data = result_log("CMA-ES", 20, 10) term_callback.lastCall = -1 print "Going for CMA-ES" op = cma.CMAOptions() op['termination_callback'] = term_callback op['maxiter'] = 200 x = CMA_obj.act.linear_parameters() fx = cma.fmin(CMA_obj, x, 0.5, options=op) term_callback.plot_data.save() def draw_politic(): plt.close() act = CMA_obj.act img = np.zeros((200, 200))
@author: arnaud """ from DDPG.core.DDPG_core import DDPG import numpy as np import DDPG.environement.instance.mountainCarEnv as mc from DDPG.core.networks.simple_actor_network import simple_actor_network from DDPG.logger.result import result_log l1 = 20 l2 = 10 rate = 0.001 env = mc.MountainCarEnv(result_log("DDPG", l1, l2)) a_c = DDPG(env, actor=simple_actor_network(2, 1, l1_size=l1, l2_size=l2, learning_rate=rate)) def voidFunc(): pass env.extern_draw = voidFunc
# -*- coding: utf-8 -*- """ Created on Thu Apr 21 13:31:15 2016 @author: arnaud """ from DDPG.core.DDPG_core import DDPG import numpy as np import DDPG.environement.instance.mountainCarEnv as mc from DDPG.core.networks.simple_actor_network import simple_actor_network from DDPG.logger.result import result_log l1 = 20 l2 = 10 rate = 0.001 env = mc.MountainCarEnv(result_log("DDPG", l1, l2)) a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = l1, l2_size = l2, learning_rate = rate)) def voidFunc(): pass env.extern_draw = voidFunc def doEp(M, T=float("inf")): a_c.M_episodes(M, T) env.perfs.save() doEp(4000)
""" from DDPG.core.DDPG_core import DDPG import numpy as np import DDPG.environement.instance.mountainCarEnv as mc from DDPG.core.networks.simple_actor_network import simple_actor_network from DDPG.core.networks.simple_critic_network import simple_critic_network import matplotlib.pyplot as plt from DDPG.logger.result import result_log l1 = 20 l2 = 10 logger = result_log("DDPG", l1, l2, "simple_"+str(l1)+"_"+str(l2)) env = mc.MountainCarEnv(logger) a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = l1, l2_size = l2, learning_rate = 0.005), critic = simple_critic_network(2, 1, l1_size = 20, l2_size = 10, learning_rate = 0.01)) def draw_politic(): plt.close() ac= a_c img = np.zeros((200, 200)) pos = -1. batch = [] for i in range(200): vel = -1. pos += 0.01 for j in range(200): vel += 0.01