def launchDDPGForSpecificTargetSizeAndSpeceficBeginning(sizeOfTarget, rs, point): """ Run cmaes for a specific target sizeCMAES Input: -sizeOfTarget, size of the target, float -setuFile, file of setup, string -save, for saving result, bool """ pos = point[0] x = point[1][0] y = point[1][1] print("Starting the DDPGor target " + str(sizeOfTarget) + " for point " + str(pos) + " !") foldername = rs.OPTIpath + str(sizeOfTarget) + "/" + str(pos) + "/" actor = simple_actor_network( rs.inputDim, rs.outputDim, l1_size=rs.hiddenLayers[0][1], l2_size=rs.hiddenLayers[1][1], learning_rate=rs.learningRate, ) env = DDPGEnv(rs, sizeOfTarget, "Best.theta", actor=actor, saveDir=foldername) env.setOnePointController(x, y) ddpg = DDPG(env, actor=actor) ddpg.M_episodes(rs.maxIterDDPG, train=False) print("End of optimization for target " + str(sizeOfTarget) + " for point " + str(pos) + " !")
def launchDDPGForSpecificTargetSizeAndSpeceficBeginning( sizeOfTarget, rs, point): ''' Run cmaes for a specific target sizeCMAES Input: -sizeOfTarget, size of the target, float -setuFile, file of setup, string -save, for saving result, bool ''' pos = point[0] x = point[1][0] y = point[1][1] print("Starting the DDPGor target " + str(sizeOfTarget) + " for point " + str(pos) + " !") foldername = rs.OPTIpath + str(sizeOfTarget) + "/" + str(pos) + "/" actor = simple_actor_network(rs.inputDim, rs.outputDim, l1_size=rs.hiddenLayers[0][1], l2_size=rs.hiddenLayers[1][1], learning_rate=rs.learningRate) env = DDPGEnv(rs, sizeOfTarget, "Best.theta", actor=actor, saveDir=foldername) env.setOnePointController(x, y) ddpg = DDPG(env, actor=actor) ddpg.M_episodes(rs.maxIterDDPG, train=False) print("End of optimization for target " + str(sizeOfTarget) + " for point " + str(pos) + " !")
def launchDDPGForSpecificTargetSize(sizeOfTarget, rs): actor = simple_actor_network(rs.inputDim, rs.outputDim, l1_size=rs.hiddenLayers[0][1], l2_size=rs.hiddenLayers[1][1], learning_rate=rs.learningRate) env = DDPGEnv(rs, sizeOfTarget, rs.thetaFile, actor=actor) ddpg = DDPG(env, actor=actor) ddpg.M_episodes(rs.maxIterDDPG, train=False)
def launchDDPGForSpecificTargetSize(sizeOfTarget, rs): actor = simple_actor_network( rs.inputDim, rs.outputDim, l1_size=rs.hiddenLayers[0][1], l2_size=rs.hiddenLayers[1][1], learning_rate=rs.learningRate, ) env = DDPGEnv(rs, sizeOfTarget, rs.thetaFile, actor=actor) ddpg = DDPG(env, actor=actor) ddpg.M_episodes(rs.maxIterDDPG, train=False)
def generateFromDDPG(repeat, rs, thetaFile, saveDir='Data'): for el in rs.sizeOfTarget: c = Chrono() actor = simple_actor_network(rs.inputDim, rs.outputDim, l1_size=rs.hiddenLayers[0][1], l2_size=rs.hiddenLayers[1][1], learning_rate=rs.learningRate) env = DDPGEnv(rs, el, rs.thetaFile, actor=actor, saveDir=saveDir) thetaName = rs.OPTIpath + str(el) + "/" + thetaFile + ".theta" saveName = rs.OPTIpath + str(el) + "/" + saveDir + "/" os.system("rm " + saveName + "Log/*.log 2>/dev/null") parameters = np.loadtxt(thetaName) actor.load_parameters(parameters) cost, time = env.saveAllTraj(repeat) c.stop() print("Average cost: ", cost) print("Average time: ", time) print("foldername : ", saveName) print("DDPG:End of generation")
def generateFromDDPG(repeat, rs, thetaFile, saveDir="Data"): for el in rs.sizeOfTarget: c = Chrono() actor = simple_actor_network( rs.inputDim, rs.outputDim, l1_size=rs.hiddenLayers[0][1], l2_size=rs.hiddenLayers[1][1], learning_rate=rs.learningRate, ) env = DDPGEnv(rs, el, rs.thetaFile, actor=actor, saveDir=saveDir) thetaName = rs.OPTIpath + str(el) + "/" + thetaFile + ".theta" saveName = rs.OPTIpath + str(el) + "/" + saveDir + "/" os.system("rm " + saveName + "Log/*.log 2>/dev/null") parameters = np.loadtxt(thetaName) actor.load_parameters(parameters) cost, time = env.saveAllTraj(repeat) c.stop() print("Average cost: ", cost) print("Average time: ", time) print("foldername : ", saveName) print("DDPG:End of generation")
return 100-ret def term_callback(c): if CMA_obj.totStep != term_callback.lastCall: term_callback.lastCall = CMA_obj.totStep CMA_obj.act.load_parameters(c.mean) CMA_obj.env.reset(noise = False) ret = 0 while not CMA_obj.env.isFinished(): actn, r = CMA_obj.env.act(CMA_obj.act.action_batch(CMA_obj.env.state())) ret += r[0] term_callback.plot_data.addData(CMA_obj.totStep, CMA_obj.env.t, ret) return False CMA_obj.env = MountainCarEnv() CMA_obj.act = simple_actor_network(2, 1, l1_size = 20, l2_size = 10) CMA_obj.totStep = 0 term_callback.plot_data = result_log("CMA-ES", 20,10) term_callback.lastCall = -1 print "Going for CMA-ES" op = cma.CMAOptions() op['termination_callback'] = term_callback op['maxiter']=200 x= CMA_obj.act.linear_parameters() fx = cma.fmin(CMA_obj, x, 0.5, options = op) term_callback.plot_data.save() def draw_politic(): plt.close()
Created on Wed Feb 24 10:00:45 2016 @author: debroissia """ from DDPG.core.DDPG_core import DDPG import numpy as np import DDPG.environement.instance.mountainCarEnv as mc from DDPG.core.networks.simple_actor_network import simple_actor_network import matplotlib.pyplot as plt env = mc.MountainCarEnv() a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = 20, l2_size = 10, learning_rate = 0.001)) def draw_politic(): plt.close() ac= a_c img = np.zeros((200, 200)) pos = -1. batch = [] for i in range(200): vel = -1. pos += 0.01 for j in range(200): vel += 0.01 batch.append([pos, vel]) pol = ac.react(batch) b=0
def term_callback(c): if CMA_obj.totStep != term_callback.lastCall: term_callback.lastCall = CMA_obj.totStep CMA_obj.act.load_parameters(c.mean) CMA_obj.env.reset(noise=False) ret = 0 while not CMA_obj.env.isFinished(): actn, r = CMA_obj.env.act( CMA_obj.act.action_batch(CMA_obj.env.state())) ret += r[0] term_callback.plot_data.addData(CMA_obj.totStep, CMA_obj.env.t, ret) return False CMA_obj.env = MountainCarEnv() CMA_obj.act = simple_actor_network(2, 1, l1_size=20, l2_size=10) CMA_obj.totStep = 0 term_callback.plot_data = result_log("CMA-ES", 20, 10) term_callback.lastCall = -1 print "Going for CMA-ES" op = cma.CMAOptions() op['termination_callback'] = term_callback op['maxiter'] = 200 x = CMA_obj.act.linear_parameters() fx = cma.fmin(CMA_obj, x, 0.5, options=op) term_callback.plot_data.save() def draw_politic():
from DDPG.core.DDPG_core import DDPG import numpy as np import DDPG.environement.instance.mountainCarEnv as mc from DDPG.core.networks.simple_actor_network import simple_actor_network from DDPG.logger.result import result_log l1 = 20 l2 = 10 rate = 0.001 env = mc.MountainCarEnv(result_log("DDPG", l1, l2)) a_c = DDPG(env, actor=simple_actor_network(2, 1, l1_size=l1, l2_size=l2, learning_rate=rate)) def voidFunc(): pass env.extern_draw = voidFunc def doEp(M, T=float("inf")): a_c.M_episodes(M, T) env.perfs.save()
# -*- coding: utf-8 -*- """ Created on Thu Apr 21 13:31:15 2016 @author: arnaud """ from DDPG.core.DDPG_core import DDPG import numpy as np import DDPG.environement.instance.mountainCarEnv as mc from DDPG.core.networks.simple_actor_network import simple_actor_network from DDPG.logger.result import result_log l1 = 20 l2 = 10 rate = 0.001 env = mc.MountainCarEnv(result_log("DDPG", l1, l2)) a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = l1, l2_size = l2, learning_rate = rate)) def voidFunc(): pass env.extern_draw = voidFunc def doEp(M, T=float("inf")): a_c.M_episodes(M, T) env.perfs.save() doEp(4000)
import numpy as np import DDPG.environement.instance.mountainCarEnv as mc from DDPG.core.networks.simple_actor_network import simple_actor_network from DDPG.core.networks.simple_critic_network import simple_critic_network import matplotlib.pyplot as plt from DDPG.logger.result import result_log l1 = 20 l2 = 10 logger = result_log("DDPG", l1, l2, "simple_"+str(l1)+"_"+str(l2)) env = mc.MountainCarEnv(logger) a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = l1, l2_size = l2, learning_rate = 0.005), critic = simple_critic_network(2, 1, l1_size = 20, l2_size = 10, learning_rate = 0.01)) def draw_politic(): plt.close() ac= a_c img = np.zeros((200, 200)) pos = -1. batch = [] for i in range(200): vel = -1. pos += 0.01 for j in range(200): vel += 0.01 batch.append([pos, vel]) pol = ac.react(batch) b=0