コード例 #1
0
ファイル: MainDDPG.py プロジェクト: osigaud/ArmModelPython
def launchDDPGForSpecificTargetSizeAndSpeceficBeginning(sizeOfTarget, rs, point):
    """
    Run cmaes for a specific target sizeCMAES

    Input:    -sizeOfTarget, size of the target, float
            -setuFile, file of setup, string
            -save, for saving result, bool
    """
    pos = point[0]
    x = point[1][0]
    y = point[1][1]
    print("Starting the DDPGor target " + str(sizeOfTarget) + " for point " + str(pos) + " !")
    foldername = rs.OPTIpath + str(sizeOfTarget) + "/" + str(pos) + "/"

    actor = simple_actor_network(
        rs.inputDim,
        rs.outputDim,
        l1_size=rs.hiddenLayers[0][1],
        l2_size=rs.hiddenLayers[1][1],
        learning_rate=rs.learningRate,
    )
    env = DDPGEnv(rs, sizeOfTarget, "Best.theta", actor=actor, saveDir=foldername)
    env.setOnePointController(x, y)
    ddpg = DDPG(env, actor=actor)
    ddpg.M_episodes(rs.maxIterDDPG, train=False)

    print("End of optimization for target " + str(sizeOfTarget) + " for point " + str(pos) + " !")
コード例 #2
0
ファイル: MainDDPG.py プロジェクト: osigaud/ArmModelPython
def launchDDPGForSpecificTargetSizeAndSpeceficBeginning(
        sizeOfTarget, rs, point):
    '''
    Run cmaes for a specific target sizeCMAES

    Input:    -sizeOfTarget, size of the target, float
            -setuFile, file of setup, string
            -save, for saving result, bool
    '''
    pos = point[0]
    x = point[1][0]
    y = point[1][1]
    print("Starting the DDPGor target " + str(sizeOfTarget) + " for point " +
          str(pos) + " !")
    foldername = rs.OPTIpath + str(sizeOfTarget) + "/" + str(pos) + "/"

    actor = simple_actor_network(rs.inputDim,
                                 rs.outputDim,
                                 l1_size=rs.hiddenLayers[0][1],
                                 l2_size=rs.hiddenLayers[1][1],
                                 learning_rate=rs.learningRate)
    env = DDPGEnv(rs,
                  sizeOfTarget,
                  "Best.theta",
                  actor=actor,
                  saveDir=foldername)
    env.setOnePointController(x, y)
    ddpg = DDPG(env, actor=actor)
    ddpg.M_episodes(rs.maxIterDDPG, train=False)

    print("End of optimization for target " + str(sizeOfTarget) +
          " for point " + str(pos) + " !")
コード例 #3
0
ファイル: MainDDPG.py プロジェクト: osigaud/ArmModelPython
def launchDDPGForSpecificTargetSize(sizeOfTarget, rs):
    actor = simple_actor_network(rs.inputDim,
                                 rs.outputDim,
                                 l1_size=rs.hiddenLayers[0][1],
                                 l2_size=rs.hiddenLayers[1][1],
                                 learning_rate=rs.learningRate)
    env = DDPGEnv(rs, sizeOfTarget, rs.thetaFile, actor=actor)
    ddpg = DDPG(env, actor=actor)
    ddpg.M_episodes(rs.maxIterDDPG, train=False)
コード例 #4
0
ファイル: MainDDPG.py プロジェクト: osigaud/ArmModelPython
def launchDDPGForSpecificTargetSize(sizeOfTarget, rs):
    actor = simple_actor_network(
        rs.inputDim,
        rs.outputDim,
        l1_size=rs.hiddenLayers[0][1],
        l2_size=rs.hiddenLayers[1][1],
        learning_rate=rs.learningRate,
    )
    env = DDPGEnv(rs, sizeOfTarget, rs.thetaFile, actor=actor)
    ddpg = DDPG(env, actor=actor)
    ddpg.M_episodes(rs.maxIterDDPG, train=False)
コード例 #5
0
ファイル: MainDDPG.py プロジェクト: osigaud/ArmModelPython
def generateFromDDPG(repeat, rs, thetaFile, saveDir='Data'):
    for el in rs.sizeOfTarget:
        c = Chrono()
        actor = simple_actor_network(rs.inputDim,
                                     rs.outputDim,
                                     l1_size=rs.hiddenLayers[0][1],
                                     l2_size=rs.hiddenLayers[1][1],
                                     learning_rate=rs.learningRate)
        env = DDPGEnv(rs, el, rs.thetaFile, actor=actor, saveDir=saveDir)
        thetaName = rs.OPTIpath + str(el) + "/" + thetaFile + ".theta"
        saveName = rs.OPTIpath + str(el) + "/" + saveDir + "/"
        os.system("rm " + saveName + "Log/*.log 2>/dev/null")
        parameters = np.loadtxt(thetaName)
        actor.load_parameters(parameters)
        cost, time = env.saveAllTraj(repeat)
        c.stop()
        print("Average cost: ", cost)
        print("Average time: ", time)
        print("foldername : ", saveName)
    print("DDPG:End of generation")
コード例 #6
0
ファイル: MainDDPG.py プロジェクト: osigaud/ArmModelPython
def generateFromDDPG(repeat, rs, thetaFile, saveDir="Data"):
    for el in rs.sizeOfTarget:
        c = Chrono()
        actor = simple_actor_network(
            rs.inputDim,
            rs.outputDim,
            l1_size=rs.hiddenLayers[0][1],
            l2_size=rs.hiddenLayers[1][1],
            learning_rate=rs.learningRate,
        )
        env = DDPGEnv(rs, el, rs.thetaFile, actor=actor, saveDir=saveDir)
        thetaName = rs.OPTIpath + str(el) + "/" + thetaFile + ".theta"
        saveName = rs.OPTIpath + str(el) + "/" + saveDir + "/"
        os.system("rm " + saveName + "Log/*.log 2>/dev/null")
        parameters = np.loadtxt(thetaName)
        actor.load_parameters(parameters)
        cost, time = env.saveAllTraj(repeat)
        c.stop()
        print("Average cost: ", cost)
        print("Average time: ", time)
        print("foldername : ", saveName)
    print("DDPG:End of generation")
コード例 #7
0
ファイル: cma_mc.py プロジェクト: MOCR/DDPG
    return 100-ret
    
def term_callback(c):
    if CMA_obj.totStep != term_callback.lastCall:
        term_callback.lastCall = CMA_obj.totStep
        CMA_obj.act.load_parameters(c.mean)
        CMA_obj.env.reset(noise = False)
        ret = 0
        while not CMA_obj.env.isFinished():
            actn, r = CMA_obj.env.act(CMA_obj.act.action_batch(CMA_obj.env.state()))
            ret += r[0]
        term_callback.plot_data.addData(CMA_obj.totStep, CMA_obj.env.t, ret)
    return False

CMA_obj.env = MountainCarEnv()
CMA_obj.act = simple_actor_network(2, 1, l1_size = 20, l2_size = 10)
CMA_obj.totStep = 0

term_callback.plot_data = result_log("CMA-ES", 20,10)
term_callback.lastCall = -1
print "Going for CMA-ES"
op = cma.CMAOptions()
op['termination_callback'] = term_callback
op['maxiter']=200
x=  CMA_obj.act.linear_parameters()
fx = cma.fmin(CMA_obj, x, 0.5, options = op)

term_callback.plot_data.save()

def draw_politic():
    plt.close()
コード例 #8
0
Created on Wed Feb 24 10:00:45 2016

@author: debroissia
"""

from DDPG.core.DDPG_core import DDPG
import numpy as np

import DDPG.environement.instance.mountainCarEnv as mc
from DDPG.core.networks.simple_actor_network import simple_actor_network

import matplotlib.pyplot as plt


env = mc.MountainCarEnv()
a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = 20, l2_size = 10, learning_rate = 0.001))
    
def draw_politic():
    plt.close()
    ac= a_c
    img = np.zeros((200, 200))
    pos = -1.
    batch = []
    for i in range(200):
        vel = -1.
        pos += 0.01
        for j in range(200):
            vel += 0.01
            batch.append([pos, vel])
    pol = ac.react(batch)
    b=0           
コード例 #9
0
def term_callback(c):
    if CMA_obj.totStep != term_callback.lastCall:
        term_callback.lastCall = CMA_obj.totStep
        CMA_obj.act.load_parameters(c.mean)
        CMA_obj.env.reset(noise=False)
        ret = 0
        while not CMA_obj.env.isFinished():
            actn, r = CMA_obj.env.act(
                CMA_obj.act.action_batch(CMA_obj.env.state()))
            ret += r[0]
        term_callback.plot_data.addData(CMA_obj.totStep, CMA_obj.env.t, ret)
    return False


CMA_obj.env = MountainCarEnv()
CMA_obj.act = simple_actor_network(2, 1, l1_size=20, l2_size=10)
CMA_obj.totStep = 0

term_callback.plot_data = result_log("CMA-ES", 20, 10)
term_callback.lastCall = -1
print "Going for CMA-ES"
op = cma.CMAOptions()
op['termination_callback'] = term_callback
op['maxiter'] = 200
x = CMA_obj.act.linear_parameters()
fx = cma.fmin(CMA_obj, x, 0.5, options=op)

term_callback.plot_data.save()


def draw_politic():
コード例 #10
0
ファイル: schedule_calcs_DDPG.py プロジェクト: szrayic/DDPG-1
from DDPG.core.DDPG_core import DDPG
import numpy as np

import DDPG.environement.instance.mountainCarEnv as mc
from DDPG.core.networks.simple_actor_network import simple_actor_network
from DDPG.logger.result import result_log

l1 = 20
l2 = 10
rate = 0.001

env = mc.MountainCarEnv(result_log("DDPG", l1, l2))
a_c = DDPG(env,
           actor=simple_actor_network(2,
                                      1,
                                      l1_size=l1,
                                      l2_size=l2,
                                      learning_rate=rate))


def voidFunc():
    pass


env.extern_draw = voidFunc


def doEp(M, T=float("inf")):
    a_c.M_episodes(M, T)
    env.perfs.save()
コード例 #11
0
ファイル: schedule_calcs_DDPG.py プロジェクト: MOCR/DDPG
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 21 13:31:15 2016

@author: arnaud
"""

from DDPG.core.DDPG_core import DDPG
import numpy as np

import DDPG.environement.instance.mountainCarEnv as mc
from DDPG.core.networks.simple_actor_network import simple_actor_network
from DDPG.logger.result import result_log

l1 = 20
l2 = 10
rate = 0.001

env = mc.MountainCarEnv(result_log("DDPG", l1, l2))
a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = l1, l2_size = l2, learning_rate = rate))

def voidFunc():
    pass

env.extern_draw = voidFunc

def doEp(M, T=float("inf")):
    a_c.M_episodes(M, T)
    env.perfs.save()
doEp(4000)
コード例 #12
0
ファイル: test_mc.py プロジェクト: MOCR/DDPG
import numpy as np

import DDPG.environement.instance.mountainCarEnv as mc
from DDPG.core.networks.simple_actor_network import simple_actor_network
from DDPG.core.networks.simple_critic_network import simple_critic_network

import matplotlib.pyplot as plt
from DDPG.logger.result import result_log

l1 = 20
l2 = 10

logger = result_log("DDPG", l1, l2, "simple_"+str(l1)+"_"+str(l2))

env = mc.MountainCarEnv(logger)
a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = l1, l2_size = l2, learning_rate = 0.005), critic = simple_critic_network(2, 1, l1_size = 20, l2_size = 10, learning_rate = 0.01))
    
def draw_politic():
    plt.close()
    ac= a_c
    img = np.zeros((200, 200))
    pos = -1.
    batch = []
    for i in range(200):
        vel = -1.
        pos += 0.01
        for j in range(200):
            vel += 0.01
            batch.append([pos, vel])
    pol = ac.react(batch)
    b=0