RANDOM_SEED = 999 # int((time.time()%10)*1000) print "Seed = %d" % RANDOM_SEED np .random.seed (RANDOM_SEED) random.seed (RANDOM_SEED) #env = Pendulum(2,withDisplay=True) # Continuous pendulum env = Pendulum(2,length=.5,mass=3.0,armature=.2,withDisplay=False) env.withSinCos = False # State is dim-3: (cosq,sinq,qdot) ... NX = env.nobs # ... training converges with q,qdot with 2x more neurones. NU = env.nu # Control is dim-1: joint torque env.vmax = 100. env.Kf = np.diagflat([ 0.2, 2. ]) env.modulo = False env.DT = 0.15 env.NDT = 1 #env.umax = 15. #env.umax = (15.,15.) env.umax = np.matrix([5.,10.]).T NSTEPS = 32 env.qlow[1] = -np.pi env.qup [1] = np.pi # Shortcut function to convert SE3 to 7-dof vector. M2gv = lambda M: XYZQUATToViewerConfiguration(se3ToXYZQUAT(M)) def place(objectId,M): robot.viewer.gui.applyConfiguration(objectId, M2gv(M)) robot.viewer.gui.refresh() # Refresh the window.
REPLAY_SIZE = 10000 # Size of replay buffer BATCH_SIZE = 64 # Number of points to be fed in stochastic gradient NH1 = NH2 = 250 # Hidden layer size RESTORE = ""#"netvalues/actorcritic.15.kf2" # Previously optimize net weight # (set empty string if no) RENDERRATE = 20 # Render rate (rollout and plot) during training (0 = no) #RENDERACTION = [ 'saveweights', 'draw', 'rollout' ] REGULAR = True # Render on a regular grid vs random grid ### --- Environment env = Pendulum(1) # Continuous pendulum env.withSinCos = True # State is dim-3: (cosq,sinq,qdot) ... NX = env.nobs # ... training converges with q,qdot with 2x more neurones. NU = env.nu # Control is dim-1: joint torque env.DT = .15 env.NDT = 2 env.Kf = 0.2 env.vmax = 100 RENDERACTION = [ 'draw', ] ''' env = Pendulum(2,length=.5,mass=3.0,armature=10.) env.withSinCos = True # State is dim-3: (cosq,sinq,qdot) ... NX = env.nobs # ... training converges with q,qdot with 2x more neurones. NU = env.nu # Control is dim-1: joint torque env.DT = 0.2 env.NDT = 1
### --- UNIT TEST --- ### --- UNIT TEST --- if __name__ == '__main__': from pendulum import Pendulum env = Pendulum(1,withDisplay=False) # Continuous pendulum env.withSinCos = False # State is dim-3: (cosq,sinq,qdot) ... NX = env.nobs # ... training converges with q,qdot with 2x more neurones. NU = env.nu # Control is dim-1: joint torque env.vmax = 100. env.Kf = 0.2 env.modulo = False env.DT = 0.05 env.NDT = 1 T = .5 NSTEPS = int(round(T/env.DT)) x1 = np.matrix([ .2, .1]).T x2 = np.matrix([ .2, -.1]).T acado = AcadoConnect(model=env.model) acado.setTimeInterval(T) acado.options['steps'] = NSTEPS acado.options['shift'] = 0 acado.options['iter'] = 100 acado.options['friction'] = 0.2 # acado.debug()
import signal import matplotlib.pyplot as plt import acado_runner plt.ion() env = Pendulum(2, length=.5, mass=3.0, armature=10.) acado = acado_runner.AcadoRunner( "/home/nmansard/src/pinocchio/pycado/build/unittest/discrete_double_pendulum" ) env.umax = 15. env.vmax = 100 env.modulo = False NSTEPS = 50 env.DT = 0.2 #25e-3 env.NDT = 1 env.Kf = 1.0 x0 = np.matrix([0.3, 0.3, 0., 0.]).T def controlPD(withPlot=True): ''' Compute a control trajectory by rollout a PD controller. ''' Kp = 50. Kv = 2 * np.sqrt(Kp) * .5 ratioK = 1.0 #xdes = np.matrix([3.14,0,0,0]).T xdes = np.matrix([0., 0, 0, 0]).T K = np.matrix(