コード例 #1
0
print "Seed = %d" %  RANDOM_SEED
np .random.seed     (RANDOM_SEED)
random.seed         (RANDOM_SEED)

#env                 = Pendulum(2,withDisplay=True)       # Continuous pendulum
env                 = Pendulum(2,length=.5,mass=3.0,armature=.2,withDisplay=False)
env.withSinCos      = False             # State is dim-3: (cosq,sinq,qdot) ...
NX                  = env.nobs          # ... training converges with q,qdot with 2x more neurones.
NU                  = env.nu            # Control is dim-1: joint torque

env.vmax            = 100.
env.Kf              = np.diagflat([ 0.2, 2. ])
env.modulo          = False

env.DT              = 0.15
env.NDT             = 1
#env.umax            = 15.
#env.umax            = (15.,15.)
env.umax            = np.matrix([5.,10.]).T
NSTEPS              = 32

env.qlow[1] = -np.pi
env.qup [1] = np.pi


# Shortcut function to convert SE3 to 7-dof vector.
M2gv      = lambda M: XYZQUATToViewerConfiguration(se3ToXYZQUAT(M))
def place(objectId,M):
     robot.viewer.gui.applyConfiguration(objectId, M2gv(M))
     robot.viewer.gui.refresh() # Refresh the window.
コード例 #2
0
DECAY_RATE = 0.99  # Discount factor
UPDATE_RATE = 0.01  # Homotopy rate to update the networks
REPLAY_SIZE = 10000  # Size of replay buffer
BATCH_SIZE = 64  # Number of points to be fed in stochastic gradient
NH1 = NH2 = 250  # Hidden layer size
RESTORE = "netvalues/actorcritic"  # Previously optimize net weight
# (set empty string if no)
### --- Environment
env = Pendulum(1)  # Continuous pendulum
env.withSinCos = True  # State is dim-3: (cosq,sinq,qdot) ...
NX = env.nobs  # ... training converges with q,qdot with 2x more neurones.
NU = env.nu  # Control is dim-1: joint torque

env.vmax = 100.
env.DT = .15
env.NDT = 2
env.Kf = 0.2
NSTEPS = 30

### --- Q-value and policy networks


class QValueNetwork:
    def __init__(self):
        nvars = len(tf.trainable_variables())

        x = tflearn.input_data(shape=[None, NX])
        u = tflearn.input_data(shape=[None, NU])

        netx1 = tflearn.fully_connected(x,
                                        NH1,
コード例 #3
0
BATCH_SIZE              = 64            # Number of points to be fed in stochastic gradient
NH1 = NH2               = 250           # Hidden layer size
RESTORE                 = ""#"netvalues/actorcritic.15.kf2" # Previously optimize net weight 
                                        # (set empty string if no)
RENDERRATE              = 20           # Render rate (rollout and plot) during training (0 = no)
#RENDERACTION            = [ 'saveweights',  'draw', 'rollout' ]
REGULAR                 = True          # Render on a regular grid vs random grid

### --- Environment
env                     = Pendulum(1)       # Continuous pendulum
env.withSinCos          = True              # State is dim-3: (cosq,sinq,qdot) ...
NX                      = env.nobs          # ... training converges with q,qdot with 2x more neurones.
NU                      = env.nu            # Control is dim-1: joint torque

env.DT                  = .15
env.NDT                 = 2
env.Kf                  = 0.2
env.vmax                = 100

RENDERACTION            = [ 'draw', ]

'''

env = Pendulum(2,length=.5,mass=3.0,armature=10.)
env.withSinCos      = True              # State is dim-3: (cosq,sinq,qdot) ...
NX                  = env.nobs          # ... training converges with q,qdot with 2x more neurones.
NU                  = env.nu            # Control is dim-1: joint torque

env.DT              = 0.2
env.NDT             = 1
env.Kf              = 10.0 # 1.0
コード例 #4
0
BATCH_SIZE = 64  # Number of points to be fed in stochastic gradient
NH1 = NH2 = 250  # Hidden layer size
RESTORE = "netvalues/actorcritic.dt015.kf02.ep1300"  # Previously optimize net weight
# (set empty string if no)
### --- Environment
env = Pendulum(1)  # Continuous pendulum
env.withSinCos = True  # State is dim-3: (cosq,sinq,qdot) ...
NX = env.nobs  # ... training converges with q,qdot with 2x more neurones.
NU = env.nu  # Control is dim-1: joint torque

env.vmax = 100.
env.Kf = 0.2
env.modulo = False

env.DT = 0.15
env.NDT = 1
NSTEPS = 32  # Number of intergration steps in horizon
NNODES = 8  # Number of shooting nodes
FNODES = NSTEPS / NNODES  # Number of integration nodes per shooting interval ...
assert (not NSTEPS % NNODES)  # ... should be an integer

### --- Q-value and policy networks


class QValueNetwork:
    def __init__(self):
        nvars = len(tf.trainable_variables())

        x = tflearn.input_data(shape=[None, NX])
        u = tflearn.input_data(shape=[None, NU])