Example #1
0
policy = PolicyNetwork().setupOptim()
policyTarget = PolicyNetwork().setupTargetAssign(policy)

qvalue = QValueNetwork().setupOptim()
qvalueTarget = QValueNetwork().setupTargetAssign(qvalue)

SIM_NUMBER = 1.5
model_save = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/trined_agents/DDPG_saved_" + str(
    SIM_NUMBER) + ".chkpt"

sess = tf.compat.v1.InteractiveSession()
tf.compat.v1.global_variables_initializer().run()
tf.compat.v1.train.Saver().restore(sess, model_save)

robot = Robot("single_pendulum.urdf")
robot.sim_number = SIM_NUMBER
robot.RANDSET = 0
robot.GUI_ENABLED = 0
robot.SINCOS = 1
path_log = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/continuous/"
robot.time_step = time_step
robot.setupSim()

#Evaluate policy
#env.robot.stopSim()
#env = PendulumPyB()

#Check convergence

c = 100000000
Example #2
0
QVALUE_LEARNING_RATE = tc.QVALUE_LEARNING_RATE  # Base learning rate for the Q-value Network
POLICY_LEARNING_RATE = tc.POLICY_LEARNING_RATE  # Base learning rate for the policy network
DECAY_RATE = tc.DECAY_RATE  # Discount factor
UPDATE_RATE = tc.UPDATE_RATE  # Homotopy rate to update the networks
REPLAY_SIZE = tc.REPLAY_SIZE  # Size of replay buffer
BATCH_SIZE = tc.BATCH_SIZE  # Number of points to be fed in stochastic gradient
NH1 = NH2 = tc.NH1  # Hidden layer size
range_esp = tc.range_esp
time_step = tc.time_step

SIM_NUMBER = 1.2

model = DDPG.load("ddpg_pendulum_stb_baselines_" + str(SIM_NUMBER))

robot = Robot("single_pendulum.urdf")
robot.sim_number = 1
robot.RANDSET = 0
robot.GUI_ENABLED = 1
robot.SINCOS = 1
path_log = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/"
robot.time_step = time_step
robot.setupSim()

#Evaluate policy
#env.robot.stopSim()
#env = PendulumPyB()

#Check convergence
#confronta
#convergenza
#tempo di training