예제 #1
0
qvalue = QValueNetwork().setupOptim()
qvalueTarget = QValueNetwork().setupTargetAssign(qvalue)

SIM_NUMBER = 1.5
model_save = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/trined_agents/DDPG_saved_" + str(
    SIM_NUMBER) + ".chkpt"

sess = tf.compat.v1.InteractiveSession()
tf.compat.v1.global_variables_initializer().run()
tf.compat.v1.train.Saver().restore(sess, model_save)

robot = Robot("single_pendulum.urdf")
robot.sim_number = SIM_NUMBER
robot.RANDSET = 0
robot.GUI_ENABLED = 0
robot.SINCOS = 1
path_log = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/continuous/"
robot.time_step = time_step
robot.setupSim()

#Evaluate policy
#env.robot.stopSim()
#env = PendulumPyB()

#Check convergence

c = 100000000

h_sum_last = 0

angles_list = []
예제 #2
0
# -> goal position 

length = .035 + .01 + 0.1088
goal = np.array([0,0,length])





env                 = Robot("double_pendulum.urdf")       
env_rend            = Robot("double_pendulum.urdf",sim_number=SIM_NUMBER) #for rendering


env.GUI_ENABLED=0 
env.RANDSET = 0
env.SINCOS = 1
env.time_step= time_step
env.actuated_index =[2]
env.max_torque=5.0

env_rend.SINCOS = 1
env_rend.GUI_ENABLED = 1
env_rend.actuated_index = [2]






step_expl = 0.
epi_expl = 0.