qvalue = QValueNetwork().setupOptim() qvalueTarget = QValueNetwork().setupTargetAssign(qvalue) SIM_NUMBER = 1.5 model_save = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/trined_agents/DDPG_saved_" + str( SIM_NUMBER) + ".chkpt" sess = tf.compat.v1.InteractiveSession() tf.compat.v1.global_variables_initializer().run() tf.compat.v1.train.Saver().restore(sess, model_save) robot = Robot("single_pendulum.urdf") robot.sim_number = SIM_NUMBER robot.RANDSET = 0 robot.GUI_ENABLED = 0 robot.SINCOS = 1 path_log = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/continuous/" robot.time_step = time_step robot.setupSim() #Evaluate policy #env.robot.stopSim() #env = PendulumPyB() #Check convergence c = 100000000 h_sum_last = 0 angles_list = []
# -> goal position length = .035 + .01 + 0.1088 goal = np.array([0,0,length]) env = Robot("double_pendulum.urdf") env_rend = Robot("double_pendulum.urdf",sim_number=SIM_NUMBER) #for rendering env.GUI_ENABLED=0 env.RANDSET = 0 env.SINCOS = 1 env.time_step= time_step env.actuated_index =[2] env.max_torque=5.0 env_rend.SINCOS = 1 env_rend.GUI_ENABLED = 1 env_rend.actuated_index = [2] step_expl = 0. epi_expl = 0.