j += 1 h_sum_last += -angle_normalize(robot.states[1][3])**2 if i >= NSTEPS - 50: if angle_normalize(robot.states[1][3]) * 180 / np.pi > max_after_up: max_after_up = angle_normalize(robot.states[1][3]) * 180 / np.pi angles_list.append(angle_normalize(robot.states[1][3]) * 180 / np.pi) vel_ang_list.append(robot.states_dot[1][3]) h_mean_last = h_sum_last / j print("up position reached at step" + str(first_step_up_1) + ",mean reward last steps after up reached: " + str(h_mean_last) + ", angle is lower than " + str(max_after_up) + "in the last 50 steps") robot.stopSim() #mean return 20 last steps: -2.2419180192174818e-07, first reached top at 30 #valuta policy con (10x)random reset #salvare a che step arriva in posizione verticale (anche con random reset) robot = Robot("single_pendulum.urdf") robot.sim_number = 1 robot.RANDSET = 1 robot.GUI_ENABLED = 1 robot.SINCOS = 1 path_eval = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/ddpg/eval/" robot.time_step = time_step robot.setupSim()
maxq = np.max( sess.run(qvalue.qvalue,feed_dict={ qvalue.x : x_batch, qvalue.u : u_batch }) ) \ if 'x_batch' in locals() else 0 print('Ep#{:3d}: lasted {:d} steps, reward={:3.0f}, max qvalue={:2.3f}' \ .format(episode, step,rsum, maxq)) h_rwd.append(rsum) h_qva.append(maxq) h_ste.append(step) # env_rend.setupSIm() # if not (episode+1) % 15: rendertrial(env_rend) # \\\END_FOR episode in range(NEPISODES) end_time=time.time() elapsed_time = end_time-start_time print('elapsed '+str(elapsed_time)+'s') env.stopSim() print("Average reward during trials: %.3f" % (sum(h_rwd)/NEPISODES)) env_rend.SINCOS = 1 env_rend.GUI_ENABLED = 1 env_rend.time_step = time_step env_rend.setupSim() env_rend.video_path = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/Video" env_rend.LOGDATA=1 ####@@@@@@@@@@@@@@@@############@@@@@@@@@@@@@@@@@@@@#############@@@@@@@@ rendertrial(env_rend) env_rend.stopSim() ## SAVE DATA ##