Example #1
0
        j += 1
        h_sum_last += -angle_normalize(robot.states[1][3])**2

    if i >= NSTEPS - 50:
        if angle_normalize(robot.states[1][3]) * 180 / np.pi > max_after_up:
            max_after_up = angle_normalize(robot.states[1][3]) * 180 / np.pi

    angles_list.append(angle_normalize(robot.states[1][3]) * 180 / np.pi)
    vel_ang_list.append(robot.states_dot[1][3])

h_mean_last = h_sum_last / j
print("up position reached at step" + str(first_step_up_1) +
      ",mean reward last steps after up reached: " + str(h_mean_last) +
      ", angle is lower than " + str(max_after_up) + "in the last 50 steps")

robot.stopSim()

#mean return 20 last steps: -2.2419180192174818e-07, first reached top at 30

#valuta policy con (10x)random reset
#salvare a che step arriva in posizione verticale (anche con random reset)

robot = Robot("single_pendulum.urdf")
robot.sim_number = 1
robot.RANDSET = 1
robot.GUI_ENABLED = 1
robot.SINCOS = 1
path_eval = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/ddpg/eval/"
robot.time_step = time_step
robot.setupSim()
Example #2
0
        maxq = np.max( sess.run(qvalue.qvalue,feed_dict={ qvalue.x : x_batch,
                                                          qvalue.u : u_batch }) ) \
                                                          if 'x_batch' in locals() else 0
        print('Ep#{:3d}: lasted {:d} steps, reward={:3.0f}, max qvalue={:2.3f}' \
            .format(episode, step,rsum, maxq))
        h_rwd.append(rsum) 
        h_qva.append(maxq)
        h_ste.append(step)
        # env_rend.setupSIm()
        # if not (episode+1) % 15:     rendertrial(env_rend)

    # \\\END_FOR episode in range(NEPISODES)
    end_time=time.time()
    elapsed_time = end_time-start_time
    print('elapsed '+str(elapsed_time)+'s')
    env.stopSim()

    print("Average reward during trials: %.3f" % (sum(h_rwd)/NEPISODES))

    env_rend.SINCOS = 1
    env_rend.GUI_ENABLED = 1
    env_rend.time_step = time_step
    env_rend.setupSim()
    env_rend.video_path = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/Video"
    env_rend.LOGDATA=1   ####@@@@@@@@@@@@@@@@############@@@@@@@@@@@@@@@@@@@@#############@@@@@@@@
    rendertrial(env_rend)
    env_rend.stopSim()



    ##   SAVE DATA  ##