예제 #1
0
env.robot.stopSim()


#mean_reward, std_reward = evaluate_policy(model, env_eval, n_eval_episodes=10)
#print(f'Mean reward: {mean_reward} +/- {std_reward:.2f}')

  
### save video
# model = DDPG.load("ddpg_pendulum_stb_baselines")

robot = Robot("single_pendulum.urdf")
robot.sim_number=SIM_NUMBER
RANDSET =0
robot.LOGDATA = 1
robot.SINCOS=1
robot.video_path = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/Video"
path_log= "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/"
robot.time_step = time_step
robot.setupSim()
for i in range(NSTEPS):
        
         obs = np.array([robot.states_sincos[1][0],robot.states_sincos[1][1],robot.states_dot[1][3]])
         action, _states = model.predict(obs)
         action=action.tolist()
         robot.simulateDyn(action)
         time.sleep(0.05)
robot.stopSim()  

#Evaluate policy 
#env.robot.stopSim()
#env = PendulumPyB()
예제 #2
0
        # env_rend.setupSIm()
        # if not (episode+1) % 15:     rendertrial(env_rend)

    # \\\END_FOR episode in range(NEPISODES)
    end_time=time.time()
    elapsed_time = end_time-start_time
    print('elapsed '+str(elapsed_time)+'s')
    env.stopSim()

    print("Average reward during trials: %.3f" % (sum(h_rwd)/NEPISODES))

    env_rend.SINCOS = 1
    env_rend.GUI_ENABLED = 1
    env_rend.time_step = time_step
    env_rend.setupSim()
    env_rend.video_path = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/Video"
    env_rend.LOGDATA=1   ####@@@@@@@@@@@@@@@@############@@@@@@@@@@@@@@@@@@@@#############@@@@@@@@
    rendertrial(env_rend)
    env_rend.stopSim()



    ##   SAVE DATA  ##
    filepath = '/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/'
    

    f=open(filepath + 'hrwd{}.txt'.format(SIM_NUMBER), 'w')
    f.write(json.dumps(h_rwd))
    f.close()
    
    
예제 #3
0
print('elapsed ' + str(elapsed_time) + 's')
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=21)
env.robot.stopSim()

#mean_reward, std_reward = evaluate_policy(model, env_eval, n_eval_episodes=10)
#print(f'Mean reward: {mean_reward} +/- {std_reward:.2f}')

### save video
# model = DDPG.load("ddpg_pendulum_stb_baselines")

robot = Robot("double_pendulum.urdf")
robot.sim_number = SIM_NUMBER
RANDSET = 0
robot.LOGDATA = 1
robot.SINCOS = 1
robot.video_path = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/stable_baselines/Video"
path_log = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/stable_baselines/"
robot.time_step = time_step
robot.setupSim()
for i in range(NSTEPS):

    obs = np.array([
        robot.states_sincos[1][0], robot.states_sincos[1][1],
        robot.states_dot[1][3], robot.states_sincos[2][0],
        robot.states_sincos[2][1], robot.states_dot[2][3]
    ])
    action, _states = model.predict(obs)
    action = action.tolist()
    robot.simulateDyn(action)
    time.sleep(0.05)
robot.stopSim()
예제 #4
0
        h_ste.append(step)
        # if not (episode+1) % 15:     rendertrial()

    # \\\END_FOR episode in range(NEPISODES)
    end_time=time.time()
    elapsed_time = end_time-start_time
    print('elapsed '+str(elapsed_time)+'s')
    env.stopSim()

    print("Average reward during trials: %.3f" % (sum(h_rwd)/NEPISODES))

    env_rend.SINCOS = 1
    env_rend.GUI_ENABLED = 1
    env_rend.time_step = time_step
    env_rend.setupSim()
    env_rend.video_path = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/ddpg/Video"
    env_rend.LOGDATA=1   ####@@@@@@@@@@@@@@@@############@@@@@@@@@@@@@@@@@@@@#############@@@@@@@@
    rendertrial()
    env_rend.stopSim()



    ##   SAVE DATA  ##
    filepath = '/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/ddpg/'
    

    f=open(filepath + 'hrwd{}.txt'.format(SIM_NUMBER), 'w')
    f.write(json.dumps(h_rwd))
    f.close()

    f=open(filepath + 'config{}.txt'.format(SIM_NUMBER), 'w')