env.robot.stopSim() #mean_reward, std_reward = evaluate_policy(model, env_eval, n_eval_episodes=10) #print(f'Mean reward: {mean_reward} +/- {std_reward:.2f}') ### save video # model = DDPG.load("ddpg_pendulum_stb_baselines") robot = Robot("single_pendulum.urdf") robot.sim_number=SIM_NUMBER RANDSET =0 robot.LOGDATA = 1 robot.SINCOS=1 robot.video_path = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/Video" path_log= "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/" robot.time_step = time_step robot.setupSim() for i in range(NSTEPS): obs = np.array([robot.states_sincos[1][0],robot.states_sincos[1][1],robot.states_dot[1][3]]) action, _states = model.predict(obs) action=action.tolist() robot.simulateDyn(action) time.sleep(0.05) robot.stopSim() #Evaluate policy #env.robot.stopSim() #env = PendulumPyB()
# env_rend.setupSIm() # if not (episode+1) % 15: rendertrial(env_rend) # \\\END_FOR episode in range(NEPISODES) end_time=time.time() elapsed_time = end_time-start_time print('elapsed '+str(elapsed_time)+'s') env.stopSim() print("Average reward during trials: %.3f" % (sum(h_rwd)/NEPISODES)) env_rend.SINCOS = 1 env_rend.GUI_ENABLED = 1 env_rend.time_step = time_step env_rend.setupSim() env_rend.video_path = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/Video" env_rend.LOGDATA=1 ####@@@@@@@@@@@@@@@@############@@@@@@@@@@@@@@@@@@@@#############@@@@@@@@ rendertrial(env_rend) env_rend.stopSim() ## SAVE DATA ## filepath = '/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/' f=open(filepath + 'hrwd{}.txt'.format(SIM_NUMBER), 'w') f.write(json.dumps(h_rwd)) f.close()
print('elapsed ' + str(elapsed_time) + 's') mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=21) env.robot.stopSim() #mean_reward, std_reward = evaluate_policy(model, env_eval, n_eval_episodes=10) #print(f'Mean reward: {mean_reward} +/- {std_reward:.2f}') ### save video # model = DDPG.load("ddpg_pendulum_stb_baselines") robot = Robot("double_pendulum.urdf") robot.sim_number = SIM_NUMBER RANDSET = 0 robot.LOGDATA = 1 robot.SINCOS = 1 robot.video_path = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/stable_baselines/Video" path_log = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/stable_baselines/" robot.time_step = time_step robot.setupSim() for i in range(NSTEPS): obs = np.array([ robot.states_sincos[1][0], robot.states_sincos[1][1], robot.states_dot[1][3], robot.states_sincos[2][0], robot.states_sincos[2][1], robot.states_dot[2][3] ]) action, _states = model.predict(obs) action = action.tolist() robot.simulateDyn(action) time.sleep(0.05) robot.stopSim()
h_ste.append(step) # if not (episode+1) % 15: rendertrial() # \\\END_FOR episode in range(NEPISODES) end_time=time.time() elapsed_time = end_time-start_time print('elapsed '+str(elapsed_time)+'s') env.stopSim() print("Average reward during trials: %.3f" % (sum(h_rwd)/NEPISODES)) env_rend.SINCOS = 1 env_rend.GUI_ENABLED = 1 env_rend.time_step = time_step env_rend.setupSim() env_rend.video_path = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/ddpg/Video" env_rend.LOGDATA=1 ####@@@@@@@@@@@@@@@@############@@@@@@@@@@@@@@@@@@@@#############@@@@@@@@ rendertrial() env_rend.stopSim() ## SAVE DATA ## filepath = '/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/ddpg/' f=open(filepath + 'hrwd{}.txt'.format(SIM_NUMBER), 'w') f.write(json.dumps(h_rwd)) f.close() f=open(filepath + 'config{}.txt'.format(SIM_NUMBER), 'w')