rsum += reward #print(rsum) print(x) if verbose: print('Lasted ',i,' timestep -- total reward:',rsum) signal.signal(signal.SIGTSTP, lambda x,y:rendertrial()) # Roll-out when CTRL-Z is pressed h_rwd = [] h_qva = [] h_ste = [] ## Envirnonment Configuration ## env.setupSim() ## Training Loop ## start_time = time.time() for episode in range(1,NEPISODES): env.resetRobot() x = np.array([[env.states_sincos[1][0],env.states_sincos[1][1],env.states_dot[1][3], env.states_sincos[2][0],env.states_sincos[2][1],env.states_dot[2][3]]]) rsum = 0.0 for step in range(NSTEPS): u = sess.run(policy.policy, feed_dict={ policy.x: x }) # Greedy policy ... #u += np.random.uniform(-range_esp,range_esp) / (1. + episode/epi_expl + step/step_expl ) #add gaussian noise # ... with noise
SIM_NUMBER = 1.5 model_save = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/trined_agents/DDPG_saved_" + str( SIM_NUMBER) + ".chkpt" sess = tf.compat.v1.InteractiveSession() tf.compat.v1.global_variables_initializer().run() tf.compat.v1.train.Saver().restore(sess, model_save) robot = Robot("single_pendulum.urdf") robot.sim_number = SIM_NUMBER robot.RANDSET = 0 robot.GUI_ENABLED = 0 robot.SINCOS = 1 path_log = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/continuous/" robot.time_step = time_step robot.setupSim() #Evaluate policy #env.robot.stopSim() #env = PendulumPyB() #Check convergence c = 100000000 h_sum_last = 0 angles_list = [] angles_list.append(angle_normalize(robot.states[1][3]) * 180 / np.pi) vel_ang_list = [] vel_ang_list.append(robot.states_dot[1][3])
class PendulumPyB(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.max_speed = 1000. self.max_torque = 2. #self.dt = .05 #self.viewer = None self.robot = Robot("single_pendulum.urdf") self.robot.time_step = .01 self.robot.GUI_ENABLED = 0 self.robot.setupSim() self.robot.SINCOS = 1 self.reward_weights = [1., 0.0, 0.0] self.step_count = 0 self.episode_duration = tc.NSTEPS high = np.array([1., 1., self.max_speed], dtype=np.float32) self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1, ), dtype=np.float32) self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32) def step(self, u): #dt = self.dt self.step_count += 1 end_episode = False #u = np.clip(u, -self.max_torque, self.max_torque)[0] u.tolist() self.robot.simulateDyn([u[0]]) reward = -angle_normalize( self.robot.states[1][3] )**2 * self.reward_weights[0] - self.robot.states_dot[1][ 3]**2 * self.reward_weights[1] - self.reward_weights[2] * (u**2) if not self.step_count % self.episode_duration: end_episode = True return self._get_obs(), reward, end_episode, {} def reset(self): self.robot.resetRobot() return self._get_obs() def _get_obs(self): self.robot.observeState() return np.array([ self.robot.states_sincos[1][0], self.robot.states_sincos[1][1], self.robot.states_dot[1][3] ]) """ def render(self, mode='human'): if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(500, 500) self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2) rod = rendering.make_capsule(1, .2) rod.set_color(.8, .3, .3) self.pole_transform = rendering.Transform() rod.add_attr(self.pole_transform) self.viewer.add_geom(rod) axle = rendering.make_circle(.05) axle.set_color(0, 0, 0) self.viewer.add_geom(axle) fname = path.join(path.dirname(__file__), "assets/clockwise.png") self.img = rendering.Image(fname, 1., 1.) self.imgtrans = rendering.Transform() self.img.add_attr(self.imgtrans) self.viewer.add_onetime(self.img) self.pole_transform.set_rotation(self.state[0] + np.pi / 2) if self.last_u: self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2) return self.viewer.render(return_rgb_array=mode == 'rgb_array') """ """ def close(self):