vel_ang_list.append(robot.states_dot[1][3]) action_list = [] up_reach = False j = 0 max_after_up = 0 for i in range(NSTEPS): x = np.array([[ robot.states_sincos[1][0], robot.states_sincos[1][1], robot.states_dot[1][3] ]]) action = sess.run(policy.policy, feed_dict={policy.x: x}) action = action.tolist() robot.simulateDyn(action[0]) action_list.append(action) if angle_normalize( robot.states[1][3]) < 1 * np.pi / 180 and up_reach == False: first_step_up_1 = i up_reach = True if angle_normalize(robot.states[1][3])**2 < c: c = angle_normalize(robot.states[1][3])**2 step_max = i if up_reach == True: j += 1 h_sum_last += -angle_normalize(robot.states[1][3])**2
## Training Loop ## start_time = time.time() for episode in range(1,NEPISODES): env.resetRobot() x = np.array([[env.states_sincos[1][0],env.states_sincos[1][1],env.states_dot[1][3], env.states_sincos[2][0],env.states_sincos[2][1],env.states_dot[2][3]]]) rsum = 0.0 for step in range(NSTEPS): u = sess.run(policy.policy, feed_dict={ policy.x: x }) # Greedy policy ... #u += np.random.uniform(-range_esp,range_esp) / (1. + episode/epi_expl + step/step_expl ) #add gaussian noise # ... with noise u += np.random.normal(loc=0.0,scale=range_esp) #print(u[0][0]) env.simulateDyn([u[0][0]]) x2 = np.array([env.states_sincos[1][0],env.states_sincos[1][1],env.states_dot[1][3], env.states_sincos[2][0],env.states_sincos[2][1],env.states_dot[2][3]]) #print(x2) r = -np.linalg.norm(goal-np.array([env.states[2][0],env.states[2][1],env.states[2][2]])) done = False # pendulum scenario is endless. #print(r) replayDeque.append(ReplayItem(x,u,r,done,x2)) # Feed replay memory ... if len(replayDeque)>REPLAY_SIZE: replayDeque.popleft() # ... with FIFO forgetting. rsum += r if done or np.linalg.norm(x-x2)<1e-3: break # Break when pendulum is still. x = [x2] # Start optimizing networks when memory size > batch size. if len(replayDeque) > BATCH_SIZE:
vel_ang_list.append(robot.states_dot[1][3]) action_list = [] up_reach = False j = 0 max_after_up = 0 for i in range(NSTEPS): obs = np.array([ robot.states_sincos[1][0], robot.states_sincos[1][1], robot.states_dot[1][3] ]) action, _states = model.predict(obs) action = action.tolist() robot.simulateDyn(action) action_list.append(action) #time.sleep(0.01) if angle_normalize( robot.states[1][3]) < 1 * np.pi / 180 and up_reach == False: first_step_up_1 = i up_reach = True if angle_normalize(robot.states[1][3])**2 < c: c = angle_normalize(robot.states[1][3])**2 step_max = i if up_reach == True: j += 1 h_sum_last += -angle_normalize(robot.states[1][3])**2
class PendulumPyB(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.max_speed = 1000. self.max_torque = 2. #self.dt = .05 #self.viewer = None self.robot = Robot("single_pendulum.urdf") self.robot.time_step = .01 self.robot.GUI_ENABLED = 0 self.robot.setupSim() self.robot.SINCOS = 1 self.reward_weights = [1., 0.0, 0.0] self.step_count = 0 self.episode_duration = tc.NSTEPS high = np.array([1., 1., self.max_speed], dtype=np.float32) self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1, ), dtype=np.float32) self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32) def step(self, u): #dt = self.dt self.step_count += 1 end_episode = False #u = np.clip(u, -self.max_torque, self.max_torque)[0] u.tolist() self.robot.simulateDyn([u[0]]) reward = -angle_normalize( self.robot.states[1][3] )**2 * self.reward_weights[0] - self.robot.states_dot[1][ 3]**2 * self.reward_weights[1] - self.reward_weights[2] * (u**2) if not self.step_count % self.episode_duration: end_episode = True return self._get_obs(), reward, end_episode, {} def reset(self): self.robot.resetRobot() return self._get_obs() def _get_obs(self): self.robot.observeState() return np.array([ self.robot.states_sincos[1][0], self.robot.states_sincos[1][1], self.robot.states_dot[1][3] ]) """ def render(self, mode='human'): if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(500, 500) self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2) rod = rendering.make_capsule(1, .2) rod.set_color(.8, .3, .3) self.pole_transform = rendering.Transform() rod.add_attr(self.pole_transform) self.viewer.add_geom(rod) axle = rendering.make_circle(.05) axle.set_color(0, 0, 0) self.viewer.add_geom(axle) fname = path.join(path.dirname(__file__), "assets/clockwise.png") self.img = rendering.Image(fname, 1., 1.) self.imgtrans = rendering.Transform() self.img.add_attr(self.imgtrans) self.viewer.add_onetime(self.img) self.pole_transform.set_rotation(self.state[0] + np.pi / 2) if self.last_u: self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2) return self.viewer.render(return_rgb_array=mode == 'rgb_array') """ """ def close(self):