Ejemplo n.º 1
0
vel_ang_list.append(robot.states_dot[1][3])
action_list = []

up_reach = False
j = 0
max_after_up = 0

for i in range(NSTEPS):

    x = np.array([[
        robot.states_sincos[1][0], robot.states_sincos[1][1],
        robot.states_dot[1][3]
    ]])
    action = sess.run(policy.policy, feed_dict={policy.x: x})
    action = action.tolist()
    robot.simulateDyn(action[0])
    action_list.append(action)

    if angle_normalize(
            robot.states[1][3]) < 1 * np.pi / 180 and up_reach == False:
        first_step_up_1 = i
        up_reach = True

    if angle_normalize(robot.states[1][3])**2 < c:
        c = angle_normalize(robot.states[1][3])**2
        step_max = i

    if up_reach == True:
        j += 1
        h_sum_last += -angle_normalize(robot.states[1][3])**2
Ejemplo n.º 2
0
    ##  Training Loop ##
    start_time = time.time()
    for episode in range(1,NEPISODES):
        env.resetRobot()
        x = np.array([[env.states_sincos[1][0],env.states_sincos[1][1],env.states_dot[1][3],
                       env.states_sincos[2][0],env.states_sincos[2][1],env.states_dot[2][3]]])
        
        rsum = 0.0 
 
        for step in range(NSTEPS):
            u       = sess.run(policy.policy, feed_dict={ policy.x: x }) # Greedy policy ...
            #u      += np.random.uniform(-range_esp,range_esp) / (1. + episode/epi_expl + step/step_expl )   #add gaussian noise                      # ... with noise
            u      += np.random.normal(loc=0.0,scale=range_esp)
            
            #print(u[0][0])
            env.simulateDyn([u[0][0]])
            x2 = np.array([env.states_sincos[1][0],env.states_sincos[1][1],env.states_dot[1][3],
                            env.states_sincos[2][0],env.states_sincos[2][1],env.states_dot[2][3]])   
            #print(x2)
            r = -np.linalg.norm(goal-np.array([env.states[2][0],env.states[2][1],env.states[2][2]])) 
            done    = False                                              # pendulum scenario is endless.
            #print(r)
            replayDeque.append(ReplayItem(x,u,r,done,x2))                # Feed replay memory ...
            if len(replayDeque)>REPLAY_SIZE: replayDeque.popleft()       # ... with FIFO forgetting.

            rsum   += r
            if done or np.linalg.norm(x-x2)<1e-3: break                  # Break when pendulum is still.
            x       = [x2]

        # Start optimizing networks when memory size > batch size.
            if len(replayDeque) > BATCH_SIZE:     
Ejemplo n.º 3
0
vel_ang_list.append(robot.states_dot[1][3])
action_list = []

up_reach = False
j = 0
max_after_up = 0

for i in range(NSTEPS):

    obs = np.array([
        robot.states_sincos[1][0], robot.states_sincos[1][1],
        robot.states_dot[1][3]
    ])
    action, _states = model.predict(obs)
    action = action.tolist()
    robot.simulateDyn(action)
    action_list.append(action)
    #time.sleep(0.01)
    if angle_normalize(
            robot.states[1][3]) < 1 * np.pi / 180 and up_reach == False:
        first_step_up_1 = i
        up_reach = True

    if angle_normalize(robot.states[1][3])**2 < c:
        c = angle_normalize(robot.states[1][3])**2
        step_max = i

    if up_reach == True:
        j += 1
        h_sum_last += -angle_normalize(robot.states[1][3])**2
Ejemplo n.º 4
0
class PendulumPyB(gym.Env):

    metadata = {'render.modes': ['human']}

    def __init__(self):
        self.max_speed = 1000.
        self.max_torque = 2.
        #self.dt = .05
        #self.viewer = None

        self.robot = Robot("single_pendulum.urdf")
        self.robot.time_step = .01
        self.robot.GUI_ENABLED = 0
        self.robot.setupSim()
        self.robot.SINCOS = 1
        self.reward_weights = [1., 0.0, 0.0]

        self.step_count = 0
        self.episode_duration = tc.NSTEPS

        high = np.array([1., 1., self.max_speed], dtype=np.float32)
        self.action_space = spaces.Box(low=-self.max_torque,
                                       high=self.max_torque,
                                       shape=(1, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-high,
                                            high=high,
                                            dtype=np.float32)

    def step(self, u):

        #dt = self.dt
        self.step_count += 1
        end_episode = False
        #u = np.clip(u, -self.max_torque, self.max_torque)[0]
        u.tolist()
        self.robot.simulateDyn([u[0]])
        reward = -angle_normalize(
            self.robot.states[1][3]
        )**2 * self.reward_weights[0] - self.robot.states_dot[1][
            3]**2 * self.reward_weights[1] - self.reward_weights[2] * (u**2)

        if not self.step_count % self.episode_duration: end_episode = True
        return self._get_obs(), reward, end_episode, {}

    def reset(self):
        self.robot.resetRobot()
        return self._get_obs()

    def _get_obs(self):
        self.robot.observeState()
        return np.array([
            self.robot.states_sincos[1][0], self.robot.states_sincos[1][1],
            self.robot.states_dot[1][3]
        ])

    """ def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
            rod = rendering.make_capsule(1, .2)
            rod.set_color(.8, .3, .3)
            self.pole_transform = rendering.Transform()
            rod.add_attr(self.pole_transform)
            self.viewer.add_geom(rod)
            axle = rendering.make_circle(.05)
            axle.set_color(0, 0, 0)
            self.viewer.add_geom(axle)
            fname = path.join(path.dirname(__file__), "assets/clockwise.png")
            self.img = rendering.Image(fname, 1., 1.)
            self.imgtrans = rendering.Transform()
            self.img.add_attr(self.imgtrans)

        self.viewer.add_onetime(self.img)
        self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
        if self.last_u:
            self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)

        return self.viewer.render(return_rgb_array=mode == 'rgb_array') """
    """ def close(self):