예제 #1
0
def latency(arm_angle, last_arm_angle, pole_angle, last_pole_angle, step):
    setDelay = fetchSetDelay()
    getDelay = fetchGetDelay()
    nnDelay = cm.dither(0.00175, 0.2)  # vary prediction time by +/- 20%

    if not S2R_LATENCY:
        nnDelay = 0

    pole_angle_diff = cm.difRads(last_pole_angle, pole_angle)
    arm_angle_diff = cm.difRads(last_arm_angle, arm_angle)

    pole_delay = getDelay / 4 + nnDelay + setDelay / 2
    arm_delay = getDelay * (3 / 4) + nnDelay + setDelay / 2

    pole_delta = pole_angle_diff * (pole_delay / step)
    arm_delta = arm_angle_diff * (arm_delay / step)

    pole_angle_jittered = cm.rad2Norm(pole_angle - pole_delta)
    arm_angle_jittered = cm.rad2Norm(arm_angle - arm_delta)

    pole_vel_jittered = (pole_angle_diff - pole_delta) / (step - pole_delay)
    arm_vel_jittered = (arm_angle_diff - arm_delta) / (step - arm_delay)

    return [
        arm_angle_jittered, arm_vel_jittered, pole_angle_jittered,
        pole_vel_jittered
    ]
예제 #2
0
    def reset(self, position=PI, velocity=0, arm_vel=0):
        self.throttle = 0
        self.action = None
        self._envStepCounter = 0

        self.arm_angle_target = np.random.uniform(0, 2 * PI)
        #self.arm_vel_target = np.random.uniform(-6*PI, 6*PI) # velocity rad/s, 3Hz

        self.arm_angle_real = 0
        self.arm_vel_real = 0

        self.arm_angle_jittered = 0
        self.arm_vel_jittered = 0

        self.pole_angle_real = 0
        self.pole_vel_real = 0

        p.resetSimulation()
        p.setGravity(0, 0, -9.8)  # m/s^2
        p.setTimeStep(cm.STEP)  # sec
        planeId = p.loadURDF("plane.urdf")

        path = os.path.abspath(os.path.dirname(__file__))
        self.botId = p.loadURDF(os.path.join(path, "furuta.urdf"),
                                useFixedBase=1,
                                flags=p.URDF_USE_INERTIA_FROM_FILE)

        numJoints = p.getNumJoints(self.botId)
        joints = {}
        for j in range(numJoints):
            joint = p.getJointInfo(self.botId, j)
            joints[joint[1]] = j
        self.armId = joints[b'pillar_to_arm']
        self.poleId = joints[b'axle_to_rod']

        s2r.ditherInertia(self.botId, DITHER, DAMPING)

        p.resetJointState(bodyUniqueId=self.botId,
                          jointIndex=self.poleId,
                          targetValue=position,
                          targetVelocity=velocity)

        p.resetJointState(bodyUniqueId=self.botId,
                          jointIndex=self.armId,
                          targetValue=0,
                          targetVelocity=arm_vel)

        p.setJointMotorControl2(bodyUniqueId=self.botId,
                                jointIndex=self.armId,
                                controlMode=p.VELOCITY_CONTROL,
                                force=0)

        obs = self.compute_observation()
        obs[0] = cm.rad2Norm(obs[0])
        obs[2] = cm.rad2Norm(obs[2])

        s2r.networkReset(obs)

        return np.array(obs)
예제 #3
0
 def compute_done(self):
     overspeed = self.overspeed()
     endOfRun = self._envStepCounter >= 1000
     terminalAngle = abs(cm.rad2Norm(self.pole_angle_real)) > cm.deg2Rad(
         cm.ANGLE_TERMINAL_MAX_D)
     #excessTravel = abs(self.arm_angle_real) > 0.8 * cm.PI # tmp - simple reward
     return endOfRun or terminalAngle or overspeed  #or excessTravel
 def compute_reward(self, action, done=False):
     overspeed = self.overspeed()
     bonus = 0
     if done and not overspeed:
         bonus = 1000 - self._envStepCounter
     return bonus + math.cos(abs(cm.rad2Norm(self.pole_angle_real))) - abs(
         self.decodeAction(action)) * 0.001 - abs(
             self.pole_vel_real) / cm.VEL_MAX_POLE
예제 #5
0
    def compute_reward_x(self, action, done=False):
        pole = math.cos(abs(cm.rad2Norm(self.pole_angle_real)))

        # target position
        arm = 0  #math.cos(abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))) * 0.1
        arm_vel = 0  #abs(self.arm_vel_real) / cm.VEL_MAX_ARM * 0.00001

        activation = 0  #abs(self.decodeAction(action)) * 0.0001
        activation_delta = 0  #abs(self.activation_buf[0] - self.activation_buf[1])*0.5 * 0.1 # *0.5 normalizes to 1

        return pole + arm - arm_vel - activation - activation_delta
예제 #6
0
    def compute_reward(self, action, done=False):
        overspeed = self.overspeed()
        bonus = 0
        if done and not overspeed:
            bonus = 1000 - self._envStepCounter
            #bonus = -self._envStepCounter

        pole = math.cos(abs(cm.rad2Norm(self.pole_angle_real)))

        pole_vel = 0  #(abs(self.pole_vel_real) / cm.VEL_MAX_POLE) * 1

        activation = 0  #abs(self.decodeAction(action)) * 0.0001
        activation_delta = 0  #abs(self.activation_buf[0] - self.activation_buf[1])*0.5 * 0.1 # *0.5 normalizes to 1

        return bonus + pole - pole_vel - activation - activation_delta
    def compute_reward(self, action, done=False):
        # target position
        #arm = math.cos(abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))) * (1 - abs(self.arm_vel_real) / feb.VEL_MAX_ARM)
        arm = math.cos(
            abs(cm.difRads(self.arm_angle_real, self.arm_angle_target)))
        #arm = abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))

        arm_vel = 2 * abs(self.arm_vel_real) / cm.VEL_MAX_ARM

        # target velocity
        #arm = (1 - abs(self.arm_vel_real - self.arm_angle_target) / (feb.VEL_MAX_ARM + 6*feb.PI)) * 0.001

        pole = math.cos(abs(cm.rad2Norm(self.pole_angle_real)))

        #vel = (abs(self.arm_vel_real) / feb.VEL_MAX_ARM) * 2

        throttle = abs(self.decodeAction(action))  #* 0.1

        return arm + pole - arm_vel - throttle  #- vel
예제 #8
0
def updateObs(render=False):
    global arm_last, pole_last, time_last, obs

    now = time.time()
    diff = now - time_last
    pole = getPole()
    arm = getArm()

    pole = cm.rad2Norm(cm.wrapRad(cm.norm2Rad(pole) + cm.norm2Rad(pole_offset)))
            
    arm_vel = getVel(arm, arm_last, diff)
    pole_vel = getVel(pole, pole_last, diff)
      
    arm_last = arm
    pole_last = pole
    time_last = now
    
    if render:
        print("arm: %.6f %.6f \tpole: %.6f %.6f" % (arm, arm_vel, pole, pole_vel))
        print("arm: %.3f %.3f \t\tpole: %.3f %.3f" % (cm.rad2Deg(cm.norm2Rad(arm)), np.sign(arm_vel)*cm.rad2Deg(abs(arm_vel)), cm.rad2Deg(cm.norm2Rad(pole)), np.sign(pole_vel)*cm.rad2Deg(abs(pole_vel))))
        
    obs = [arm, arm_vel, pole, pole_vel, ARM_TARGET_RAD]
    return obs
예제 #9
0
def getObs(render=True):
    global arm_last, pole_last, time_last

    now = time.time()
    diff = now - time_last
    pole = getPole()
    arm = getArm()

    pole += cm.PI - pole_offset
    pole = cm.rad2Norm(pole)

    arm_vel = getVel(arm, arm_last, diff)
    pole_vel = getVel(pole, pole_last, diff)

    arm_last = arm
    pole_last = pole
    time_last = now

    if render:
        print("arm: %.4f %.4f \tpole: %.4f %.4f" %
              (arm, arm_vel, pole, pole_vel))

    return [arm, arm_vel, pole, pole_vel, ARM_TARGET_RAD]
예제 #10
0
def getPole():
    return cm.rad2Norm(cm.cpr2Rad(axis1.encoder.pos_cpr))
예제 #11
0
def getArm():
    return cm.rad2Norm(cm.cpr2Rad(axis0.encoder.pos_cpr))
예제 #12
0
def setPosition(rads):
    global pos_target
    pos = cm.rad2Cpr(rads)
    axis0.controller.pos_setpoint = pos
    pos_target = cm.rad2Norm(rads)
예제 #13
0
 def compute_reward(self, action, done=False):
     # Implementations may override
     return math.cos(abs(cm.rad2Norm(
         self.pole_angle_real))) - abs(self.decodeAction(action)) * 0.0001
예제 #14
0
 def compute_reward(self, action, done=None):
     return math.cos(abs(cm.rad2Norm(self.pole_angle_real))) - abs(self.decodeAction(action)) * 0.0001
예제 #15
0
 def compute_done(self):
     overspeed = self.overspeed()
     endOfRun = self._envStepCounter >= 1000
     terminalAngle = abs(cm.rad2Norm(self.pole_angle_real)) > cm.deg2Rad(
         cm.ANGLE_TERMINAL_MAX_D)
     return endOfRun or terminalAngle or overspeed
예제 #16
0
        if time_last > start + CYCLE_PERIOD_S:
            break

        action, _ = p.predict(np.array(obs))
        activation = THROTTLE_PROFILE[action]
        delta = activation * cm.MAX_RADIANS / 2  # reality factor

        print("%d\t%.6f\t%.3f\t%.3f\t%.6f" %
              (count, obs[0], cm.rad2Deg(obs[0]), activation, delta))

        buf_time.append(time_last)
        buf_pos.append(obs[0])
        buf_act.append(activation)
        buf_d_p.append(delta)

        pos = cm.wrapRad(cm.norm2Rad(obs[0]) + delta)
        pos = cm.rad2Norm(pos)
        obs[0] = pos

        count += 1

        now = time.time()
        dif = now - time_last

        time.sleep(cm.STEP - dif)

    plt.plot(buf_time, buf_pos)
    plt.plot(buf_time, buf_act)
    plt.plot(buf_time, buf_d_p)
    plt.show()