Exemple #1
0
def latency(arm_angle, last_arm_angle, pole_angle, last_pole_angle, step):
    setDelay = fetchSetDelay()
    getDelay = fetchGetDelay()
    nnDelay = cm.dither(0.00175, 0.2)  # vary prediction time by +/- 20%

    if not S2R_LATENCY:
        nnDelay = 0

    pole_angle_diff = cm.difRads(last_pole_angle, pole_angle)
    arm_angle_diff = cm.difRads(last_arm_angle, arm_angle)

    pole_delay = getDelay / 4 + nnDelay + setDelay / 2
    arm_delay = getDelay * (3 / 4) + nnDelay + setDelay / 2

    pole_delta = pole_angle_diff * (pole_delay / step)
    arm_delta = arm_angle_diff * (arm_delay / step)

    pole_angle_jittered = cm.rad2Norm(pole_angle - pole_delta)
    arm_angle_jittered = cm.rad2Norm(arm_angle - arm_delta)

    pole_vel_jittered = (pole_angle_diff - pole_delta) / (step - pole_delay)
    arm_vel_jittered = (arm_angle_diff - arm_delta) / (step - arm_delay)

    return [
        arm_angle_jittered, arm_vel_jittered, pole_angle_jittered,
        pole_vel_jittered
    ]
Exemple #2
0
def initObs():
    global time_last, arm_last, pole_last, pole_offset
    
    time_last = time.time()
    arm_last = getArm()
    pole_offset = cm.difRads(cm.norm2Rad(getPole()), cm.norm2Rad(pole_last))
    
    # sometimes twitches on connect - wait for settle
    updateObs()
    while 0 != obs[3]:
        time.sleep(0.5)
        updateObs()
    def compute_reward(self, action, done=False):
        # target position
        #arm = math.cos(abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))) * (1 - abs(self.arm_vel_real) / feb.VEL_MAX_ARM)
        arm = math.cos(
            abs(cm.difRads(self.arm_angle_real, self.arm_angle_target)))
        #arm = abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))

        arm_vel = 2 * abs(self.arm_vel_real) / cm.VEL_MAX_ARM

        # target velocity
        #arm = (1 - abs(self.arm_vel_real - self.arm_angle_target) / (feb.VEL_MAX_ARM + 6*feb.PI)) * 0.001

        pole = math.cos(abs(cm.rad2Norm(self.pole_angle_real)))

        #vel = (abs(self.arm_vel_real) / feb.VEL_MAX_ARM) * 2

        throttle = abs(self.decodeAction(action))  #* 0.1

        return arm + pole - arm_vel - throttle  #- vel
Exemple #4
0
def getVel(pos, pos_last, time_diff):
    return cm.difRads(cm.norm2Rad(pos_last), cm.norm2Rad(pos)) / time_diff
Exemple #5
0
def main():
    setupOdrive()
    # sanity check
    if drive is None:
        print("Failed to initialize Odrive.  Exiting...")
        exit()

    #nnBal = DQN.load(POLICY_PATH + "deepq_p_policy_bal.zip")
    #nnUp = DQN.load(POLICY_PATH + "deepq_p_policy_up.zip")
    
    nnBal = ODSP.SinePolicy(THROTTLE_PROFILE)
    nnUp = nnBal

    initObs()
    
    max_arm_vel = 0
    max_pole_vel = 0
    speedCheck = 0
    reward = 0
    i = 0
    while i < LOOP_COUNT:
        i += 1
        print("====================Step: %d=============================" % (i))
        
        prev_arm = arm_last
        
        diff = time.time() - time_last
        if 0 < diff and diff < cm.STEP:
            time.sleep(cm.STEP - diff)
        updateObs()
        
        if abs(obs[1]) > max_arm_vel:
            max_arm_vel = abs(obs[1])
        if abs(obs[3]) > max_pole_vel:
            max_pole_vel = abs(obs[3])
        
        if abs(obs[2]) > cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D):
            policyName = "spin-up"
            policy = nnUp
        else:
            policyName = "balance"
            policy = nnBal

        activation = 0 # unused?
        prev_pos_target = pos_target
        prev_target_delta = target_delta
        if checkArmVel() and checkPoleVel():
            action, _ = policy.predict(np.array(obs))
            act(action)        
        else:
            speedCheck += 1 # logging

        if prev_pos_target is not None:
            delta = cm.difRads(cm.norm2Rad(prev_arm), cm.norm2Rad(obs[0]))
            pos_err = delta - prev_target_delta
            buf_hist.append({D_POS_TARGET:prev_pos_target, D_OBS:obs, D_ERR:pos_err})
            ##print("Error: %.6f\t%.3f" % (pos_err, cm.rad2Deg(pos_err)))

        reward += math.cos(abs(obs[2]))

    deactivateMotor()

    ##print("Episode reward: %.1f\tdata len: %d" % (reward, len(buf_hist)))
    ##print("max_arm_vel-d: %.3f\tmax_pole_vel-d: %.3f\tspeedCheck: %d" % (cm.rad2Deg(max_arm_vel), cm.rad2Deg(max_pole_vel), speedCheck))
    
    if DATA_LOG:
        stamp = int(time.time() / 1)
        #fname = "data/deepq_p_data_" + str(stamp) + ".json"
        fname = "data/odsp_data_obs_" + str(stamp) + ".json"
        with open(fname, 'w') as f:
            json.dump(buf_hist, f)
        print("Wrote behavioral data to: " + fname)
Exemple #6
0
def timeLinear(start, now, pos):
    target = cm.wrapRad((now - start) * 2 * cm.PI)
    return cm.difRads(pos, target)
Exemple #7
0
def angle2Delta(angle, pos):
    target = math.sin(angle) * CYCLE_POS_MAX
    delta = cm.difRads(pos, target)
    return delta