def latency(arm_angle, last_arm_angle, pole_angle, last_pole_angle, step): setDelay = fetchSetDelay() getDelay = fetchGetDelay() nnDelay = cm.dither(0.00175, 0.2) # vary prediction time by +/- 20% if not S2R_LATENCY: nnDelay = 0 pole_angle_diff = cm.difRads(last_pole_angle, pole_angle) arm_angle_diff = cm.difRads(last_arm_angle, arm_angle) pole_delay = getDelay / 4 + nnDelay + setDelay / 2 arm_delay = getDelay * (3 / 4) + nnDelay + setDelay / 2 pole_delta = pole_angle_diff * (pole_delay / step) arm_delta = arm_angle_diff * (arm_delay / step) pole_angle_jittered = cm.rad2Norm(pole_angle - pole_delta) arm_angle_jittered = cm.rad2Norm(arm_angle - arm_delta) pole_vel_jittered = (pole_angle_diff - pole_delta) / (step - pole_delay) arm_vel_jittered = (arm_angle_diff - arm_delta) / (step - arm_delay) return [ arm_angle_jittered, arm_vel_jittered, pole_angle_jittered, pole_vel_jittered ]
def initObs(): global time_last, arm_last, pole_last, pole_offset time_last = time.time() arm_last = getArm() pole_offset = cm.difRads(cm.norm2Rad(getPole()), cm.norm2Rad(pole_last)) # sometimes twitches on connect - wait for settle updateObs() while 0 != obs[3]: time.sleep(0.5) updateObs()
def compute_reward(self, action, done=False): # target position #arm = math.cos(abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))) * (1 - abs(self.arm_vel_real) / feb.VEL_MAX_ARM) arm = math.cos( abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))) #arm = abs(cm.difRads(self.arm_angle_real, self.arm_angle_target)) arm_vel = 2 * abs(self.arm_vel_real) / cm.VEL_MAX_ARM # target velocity #arm = (1 - abs(self.arm_vel_real - self.arm_angle_target) / (feb.VEL_MAX_ARM + 6*feb.PI)) * 0.001 pole = math.cos(abs(cm.rad2Norm(self.pole_angle_real))) #vel = (abs(self.arm_vel_real) / feb.VEL_MAX_ARM) * 2 throttle = abs(self.decodeAction(action)) #* 0.1 return arm + pole - arm_vel - throttle #- vel
def getVel(pos, pos_last, time_diff): return cm.difRads(cm.norm2Rad(pos_last), cm.norm2Rad(pos)) / time_diff
def main(): setupOdrive() # sanity check if drive is None: print("Failed to initialize Odrive. Exiting...") exit() #nnBal = DQN.load(POLICY_PATH + "deepq_p_policy_bal.zip") #nnUp = DQN.load(POLICY_PATH + "deepq_p_policy_up.zip") nnBal = ODSP.SinePolicy(THROTTLE_PROFILE) nnUp = nnBal initObs() max_arm_vel = 0 max_pole_vel = 0 speedCheck = 0 reward = 0 i = 0 while i < LOOP_COUNT: i += 1 print("====================Step: %d=============================" % (i)) prev_arm = arm_last diff = time.time() - time_last if 0 < diff and diff < cm.STEP: time.sleep(cm.STEP - diff) updateObs() if abs(obs[1]) > max_arm_vel: max_arm_vel = abs(obs[1]) if abs(obs[3]) > max_pole_vel: max_pole_vel = abs(obs[3]) if abs(obs[2]) > cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D): policyName = "spin-up" policy = nnUp else: policyName = "balance" policy = nnBal activation = 0 # unused? prev_pos_target = pos_target prev_target_delta = target_delta if checkArmVel() and checkPoleVel(): action, _ = policy.predict(np.array(obs)) act(action) else: speedCheck += 1 # logging if prev_pos_target is not None: delta = cm.difRads(cm.norm2Rad(prev_arm), cm.norm2Rad(obs[0])) pos_err = delta - prev_target_delta buf_hist.append({D_POS_TARGET:prev_pos_target, D_OBS:obs, D_ERR:pos_err}) ##print("Error: %.6f\t%.3f" % (pos_err, cm.rad2Deg(pos_err))) reward += math.cos(abs(obs[2])) deactivateMotor() ##print("Episode reward: %.1f\tdata len: %d" % (reward, len(buf_hist))) ##print("max_arm_vel-d: %.3f\tmax_pole_vel-d: %.3f\tspeedCheck: %d" % (cm.rad2Deg(max_arm_vel), cm.rad2Deg(max_pole_vel), speedCheck)) if DATA_LOG: stamp = int(time.time() / 1) #fname = "data/deepq_p_data_" + str(stamp) + ".json" fname = "data/odsp_data_obs_" + str(stamp) + ".json" with open(fname, 'w') as f: json.dump(buf_hist, f) print("Wrote behavioral data to: " + fname)
def timeLinear(start, now, pos): target = cm.wrapRad((now - start) * 2 * cm.PI) return cm.difRads(pos, target)
def angle2Delta(angle, pos): target = math.sin(angle) * CYCLE_POS_MAX delta = cm.difRads(pos, target) return delta