def latency(arm_angle, last_arm_angle, pole_angle, last_pole_angle, step): setDelay = fetchSetDelay() getDelay = fetchGetDelay() nnDelay = cm.dither(0.00175, 0.2) # vary prediction time by +/- 20% if not S2R_LATENCY: nnDelay = 0 pole_angle_diff = cm.difRads(last_pole_angle, pole_angle) arm_angle_diff = cm.difRads(last_arm_angle, arm_angle) pole_delay = getDelay / 4 + nnDelay + setDelay / 2 arm_delay = getDelay * (3 / 4) + nnDelay + setDelay / 2 pole_delta = pole_angle_diff * (pole_delay / step) arm_delta = arm_angle_diff * (arm_delay / step) pole_angle_jittered = cm.rad2Norm(pole_angle - pole_delta) arm_angle_jittered = cm.rad2Norm(arm_angle - arm_delta) pole_vel_jittered = (pole_angle_diff - pole_delta) / (step - pole_delay) arm_vel_jittered = (arm_angle_diff - arm_delta) / (step - arm_delay) return [ arm_angle_jittered, arm_vel_jittered, pole_angle_jittered, pole_vel_jittered ]
def reset(self, position=PI, velocity=0, arm_vel=0): self.throttle = 0 self.action = None self._envStepCounter = 0 self.arm_angle_target = np.random.uniform(0, 2 * PI) #self.arm_vel_target = np.random.uniform(-6*PI, 6*PI) # velocity rad/s, 3Hz self.arm_angle_real = 0 self.arm_vel_real = 0 self.arm_angle_jittered = 0 self.arm_vel_jittered = 0 self.pole_angle_real = 0 self.pole_vel_real = 0 p.resetSimulation() p.setGravity(0, 0, -9.8) # m/s^2 p.setTimeStep(cm.STEP) # sec planeId = p.loadURDF("plane.urdf") path = os.path.abspath(os.path.dirname(__file__)) self.botId = p.loadURDF(os.path.join(path, "furuta.urdf"), useFixedBase=1, flags=p.URDF_USE_INERTIA_FROM_FILE) numJoints = p.getNumJoints(self.botId) joints = {} for j in range(numJoints): joint = p.getJointInfo(self.botId, j) joints[joint[1]] = j self.armId = joints[b'pillar_to_arm'] self.poleId = joints[b'axle_to_rod'] s2r.ditherInertia(self.botId, DITHER, DAMPING) p.resetJointState(bodyUniqueId=self.botId, jointIndex=self.poleId, targetValue=position, targetVelocity=velocity) p.resetJointState(bodyUniqueId=self.botId, jointIndex=self.armId, targetValue=0, targetVelocity=arm_vel) p.setJointMotorControl2(bodyUniqueId=self.botId, jointIndex=self.armId, controlMode=p.VELOCITY_CONTROL, force=0) obs = self.compute_observation() obs[0] = cm.rad2Norm(obs[0]) obs[2] = cm.rad2Norm(obs[2]) s2r.networkReset(obs) return np.array(obs)
def compute_done(self): overspeed = self.overspeed() endOfRun = self._envStepCounter >= 1000 terminalAngle = abs(cm.rad2Norm(self.pole_angle_real)) > cm.deg2Rad( cm.ANGLE_TERMINAL_MAX_D) #excessTravel = abs(self.arm_angle_real) > 0.8 * cm.PI # tmp - simple reward return endOfRun or terminalAngle or overspeed #or excessTravel
def compute_reward(self, action, done=False): overspeed = self.overspeed() bonus = 0 if done and not overspeed: bonus = 1000 - self._envStepCounter return bonus + math.cos(abs(cm.rad2Norm(self.pole_angle_real))) - abs( self.decodeAction(action)) * 0.001 - abs( self.pole_vel_real) / cm.VEL_MAX_POLE
def compute_reward_x(self, action, done=False): pole = math.cos(abs(cm.rad2Norm(self.pole_angle_real))) # target position arm = 0 #math.cos(abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))) * 0.1 arm_vel = 0 #abs(self.arm_vel_real) / cm.VEL_MAX_ARM * 0.00001 activation = 0 #abs(self.decodeAction(action)) * 0.0001 activation_delta = 0 #abs(self.activation_buf[0] - self.activation_buf[1])*0.5 * 0.1 # *0.5 normalizes to 1 return pole + arm - arm_vel - activation - activation_delta
def compute_reward(self, action, done=False): overspeed = self.overspeed() bonus = 0 if done and not overspeed: bonus = 1000 - self._envStepCounter #bonus = -self._envStepCounter pole = math.cos(abs(cm.rad2Norm(self.pole_angle_real))) pole_vel = 0 #(abs(self.pole_vel_real) / cm.VEL_MAX_POLE) * 1 activation = 0 #abs(self.decodeAction(action)) * 0.0001 activation_delta = 0 #abs(self.activation_buf[0] - self.activation_buf[1])*0.5 * 0.1 # *0.5 normalizes to 1 return bonus + pole - pole_vel - activation - activation_delta
def compute_reward(self, action, done=False): # target position #arm = math.cos(abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))) * (1 - abs(self.arm_vel_real) / feb.VEL_MAX_ARM) arm = math.cos( abs(cm.difRads(self.arm_angle_real, self.arm_angle_target))) #arm = abs(cm.difRads(self.arm_angle_real, self.arm_angle_target)) arm_vel = 2 * abs(self.arm_vel_real) / cm.VEL_MAX_ARM # target velocity #arm = (1 - abs(self.arm_vel_real - self.arm_angle_target) / (feb.VEL_MAX_ARM + 6*feb.PI)) * 0.001 pole = math.cos(abs(cm.rad2Norm(self.pole_angle_real))) #vel = (abs(self.arm_vel_real) / feb.VEL_MAX_ARM) * 2 throttle = abs(self.decodeAction(action)) #* 0.1 return arm + pole - arm_vel - throttle #- vel
def updateObs(render=False): global arm_last, pole_last, time_last, obs now = time.time() diff = now - time_last pole = getPole() arm = getArm() pole = cm.rad2Norm(cm.wrapRad(cm.norm2Rad(pole) + cm.norm2Rad(pole_offset))) arm_vel = getVel(arm, arm_last, diff) pole_vel = getVel(pole, pole_last, diff) arm_last = arm pole_last = pole time_last = now if render: print("arm: %.6f %.6f \tpole: %.6f %.6f" % (arm, arm_vel, pole, pole_vel)) print("arm: %.3f %.3f \t\tpole: %.3f %.3f" % (cm.rad2Deg(cm.norm2Rad(arm)), np.sign(arm_vel)*cm.rad2Deg(abs(arm_vel)), cm.rad2Deg(cm.norm2Rad(pole)), np.sign(pole_vel)*cm.rad2Deg(abs(pole_vel)))) obs = [arm, arm_vel, pole, pole_vel, ARM_TARGET_RAD] return obs
def getObs(render=True): global arm_last, pole_last, time_last now = time.time() diff = now - time_last pole = getPole() arm = getArm() pole += cm.PI - pole_offset pole = cm.rad2Norm(pole) arm_vel = getVel(arm, arm_last, diff) pole_vel = getVel(pole, pole_last, diff) arm_last = arm pole_last = pole time_last = now if render: print("arm: %.4f %.4f \tpole: %.4f %.4f" % (arm, arm_vel, pole, pole_vel)) return [arm, arm_vel, pole, pole_vel, ARM_TARGET_RAD]
def getPole(): return cm.rad2Norm(cm.cpr2Rad(axis1.encoder.pos_cpr))
def getArm(): return cm.rad2Norm(cm.cpr2Rad(axis0.encoder.pos_cpr))
def setPosition(rads): global pos_target pos = cm.rad2Cpr(rads) axis0.controller.pos_setpoint = pos pos_target = cm.rad2Norm(rads)
def compute_reward(self, action, done=False): # Implementations may override return math.cos(abs(cm.rad2Norm( self.pole_angle_real))) - abs(self.decodeAction(action)) * 0.0001
def compute_reward(self, action, done=None): return math.cos(abs(cm.rad2Norm(self.pole_angle_real))) - abs(self.decodeAction(action)) * 0.0001
def compute_done(self): overspeed = self.overspeed() endOfRun = self._envStepCounter >= 1000 terminalAngle = abs(cm.rad2Norm(self.pole_angle_real)) > cm.deg2Rad( cm.ANGLE_TERMINAL_MAX_D) return endOfRun or terminalAngle or overspeed
if time_last > start + CYCLE_PERIOD_S: break action, _ = p.predict(np.array(obs)) activation = THROTTLE_PROFILE[action] delta = activation * cm.MAX_RADIANS / 2 # reality factor print("%d\t%.6f\t%.3f\t%.3f\t%.6f" % (count, obs[0], cm.rad2Deg(obs[0]), activation, delta)) buf_time.append(time_last) buf_pos.append(obs[0]) buf_act.append(activation) buf_d_p.append(delta) pos = cm.wrapRad(cm.norm2Rad(obs[0]) + delta) pos = cm.rad2Norm(pos) obs[0] = pos count += 1 now = time.time() dif = now - time_last time.sleep(cm.STEP - dif) plt.plot(buf_time, buf_pos) plt.plot(buf_time, buf_act) plt.plot(buf_time, buf_d_p) plt.show()