def reset(self): # position 0rad is upright if False: #self.state == feb.RUN: return FurutaEnvPosTrpo.reset(self, position=cm.deg2Rad( cm.ANGLE_TERMINAL_MIN_D / 2), velocity=0, arm_vel=0) return FurutaEnvPosTrpo.reset( self, position=cm.sgn() * np.random.uniform(0, cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D)), velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE), arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
def reset(self): # position 0rad is upright return FurutaEnvTorqueDeepq.reset( self, #position=cm.sgn()*np.random.uniform(cm.deg2Rad(feb.ANGLE_TERMINAL_MIN_D), 2*feb.PI), #velocity=cm.sgn()*np.random.uniform(0, feb.VEL_MAX_POLE), #arm_vel=cm.sgn()*np.random.uniform(0, feb.VEL_MAX_ARM) # start in the spin we never want to see position=cm.sgn() * np.random.uniform(cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D), 2 * cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D)), #position=cm.sgn()*feb.PI, velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE), arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
def compute_done(self): overspeed = self.overspeed() endOfRun = self._envStepCounter >= 1000 terminalAngle = abs(cm.rad2Norm(self.pole_angle_real)) > cm.deg2Rad( cm.ANGLE_TERMINAL_MAX_D) #excessTravel = abs(self.arm_angle_real) > 0.8 * cm.PI # tmp - simple reward return endOfRun or terminalAngle or overspeed #or excessTravel
def setupOdrive(): global drive, axis0, axis1, ARM_TARGET_RAD print("Connecting to Odrive...") drive = odrive.find_any() print("Connected!") axis0 = drive.axis0 axis1 = drive.axis1 axis0.controller.config.control_mode = POSITION_CONTROL ARM_TARGET_RAD = cm.deg2Rad(0)
def main(): setupOdrive() # sanity check if drive is None: print("Failed to initialize Odrive. Exiting...") exit() _setCurrent(0) nnBal = DQN.load("deepq_policy_bal.zip.quiet") nnUp = DQN.load("deepq_policy_up.zip.quiet") initObs() obs = getObs() reward = 0 i = 0 while i < LOOP_COUNT: i += 1 mark = time.time() action = 0 if checkArmVel(obs) and checkPoleVel(obs): policy = None if abs(obs[2]) > cm.deg2Rad(10): policy = "spin-up" action, _ = nnUp.predict(np.array(obs)) else: policy = "balance" action, _ = nnBal.predict(np.array(obs)) current = computeCurrent(action) print("%s\taction: %d\tcurrent: %.2f" % (policy, action, current)) setCurrent(current, obs) diff = time.time() - mark render = True while diff < LOOP_COUNT: obs = getObs(render) cur = getCurrent() render = False buf.append({"target": current_target, "obs": obs, "i": cur}) diff = time.time() - mark reward += math.cos(abs(obs[2])) - abs(current / MAX_CURRENT) * 0.001 print("Episode reward: %.1f\tdata len: %d" % (reward, len(buf_current))) _setCurrent(0) stamp = int(time.time() / 1) fname = "data/current_data_3_b_" + str(stamp) + ".json" with open(fname, 'w') as f: json.dump(buf, f) print("Wrote behavioral data to: " + fname)
def reset(self): #if self.state == feb.RUN: if self.state is None: return FurutaEnvTorqueDeepq.reset(self, position=0, velocity=0) # position 0rad is upright return FurutaEnvTorqueDeepq.reset( self, position=cm.sgn() * np.random.uniform(0, cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D)), #position=cm.sgn()*cm.deg2Rad(feb.ANGLE_TERMINAL_MIN_D), velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE), arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
def mainHybrid(arg): test = arg == TEST env = fep.FurutaEnvPosPpo(cm.RUN, render=not test) #env.setRender(True) modelBal = PPO1.load(POLICY_PATH + "ppo1_pos_policy_bal.zip") modelUp = PPO1.load(POLICY_PATH + "ppo1_pos_policy_up.zip") buf_rew = [] test_cutoff_count = 0 test_count = 0 overspeed = 0 complete_count = 0 while True: test_count += 1 if test and test_count >= TEST_COUNT_HYBRID: print("\n***Average reward: %.3f\tLong runs: %d\tComplete: %d" % (sum(buf_rew) / float(len(buf_rew)), test_cutoff_count - overspeed, complete_count)) break obs, done = env.reset(), False episode_rew = 0 count = 0 while not done: if abs(obs[2]) > cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D): action, _ = modelUp.predict(obs) else: action, _ = modelBal.predict(obs) obs, rew, done, _ = env.step(action) if speedCheck(obs): overspeed += 1 episode_rew += rew count += 1 if count > 999: complete_count += 1 buf_rew.append(episode_rew) if test and count >= TEST_CUTOFF_MAX: test_cutoff_count += 1 print("Episode reward: %.3f" % (episode_rew))
def haze(self): if abs(self.pole_angle_real) > cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D): return velocity = 0 if cm.TRAIN == self.state and POS_HAZE == cm.TRAIN: if self._envStepCounter % 500 == 0: velocity = cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE) * 0.3 if cm.RUN == self.state and POS_HAZE == cm.RUN: if self._envStepCounter % 500 == 0: velocity = cm.sgn() * cm.VEL_MAX_POLE * 0.2 if 0 != velocity: p.setJointMotorControl2(bodyUniqueId=self.botId, jointIndex=self.poleId, controlMode=p.VELOCITY_CONTROL, targetVelocity=self.pole_vel_real + velocity)
def mainHybrid(): env = fed.FurutaEnvTorqueDeepq(cm.RUN, render=True) #env.setRender(True) modelBal = DQN.load(POLICY_PATH + "deepq_policy_bal_nn.zip", env) modelUp = DQN.load(POLICY_PATH + "deepq_policy_up_nn.zip", env) while True: obs, done = env.reset(), False episode_rew = 0 while not done: if abs(obs[2]) > cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D): action, _ = modelUp.predict(obs) else: action, _ = modelBal.predict(obs) obs, rew, done, _ = env.step(action) speedCheck(obs) episode_rew += rew print("Episode reward: %.3f" % (episode_rew))
def main(): setupOdrive() # sanity check if drive is None: print("Failed to initialize Odrive. Exiting...") exit() #nnBal = DQN.load(POLICY_PATH + "deepq_p_policy_bal.zip") #nnUp = DQN.load(POLICY_PATH + "deepq_p_policy_up.zip") nnBal = ODSP.SinePolicy(THROTTLE_PROFILE) nnUp = nnBal initObs() max_arm_vel = 0 max_pole_vel = 0 speedCheck = 0 reward = 0 i = 0 while i < LOOP_COUNT: i += 1 print("====================Step: %d=============================" % (i)) prev_arm = arm_last diff = time.time() - time_last if 0 < diff and diff < cm.STEP: time.sleep(cm.STEP - diff) updateObs() if abs(obs[1]) > max_arm_vel: max_arm_vel = abs(obs[1]) if abs(obs[3]) > max_pole_vel: max_pole_vel = abs(obs[3]) if abs(obs[2]) > cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D): policyName = "spin-up" policy = nnUp else: policyName = "balance" policy = nnBal activation = 0 # unused? prev_pos_target = pos_target prev_target_delta = target_delta if checkArmVel() and checkPoleVel(): action, _ = policy.predict(np.array(obs)) act(action) else: speedCheck += 1 # logging if prev_pos_target is not None: delta = cm.difRads(cm.norm2Rad(prev_arm), cm.norm2Rad(obs[0])) pos_err = delta - prev_target_delta buf_hist.append({D_POS_TARGET:prev_pos_target, D_OBS:obs, D_ERR:pos_err}) ##print("Error: %.6f\t%.3f" % (pos_err, cm.rad2Deg(pos_err))) reward += math.cos(abs(obs[2])) deactivateMotor() ##print("Episode reward: %.1f\tdata len: %d" % (reward, len(buf_hist))) ##print("max_arm_vel-d: %.3f\tmax_pole_vel-d: %.3f\tspeedCheck: %d" % (cm.rad2Deg(max_arm_vel), cm.rad2Deg(max_pole_vel), speedCheck)) if DATA_LOG: stamp = int(time.time() / 1) #fname = "data/deepq_p_data_" + str(stamp) + ".json" fname = "data/odsp_data_obs_" + str(stamp) + ".json" with open(fname, 'w') as f: json.dump(buf_hist, f) print("Wrote behavioral data to: " + fname)
from stable_baselines import PPO1 import furuta_env_p_ppo as fep import furuta_env_base as feb import common as cm POLICY_PATH = "policy/" TEST = "test" TEST_COUNT_BAL = 500 TEST_COUNT_UP = 100 TEST_COUNT_HYBRID = 50 TEST_CUTOFF_MIN = 100 # a test that ran at least this many steps is considered a long test, for reporting purposes TEST_CUTOFF_MAX = 50 # a test that ran less than this many steps is considered a short test, for reporting purposes ARM_TARGET_RAD = cm.deg2Rad(90) def speedCheck(obs): if abs(obs[1]) > cm.VEL_MAX_ARM or abs(obs[3]) > cm.VEL_MAX_POLE: print("Overspeed!") return True return False def mainBal(arg): test = arg == TEST env = fep.FurutaEnvPosPpoBal(cm.RUN, render=not test) #env.setRender(not test) #model = PPO1.load(POLICY_PATH + "ppo1_pos_policy_bal.zip")
def compute_done(self): overspeed = self.overspeed() endOfRun = self._envStepCounter >= 1000 terminalAngle = abs(cm.rad2Norm(self.pole_angle_real)) > cm.deg2Rad( cm.ANGLE_TERMINAL_MAX_D) return endOfRun or terminalAngle or overspeed
def compute_done(self): overspeed = self.overspeed() return overspeed or self._envStepCounter >= 1000 or ( abs(self.pole_angle_real) < cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D) and abs(self.pole_vel_real) < 2 * feb.PI)