コード例 #1
0
    def reset(self):
        # position 0rad is upright
        if False:  #self.state == feb.RUN:
            return FurutaEnvPosTrpo.reset(self,
                                          position=cm.deg2Rad(
                                              cm.ANGLE_TERMINAL_MIN_D / 2),
                                          velocity=0,
                                          arm_vel=0)

        return FurutaEnvPosTrpo.reset(
            self,
            position=cm.sgn() *
            np.random.uniform(0, cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D)),
            velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE),
            arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
コード例 #2
0
    def reset(self):
        # position 0rad is upright
        return FurutaEnvTorqueDeepq.reset(
            self,
            #position=cm.sgn()*np.random.uniform(cm.deg2Rad(feb.ANGLE_TERMINAL_MIN_D), 2*feb.PI),
            #velocity=cm.sgn()*np.random.uniform(0, feb.VEL_MAX_POLE),
            #arm_vel=cm.sgn()*np.random.uniform(0, feb.VEL_MAX_ARM)

            # start in the spin we never want to see
            position=cm.sgn() *
            np.random.uniform(cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D),
                              2 * cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D)),
            #position=cm.sgn()*feb.PI,
            velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE),
            arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
コード例 #3
0
 def compute_done(self):
     overspeed = self.overspeed()
     endOfRun = self._envStepCounter >= 1000
     terminalAngle = abs(cm.rad2Norm(self.pole_angle_real)) > cm.deg2Rad(
         cm.ANGLE_TERMINAL_MAX_D)
     #excessTravel = abs(self.arm_angle_real) > 0.8 * cm.PI # tmp - simple reward
     return endOfRun or terminalAngle or overspeed  #or excessTravel
コード例 #4
0
def setupOdrive():
    global drive, axis0, axis1, ARM_TARGET_RAD
    
    print("Connecting to Odrive...")
    drive = odrive.find_any()
    print("Connected!")
    axis0 = drive.axis0
    axis1 = drive.axis1
    
    axis0.controller.config.control_mode = POSITION_CONTROL

    ARM_TARGET_RAD = cm.deg2Rad(0)
コード例 #5
0
def main():
    setupOdrive()
    # sanity check
    if drive is None:
        print("Failed to initialize Odrive.  Exiting...")
        exit()
    _setCurrent(0)

    nnBal = DQN.load("deepq_policy_bal.zip.quiet")
    nnUp = DQN.load("deepq_policy_up.zip.quiet")

    initObs()
    obs = getObs()

    reward = 0
    i = 0
    while i < LOOP_COUNT:
        i += 1
        mark = time.time()

        action = 0
        if checkArmVel(obs) and checkPoleVel(obs):
            policy = None
            if abs(obs[2]) > cm.deg2Rad(10):
                policy = "spin-up"
                action, _ = nnUp.predict(np.array(obs))
            else:
                policy = "balance"
                action, _ = nnBal.predict(np.array(obs))

            current = computeCurrent(action)
            print("%s\taction: %d\tcurrent: %.2f" % (policy, action, current))
            setCurrent(current, obs)

        diff = time.time() - mark
        render = True
        while diff < LOOP_COUNT:
            obs = getObs(render)
            cur = getCurrent()
            render = False
            buf.append({"target": current_target, "obs": obs, "i": cur})
            diff = time.time() - mark

        reward += math.cos(abs(obs[2])) - abs(current / MAX_CURRENT) * 0.001

    print("Episode reward: %.1f\tdata len: %d" % (reward, len(buf_current)))
    _setCurrent(0)

    stamp = int(time.time() / 1)
    fname = "data/current_data_3_b_" + str(stamp) + ".json"
    with open(fname, 'w') as f:
        json.dump(buf, f)
    print("Wrote behavioral data to: " + fname)
コード例 #6
0
    def reset(self):
        #if self.state == feb.RUN:
        if self.state is None:
            return FurutaEnvTorqueDeepq.reset(self, position=0, velocity=0)

        # position 0rad is upright
        return FurutaEnvTorqueDeepq.reset(
            self,
            position=cm.sgn() *
            np.random.uniform(0, cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D)),
            #position=cm.sgn()*cm.deg2Rad(feb.ANGLE_TERMINAL_MIN_D),
            velocity=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_POLE),
            arm_vel=cm.sgn() * np.random.uniform(0, cm.VEL_MAX_ARM))
コード例 #7
0
ファイル: ppo_p_run.py プロジェクト: rravenel/furuta_pendulum
def mainHybrid(arg):
    test = arg == TEST

    env = fep.FurutaEnvPosPpo(cm.RUN, render=not test)
    #env.setRender(True)
    modelBal = PPO1.load(POLICY_PATH + "ppo1_pos_policy_bal.zip")
    modelUp = PPO1.load(POLICY_PATH + "ppo1_pos_policy_up.zip")

    buf_rew = []
    test_cutoff_count = 0
    test_count = 0
    overspeed = 0
    complete_count = 0
    while True:
        test_count += 1
        if test and test_count >= TEST_COUNT_HYBRID:
            print("\n***Average reward: %.3f\tLong runs: %d\tComplete: %d" %
                  (sum(buf_rew) / float(len(buf_rew)),
                   test_cutoff_count - overspeed, complete_count))
            break

        obs, done = env.reset(), False
        episode_rew = 0
        count = 0
        while not done:
            if abs(obs[2]) > cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D):
                action, _ = modelUp.predict(obs)
            else:
                action, _ = modelBal.predict(obs)

            obs, rew, done, _ = env.step(action)

            if speedCheck(obs):
                overspeed += 1

            episode_rew += rew
            count += 1
        if count > 999:
            complete_count += 1
        buf_rew.append(episode_rew)
        if test and count >= TEST_CUTOFF_MAX:
            test_cutoff_count += 1
        print("Episode reward: %.3f" % (episode_rew))
コード例 #8
0
    def haze(self):
        if abs(self.pole_angle_real) > cm.deg2Rad(cm.ANGLE_TERMINAL_MAX_D):
            return

        velocity = 0
        if cm.TRAIN == self.state and POS_HAZE == cm.TRAIN:
            if self._envStepCounter % 500 == 0:
                velocity = cm.sgn() * np.random.uniform(0,
                                                        cm.VEL_MAX_POLE) * 0.3

        if cm.RUN == self.state and POS_HAZE == cm.RUN:
            if self._envStepCounter % 500 == 0:
                velocity = cm.sgn() * cm.VEL_MAX_POLE * 0.2

        if 0 != velocity:
            p.setJointMotorControl2(bodyUniqueId=self.botId,
                                    jointIndex=self.poleId,
                                    controlMode=p.VELOCITY_CONTROL,
                                    targetVelocity=self.pole_vel_real +
                                    velocity)
コード例 #9
0
def mainHybrid():
    env = fed.FurutaEnvTorqueDeepq(cm.RUN, render=True)
    #env.setRender(True)
    modelBal = DQN.load(POLICY_PATH + "deepq_policy_bal_nn.zip", env)
    modelUp = DQN.load(POLICY_PATH + "deepq_policy_up_nn.zip", env)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            if abs(obs[2]) > cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D):
                action, _ = modelUp.predict(obs)
            else:
                action, _ = modelBal.predict(obs)

            obs, rew, done, _ = env.step(action)

            speedCheck(obs)
            episode_rew += rew
        print("Episode reward: %.3f" % (episode_rew))
コード例 #10
0
def main():
    setupOdrive()
    # sanity check
    if drive is None:
        print("Failed to initialize Odrive.  Exiting...")
        exit()

    #nnBal = DQN.load(POLICY_PATH + "deepq_p_policy_bal.zip")
    #nnUp = DQN.load(POLICY_PATH + "deepq_p_policy_up.zip")
    
    nnBal = ODSP.SinePolicy(THROTTLE_PROFILE)
    nnUp = nnBal

    initObs()
    
    max_arm_vel = 0
    max_pole_vel = 0
    speedCheck = 0
    reward = 0
    i = 0
    while i < LOOP_COUNT:
        i += 1
        print("====================Step: %d=============================" % (i))
        
        prev_arm = arm_last
        
        diff = time.time() - time_last
        if 0 < diff and diff < cm.STEP:
            time.sleep(cm.STEP - diff)
        updateObs()
        
        if abs(obs[1]) > max_arm_vel:
            max_arm_vel = abs(obs[1])
        if abs(obs[3]) > max_pole_vel:
            max_pole_vel = abs(obs[3])
        
        if abs(obs[2]) > cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D):
            policyName = "spin-up"
            policy = nnUp
        else:
            policyName = "balance"
            policy = nnBal

        activation = 0 # unused?
        prev_pos_target = pos_target
        prev_target_delta = target_delta
        if checkArmVel() and checkPoleVel():
            action, _ = policy.predict(np.array(obs))
            act(action)        
        else:
            speedCheck += 1 # logging

        if prev_pos_target is not None:
            delta = cm.difRads(cm.norm2Rad(prev_arm), cm.norm2Rad(obs[0]))
            pos_err = delta - prev_target_delta
            buf_hist.append({D_POS_TARGET:prev_pos_target, D_OBS:obs, D_ERR:pos_err})
            ##print("Error: %.6f\t%.3f" % (pos_err, cm.rad2Deg(pos_err)))

        reward += math.cos(abs(obs[2]))

    deactivateMotor()

    ##print("Episode reward: %.1f\tdata len: %d" % (reward, len(buf_hist)))
    ##print("max_arm_vel-d: %.3f\tmax_pole_vel-d: %.3f\tspeedCheck: %d" % (cm.rad2Deg(max_arm_vel), cm.rad2Deg(max_pole_vel), speedCheck))
    
    if DATA_LOG:
        stamp = int(time.time() / 1)
        #fname = "data/deepq_p_data_" + str(stamp) + ".json"
        fname = "data/odsp_data_obs_" + str(stamp) + ".json"
        with open(fname, 'w') as f:
            json.dump(buf_hist, f)
        print("Wrote behavioral data to: " + fname)
コード例 #11
0
ファイル: ppo_p_run.py プロジェクト: rravenel/furuta_pendulum
from stable_baselines import PPO1

import furuta_env_p_ppo as fep
import furuta_env_base as feb
import common as cm

POLICY_PATH = "policy/"

TEST = "test"
TEST_COUNT_BAL = 500
TEST_COUNT_UP = 100
TEST_COUNT_HYBRID = 50
TEST_CUTOFF_MIN = 100  # a test that ran at least this many steps is considered a long test, for reporting purposes
TEST_CUTOFF_MAX = 50  # a test that ran less than this many steps is considered a short test, for reporting purposes

ARM_TARGET_RAD = cm.deg2Rad(90)


def speedCheck(obs):
    if abs(obs[1]) > cm.VEL_MAX_ARM or abs(obs[3]) > cm.VEL_MAX_POLE:
        print("Overspeed!")
        return True
    return False


def mainBal(arg):
    test = arg == TEST

    env = fep.FurutaEnvPosPpoBal(cm.RUN, render=not test)
    #env.setRender(not test)
    #model = PPO1.load(POLICY_PATH + "ppo1_pos_policy_bal.zip")
コード例 #12
0
 def compute_done(self):
     overspeed = self.overspeed()
     endOfRun = self._envStepCounter >= 1000
     terminalAngle = abs(cm.rad2Norm(self.pole_angle_real)) > cm.deg2Rad(
         cm.ANGLE_TERMINAL_MAX_D)
     return endOfRun or terminalAngle or overspeed
コード例 #13
0
 def compute_done(self):
     overspeed = self.overspeed()
     return overspeed or self._envStepCounter >= 1000 or (
         abs(self.pole_angle_real) < cm.deg2Rad(cm.ANGLE_TERMINAL_MIN_D)
         and abs(self.pole_vel_real) < 2 * feb.PI)