コード例 #1
0
ファイル: simple_driver.py プロジェクト: malbot/cs234
 def add_placeholders(self):
     """
     adds the placeholders that will be used by both the actor and critic
     actor_state: placeholder for states to feed to actor
     critic_state: placeholder for states to feed to critic
     action: action taken while training actor
     g: reward G, the total sum of rewards for a single episode from the given state onward (excludes previous rewards)
     r: reward R, the reward for a single (s,a,s') tuple
     :return: 
     """
     self.current_state = tf.placeholder(
         tf.float32, shape=[None, State.size() + self.kappa_length])
     self.next_state = tf.placeholder(
         tf.float32, shape=[None, State.size() + self.kappa_length])
     self.action = tf.placeholder(tf.float32, shape=[None, Action.size()])
     self.g = tf.placeholder(tf.float32, shape=[None])
     self.r = tf.placeholder(tf.float32, shape=[None])
コード例 #2
0
ファイル: car2.py プロジェクト: malbot/cs234
 def make_test_state(self, Ux, Uy, r, path=None, x=0, y=0, o=0, delta_psi=0, e=0, s=0, wf=None, wr=None):
     """
     sets the state of the vehicle
     :param Ux: velocity in the x direction
     :param Uy: velocity the y direction (lateral)
     :param r: rate of rotation
     :return:
     """
     wf = np.ones(shape=2)*Ux/self.Re if wf is None else wf
     wr = np.ones(shape=2)*Ux/self.Re if wr is None else wr
     return State(Ux=Ux, Uy=Uy, r=r, wf=wf, wr=wr, path=path, wx=x, wy=y, wo=o, delta_psi=delta_psi, e=e, s=s)
コード例 #3
0
 def start_state(self, Ux, Uy, r, path=None):
     """
     sets the state of the vehicle
     :param Ux: velocity in the x direction
     :param Uy: velocity the y direction (lateral)
     :param r: rate of rotation
     :return:
     """
     wf = np.ones(shape=2)*Ux/self.Re
     wr = np.ones(shape=2)*Ux/self.Re
     return State(Ux=Ux, Uy=Uy, r=r, wf=wf, wr=wr, path=path, wx=path.x[0], wy=path.y[0], wo=path.kappa(0), delta_psi=0, e=0, s=0)
コード例 #4
0
ファイル: car2.py プロジェクト: malbot/cs234
 def start_state(self, Ux, Uy, r, path=None):
     """
     sets the state of the vehicle
     :param Ux: velocity in the x direction
     :param Uy: velocity the y direction (lateral)
     :param r: rate of rotation
     :return:
     """
     wf = np.ones(shape=2)*Ux/self.Re
     wr = np.ones(shape=2)*Ux/self.Re
     if path is not None:
         nx, ny = path.p(path.interp_offset)
         sx, sy = path.p(0)
         o = np.arctan2(ny-sy, nx-sx)
     else:
         o = 0
     return State(Ux=Ux, Uy=Uy, r=r, wf=wf, wr=wr, path=path, wx=path.x[0], wy=path.y[0], wo=o, delta_psi=0, e=0, s=0)
コード例 #5
0
ファイル: car2.py プロジェクト: malbot/cs234
    def __call__(self, state, action, time, as_state=True):
        """
        updates the state of the vehicle
        :param delta: turn angle of the front wheels
        :param time: time step
        :param torque_f: torque on the front wheels
        :param torque_r: torque on the rear wheels
        :param as_state: returns output as dictionary
        :return: the new state
        """
        Uxr = np.array([
            state.Ux - self.d/2*state.r,
            state.Ux + self.d/2*state.r
        ])
        Uyr = np.array([
            state.Uy - self.b*state.r,
            state.Uy - self.b*state.r
        ])

        Uxf = np.array([
            state.Ux - self.d/2*state.r,
            state.Ux + self.d/2*state.r
        ])

        Uyf = np.array([
            state.Uy + self.a*state.r,
            state.Uy + self.a*state.r
        ])

        alphar = np.arctan2(Uyr, Uxr)
        alphaf = np.arctan2(Uyf, Uxf) - action.delta

        Vr = Uxr
        Vf = Uxf*np.cos(action.delta) + Uyf*np.sin(action.delta)
        sigmar = np.nan_to_num(np.divide(self.Re*state.wr - Vr, Vr))
        sigmaf = np.nan_to_num(np.divide(self.Re*state.wf - Vf, Vf))

        F_rl = self.tyre_model["rear"](Fz=self.fz_rear, alpha=alphar[0], sigma=sigmar[0], as_dict=True)
        F_rr = self.tyre_model["rear"](Fz=self.fz_rear, alpha=alphar[1], sigma=sigmar[1], as_dict=True)
        F_fl = self.tyre_model["front"](Fz=self.fz_front, alpha=alphaf[0], sigma=sigmaf[0], as_dict=True)
        F_fr = self.tyre_model["front"](Fz=self.fz_front, alpha=alphaf[1], sigma=sigmaf[1], as_dict=True)

        R_a = np.sqrt(self.a**2 + self.d**2/4)
        theta_a = np.arctan(self.d/(self.a*2))
        Ff = np.array([
            -F_fl['Fx']*np.sin(theta_a - action.delta) + F_fl['Fy']*np.cos(theta_a - action.delta),
            F_fr['Fx'] * np.sin(theta_a + action.delta) + F_fr['Fy']*np.cos(theta_a + action.delta)
        ])

        R_b = np.sqrt(self.b**2 + self.d**2/4)
        theta_b = np.arctan(self.d/(2*self.b))
        Fr = np.array([
            -F_rl['Fx']*np.sin(theta_b) - F_rl['Fy']*np.cos(theta_b),
            F_rr['Fx'] * np.sin(theta_b) - F_rr['Fy'] * np.cos(theta_b)
        ])

        drdt = (np.sum(Ff*R_a) + np.sum(Fr*R_b))/self.Iz
        dUydt = (
                    F_rl['Fy'] + F_rr['Fy']
                    + (F_fl['Fy'] + F_fr['Fy'])*np.cos(action.delta)
                    + (F_fl['Fx'] + F_fr['Fx'])*np.sin(action.delta)
                )/self.m - state.r*state.Ux
        dUxdt = (
                    F_rl['Fx'] + F_rr['Fx']
                    + (F_fl['Fx'] + F_fr['Fx'])*np.cos(action.delta)
                    - (F_fl['Fy'] + F_fr['Fy'])*np.sin(action.delta)
                )/self.m + state.r*state.Uy

        dwrdt = (action.tr - self.Re * np.array([F_rl['Fx'], F_rr['Fx']]))/self.Jw
        dwfdt = (action.tf - self.Re * np.array([F_fl['Fx'], F_fr['Fx']]))/self.Jw

        dx = state.Ux * time
        dy = state.Uy * time
        do = state.r * time

        Ux = time*dUxdt + state.Ux
        Uy = time*dUydt + state.Uy
        r = time*drdt + state.r
        wr = time*dwrdt + state.wr
        wf = time*dwfdt + state.wf

        Wo = float(state.wo + do)
        if Wo > np.pi:
            Wo -= 2*np.pi
        elif Wo < -1*np.pi:
            Wo += 2*np.pi
        Wx = float(state.wx + (dx * np.cos(state.wo) + dy * np.sin(state.wo)))
        Wy = float(state.wy + (dx * np.sin(state.wo) + dy * np.cos(state.wo)))

        e, s, road_orientation = state.path.interpolate(Wx, Wy, old_s=None)
        delta_psi = Wo - road_orientation

        state = State(
                Ux=Ux,
                Uy=Uy,
                r=r,
                wf=wf,
                wr=wr,
                path=state.path,
                wo=Wo,
                wx=Wx,
                wy=Wy,
                e=e,
                s=s,
                delta_psi=delta_psi,
                e_max=state.e_max,
                data={
                    "u_xr": Uxr,
                    "u_yr": Uyr,
                    "u_xf": Uxf,
                    "u_yf": Uyf,
                    "alphar": alphar,
                    "alphaf": alphaf,
                    "sigmaf": sigmaf,
                    "sigmar": sigmar,
                    "Vf": Vf,
                    "Vr": Vr,
                    "f_rl": F_rl,
                    "f_rr": F_rr,
                    "f_fr": F_fr,
                    "f_fl": F_fl,
                    "Ra": R_a,
                    "theta_a": theta_a,
                    "ff": Ff,
                    "Rb": R_b,
                    "theta_b": theta_b,
                    "fr": Fr,
                    "del": action.delta,
                    "tr_r": action.tr,
                    "tr_f": action.tf,
                    "w_r": wr,
                    "w_f": wf
                }
        )

        if self.record is not None:
            for points, point in zip(self.record, [[state.wx, state.wy], [state.x_front(self), state.y_front(self)], [state.x_back(self), state.y_back(self)]]):
                points[0].append(point[0])
                points[1].append(point[1])

        if as_state:
            return state, dx, dy, do

        else:
            return dx, dy, do