コード例 #1
0
    def step(self, a):
        """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """
        s = self.state
        position, velocity, cflag = self.state
        noise = self.accelerationFactor * self.noise * \
                2 * (self.random_state.rand() - .5)
        velocity += (
            noise + self.actions[a] * self.accelerationFactor +
            np.cos(self.hillPeakFrequency * position) * self.gravityFactor)
        velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX)
        position += velocity
        position = bound(position, self.XMIN, self.XMAX)
        if position <= self.XMIN and velocity < 0:
            velocity = 0  # Bump into wall

        r = self.STEP_REWARD
        #------------- calculate the reward and update the last dimension of state -------------#
        ### A. abs(distance)
        # if cflag < self.FlagNum:
        #     dist_s = np.absolute( s[0] - self.FlagPos[int(cflag)] )
        #     dist_ns = np.absolute( position - self.FlagPos[int(cflag)] )
        #     if dist_ns < dist_s:
        #         r += self.FlagHeight[int(cflag)]/dist_s
        #     #---- if cflag is updated ----#
        #     ### (2) the agent is near the flag
        #     if np.absolute(self.FlagPos[int(cflag)] - position) <= 0.1:
        #         cflag += 1
        #         # print('collect flag ', cflag)
        #     ### (1) the agent must pass the flag
        #     # p_low, p_high = min(s[0], position), max(s[0], position)
        #     # if self.FlagPos[int(cflag)] >= p_low and self.FlagPos[int(cflag)] <= p_high:
        #     #     cflag += 1

        ### B. sqr(distance)
        if cflag < self.FlagNum:
            distance_s = s[0] - self.FlagPos[int(cflag)]
            exponent_s = 0.5 * distance_s**2 / self.FlagWid[int(cflag)]
            phi_s = self.FlagHeight[int(cflag)] * np.exp(-exponent_s)
            distance_ns = position - self.FlagPos[int(cflag)]
            exponent_ns = 0.5 * distance_ns**2 / self.FlagWid[int(cflag)]
            phi_ns = self.FlagHeight[int(cflag)] * np.exp(-exponent_ns)
            r += self.discount_factor * phi_ns - phi_s
            #---- if cflag is updated ----#
            p_low, p_high = min(s[0], position), max(s[0], position)
            if self.FlagPos[int(cflag)] >= p_low and self.FlagPos[int(
                    cflag)] <= p_high:
                cflag += 1
                # print(cflag)

        ns = np.array([position, velocity, int(cflag)])
        self.collectedFlags = int(cflag)
        self.state = ns.copy()

        terminal = self.isTerminal()
        if terminal: r += self.GOAL_REWARD

        return r, ns, terminal, self.possibleActions()
コード例 #2
0
    def step(self, a):
        """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """
        s = self.state
        position, velocity, cflag = self.state
        noise = self.accelerationFactor * self.noise * \
                2 * (self.random_state.rand() - .5)
        velocity += (noise +
                     self.actions[a] * self.accelerationFactor +
                     np.cos(self.hillPeakFrequency * position) * self.gravityFactor)
        velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX)
        position += velocity
        position = bound(position, self.XMIN, self.XMAX)
        if position <= self.XMIN and velocity < 0:
            velocity = 0  # Bump into wall

        #------------- calculate the reward and update the last dimension of state -------------#
        r = self.STEP_REWARD

        if cflag < self.FlagNum:
            dist_s = np.sum( np.absolute( s[0:-1] - self.FlagPos[int(cflag)] ) )
            ns_pv = np.array([position, velocity])
            dist_ns = np.sum( np.absolute( ns_pv - self.FlagPos[int(cflag)] ) )
            if dist_ns < dist_s:
                r += self.FlagHeight[int(cflag)]/dist_s/100
            p_low, p_high = min(s[0], position), max(s[0], position)
            v_low, v_high = min(s[1], velocity), max(s[1], velocity)
            if np.all(self.FlagPos[int(cflag)] >= np.array([p_low, v_low])) and \
                np.all(self.FlagPos[int(cflag)] <= np.array([p_high, v_high])):
                cflag += 1
                # print('collect flag ', cflag)

        # if cflag < self.FlagNum:
        #     exponent_s = np.divide(np.sum(0.5*(s[0:-1]-self.FlagPos[int(cflag)])**2), self.FlagWid[int(cflag)])
        #     phi_s = np.multiply(self.FlagHeight[int(cflag)], np.exp(-exponent_s))
        #     ns_pv = np.array([position, velocity])
        #     exponent_ns = np.divide(np.sum(0.5*(ns_pv-self.FlagPos[int(cflag)])**2), self.FlagWid[int(cflag)])
        #     phi_ns = np.multiply(self.FlagHeight[int(cflag)], np.exp(-exponent_ns))
        #     r += self.discount_factor * phi_ns - phi_s
        #     p_low, p_high = min(s[0], position), max(s[0], position)
        #     v_low, v_high = min(s[1], velocity), max(s[1], velocity)
        #     if np.all(self.FlagPos[int(cflag)] >= np.array([p_low, v_low])) and \
        #         np.all(self.FlagPos[int(cflag)] <= np.array([p_high, v_high])):
        #         cflag += 1

            # if np.absolute(position - self.FlagPos[int(cflag),0]) <= 0.1 and \
            #     np.absolute(velocity - self.FlagPos[int(cflag),1]) <= 0.02:
            #     cflag += 1

        ns = np.array([position, velocity, int(cflag)])
        self.collectedFlags = int(cflag)
        self.state = ns.copy()

        terminal = self.isTerminal()
        if terminal: r += self.GOAL_REWARD
        
        return r, ns, terminal, self.possibleActions()
コード例 #3
0
 def step(self, a):
     """
     Take acceleration action *a*, adding noise as specified in ``__init__()``.
     """
     position, velocity = self.state
     noise = self.accelerationFactor * self.noise * \
         2 * (self.random_state.rand() - .5)
     velocity += (noise +
                  self.actions[a] * self.accelerationFactor +
                  np.cos(self.hillPeakFrequency * position) * self.gravityFactor)
     velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX)
     position += velocity
     position = bound(position, self.XMIN, self.XMAX)
     if position <= self.XMIN and velocity < 0:
         velocity = 0  # Bump into wall
     ns = np.array([position, velocity])
     self.state = ns.copy()
     terminal = self.isTerminal()
     r = self.GOAL_REWARD if terminal else self.STEP_REWARD
     return r, ns, terminal, self.possibleActions()
コード例 #4
0
    def step(self, a):
        """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """
        s = self.state
        position, velocity, cflag = self.state
        noise = self.accelerationFactor * self.noise * \
                2 * (self.random_state.rand() - .5)
        velocity += (noise +
                     self.actions[a] * self.accelerationFactor +
                     np.cos(self.hillPeakFrequency * position) * self.gravityFactor)
        velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX)
        position += velocity
        position = bound(position, self.XMIN, self.XMAX)
        if position <= self.XMIN and velocity < 0:
            velocity = 0  # Bump into wall

        # calculate the reward and update the last dimension of state
        r = self.STEP_REWARD
        if cflag < self.FlagNum:
            distance_s = s[0] - self.FlagPos[int(cflag)]
            exponent_s = 0.5 * distance_s**2 / self.FlagWid[int(cflag)]
            phi_s = self.FlagHeight[int(cflag)] * np.exp(-exponent_s)
            distance_ns = position - self.FlagPos[int(cflag)]
            exponent_ns = 0.5 * distance_ns**2 / self.FlagWid[int(cflag)]
            phi_ns = self.FlagHeight[int(cflag)] * np.exp(-exponent_ns)
            r += self.discount_factor * phi_ns - phi_s
            if (position>=self.FlagPos[int(cflag)] and s[0]<=self.FlagPos[int(cflag)]) or \
                (position<=self.FlagPos[int(cflag)] and s[0]>=self.FlagPos[int(cflag)]):
                    cflag += 1
                    # print(cflag)

        ns = np.array([position, velocity, int(cflag)])
        self.collectedFlags = int(cflag)
        self.state = ns.copy()

        terminal = self.isTerminal()
        if terminal: r += self.GOAL_REWARD
        
        return r, ns, terminal, self.possibleActions()
コード例 #5
0
    def step(self, a):
        """ Take acceleration action *a*, adding noise as specified in ``__init__()``. """
        s = self.state
        position, velocity = self.state
        noise = self.accelerationFactor * self.noise * \
            2 * (self.random_state.rand() - .5)
        velocity += (
            noise + self.actions[a] * self.accelerationFactor +
            np.cos(self.hillPeakFrequency * position) * self.gravityFactor)
        velocity = bound(velocity, self.XDOTMIN, self.XDOTMAX)
        position += velocity
        position = bound(position, self.XMIN, self.XMAX)
        if position <= self.XMIN and velocity < 0:
            velocity = 0  # Bump into wall
        terminal = self.isTerminal()
        ns = np.array([position, velocity])
        self.state = ns.copy()

        if terminal: r = self.GOAL_REWARD
        else:
            collected = self.collectedFlags
            if collected < self.FlagNum:
                distance_s = s[0] - self.FlagPos[collected]
                exponent_s = 0.5 * distance_s**2 / self.FlagWid[collected]
                phi_s = self.FlagHeight[collected] * np.exp(-exponent_s)
                distance_ns = ns[0] - self.FlagPos[collected]
                exponent_ns = 0.5 * distance_ns**2 / self.FlagWid[collected]
                phi_ns = self.FlagHeight[collected] * np.exp(-exponent_ns)
                r = self.STEP_REWARD + self.discount_factor * phi_ns - phi_s
                if (ns[0]>=self.FlagPos[collected] and s[0]<=self.FlagPos[collected]) or \
                    (ns[0]<=self.FlagPos[collected] and s[0]>=self.FlagPos[collected]):
                    self.collectedFlags += 1
            else:
                r = self.STEP_REWARD

        return r, ns, terminal, self.possibleActions()