コード例 #1
0
ファイル: PST.py プロジェクト: okkhoy/rlpy
    def vecList2idHelper(self, x, actionIDs, ind,
                         curActionList, maxValue, limits):
        """
        Helper method for vecList2id().

        :returns: a list of unique id's based on possible permutations of this list of lists.

        See vecList2id()

        """
        # x[ind] is one of the lists, e.g [0, 2] or [1,2]
        for curAction in x[ind]:
            partialActionAssignment = curActionList[:]
            partialActionAssignment.append(curAction)
            # We have reached the final list, assignment is complete
            if(ind == len(x) - 1):
                # eg [0,1,0,2] and [3,3,3,3]
                actionIDs.append(vec2id(partialActionAssignment, limits))
            else:
                self.vecList2idHelper(
                    x,
                    actionIDs,
                    ind + 1,
                    partialActionAssignment,
                    maxValue,
                    limits)  # TODO remove self
コード例 #2
0
ファイル: BlocksWorld.py プロジェクト: zhexiaozhe/rlpy
 def possibleActions(self):
     s = self.state
     # return the id of possible actions
     # find empty blocks (nothing on top)
     empty_blocks = [b for b in xrange(self.blocks) if self.clear(b, s)]
     actions = [
         [a, b] for a in empty_blocks for b in empty_blocks
         if not self.destination_is_table(a, b) or not self.on_table(a, s)
     ]  # condition means if A sits on the table you can not pick it and put it on the table
     return np.array(
         [vec2id(x, [self.blocks, self.blocks]) for x in actions])
コード例 #3
0
ファイル: BlocksWorld.py プロジェクト: okkhoy/rlpy
 def possibleActions(self):
     s = self.state
     # return the id of possible actions
     # find empty blocks (nothing on top)
     empty_blocks = [b for b in range(self.blocks) if self.clear(b, s)]
     actions = [[a,
                 b] for a in empty_blocks for b in empty_blocks if not self.destination_is_table(
         a,
         b) or not self.on_table(a,
                                 s)]  # condition means if A sits on the table you can not pick it and put it on the table
     return np.array([vec2id(x, [self.blocks, self.blocks]) for x in actions])
コード例 #4
0
ファイル: Representation.py プロジェクト: epruesse/rlpy
    def hashState(self, s,):
        """
        Returns a unique id for a given state.
        Essentially, enumerate all possible states and return the ID associated
        with *s*.

        Under the hood: first, discretize continuous dimensions into bins
        as necessary. Then map the binstate to an integer.
        """
        ds = self.binState(s)
        return vec2id(ds, self.bins_per_dim)
コード例 #5
0
    def hashState(
        self,
        s,
    ):
        """
        Returns a unique id for a given state.
        Essentially, enumerate all possible states and return the ID associated
        with *s*.

        Under the hood: first, discretize continuous dimensions into bins
        as necessary. Then map the binstate to an integer.
        """
        ds = self.binState(s)
        return vec2id(ds, self.bins_per_dim)
コード例 #6
0
ファイル: MDPSolver.py プロジェクト: smcgregor/rlpy
    def BellmanBackup(self, s, a, ns_samples, policy=None):
        """Applied Bellman Backup to state-action pair s,a
        i.e. Q(s,a) = E[r + discount_factor * V(s')]
        If policy is given then Q(s,a) =  E[r + discount_factor * Q(s',pi(s')]

        Args:
            s (ndarray):        The current state
            a (int):            The action taken in state s
            ns_samples(int):    Number of next state samples to use.
            policy (Policy):    Policy object to use for sampling actions.
        """
        Q = self.representation.Q_oneStepLookAhead(s, a, ns_samples, policy)
        s_index = vec2id(self.representation.binState(s),
                         self.representation.bins_per_dim)
        weight_vec_index = int(self.representation.agg_states_num * a +
                               s_index)
        self.representation.weight_vec[weight_vec_index] = Q
コード例 #7
0
ファイル: MDPSolver.py プロジェクト: okkhoy/rlpy
    def BellmanBackup(self, s, a, ns_samples, policy=None):
        """Applied Bellman Backup to state-action pair s,a
        i.e. Q(s,a) = E[r + discount_factor * V(s')]
        If policy is given then Q(s,a) =  E[r + discount_factor * Q(s',pi(s')]

        Args:
            s (ndarray):        The current state
            a (int):            The action taken in state s
            ns_samples(int):    Number of next state samples to use.
            policy (Policy):    Policy object to use for sampling actions.
        """
        Q = self.representation.Q_oneStepLookAhead(
            s,
            a,
            ns_samples,
            policy)
        s_index = vec2id(
            self.representation.binState(s),
            self.representation.bins_per_dim)
        weight_vec_index = int(self.representation.agg_states_num * a + s_index)
        self.representation.weight_vec[weight_vec_index] = Q
コード例 #8
0
ファイル: PST.py プロジェクト: amoliu/consumable-irl
    def vecList2idHelper(self, x, actionIDs, ind, curActionList, maxValue,
                         limits):
        """
        Helper method for vecList2id().

        :returns: a list of unique id's based on possible permutations of this list of lists.

        See vecList2id()

        """
        # x[ind] is one of the lists, e.g [0, 2] or [1,2]
        for curAction in x[ind]:
            partialActionAssignment = curActionList[:]
            partialActionAssignment.append(curAction)
            # We have reached the final list, assignment is complete
            if (ind == len(x) - 1):
                # eg [0,1,0,2] and [3,3,3,3]
                actionIDs.append(vec2id(partialActionAssignment, limits))
            else:
                self.vecList2idHelper(x, actionIDs, ind + 1,
                                      partialActionAssignment, maxValue,
                                      limits)  # TODO remove self
コード例 #9
0
ファイル: BlocksWorld.py プロジェクト: zhexiaozhe/rlpy
 def getActionPutAonB(self, A, B):
     return vec2id(np.array([A, B]), [self.blocks, self.blocks])
コード例 #10
0
ファイル: BlocksWorld.py プロジェクト: zhexiaozhe/rlpy
 def getActionPutAonTable(self, A):
     return vec2id(np.array([A, A]), [self.blocks, self.blocks])
コード例 #11
0
    def pi2(self, s, terminal, p_actions):
        domain = self.representation.domain
        if not className(domain) in self.supportedDomains:
            print("ERROR: There is no fixed policy defined for %s" %
                  className(domain))
            return None

        if className(domain) == 'GridWorld':
            # Actions are Up, Down, Left, Right
            if not self.policyName in self.gridWorldPolicyNames:
                print("Error: There is no GridWorld policy with name %s" %
                      self.policyName)
                return None

            if self.policyName == 'cw_circle':
                # Cycle through actions, starting with 0, causing agent to go
                # in loop
                if not hasattr(self, "curAction"):
                    # it doesn't exist yet, so initialize it [immediately
                    # incremented]
                    self.curAction = 0
                while (not (self.curAction in domain.possibleActions(s))):
                    # We can't do something simple because of the order in which actions are defined
                    # must do switch statement
                    if self.curAction == 0:  # up
                        self.curAction = 3
                    elif self.curAction == 3:  # right
                        self.curAction = 1
                    elif self.curAction == 1:  # down
                        self.curAction = 2
                    elif self.curAction == 2:  # left
                        self.curAction = 0
                    else:
                        print(
                            'Something terrible happened...got an invalid action on GridWorld Fixed Policy'
                        )
    #                 self.curAction = self.curAction % domain.actions_num
            elif self.policyName == 'ccw_circle':
                # Cycle through actions, starting with 0, causing agent to go
                # in loop
                if not hasattr(self, "curAction"):
                    # it doesn't exist yet, so initialize it
                    self.curAction = 1
                while (not (self.curAction in domain.possibleActions(s))):
                    # We can't do something simple because of the order in which actions are defined
                    # must do switch statement
                    if self.curAction == 3:  # right
                        self.curAction = 0
                    elif self.curAction == 0:  # up
                        self.curAction = 2
                    elif self.curAction == 2:  # left
                        self.curAction = 1
                    elif self.curAction == 1:  # down
                        self.curAction = 3
                    else:
                        print(
                            'Something terrible happened...got an invalid action on GridWorld Fixed Policy'
                        )

    #                 self.curAction = self.curAction % domain.actions_num

            else:
                print(
                    "Error: No policy defined with name %s, but listed in gridWorldPolicyNames"
                    % self.policyName)
                print(
                    "You need to create a switch statement for the policy name above, or remove it from gridWorldPolicyNames"
                )
                return None
            return self.curAction

# Cycle through actions, starting with 0, causing agent to go in other direction
#             if not hasattr(pi, "curAction"):
# pi.curAction = domain.actions_num-1  # it doesn't exist yet, so initialize it
#             if not(pi.curAction in domain.possibleActions(s)):
#                 pi.curAction -= 1
#                 if pi.curAction < 0: pi.curAction = domain.actions_num-1

        if className(domain) == 'InfCartPoleBalance':
            # Fixed policy rotate the pendulum in the opposite direction of the
            # thetadot
            theta, thetadot = s
            if thetadot > 0:
                return 2
            else:
                return 0
        if className(domain) == 'BlocksWorld':
            # Fixed policy rotate the blocksworld = Optimal Policy (Always pick the next piece of the tower and move it to the tower
            # Policy: Identify the top of the tower.
            # move the next piece on the tower with 95% chance 5% take a random
            # action

            # Random Action with some probability
            # TODO fix isTerminal use here
            if self.random_state.rand() < .3 or domain.isTerminal():
                return randSet(domain.possibleActions(s))

            # non-Random Policy
            # next_block is the block that should be stacked on the top of the tower
            # wrong_block is the highest block stacked on the top of the next_block
            # Wrong_tower_block is the highest stacked on the top of the tower
            blocks = domain.blocks
            # Length of the tower assumed to be built correctly.
            correct_tower_size = 0
            while True:
                # Check the next block
                block = correct_tower_size
                if (block == 0 and domain.on_table(block, s)) or domain.on(
                        block, block - 1, s):
                    # This block is on the right position, check the next block
                    correct_tower_size += 1
                else:
                    # print s
                    # print "Incorrect block:", block
                    # The block is on the wrong place.
                    # 1. Check if the tower is empty => If not take one block from the tower and put it on the table
                    # 2. check to see if this wrong block is empty => If not put one block from its stack and put on the table
                    # 3. Otherwise move this block on the tower

                    ###################
                    # 1
                    ###################
                    # If the first block is in the wrong place, then the tower
                    # top which is table is empty by definition
                    if block != 0:
                        ideal_tower_top = block - 1
                        tower_top = domain.towerTop(ideal_tower_top, s)
                        if tower_top != ideal_tower_top:
                            # There is a wrong block there hence we should put
                            # it on the table first
                            return (
                                # put the top of the tower on the table since
                                # it is not correct
                                domain.getActionPutAonTable(tower_top))
                    ###################
                    # 2
                    ###################
                    block_top = domain.towerTop(block, s)
                    if block_top != block:
                        # The target block to be stacked is not empty
                        return domain.getActionPutAonTable(block_top)
                    ###################
                    # 3
                    ###################
                    if block == 0:
                        return domain.getActionPutAonTable(block)
                    else:
                        return domain.getActionPutAonB(block, block - 1)
        if className(domain) == 'IntruderMonitoring':
            # Each UAV assign themselves to a target
            # Each UAV finds the closest danger zone to its target and go towards there.
            # If UAVs_num > Target, the rest will hold position
            # Move all agents based on the taken action
            agents = np.array(s[:domain.NUMBER_OF_AGENTS * 2].reshape(-1, 2))
            targets = np.array(s[domain.NUMBER_OF_AGENTS * 2:].reshape(-1, 2))
            zones = domain.danger_zone_locations
            # Default action is hold
            actions = np.ones(len(agents), dtype=np.integer) * 4
            planned_agents_num = min(len(agents), len(targets))
            for i in range(planned_agents_num):
                # Find cloasest zone (manhattan) to the corresponding target
                target = targets[i, :]
                distances = np.sum(
                    np.abs(np.tile(target, (len(zones), 1)) - zones), axis=1)
                z_row, z_col = zones[np.argmin(distances), :]
                # find the valid action
                a_row, a_col = agents[i, :]
                a = 4  # hold as a default action
                if a_row > z_row:
                    a = 0  # up
                if a_row < z_row:
                    a = 1  # down
                if a_col > z_col:
                    a = 2  # left
                if a_col < z_col:
                    a = 3  # right
                actions[i] = a
#                print "Agent=", agents[i,:]
#                print "Target", target
#                print "Zone", zones[argmin(distances),:]
#                print "Action", a
#                print '============'
            return vec2id(actions, np.ones(len(agents), dtype=np.integer) * 5)
        if className(domain) == 'SystemAdministrator':
            # Select a broken computer and reset it
            brokenComputers = np.where(s == 0)[0]
            if len(brokenComputers):
                return randSet(brokenComputers)
            else:
                return domain.computers_num
        if className(domain) == 'MountainCar':
            # Accelerate in the direction of the valley
            # WORK IN PROGRESS
            x, xdot = s
            if xdot > 0:
                return 2
            else:
                return 0
        if className(domain) == 'PST':
            # One stays at comm, n-1 stay at target area. Whenever fuel is
            # lower than reaching the base the move back
            print(s)
            s = domain.state2Struct(s)
            uavs = domain.NUM_UAV
            print(s)
            return vec2id(np.zeros(uavs), np.ones(uavs) * 3)
コード例 #12
0
ファイル: FixedPolicy.py プロジェクト: smcgregor/rlpy
    def pi2(self, s, terminal, p_actions):
        domain = self.representation.domain
        if not className(domain) in self.supportedDomains:
            print "ERROR: There is no fixed policy defined for %s" % className(domain)
            return None

        if className(domain) == 'GridWorld':
            # Actions are Up, Down, Left, Right
            if not self.policyName in self.gridWorldPolicyNames:
                print "Error: There is no GridWorld policy with name %s" % self.policyName
                return None

            if self.policyName == 'cw_circle':
                # Cycle through actions, starting with 0, causing agent to go
                # in loop
                if not hasattr(self, "curAction"):
                    # it doesn't exist yet, so initialize it [immediately
                    # incremented]
                    self.curAction = 0
                while (not(self.curAction in domain.possibleActions(s))):
                    # We can't do something simple because of the order in which actions are defined
                    # must do switch statement
                    if self.curAction == 0:  # up
                        self.curAction = 3
                    elif self.curAction == 3:  # right
                        self.curAction = 1
                    elif self.curAction == 1:  # down
                        self.curAction = 2
                    elif self.curAction == 2:  # left
                        self.curAction = 0
                    else:
                        print 'Something terrible happened...got an invalid action on GridWorld Fixed Policy'
    #                 self.curAction = self.curAction % domain.actions_num
            elif self.policyName == 'ccw_circle':
                # Cycle through actions, starting with 0, causing agent to go
                # in loop
                if not hasattr(self, "curAction"):
                    # it doesn't exist yet, so initialize it
                    self.curAction = 1
                while (not(self.curAction in domain.possibleActions(s))):
                    # We can't do something simple because of the order in which actions are defined
                    # must do switch statement
                    if self.curAction == 3:  # right
                        self.curAction = 0
                    elif self.curAction == 0:  # up
                        self.curAction = 2
                    elif self.curAction == 2:  # left
                        self.curAction = 1
                    elif self.curAction == 1:  # down
                        self.curAction = 3
                    else:
                        print 'Something terrible happened...got an invalid action on GridWorld Fixed Policy'
    #                 self.curAction = self.curAction % domain.actions_num

            else:
                print "Error: No policy defined with name %s, but listed in gridWorldPolicyNames" % self.policyName
                print "You need to create a switch statement for the policy name above, or remove it from gridWorldPolicyNames"
                return None
            return self.curAction

# Cycle through actions, starting with 0, causing agent to go in other direction
#             if not hasattr(pi, "curAction"):
# pi.curAction = domain.actions_num-1  # it doesn't exist yet, so initialize it
#             if not(pi.curAction in domain.possibleActions(s)):
#                 pi.curAction -= 1
#                 if pi.curAction < 0: pi.curAction = domain.actions_num-1

        if className(domain) == 'InfCartPoleBalance':
            # Fixed policy rotate the pendulum in the opposite direction of the
            # thetadot
            theta, thetadot = s
            if thetadot > 0:
                return 2
            else:
                return 0
        if className(domain) == 'BlocksWorld':
            # Fixed policy rotate the blocksworld = Optimal Policy (Always pick the next piece of the tower and move it to the tower
            # Policy: Identify the top of the tower.
            # move the next piece on the tower with 95% chance 5% take a random
            # action

            # Random Action with some probability
            # TODO fix isTerminal use here
            if np.random.rand() < .3 or domain.isTerminal():
                return randSet(domain.possibleActions(s))

            # non-Random Policy
            # next_block is the block that should be stacked on the top of the tower
            # wrong_block is the highest block stacked on the top of the next_block
            # Wrong_tower_block is the highest stacked on the top of the tower
            blocks = domain.blocks
            # Length of the tower assumed to be built correctly.
            correct_tower_size = 0
            while True:
                # Check the next block
                block = correct_tower_size
                if (block == 0 and domain.on_table(block, s)) or domain.on(block, block - 1, s):
                    # This block is on the right position, check the next block
                    correct_tower_size += 1
                else:
                    # print s
                    # print "Incorrect block:", block
                    # The block is on the wrong place.
                    # 1. Check if the tower is empty => If not take one block from the tower and put it on the table
                    # 2. check to see if this wrong block is empty => If not put one block from its stack and put on the table
                    # 3. Otherwise move this block on the tower

                    ###################
                    # 1
                    ###################
                    # If the first block is in the wrong place, then the tower
                    # top which is table is empty by definition
                    if block != 0:
                        ideal_tower_top = block - 1
                        tower_top = domain.towerTop(ideal_tower_top, s)
                        if tower_top != ideal_tower_top:
                            # There is a wrong block there hence we should put
                            # it on the table first
                            return (
                                # put the top of the tower on the table since
                                # it is not correct
                                domain.getActionPutAonTable(tower_top)
                            )
                    ###################
                    # 2
                    ###################
                    block_top = domain.towerTop(block, s)
                    if block_top != block:
                        # The target block to be stacked is not empty
                        return domain.getActionPutAonTable(block_top)
                    ###################
                    # 3
                    ###################
                    if block == 0:
                        return domain.getActionPutAonTable(block)
                    else:
                        return domain.getActionPutAonB(block, block - 1)
        if className(domain) == 'IntruderMonitoring':
            # Each UAV assign themselves to a target
            # Each UAV finds the closest danger zone to its target and go towards there.
            # If UAVs_num > Target, the rest will hold position
            # Move all agents based on the taken action
            agents = np.array(s[:domain.NUMBER_OF_AGENTS * 2].reshape(-1, 2))
            targets = np.array(s[domain.NUMBER_OF_AGENTS * 2:].reshape(-1, 2))
            zones = domain.danger_zone_locations
            # Default action is hold
            actions = np.ones(len(agents), dtype=np.integer) * 4
            planned_agents_num = min(len(agents), len(targets))
            for i in xrange(planned_agents_num):
                # Find cloasest zone (manhattan) to the corresponding target
                target = targets[i, :]
                distances = np.sum(
                    np.abs(np.tile(target, (len(zones), 1)) - zones), axis=1)
                z_row, z_col = zones[np.argmin(distances), :]
                # find the valid action
                a_row, a_col = agents[i, :]
                a = 4  # hold as a default action
                if a_row > z_row:
                    a = 0  # up
                if a_row < z_row:
                    a = 1  # down
                if a_col > z_col:
                    a = 2  # left
                if a_col < z_col:
                    a = 3  # right
                actions[i] = a
#                print "Agent=", agents[i,:]
#                print "Target", target
#                print "Zone", zones[argmin(distances),:]
#                print "Action", a
#                print '============'
            return vec2id(actions, np.ones(len(agents), dtype=np.integer) * 5)
        if className(domain) == 'SystemAdministrator':
            # Select a broken computer and reset it
            brokenComputers = np.where(s == 0)[0]
            if len(brokenComputers):
                return randSet(brokenComputers)
            else:
                return domain.computers_num
        if className(domain) == 'MountainCar':
            # Accelerate in the direction of the valley
            # WORK IN PROGRESS
            x, xdot = s
            if xdot > 0:
                return 2
            else:
                return 0
        if className(domain) == 'PST':
            # One stays at comm, n-1 stay at target area. Whenever fuel is
            # lower than reaching the base the move back
            print s
            s = domain.state2Struct(s)
            uavs = domain.NUM_UAV
            print s
            return vec2id(np.zeros(uavs), np.ones(uavs) * 3)
コード例 #13
0
ファイル: test_PST.py プロジェクト: okkhoy/rlpy
def test_transitions():
    """
    Ensure that actions result in expected state transition behavior.
    Test:
        1) Actuator and sensor failure, associated lack of reward
        2) Refuel
        3) Repair
        4) Presence of reward iff a UAV is in COMMS *and* SURVEIL
        5) UAV Crash because of lack of fuel

    """
    NUM_UAV = 2
    nPosActions = 3 # = UAVAction.SIZE
    actionLimits = nPosActions * np.ones(NUM_UAV, dtype='int')

    # Test p=1 actuator failure when not at base
    domain = PST(NUM_UAV=NUM_UAV)
    dummyS = domain.s0()

    domain.P_ACT_FAIL = 0.0
    domain.P_SENSOR_FAIL = 1.0

    locs = np.array([UAVLocation.COMMS, UAVLocation.COMMS])
    fuel = np.array([10,10])
    act = np.array([ActuatorState.RUNNING, ActuatorState.RUNNING])
    sens = np.array([SensorState.RUNNING, SensorState.RUNNING])
    actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    domain.state = domain.properties2StateVec(locs, fuel, act, sens)
    r, ns, t, possA = domain.step(a)
    # Assert that only change was reduction in fuel and failure of sensor
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-1, \
                                                   act, np.array([0,0])))

    # Test location change movement
    actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-2, \
                                                   act, np.array([0,0])))

    # Test p=1 sensor failure when not at base
    domain.FUEL_BURN_REWARD_COEFF = 0.0
    domain.MOVE_REWARD_COEFF = 0.0
    domain.P_ACT_FAIL = 1.0
    actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs + [0,1], fuel-3, \
                                                   np.array([0,0]), np.array([0,0])))

    # Test that no reward was received since the sensor is broken
    assert r == 0

    # Test Refuel
    # After action below will be in locs + [-1,1], or REFUEL and SURVEIL
    # respectively, with 4 fuel units consumed.  Must LOITER to refill fuel though
    actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    locs = np.array([UAVLocation.REFUEL, UAVLocation.COMMS])
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \
                                                   np.array([0,0]), np.array([0,0])))
    # Refuel occurs after loitering
    actionVec = np.array([UAVAction.LOITER, UAVAction.RETREAT])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    fuel = np.array([10,5])
    locs = np.array([UAVLocation.REFUEL, UAVLocation.REFUEL])
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel, \
                                                   np.array([0,0]), np.array([0,0])))

    # Test repair [note uav2 was never refueled since never loitered]
    actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \
                                                   np.array([0,0]), np.array([0,0])))

    # Repair only occurs after loiter [no fuel burned for BASE/REFUEL loiter
    actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \
                                                   np.array([1,1]), np.array([1,1])))

    # Test comms but no surveillance
    domain.P_ACT_FAIL = 0.0
    domain.P_SENSOR_FAIL = 0.0
    actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-2, \
                                                   np.array([1,1]), np.array([1,1])))
    actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-3, \
                                                   np.array([1,1]), np.array([1,1])))
    assert r == 0 # no reward because only have comms, no surveil

    # add 2 units of extra fuel to each and move
    domain.state = domain.properties2StateVec(locs+1, fuel-1, \
                                                   np.array([1,1]), np.array([1,1]))

    # Test surveillance but no comms
    actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs+2, fuel-2, \
                                                   np.array([1,1]), np.array([1,1])))
    assert r == 0 # no reward because have only surveil, no comms

    # Test comms and surveillance
    actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL])
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-3, \
                                                   np.array([1,1]), np.array([1,1])))
    assert r == 0
    # reward based on "s", not "ns", pickup reward here
    actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL])
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \
                                                   np.array([1,1]), np.array([1,1])))
    assert r == domain.SURVEIL_REWARD

    # Test crash
    # Since reward based on "s" not "ns", also pickup reward from prev step
    actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-5, \
                                                   np.array([1,1]), np.array([1,1])))
    assert t == True
    assert r == domain.CRASH_REWARD + domain.SURVEIL_REWARD
コード例 #14
0
ファイル: BlocksWorld.py プロジェクト: okkhoy/rlpy
 def getActionPutAonB(self, A, B):
     return vec2id(np.array([A, B]), [self.blocks, self.blocks])
コード例 #15
0
ファイル: BlocksWorld.py プロジェクト: okkhoy/rlpy
 def getActionPutAonTable(self, A):
     return vec2id(np.array([A, A]), [self.blocks, self.blocks])
コード例 #16
0
ファイル: test_PST.py プロジェクト: zhuzhenping/rlpy
def test_transitions():
    """
    Ensure that actions result in expected state transition behavior.
    Test:
        1) Actuator and sensor failure, associated lack of reward
        2) Refuel
        3) Repair
        4) Presence of reward iff a UAV is in COMMS *and* SURVEIL
        5) UAV Crash because of lack of fuel

    """
    NUM_UAV = 2
    nPosActions = 3  # = UAVAction.SIZE
    actionLimits = nPosActions * np.ones(NUM_UAV, dtype='int')

    # Test p=1 actuator failure when not at base
    domain = PST(NUM_UAV=NUM_UAV)
    dummyS = domain.s0()

    domain.P_ACT_FAIL = 0.0
    domain.P_SENSOR_FAIL = 1.0

    locs = np.array([UAVLocation.COMMS, UAVLocation.COMMS])
    fuel = np.array([10, 10])
    act = np.array([ActuatorState.RUNNING, ActuatorState.RUNNING])
    sens = np.array([SensorState.RUNNING, SensorState.RUNNING])
    actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    domain.state = domain.properties2StateVec(locs, fuel, act, sens)
    r, ns, t, possA = domain.step(a)
    # Assert that only change was reduction in fuel and failure of sensor
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-1, \
                                                   act, np.array([0,0])))

    # Test location change movement
    actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-2, \
                                                   act, np.array([0,0])))

    # Test p=1 sensor failure when not at base
    domain.FUEL_BURN_REWARD_COEFF = 0.0
    domain.MOVE_REWARD_COEFF = 0.0
    domain.P_ACT_FAIL = 1.0
    actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs + [0,1], fuel-3, \
                                                   np.array([0,0]), np.array([0,0])))

    # Test that no reward was received since the sensor is broken
    assert r == 0

    # Test Refuel
    # After action below will be in locs + [-1,1], or REFUEL and SURVEIL
    # respectively, with 4 fuel units consumed.  Must LOITER to refill fuel though
    actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    locs = np.array([UAVLocation.REFUEL, UAVLocation.COMMS])
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \
                                                   np.array([0,0]), np.array([0,0])))
    # Refuel occurs after loitering
    actionVec = np.array([UAVAction.LOITER, UAVAction.RETREAT])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    fuel = np.array([10, 5])
    locs = np.array([UAVLocation.REFUEL, UAVLocation.REFUEL])
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel, \
                                                   np.array([0,0]), np.array([0,0])))

    # Test repair [note uav2 was never refueled since never loitered]
    actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \
                                                   np.array([0,0]), np.array([0,0])))

    # Repair only occurs after loiter [no fuel burned for BASE/REFUEL loiter
    actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \
                                                   np.array([1,1]), np.array([1,1])))

    # Test comms but no surveillance
    domain.P_ACT_FAIL = 0.0
    domain.P_SENSOR_FAIL = 0.0
    actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-2, \
                                                   np.array([1,1]), np.array([1,1])))
    actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-3, \
                                                   np.array([1,1]), np.array([1,1])))
    assert r == 0  # no reward because only have comms, no surveil

    # add 2 units of extra fuel to each and move
    domain.state = domain.properties2StateVec(locs+1, fuel-1, \
                                                   np.array([1,1]), np.array([1,1]))

    # Test surveillance but no comms
    actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs+2, fuel-2, \
                                                   np.array([1,1]), np.array([1,1])))
    assert r == 0  # no reward because have only surveil, no comms

    # Test comms and surveillance
    actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL])
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-3, \
                                                   np.array([1,1]), np.array([1,1])))
    assert r == 0
    # reward based on "s", not "ns", pickup reward here
    actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL])
    assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \
                                                   np.array([1,1]), np.array([1,1])))
    assert r == domain.SURVEIL_REWARD

    # Test crash
    # Since reward based on "s" not "ns", also pickup reward from prev step
    actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT])
    a = vec2id(actionVec, actionLimits)
    r, ns, t, possA = domain.step(a)
    assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-5, \
                                                   np.array([1,1]), np.array([1,1])))
    assert t == True
    assert r == domain.CRASH_REWARD + domain.SURVEIL_REWARD