def vecList2idHelper(self, x, actionIDs, ind, curActionList, maxValue, limits): """ Helper method for vecList2id(). :returns: a list of unique id's based on possible permutations of this list of lists. See vecList2id() """ # x[ind] is one of the lists, e.g [0, 2] or [1,2] for curAction in x[ind]: partialActionAssignment = curActionList[:] partialActionAssignment.append(curAction) # We have reached the final list, assignment is complete if(ind == len(x) - 1): # eg [0,1,0,2] and [3,3,3,3] actionIDs.append(vec2id(partialActionAssignment, limits)) else: self.vecList2idHelper( x, actionIDs, ind + 1, partialActionAssignment, maxValue, limits) # TODO remove self
def possibleActions(self): s = self.state # return the id of possible actions # find empty blocks (nothing on top) empty_blocks = [b for b in xrange(self.blocks) if self.clear(b, s)] actions = [ [a, b] for a in empty_blocks for b in empty_blocks if not self.destination_is_table(a, b) or not self.on_table(a, s) ] # condition means if A sits on the table you can not pick it and put it on the table return np.array( [vec2id(x, [self.blocks, self.blocks]) for x in actions])
def possibleActions(self): s = self.state # return the id of possible actions # find empty blocks (nothing on top) empty_blocks = [b for b in range(self.blocks) if self.clear(b, s)] actions = [[a, b] for a in empty_blocks for b in empty_blocks if not self.destination_is_table( a, b) or not self.on_table(a, s)] # condition means if A sits on the table you can not pick it and put it on the table return np.array([vec2id(x, [self.blocks, self.blocks]) for x in actions])
def hashState(self, s,): """ Returns a unique id for a given state. Essentially, enumerate all possible states and return the ID associated with *s*. Under the hood: first, discretize continuous dimensions into bins as necessary. Then map the binstate to an integer. """ ds = self.binState(s) return vec2id(ds, self.bins_per_dim)
def hashState( self, s, ): """ Returns a unique id for a given state. Essentially, enumerate all possible states and return the ID associated with *s*. Under the hood: first, discretize continuous dimensions into bins as necessary. Then map the binstate to an integer. """ ds = self.binState(s) return vec2id(ds, self.bins_per_dim)
def BellmanBackup(self, s, a, ns_samples, policy=None): """Applied Bellman Backup to state-action pair s,a i.e. Q(s,a) = E[r + discount_factor * V(s')] If policy is given then Q(s,a) = E[r + discount_factor * Q(s',pi(s')] Args: s (ndarray): The current state a (int): The action taken in state s ns_samples(int): Number of next state samples to use. policy (Policy): Policy object to use for sampling actions. """ Q = self.representation.Q_oneStepLookAhead(s, a, ns_samples, policy) s_index = vec2id(self.representation.binState(s), self.representation.bins_per_dim) weight_vec_index = int(self.representation.agg_states_num * a + s_index) self.representation.weight_vec[weight_vec_index] = Q
def BellmanBackup(self, s, a, ns_samples, policy=None): """Applied Bellman Backup to state-action pair s,a i.e. Q(s,a) = E[r + discount_factor * V(s')] If policy is given then Q(s,a) = E[r + discount_factor * Q(s',pi(s')] Args: s (ndarray): The current state a (int): The action taken in state s ns_samples(int): Number of next state samples to use. policy (Policy): Policy object to use for sampling actions. """ Q = self.representation.Q_oneStepLookAhead( s, a, ns_samples, policy) s_index = vec2id( self.representation.binState(s), self.representation.bins_per_dim) weight_vec_index = int(self.representation.agg_states_num * a + s_index) self.representation.weight_vec[weight_vec_index] = Q
def vecList2idHelper(self, x, actionIDs, ind, curActionList, maxValue, limits): """ Helper method for vecList2id(). :returns: a list of unique id's based on possible permutations of this list of lists. See vecList2id() """ # x[ind] is one of the lists, e.g [0, 2] or [1,2] for curAction in x[ind]: partialActionAssignment = curActionList[:] partialActionAssignment.append(curAction) # We have reached the final list, assignment is complete if (ind == len(x) - 1): # eg [0,1,0,2] and [3,3,3,3] actionIDs.append(vec2id(partialActionAssignment, limits)) else: self.vecList2idHelper(x, actionIDs, ind + 1, partialActionAssignment, maxValue, limits) # TODO remove self
def getActionPutAonB(self, A, B): return vec2id(np.array([A, B]), [self.blocks, self.blocks])
def getActionPutAonTable(self, A): return vec2id(np.array([A, A]), [self.blocks, self.blocks])
def pi2(self, s, terminal, p_actions): domain = self.representation.domain if not className(domain) in self.supportedDomains: print("ERROR: There is no fixed policy defined for %s" % className(domain)) return None if className(domain) == 'GridWorld': # Actions are Up, Down, Left, Right if not self.policyName in self.gridWorldPolicyNames: print("Error: There is no GridWorld policy with name %s" % self.policyName) return None if self.policyName == 'cw_circle': # Cycle through actions, starting with 0, causing agent to go # in loop if not hasattr(self, "curAction"): # it doesn't exist yet, so initialize it [immediately # incremented] self.curAction = 0 while (not (self.curAction in domain.possibleActions(s))): # We can't do something simple because of the order in which actions are defined # must do switch statement if self.curAction == 0: # up self.curAction = 3 elif self.curAction == 3: # right self.curAction = 1 elif self.curAction == 1: # down self.curAction = 2 elif self.curAction == 2: # left self.curAction = 0 else: print( 'Something terrible happened...got an invalid action on GridWorld Fixed Policy' ) # self.curAction = self.curAction % domain.actions_num elif self.policyName == 'ccw_circle': # Cycle through actions, starting with 0, causing agent to go # in loop if not hasattr(self, "curAction"): # it doesn't exist yet, so initialize it self.curAction = 1 while (not (self.curAction in domain.possibleActions(s))): # We can't do something simple because of the order in which actions are defined # must do switch statement if self.curAction == 3: # right self.curAction = 0 elif self.curAction == 0: # up self.curAction = 2 elif self.curAction == 2: # left self.curAction = 1 elif self.curAction == 1: # down self.curAction = 3 else: print( 'Something terrible happened...got an invalid action on GridWorld Fixed Policy' ) # self.curAction = self.curAction % domain.actions_num else: print( "Error: No policy defined with name %s, but listed in gridWorldPolicyNames" % self.policyName) print( "You need to create a switch statement for the policy name above, or remove it from gridWorldPolicyNames" ) return None return self.curAction # Cycle through actions, starting with 0, causing agent to go in other direction # if not hasattr(pi, "curAction"): # pi.curAction = domain.actions_num-1 # it doesn't exist yet, so initialize it # if not(pi.curAction in domain.possibleActions(s)): # pi.curAction -= 1 # if pi.curAction < 0: pi.curAction = domain.actions_num-1 if className(domain) == 'InfCartPoleBalance': # Fixed policy rotate the pendulum in the opposite direction of the # thetadot theta, thetadot = s if thetadot > 0: return 2 else: return 0 if className(domain) == 'BlocksWorld': # Fixed policy rotate the blocksworld = Optimal Policy (Always pick the next piece of the tower and move it to the tower # Policy: Identify the top of the tower. # move the next piece on the tower with 95% chance 5% take a random # action # Random Action with some probability # TODO fix isTerminal use here if self.random_state.rand() < .3 or domain.isTerminal(): return randSet(domain.possibleActions(s)) # non-Random Policy # next_block is the block that should be stacked on the top of the tower # wrong_block is the highest block stacked on the top of the next_block # Wrong_tower_block is the highest stacked on the top of the tower blocks = domain.blocks # Length of the tower assumed to be built correctly. correct_tower_size = 0 while True: # Check the next block block = correct_tower_size if (block == 0 and domain.on_table(block, s)) or domain.on( block, block - 1, s): # This block is on the right position, check the next block correct_tower_size += 1 else: # print s # print "Incorrect block:", block # The block is on the wrong place. # 1. Check if the tower is empty => If not take one block from the tower and put it on the table # 2. check to see if this wrong block is empty => If not put one block from its stack and put on the table # 3. Otherwise move this block on the tower ################### # 1 ################### # If the first block is in the wrong place, then the tower # top which is table is empty by definition if block != 0: ideal_tower_top = block - 1 tower_top = domain.towerTop(ideal_tower_top, s) if tower_top != ideal_tower_top: # There is a wrong block there hence we should put # it on the table first return ( # put the top of the tower on the table since # it is not correct domain.getActionPutAonTable(tower_top)) ################### # 2 ################### block_top = domain.towerTop(block, s) if block_top != block: # The target block to be stacked is not empty return domain.getActionPutAonTable(block_top) ################### # 3 ################### if block == 0: return domain.getActionPutAonTable(block) else: return domain.getActionPutAonB(block, block - 1) if className(domain) == 'IntruderMonitoring': # Each UAV assign themselves to a target # Each UAV finds the closest danger zone to its target and go towards there. # If UAVs_num > Target, the rest will hold position # Move all agents based on the taken action agents = np.array(s[:domain.NUMBER_OF_AGENTS * 2].reshape(-1, 2)) targets = np.array(s[domain.NUMBER_OF_AGENTS * 2:].reshape(-1, 2)) zones = domain.danger_zone_locations # Default action is hold actions = np.ones(len(agents), dtype=np.integer) * 4 planned_agents_num = min(len(agents), len(targets)) for i in range(planned_agents_num): # Find cloasest zone (manhattan) to the corresponding target target = targets[i, :] distances = np.sum( np.abs(np.tile(target, (len(zones), 1)) - zones), axis=1) z_row, z_col = zones[np.argmin(distances), :] # find the valid action a_row, a_col = agents[i, :] a = 4 # hold as a default action if a_row > z_row: a = 0 # up if a_row < z_row: a = 1 # down if a_col > z_col: a = 2 # left if a_col < z_col: a = 3 # right actions[i] = a # print "Agent=", agents[i,:] # print "Target", target # print "Zone", zones[argmin(distances),:] # print "Action", a # print '============' return vec2id(actions, np.ones(len(agents), dtype=np.integer) * 5) if className(domain) == 'SystemAdministrator': # Select a broken computer and reset it brokenComputers = np.where(s == 0)[0] if len(brokenComputers): return randSet(brokenComputers) else: return domain.computers_num if className(domain) == 'MountainCar': # Accelerate in the direction of the valley # WORK IN PROGRESS x, xdot = s if xdot > 0: return 2 else: return 0 if className(domain) == 'PST': # One stays at comm, n-1 stay at target area. Whenever fuel is # lower than reaching the base the move back print(s) s = domain.state2Struct(s) uavs = domain.NUM_UAV print(s) return vec2id(np.zeros(uavs), np.ones(uavs) * 3)
def pi2(self, s, terminal, p_actions): domain = self.representation.domain if not className(domain) in self.supportedDomains: print "ERROR: There is no fixed policy defined for %s" % className(domain) return None if className(domain) == 'GridWorld': # Actions are Up, Down, Left, Right if not self.policyName in self.gridWorldPolicyNames: print "Error: There is no GridWorld policy with name %s" % self.policyName return None if self.policyName == 'cw_circle': # Cycle through actions, starting with 0, causing agent to go # in loop if not hasattr(self, "curAction"): # it doesn't exist yet, so initialize it [immediately # incremented] self.curAction = 0 while (not(self.curAction in domain.possibleActions(s))): # We can't do something simple because of the order in which actions are defined # must do switch statement if self.curAction == 0: # up self.curAction = 3 elif self.curAction == 3: # right self.curAction = 1 elif self.curAction == 1: # down self.curAction = 2 elif self.curAction == 2: # left self.curAction = 0 else: print 'Something terrible happened...got an invalid action on GridWorld Fixed Policy' # self.curAction = self.curAction % domain.actions_num elif self.policyName == 'ccw_circle': # Cycle through actions, starting with 0, causing agent to go # in loop if not hasattr(self, "curAction"): # it doesn't exist yet, so initialize it self.curAction = 1 while (not(self.curAction in domain.possibleActions(s))): # We can't do something simple because of the order in which actions are defined # must do switch statement if self.curAction == 3: # right self.curAction = 0 elif self.curAction == 0: # up self.curAction = 2 elif self.curAction == 2: # left self.curAction = 1 elif self.curAction == 1: # down self.curAction = 3 else: print 'Something terrible happened...got an invalid action on GridWorld Fixed Policy' # self.curAction = self.curAction % domain.actions_num else: print "Error: No policy defined with name %s, but listed in gridWorldPolicyNames" % self.policyName print "You need to create a switch statement for the policy name above, or remove it from gridWorldPolicyNames" return None return self.curAction # Cycle through actions, starting with 0, causing agent to go in other direction # if not hasattr(pi, "curAction"): # pi.curAction = domain.actions_num-1 # it doesn't exist yet, so initialize it # if not(pi.curAction in domain.possibleActions(s)): # pi.curAction -= 1 # if pi.curAction < 0: pi.curAction = domain.actions_num-1 if className(domain) == 'InfCartPoleBalance': # Fixed policy rotate the pendulum in the opposite direction of the # thetadot theta, thetadot = s if thetadot > 0: return 2 else: return 0 if className(domain) == 'BlocksWorld': # Fixed policy rotate the blocksworld = Optimal Policy (Always pick the next piece of the tower and move it to the tower # Policy: Identify the top of the tower. # move the next piece on the tower with 95% chance 5% take a random # action # Random Action with some probability # TODO fix isTerminal use here if np.random.rand() < .3 or domain.isTerminal(): return randSet(domain.possibleActions(s)) # non-Random Policy # next_block is the block that should be stacked on the top of the tower # wrong_block is the highest block stacked on the top of the next_block # Wrong_tower_block is the highest stacked on the top of the tower blocks = domain.blocks # Length of the tower assumed to be built correctly. correct_tower_size = 0 while True: # Check the next block block = correct_tower_size if (block == 0 and domain.on_table(block, s)) or domain.on(block, block - 1, s): # This block is on the right position, check the next block correct_tower_size += 1 else: # print s # print "Incorrect block:", block # The block is on the wrong place. # 1. Check if the tower is empty => If not take one block from the tower and put it on the table # 2. check to see if this wrong block is empty => If not put one block from its stack and put on the table # 3. Otherwise move this block on the tower ################### # 1 ################### # If the first block is in the wrong place, then the tower # top which is table is empty by definition if block != 0: ideal_tower_top = block - 1 tower_top = domain.towerTop(ideal_tower_top, s) if tower_top != ideal_tower_top: # There is a wrong block there hence we should put # it on the table first return ( # put the top of the tower on the table since # it is not correct domain.getActionPutAonTable(tower_top) ) ################### # 2 ################### block_top = domain.towerTop(block, s) if block_top != block: # The target block to be stacked is not empty return domain.getActionPutAonTable(block_top) ################### # 3 ################### if block == 0: return domain.getActionPutAonTable(block) else: return domain.getActionPutAonB(block, block - 1) if className(domain) == 'IntruderMonitoring': # Each UAV assign themselves to a target # Each UAV finds the closest danger zone to its target and go towards there. # If UAVs_num > Target, the rest will hold position # Move all agents based on the taken action agents = np.array(s[:domain.NUMBER_OF_AGENTS * 2].reshape(-1, 2)) targets = np.array(s[domain.NUMBER_OF_AGENTS * 2:].reshape(-1, 2)) zones = domain.danger_zone_locations # Default action is hold actions = np.ones(len(agents), dtype=np.integer) * 4 planned_agents_num = min(len(agents), len(targets)) for i in xrange(planned_agents_num): # Find cloasest zone (manhattan) to the corresponding target target = targets[i, :] distances = np.sum( np.abs(np.tile(target, (len(zones), 1)) - zones), axis=1) z_row, z_col = zones[np.argmin(distances), :] # find the valid action a_row, a_col = agents[i, :] a = 4 # hold as a default action if a_row > z_row: a = 0 # up if a_row < z_row: a = 1 # down if a_col > z_col: a = 2 # left if a_col < z_col: a = 3 # right actions[i] = a # print "Agent=", agents[i,:] # print "Target", target # print "Zone", zones[argmin(distances),:] # print "Action", a # print '============' return vec2id(actions, np.ones(len(agents), dtype=np.integer) * 5) if className(domain) == 'SystemAdministrator': # Select a broken computer and reset it brokenComputers = np.where(s == 0)[0] if len(brokenComputers): return randSet(brokenComputers) else: return domain.computers_num if className(domain) == 'MountainCar': # Accelerate in the direction of the valley # WORK IN PROGRESS x, xdot = s if xdot > 0: return 2 else: return 0 if className(domain) == 'PST': # One stays at comm, n-1 stay at target area. Whenever fuel is # lower than reaching the base the move back print s s = domain.state2Struct(s) uavs = domain.NUM_UAV print s return vec2id(np.zeros(uavs), np.ones(uavs) * 3)
def test_transitions(): """ Ensure that actions result in expected state transition behavior. Test: 1) Actuator and sensor failure, associated lack of reward 2) Refuel 3) Repair 4) Presence of reward iff a UAV is in COMMS *and* SURVEIL 5) UAV Crash because of lack of fuel """ NUM_UAV = 2 nPosActions = 3 # = UAVAction.SIZE actionLimits = nPosActions * np.ones(NUM_UAV, dtype='int') # Test p=1 actuator failure when not at base domain = PST(NUM_UAV=NUM_UAV) dummyS = domain.s0() domain.P_ACT_FAIL = 0.0 domain.P_SENSOR_FAIL = 1.0 locs = np.array([UAVLocation.COMMS, UAVLocation.COMMS]) fuel = np.array([10,10]) act = np.array([ActuatorState.RUNNING, ActuatorState.RUNNING]) sens = np.array([SensorState.RUNNING, SensorState.RUNNING]) actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) domain.state = domain.properties2StateVec(locs, fuel, act, sens) r, ns, t, possA = domain.step(a) # Assert that only change was reduction in fuel and failure of sensor assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-1, \ act, np.array([0,0]))) # Test location change movement actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-2, \ act, np.array([0,0]))) # Test p=1 sensor failure when not at base domain.FUEL_BURN_REWARD_COEFF = 0.0 domain.MOVE_REWARD_COEFF = 0.0 domain.P_ACT_FAIL = 1.0 actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs + [0,1], fuel-3, \ np.array([0,0]), np.array([0,0]))) # Test that no reward was received since the sensor is broken assert r == 0 # Test Refuel # After action below will be in locs + [-1,1], or REFUEL and SURVEIL # respectively, with 4 fuel units consumed. Must LOITER to refill fuel though actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.REFUEL, UAVLocation.COMMS]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \ np.array([0,0]), np.array([0,0]))) # Refuel occurs after loitering actionVec = np.array([UAVAction.LOITER, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) fuel = np.array([10,5]) locs = np.array([UAVLocation.REFUEL, UAVLocation.REFUEL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel, \ np.array([0,0]), np.array([0,0]))) # Test repair [note uav2 was never refueled since never loitered] actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \ np.array([0,0]), np.array([0,0]))) # Repair only occurs after loiter [no fuel burned for BASE/REFUEL loiter actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \ np.array([1,1]), np.array([1,1]))) # Test comms but no surveillance domain.P_ACT_FAIL = 0.0 domain.P_SENSOR_FAIL = 0.0 actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-2, \ np.array([1,1]), np.array([1,1]))) actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-3, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # no reward because only have comms, no surveil # add 2 units of extra fuel to each and move domain.state = domain.properties2StateVec(locs+1, fuel-1, \ np.array([1,1]), np.array([1,1])) # Test surveillance but no comms actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+2, fuel-2, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # no reward because have only surveil, no comms # Test comms and surveillance actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-3, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # reward based on "s", not "ns", pickup reward here actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \ np.array([1,1]), np.array([1,1]))) assert r == domain.SURVEIL_REWARD # Test crash # Since reward based on "s" not "ns", also pickup reward from prev step actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-5, \ np.array([1,1]), np.array([1,1]))) assert t == True assert r == domain.CRASH_REWARD + domain.SURVEIL_REWARD
def test_transitions(): """ Ensure that actions result in expected state transition behavior. Test: 1) Actuator and sensor failure, associated lack of reward 2) Refuel 3) Repair 4) Presence of reward iff a UAV is in COMMS *and* SURVEIL 5) UAV Crash because of lack of fuel """ NUM_UAV = 2 nPosActions = 3 # = UAVAction.SIZE actionLimits = nPosActions * np.ones(NUM_UAV, dtype='int') # Test p=1 actuator failure when not at base domain = PST(NUM_UAV=NUM_UAV) dummyS = domain.s0() domain.P_ACT_FAIL = 0.0 domain.P_SENSOR_FAIL = 1.0 locs = np.array([UAVLocation.COMMS, UAVLocation.COMMS]) fuel = np.array([10, 10]) act = np.array([ActuatorState.RUNNING, ActuatorState.RUNNING]) sens = np.array([SensorState.RUNNING, SensorState.RUNNING]) actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) domain.state = domain.properties2StateVec(locs, fuel, act, sens) r, ns, t, possA = domain.step(a) # Assert that only change was reduction in fuel and failure of sensor assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-1, \ act, np.array([0,0]))) # Test location change movement actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-2, \ act, np.array([0,0]))) # Test p=1 sensor failure when not at base domain.FUEL_BURN_REWARD_COEFF = 0.0 domain.MOVE_REWARD_COEFF = 0.0 domain.P_ACT_FAIL = 1.0 actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs + [0,1], fuel-3, \ np.array([0,0]), np.array([0,0]))) # Test that no reward was received since the sensor is broken assert r == 0 # Test Refuel # After action below will be in locs + [-1,1], or REFUEL and SURVEIL # respectively, with 4 fuel units consumed. Must LOITER to refill fuel though actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.REFUEL, UAVLocation.COMMS]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \ np.array([0,0]), np.array([0,0]))) # Refuel occurs after loitering actionVec = np.array([UAVAction.LOITER, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) fuel = np.array([10, 5]) locs = np.array([UAVLocation.REFUEL, UAVLocation.REFUEL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel, \ np.array([0,0]), np.array([0,0]))) # Test repair [note uav2 was never refueled since never loitered] actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \ np.array([0,0]), np.array([0,0]))) # Repair only occurs after loiter [no fuel burned for BASE/REFUEL loiter actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \ np.array([1,1]), np.array([1,1]))) # Test comms but no surveillance domain.P_ACT_FAIL = 0.0 domain.P_SENSOR_FAIL = 0.0 actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-2, \ np.array([1,1]), np.array([1,1]))) actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-3, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # no reward because only have comms, no surveil # add 2 units of extra fuel to each and move domain.state = domain.properties2StateVec(locs+1, fuel-1, \ np.array([1,1]), np.array([1,1])) # Test surveillance but no comms actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+2, fuel-2, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # no reward because have only surveil, no comms # Test comms and surveillance actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-3, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # reward based on "s", not "ns", pickup reward here actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \ np.array([1,1]), np.array([1,1]))) assert r == domain.SURVEIL_REWARD # Test crash # Since reward based on "s" not "ns", also pickup reward from prev step actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-5, \ np.array([1,1]), np.array([1,1]))) assert t == True assert r == domain.CRASH_REWARD + domain.SURVEIL_REWARD