def test_errs(): """ Ensure that we can call custom methods without error """ domain = PST(NUM_UAV=2) dummyState = domain.s0() # state2Struct rlpy_state = [1,2,9,3,1,0,1,1] internState = domain.state2Struct(rlpy_state) assert np.all(internState.locations == [1,2]) assert np.all(internState.fuel == [9,3]) assert np.all(internState.actuator == [1,0]) assert np.all(internState.sensor == [1,1]) # struct2State locs = np.array([1,2]) fuel = np.array([9,3]) act = np.array([1,0]) sens = np.array([1,1]) sStruct = StateStruct(locs, fuel, act, sens) assert np.all(domain.struct2State(sStruct) == [1,2,9,3,1,0,1,1]) # properties2StateVec locs = np.array([1,2]) fuel = np.array([9,3]) act = np.array([1,0]) sens = np.array([1,1]) assert np.all(domain.properties2StateVec(locs, fuel, act, sens) == [1,2,9,3,1,0,1,1])
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=3019.313, initial_learn_rate=0.965830): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 beta_coef = 1e-6 domain = PST(NUM_UAV=4) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, BetaCoef=beta_coef, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1204., lambda_=0., boyan_N0=7353.2, initial_learn_rate=.9712): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 sparsify = 1 kappa = 1e-7 domain = PST(NUM_UAV=4) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def _make_experiment(exp_id=1, path="./Results/Tmp/test_PST"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ ## Domain: NUM_UAV = 3 domain = PST(NUM_UAV=NUM_UAV) ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrementalTabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.1) ## Agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) checks_per_policy = 2 max_steps = 30 num_policy_checks = 2 experiment = Experiment(**locals()) return experiment
def test_errs(): """ Ensure that we can call custom methods without error """ domain = PST(NUM_UAV=2) dummyState = domain.s0() # state2Struct rlpy_state = [1, 2, 9, 3, 1, 0, 1, 1] internState = domain.state2Struct(rlpy_state) assert np.all(internState.locations == [1, 2]) assert np.all(internState.fuel == [9, 3]) assert np.all(internState.actuator == [1, 0]) assert np.all(internState.sensor == [1, 1]) # struct2State locs = np.array([1, 2]) fuel = np.array([9, 3]) act = np.array([1, 0]) sens = np.array([1, 1]) sStruct = StateStruct(locs, fuel, act, sens) assert np.all(domain.struct2State(sStruct) == [1, 2, 9, 3, 1, 0, 1, 1]) # properties2StateVec locs = np.array([1, 2]) fuel = np.array([9, 3]) act = np.array([1, 0]) sens = np.array([1, 1]) assert np.all( domain.properties2StateVec(locs, fuel, act, sens) == [1, 2, 9, 3, 1, 0, 1, 1])
def makeComponents(self): NUM_UAV = int(self.spNum.value()) domain = PST(NUM_UAV=NUM_UAV) representation = RepresentationFactory.get( config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str( self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=960.51, lambda_=0., boyan_N0=4206., initial_learn_rate=.7457): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 sparsify = 1 ifddeps = 1e-7 beta_coef = 1e-6 domain = PST(NUM_UAV=4) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD( domain, discover_threshold, initial_rep, sparsify=sparsify, # discretization=discretization, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, BetaCoef=beta_coef, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def test_transitions(): """ Ensure that actions result in expected state transition behavior. Test: 1) Actuator and sensor failure, associated lack of reward 2) Refuel 3) Repair 4) Presence of reward iff a UAV is in COMMS *and* SURVEIL 5) UAV Crash because of lack of fuel """ NUM_UAV = 2 nPosActions = 3 # = UAVAction.SIZE actionLimits = nPosActions * np.ones(NUM_UAV, dtype='int') # Test p=1 actuator failure when not at base domain = PST(NUM_UAV=NUM_UAV) dummyS = domain.s0() domain.P_ACT_FAIL = 0.0 domain.P_SENSOR_FAIL = 1.0 locs = np.array([UAVLocation.COMMS, UAVLocation.COMMS]) fuel = np.array([10,10]) act = np.array([ActuatorState.RUNNING, ActuatorState.RUNNING]) sens = np.array([SensorState.RUNNING, SensorState.RUNNING]) actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) domain.state = domain.properties2StateVec(locs, fuel, act, sens) r, ns, t, possA = domain.step(a) # Assert that only change was reduction in fuel and failure of sensor assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-1, \ act, np.array([0,0]))) # Test location change movement actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-2, \ act, np.array([0,0]))) # Test p=1 sensor failure when not at base domain.FUEL_BURN_REWARD_COEFF = 0.0 domain.MOVE_REWARD_COEFF = 0.0 domain.P_ACT_FAIL = 1.0 actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs + [0,1], fuel-3, \ np.array([0,0]), np.array([0,0]))) # Test that no reward was received since the sensor is broken assert r == 0 # Test Refuel # After action below will be in locs + [-1,1], or REFUEL and SURVEIL # respectively, with 4 fuel units consumed. Must LOITER to refill fuel though actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.REFUEL, UAVLocation.COMMS]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \ np.array([0,0]), np.array([0,0]))) # Refuel occurs after loitering actionVec = np.array([UAVAction.LOITER, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) fuel = np.array([10,5]) locs = np.array([UAVLocation.REFUEL, UAVLocation.REFUEL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel, \ np.array([0,0]), np.array([0,0]))) # Test repair [note uav2 was never refueled since never loitered] actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \ np.array([0,0]), np.array([0,0]))) # Repair only occurs after loiter [no fuel burned for BASE/REFUEL loiter actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \ np.array([1,1]), np.array([1,1]))) # Test comms but no surveillance domain.P_ACT_FAIL = 0.0 domain.P_SENSOR_FAIL = 0.0 actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-2, \ np.array([1,1]), np.array([1,1]))) actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-3, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # no reward because only have comms, no surveil # add 2 units of extra fuel to each and move domain.state = domain.properties2StateVec(locs+1, fuel-1, \ np.array([1,1]), np.array([1,1])) # Test surveillance but no comms actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+2, fuel-2, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # no reward because have only surveil, no comms # Test comms and surveillance actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-3, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # reward based on "s", not "ns", pickup reward here actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \ np.array([1,1]), np.array([1,1]))) assert r == domain.SURVEIL_REWARD # Test crash # Since reward based on "s" not "ns", also pickup reward from prev step actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-5, \ np.array([1,1]), np.array([1,1]))) assert t == True assert r == domain.CRASH_REWARD + domain.SURVEIL_REWARD
def test_transitions(): """ Ensure that actions result in expected state transition behavior. Test: 1) Actuator and sensor failure, associated lack of reward 2) Refuel 3) Repair 4) Presence of reward iff a UAV is in COMMS *and* SURVEIL 5) UAV Crash because of lack of fuel """ NUM_UAV = 2 nPosActions = 3 # = UAVAction.SIZE actionLimits = nPosActions * np.ones(NUM_UAV, dtype='int') # Test p=1 actuator failure when not at base domain = PST(NUM_UAV=NUM_UAV) dummyS = domain.s0() domain.P_ACT_FAIL = 0.0 domain.P_SENSOR_FAIL = 1.0 locs = np.array([UAVLocation.COMMS, UAVLocation.COMMS]) fuel = np.array([10, 10]) act = np.array([ActuatorState.RUNNING, ActuatorState.RUNNING]) sens = np.array([SensorState.RUNNING, SensorState.RUNNING]) actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) domain.state = domain.properties2StateVec(locs, fuel, act, sens) r, ns, t, possA = domain.step(a) # Assert that only change was reduction in fuel and failure of sensor assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-1, \ act, np.array([0,0]))) # Test location change movement actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-2, \ act, np.array([0,0]))) # Test p=1 sensor failure when not at base domain.FUEL_BURN_REWARD_COEFF = 0.0 domain.MOVE_REWARD_COEFF = 0.0 domain.P_ACT_FAIL = 1.0 actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs + [0,1], fuel-3, \ np.array([0,0]), np.array([0,0]))) # Test that no reward was received since the sensor is broken assert r == 0 # Test Refuel # After action below will be in locs + [-1,1], or REFUEL and SURVEIL # respectively, with 4 fuel units consumed. Must LOITER to refill fuel though actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.REFUEL, UAVLocation.COMMS]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \ np.array([0,0]), np.array([0,0]))) # Refuel occurs after loitering actionVec = np.array([UAVAction.LOITER, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) fuel = np.array([10, 5]) locs = np.array([UAVLocation.REFUEL, UAVLocation.REFUEL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel, \ np.array([0,0]), np.array([0,0]))) # Test repair [note uav2 was never refueled since never loitered] actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \ np.array([0,0]), np.array([0,0]))) # Repair only occurs after loiter [no fuel burned for BASE/REFUEL loiter actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-1, \ np.array([1,1]), np.array([1,1]))) # Test comms but no surveillance domain.P_ACT_FAIL = 0.0 domain.P_SENSOR_FAIL = 0.0 actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-2, \ np.array([1,1]), np.array([1,1]))) actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+1, fuel-3, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # no reward because only have comms, no surveil # add 2 units of extra fuel to each and move domain.state = domain.properties2StateVec(locs+1, fuel-1, \ np.array([1,1]), np.array([1,1])) # Test surveillance but no comms actionVec = np.array([UAVAction.ADVANCE, UAVAction.ADVANCE]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs+2, fuel-2, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # no reward because have only surveil, no comms # Test comms and surveillance actionVec = np.array([UAVAction.RETREAT, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-3, \ np.array([1,1]), np.array([1,1]))) assert r == 0 # reward based on "s", not "ns", pickup reward here actionVec = np.array([UAVAction.LOITER, UAVAction.LOITER]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) locs = np.array([UAVLocation.COMMS, UAVLocation.SURVEIL]) assert np.array_equiv(ns, domain.properties2StateVec(locs, fuel-4, \ np.array([1,1]), np.array([1,1]))) assert r == domain.SURVEIL_REWARD # Test crash # Since reward based on "s" not "ns", also pickup reward from prev step actionVec = np.array([UAVAction.RETREAT, UAVAction.RETREAT]) a = vec2id(actionVec, actionLimits) r, ns, t, possA = domain.step(a) assert np.array_equiv(ns, domain.properties2StateVec(locs-1, fuel-5, \ np.array([1,1]), np.array([1,1]))) assert t == True assert r == domain.CRASH_REWARD + domain.SURVEIL_REWARD