def test_errs(): """ Ensure that we can call custom methods without error """ default_map_dir = os.path.join(__rlpy_location__, "Domains", "SystemAdministratorMaps") domain = SystemAdministrator( networkmapname=os.path.join(default_map_dir, "20MachTutorial.txt"))
def _make_experiment(exp_id=1, path="./Results/Tmp/test_SystemAdministrator"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ ## Domain: domain = SystemAdministrator() ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrementalTabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) checks_per_policy = 2 max_steps = 20 num_policy_checks = 2 experiment = Experiment(**locals()) return experiment
def makeComponents(self): map_type = str(self.lstMap.currentItem().text()) domain = SystemAdministrator(networkmapname=os.path.join( SystemAdministrator.default_map_dir, map_type+'.txt')) domain.P_SELF_REPAIR = self.spSelfRepairProb.value() domain.P_REBOOT_REPAIR = self.spRobotRepairProb.value() domain.REBOOT_REWARD = self.spRobotReward.value() representation = RepresentationFactory.get(config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str(self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def makeComponents(self): map_type = str(self.lstMap.currentItem().text()) domain = SystemAdministrator(networkmapname=os.path.join( SystemAdministrator.default_map_dir, map_type + '.txt')) domain.P_SELF_REPAIR = self.spSelfRepairProb.value() domain.P_REBOOT_REPAIR = self.spRobotRepairProb.value() domain.REBOOT_REWARD = self.spRobotReward.value() representation = RepresentationFactory.get( config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str( self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def test_compact_binary(): """ Test representation on domain with some binary dimensions """ mapDir = os.path.join(__rlpy_location__, "Domains", "SystemAdministratorMaps") mapname = os.path.join(mapDir, "20MachTutorial.txt") # expect 20+1 = 21 states domain = SystemAdministrator(networkmapname=mapname) rep = IndependentDiscretizationCompactBinary(domain) assert rep.features_num == 21 stateVec = np.zeros(20) stateVec[0] = 1 phiVec = rep.phi(stateVec, terminal=False) assert sum(phiVec) == 1 assert phiVec[0] == 1
def test_transitions(): """ Ensure that actions result in expected state transition behavior. """ # [[manually set state, manually turn off stochasticity ie deterministic, # and observe transitions, reward, etc.]] default_map_dir = os.path.join(__rlpy_location__, "Domains", "SystemAdministratorMaps") domain = SystemAdministrator( networkmapname=os.path.join(default_map_dir, "5Machines.txt")) dummyS = domain.s0() up = domain.RUNNING # shorthand down = domain.BROKEN # shorthand state = np.array([up for dummy in range(0, domain.state_space_dims)]) domain.state = state.copy() a = 5 # =n on this 5-machine map, ie no action ns = state.copy() # Test that no penalty is applied for a non-reboot action r, ns, t, pA = domain.step(a) numWorking = len(np.where(ns == up)[0]) if domain.IS_RING and domain.state[0] == self.RUNNING: r = r - 1 # remove the correctin for rings / symmetry assert r == numWorking # Test that penalty is applied for reboot r, ns, t, pA = domain.step(0) # restart computer 0 numWorking = len(np.where(ns == up)[0]) if domain.IS_RING and domain.state[0] == self.RUNNING: r = r - 1 # remove the correctin for rings / symmetry assert r == numWorking + domain.REBOOT_REWARD while np.all(ns == up): r, ns, t, pA = domain.step(a) # now at least 1 machine has failed domain.P_SELF_REPAIR = 0.0 domain.P_REBOOT_REPAIR = 0.0 # Test that machine remains down when no reboot taken fMachine = np.where(ns == down)[0][0] r, ns, t, pA = domain.step(fMachine) assert ns[fMachine] == down # Test that machine becomes up when reboot taken domain.P_REBOOT_REPAIR = 1.0 r, ns, t, pA = domain.step(fMachine) assert ns[fMachine] == up
def test_transitions(): """ Ensure that actions result in expected state transition behavior. """ # [[manually set state, manually turn off stochasticity ie deterministic, # and observe transitions, reward, etc.]] default_map_dir = os.path.join( __rlpy_location__, "Domains", "SystemAdministratorMaps") domain = SystemAdministrator(networkmapname=os.path.join( default_map_dir, "5Machines.txt")) dummyS = domain.s0() up = domain.RUNNING # shorthand down = domain.BROKEN # shorthand state = np.array([up for dummy in range(0, domain.state_space_dims)]) domain.state = state.copy() a = 5 # =n on this 5-machine map, ie no action ns = state.copy() # Test that no penalty is applied for a non-reboot action r, ns, t, pA = domain.step(a) numWorking = len(np.where(ns == up)[0]) if domain.IS_RING and domain.state[0] == self.RUNNING: r = r-1 # remove the correctin for rings / symmetry assert r == numWorking # Test that penalty is applied for reboot r, ns, t, pA = domain.step(0) # restart computer 0 numWorking = len(np.where(ns == up)[0]) if domain.IS_RING and domain.state[0] == self.RUNNING: r = r-1 # remove the correctin for rings / symmetry assert r == numWorking + domain.REBOOT_REWARD while np.all(ns == up): r, ns, t, pA = domain.step(a) # now at least 1 machine has failed domain.P_SELF_REPAIR = 0.0 domain.P_REBOOT_REPAIR = 0.0 # Test that machine remains down when no reboot taken fMachine = np.where(ns == down)[0][0] r, ns, t, pA = domain.step(fMachine) assert ns[fMachine] == down # Test that machine becomes up when reboot taken domain.P_REBOOT_REPAIR = 1.0 r, ns, t, pA = domain.step(fMachine) assert ns[fMachine] == up