def setUp(self): """ Repeating above test cases but with delays to determine that delay flags reach us. Returns ------- """ self.env = simpy.Environment() config = Config(INTEGRATION) self.cluster = Cluster(self.env, config) self.buffer = Buffer(self.env, self.cluster, config) dm = DelayModel(0.9, "normal", DelayModel.DelayDegree.HIGH) self.planner = Planner( self.env, PLANNING_ALGORITHM, self.cluster, SHADOWPlanning('heft',delay_model=dm), delay_model=dm ) self.scheduler = Scheduler( self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan() ) self.telescope = Telescope( self.env, config, self.planner, self.scheduler ) self.env.process(self.cluster.run()) self.env.process(self.buffer.run()) self.scheduler.start() self.env.process(self.scheduler.run()) self.env.process(self.telescope.run())
def setUp(self) -> None: self.env = simpy.Environment() config = Config(CONFIG) self.cluster = Cluster(self.env, config) self.buffer = Buffer(self.env, self.cluster, config) self.scheduler = Scheduler(self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan) self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster, SHADOWPlanning('heft')) # planner = None self.telescope = Telescope(self.env, config, self.planner, self.scheduler)
def setUp(self) -> None: self.env = simpy.Environment() self.config = Config(CONFIG) self.cluster = Cluster(env=self.env, config=self.config) self.buffer = Buffer(env=self.env, cluster=self.cluster, config=self.config) self.scheduler = Scheduler(env=self.env, buffer=self.buffer, cluster=self.cluster, algorithm=None) self.planner = Planner(self.env, 'heft', self.cluster, SHADOWPlanning('heft'))
def setUp(self): self.env = simpy.Environment() self.cluster = Cluster(env=self.env, spec=CLUSTER_CONFIG) self.buffer = Buffer(env=self.env, cluster=self.cluster, config=BUFFER_CONFIG) self.planner = Planner(self.env, PLAN_ALGORITHM, self.cluster) self.observation = Observation('scheduler_observation', OBS_START_TME, OBS_DURATION, OBS_DEMAND, OBS_WORKFLOW, type='continuum', data_rate=2)
def setUp(self) -> None: """ This test scenario is going to test the edge cases like double-allocation that may happen. For example - the use of curr_allocs should result in the _process_current_schedule not double_allocation, so we will test this. Returns ------- """ self.env = simpy.Environment() config = Config(CONFIG) sched_algorithm = DynamicAlgorithmFromPlan() self.cluster = Cluster(env=self.env, config=config) self.telescope = Telescope(self.env, config, planner=None, scheduler=None) self.buffer = Buffer(self.env, self.cluster, config) self.scheduler = Scheduler(self.env, self.buffer, self.cluster, sched_algorithm) self.observation = self.telescope.observations[0] self.machine = self.cluster.machines[0]
def setUp(self) -> None: """ Create a planner and a `simpy` environment in which to run dummy simulations for the purpose of ensuring the planner works nicely when selecting 'batch' as a static scheduling method. Returns ------- """ self.env = simpy.Environment() config = Config(CONFIG) self.model = BatchPlanning('batch') self.cluster = Cluster(self.env, config=config) self.buffer = Buffer(env=self.env, cluster=self.cluster, config=config) self.planner = Planner( self.env, PLAN_ALGORITHM, self.cluster, self.model, ) self.telescope = Telescope(self.env, config, planner=None, scheduler=None)
def setUp(self): self.env = simpy.Environment() config = Config(INTEGRATION) self.cluster = Cluster(self.env, config) self.buffer = Buffer(self.env, self.cluster, config) self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster, SHADOWPlanning('heft')) self.scheduler = Scheduler(self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan()) self.telescope = Telescope(self.env, config, self.planner, self.scheduler) self.env.process(self.cluster.run()) self.env.process(self.buffer.run()) self.scheduler.start() self.env.process(self.scheduler.run()) self.env.process(self.telescope.run())
def setUp(self): self.env = simpy.Environment() cluster = Cluster(env=self.env, spec=CLUSTER_CONFIG) buffer = Buffer(env=self.env, cluster=cluster, config=BUFFER_CONFIG) self.scheduler = Scheduler(env=self.env, buffer=buffer, cluster=cluster, algorithm=None) planner = Planner(self.env, 'heft', cluster)
def setUp(self): self.env = simpy.Environment() self.config = Config(CONFIG) self.cluster = Cluster(env=self.env, config=self.config) self.buffer = Buffer( env=self.env, cluster=self.cluster, config=self.config ) self.planner = Planner( self.env, PLAN_ALGORITHM, self.cluster, SHADOWPlanning('heft') ) self.observation = Observation( 'scheduler_observation', OBS_START_TME, OBS_DURATION, OBS_DEMAND, OBS_WORKFLOW, data_rate=2 )
def testHotBufferConfig(self): """ Process the Hot Buffer section of the config file """ buffer = Buffer( env=self.env, cluster=self.cluster, config=self.config ) self.assertEqual(500, buffer.hot[BUFFER_ID].total_capacity) self.assertEqual(500, buffer.hot[BUFFER_ID].current_capacity) self.assertEqual(5, buffer.hot[BUFFER_ID].max_ingest_data_rate)
def testColdBufferConfig(self): """ Process cold buffer section of the config file :return: """ buffer = Buffer( env=self.env, cluster=self.cluster, config=self.config ) self.assertEqual(250, buffer.cold[BUFFER_ID].total_capacity) self.assertEqual(250, buffer.cold[BUFFER_ID].current_capacity) self.assertEqual(2, buffer.cold[BUFFER_ID].max_data_rate)
class TestSchedulerIntegration(unittest.TestCase): def setUp(self): self.env = simpy.Environment() config = Config(INTEGRATION) self.cluster = Cluster(self.env, config) self.buffer = Buffer(self.env, self.cluster, config) self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster, SHADOWPlanning('heft')) self.scheduler = Scheduler(self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan()) self.telescope = Telescope(self.env, config, self.planner, self.scheduler) self.env.process(self.cluster.run()) self.env.process(self.buffer.run()) self.scheduler.start() self.env.process(self.scheduler.run()) self.env.process(self.telescope.run()) def test_FIFO_with_buffer(self): """ Demonstrate that the scheduler accurately schedules when we have other Actors working in tandem. Expectations: - After 1 timestep in the simualtion, we have 5 resources available of the 10 that we start with. - Returns ------- """ self.env.run(until=1) self.assertEqual(10, len(self.cluster._resources['available'])) # This takes timestep, data in the HotBuffer should be 4 self.env.run(until=2) self.assertEqual(5, len(self.cluster._resources['available'])) self.assertEqual(496, self.buffer.hot[0].current_capacity) self.env.run(until=31) self.assertEqual(5, len(self.cluster._tasks['finished'])) # self.assertEqual(500, self.buffer.hot[0].current_capacity) self.assertEqual(210, self.buffer.cold[0].current_capacity) self.env.run(until=32) # Ensure the time self.assertEqual(ScheduleStatus.ONTIME, self.scheduler.schedule_status) # 30 timesteps until we finish everything + 81 timesteps to complete # workflow plan. self.env.run(until=124) # As we have been processing the current observation, we are also # ingestting the next one. self.assertEqual(250, self.buffer.cold[0].current_capacity)
def setUp(self): self.env = simpy.Environment() sched_algorithm = DynamicAlgorithmFromPlan() config = Config(LONG_CONFIG) self.cluster = Cluster(self.env, config) planning_model = SHADOWPlanning(algorithm=PLANNING_ALGORITHM) self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster, planning_model) self.buffer = Buffer(self.env, self.cluster, config) self.scheduler = Scheduler(self.env, self.buffer, self.cluster, sched_algorithm) self.telescope = Telescope(self.env, config, self.planner, self.scheduler)
def setUp(self): self.env = simpy.Environment() sched_algorithm = FifoAlgorithm() self.planner = Planner(self.env, test_data.planning_algorithm, test_data.machine_config) self.cluster = Cluster(self.env, CLUSTER_CONFIG) self.buffer = Buffer(self.env, self.cluster, BUFFER_CONFIG) self.observations = [ Observation('scheduler_observation', OBS_START_TME, OBS_DURATION, OBS_DEMAND, OBS_WORKFLOW, type='continuum', data_rate=5) ] telescopemax = 36 # maximum number of antennas self.telescope = Telescope(self.env, OBSERVATION_CONFIG, self.scheduler, self.planner) self.scheduler = Scheduler(self.env, sched_algorithm, self.buffer, self.cluster)
def setUp(self): """ Returns ------- """ """ setup the buffer and do config stuff :return: Nothing """ self.env = simpy.Environment() self.config = Config(CONFIG) self.cluster = Cluster(env=self.env, config=self.config) self.buffer = Buffer(self.env, self.cluster, self.config) self.observation = Observation( name='test_observation', start=OBS_START_TME, duration=OBS_DURATION, demand=OBS_DEMAND, workflow=OBS_WORKFLOW, data_rate=5, )
def setUp(self): self.env = simpy.Environment() sched_algorithm = DynamicAlgorithmFromPlan() config = Config(HEFT_CONFIG) dm = DelayModel(0.1, "normal") self.model = SHADOWPlanning('heft', dm) self.cluster = Cluster(self.env, config=config) self.buffer = Buffer(self.env, self.cluster, config) self.planner = Planner(self.env, PLAN_ALGORITHM, self.cluster, self.model, delay_model=dm) self.observation = Observation('planner_observation', OBS_START_TME, OBS_DURATION, OBS_DEMAND, OBS_WORKFLOW, data_rate=OBS_DATA_RATE)
def setUp(self): self.algorithm = BatchProcessing self.env = simpy.Environment() config = Config(CONFIG) self.cluster = Cluster(self.env, config=config) self.buffer = Buffer(self.env, self.cluster, config) self.scheduler = Scheduler(self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan()) self.algorithm = BatchProcessing() self.model = BatchPlanning('batch') self.planner = Planner( self.env, 'heft', self.cluster, self.model, ) self.telescope = Telescope(self.env, config, self.planner, self.scheduler)
class TestTelescopeIngest(unittest.TestCase): def setUp(self) -> None: self.env = simpy.Environment() self.config = Config(CONFIG) self.cluster = Cluster(env=self.env, config=self.config) self.buffer = Buffer(env=self.env, cluster=self.cluster, config=self.config) self.scheduler = Scheduler(env=self.env, buffer=self.buffer, cluster=self.cluster, algorithm=None) self.planner = Planner(self.env, 'heft', self.cluster, SHADOWPlanning('heft')) def testIngest(self): telescope = Telescope(env=self.env, config=self.config, planner=self.planner, scheduler=self.scheduler) self.assertEqual(0, telescope.telescope_use) self.env.process(telescope.run()) self.env.process(self.cluster.run()) self.scheduler.start() self.env.process(self.scheduler.run()) self.env.process(self.buffer.run()) self.env.run(until=2) self.assertEqual(36, telescope.telescope_use) self.assertEqual(5, len(self.cluster._resources['available'])) # After 1 timestep, data in the HotBuffer should be 2 self.assertEqual(496, self.buffer.hot[0].current_capacity) self.env.run(until=11) self.assertEqual(len([self.buffer.hot[0].observations["transfer"]]), 1) self.assertEqual(462, self.buffer.hot[0].current_capacity) self.assertEqual(248, self.buffer.cold[0].current_capacity) self.env.run(until=12) self.assertEqual(0, telescope.telescope_use) self.assertEqual(10, len(self.cluster._resources['available'])) self.assertEqual(5, len(self.cluster._tasks['finished']))
def __init__( self, env, telescope_config, cluster_config, buffer_config, planning_algorithm, scheduling_algorithm, event_file, visualisation=False ): self.env = env # Event file setup self.event_file = event_file self.visualisation = visualisation if event_file is not None: self.monitor = Monitor(self) if visualisation: self.visualiser = Visualiser(self) # Process necessary config files # Initiaise Actor and Resource objects self.cluster = Cluster(env, cluster_config) self.buffer = Buffer(env, self.cluster, config=buffer_config) self.planner = Planner(env, planning_algorithm, cluster_config) self.scheduler = Scheduler( env, self.buffer, self.cluster, scheduling_algorithm ) self.telescope = Telescope( env=self.env, config=telescope_config, planner=self.planner, scheduler=self.scheduler )
class TestSchedulerDelayHelpers(unittest.TestCase): def setUp(self): self.env = simpy.Environment() config = Config(INTEGRATION) self.cluster = Cluster(self.env, config) self.buffer = Buffer(self.env, self.cluster, config) self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster, SHADOWPlanning('heft'), delay_model=DelayModel(0.3, "normal")) self.scheduler = Scheduler(self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan()) self.telescope = Telescope(self.env, config, self.planner, self.scheduler) self.env.process(self.cluster.run()) self.env.process(self.buffer.run()) self.scheduler.start() self.env.process(self.scheduler.run()) self.env.process(self.telescope.run()) def test_propogate_delay_returns_updated_workflow(self): """
class TestBufferIngestDataStream(unittest.TestCase): def setUp(self): """ Returns ------- """ """ setup the buffer and do config stuff :return: Nothing """ self.env = simpy.Environment() self.config = Config(CONFIG) self.cluster = Cluster(env=self.env, config=self.config) self.buffer = Buffer(self.env, self.cluster, self.config) self.observation = Observation( name='test_observation', start=OBS_START_TME, duration=OBS_DURATION, demand=OBS_DEMAND, workflow=OBS_WORKFLOW, data_rate=5, ) def testBasicIngest(self): """ Test the 'ingest_data_stream' event, which is to be called in the Scheduler. The changes we expect in this are simple - after n timesteps, the HotBuffer.currect_capacity will have reduced n*observation_ingest_rate. We test a couple of requirements here. """ self.observation.status = RunStatus.RUNNING ret = self.env.process( self.buffer.ingest_data_stream( self.observation ) ) self.env.run(until=1) self.assertEqual(495, self.buffer.hot[BUFFER_ID].current_capacity) self.env.run(until=10) self.assertEqual(self.env.now, 10) self.assertEqual(450, self.buffer.hot[BUFFER_ID].current_capacity) self.assertEqual( self.buffer.hot[BUFFER_ID].observations["stored"][0], self.observation ) def testIngestPrerequisites(self): """ In order to call ingest_data_stream, the observation must be marked as "RunStatus.RUNNING" - otherwise we will be processing an observation that hasn't started! ------- """ ret = self.env.process( self.buffer.ingest_data_stream( self.observation ) ) self.assertRaises( RuntimeError, self.env.run, until=1 ) def testIngestObservationNotRunning(self): """ The buffer won't ingest if the observation is not marked as RunStatus.RUNNING """ self.assertEqual(RunStatus.WAITING, self.observation.status) self.env.process(self.buffer.ingest_data_stream(self.observation)) self.assertRaises( RuntimeError, self.env.run, until=1 ) # def testIngestEdgeCase(self): """ Buffer must accept ingest at rate up to 'max ingest data rate' but raises an exception if the ingest rate for an observation is greater (this means we have an error). In addition, we are coordinating this ingest between the scheduler and the telescope and the cluster so these actors also need to work together in some way, which this test will also attempt to do . """ self.observation.status = RunStatus.RUNNING self.observation.ingest_data_rate = 20 ret = self.env.process( self.buffer.ingest_data_stream( self.observation ) ) self.assertRaises(ValueError, self.env.run, until=1) def test_ingest_capacity_checks(self): """ The buffer checks the hot and cold buffer for capacity; need to make sure that if either the hot buffer or the cold buffer do not have enough room for the observation, it is not scheduled. Returns ------- """ self.buffer.hot[BUFFER_ID].current_capacity = 2 self.assertFalse( self.buffer.check_buffer_capacity(self.observation) ) self.buffer.hot[BUFFER_ID].current_capacity = 100 self.buffer.cold[BUFFER_ID].current_capacity = 1 self.assertFalse( self.buffer.check_buffer_capacity(self.observation) )
class TestBufferRequests(unittest.TestCase): def setUp(self): self.env = simpy.Environment() self.config = Config(CONFIG) self.cluster = Cluster(env=self.env, config=self.config) self.buffer = Buffer( env=self.env, cluster=self.cluster, config=self.config ) self.planner = Planner( self.env, PLAN_ALGORITHM, self.cluster, SHADOWPlanning('heft') ) self.observation = Observation( 'scheduler_observation', OBS_START_TME, OBS_DURATION, OBS_DEMAND, OBS_WORKFLOW, data_rate=2 ) def tearDown(self): pass def test_buffer_hot_to_cold(self): """ This tests an ingest, and then, once we have a successful ingest, the movement from one buffer to the other. Using the current situation, we should have the observation finished by timestep [TBC], and then the observation moved across by timestep [TBC] Returns ------- """ self.observation.status = RunStatus.RUNNING self.env.process(self.buffer.ingest_data_stream(self.observation)) self.env.run(until=10) self.assertEqual(480, self.buffer.hot[BUFFER_ID].current_capacity) # Moving data from one to the other self.assertEqual(250, self.buffer.cold[BUFFER_ID].current_capacity) self.assertTrue(self.observation in self.buffer.hot[BUFFER_ID].observations["stored"]) self.env.process(self.buffer.move_hot_to_cold(0)) self.env.run(until=15) self.assertEqual(240, self.buffer.cold[BUFFER_ID].current_capacity) self.assertEqual(490, self.buffer.hot[BUFFER_ID].current_capacity) self.env.run(until=20) self.assertEqual(230, self.buffer.cold[BUFFER_ID].current_capacity) self.env.run(until=22) self.assertEqual(500, self.buffer.hot[BUFFER_ID].current_capacity) self.assertEqual(230, self.buffer.cold[BUFFER_ID].current_capacity) self.assertListEqual( [self.observation], self.buffer.cold[BUFFER_ID].observations['stored'] ) def test_hot_transfer_observation(self): """ When passed an observation, over a period of time ensure that the complete data set is removed. Only when all data has finished being transferred do we add the observation to ColdBuffer.observations. Observation duration is 10; ingest rate is 5. Observation.total_data_size => 50 ColdBuffer.max_data_rate => 2; therefore Time until Observation is moved => 25. Returns ------- """ self.buffer.hot[BUFFER_ID].current_capacity = 450 self.observation.total_data_size = 50 data_left_to_transfer = self.observation.total_data_size self.buffer.hot[BUFFER_ID].observations["stored"].append( self.observation) data_left_to_transfer = self.buffer.hot[BUFFER_ID].transfer_observation( self.observation, self.buffer.cold[BUFFER_ID].max_data_rate, data_left_to_transfer ) self.assertEqual(48, data_left_to_transfer) self.assertTrue( self.observation in self.buffer.hot[BUFFER_ID].observations[ "stored"] ) self.assertEqual(452, self.buffer.hot[BUFFER_ID].current_capacity) timestep = 24 while data_left_to_transfer > 0: data_left_to_transfer = self.buffer.hot[ BUFFER_ID].transfer_observation( self.observation, self.buffer.cold[BUFFER_ID].max_data_rate, data_left_to_transfer ) self.assertEqual(0, data_left_to_transfer) self.assertEqual(500, self.buffer.hot[BUFFER_ID].current_capacity) def test_cold_receive_data(self): """ When passed an observation, over a period of time ensure that the complete data set is added to the Cold Buffer. Only when all data has finished being transferred do we add the observation to ColdBuffer.observations. Observation duration is 10; ingest rate is 5. Observation.total_data_size => 50 ColdBuffer.max_data_rate => 2; therefore Time until Observation is moved => 25. Returns ------- """ self.observation.total_data_size = 50 data_left_to_transfer = self.observation.total_data_size data_left_to_transfer = self.buffer.cold[BUFFER_ID].receive_observation( self.observation, data_left_to_transfer ) self.assertEqual(48, data_left_to_transfer) self.assertFalse( self.observation in self.buffer.cold[BUFFER_ID].observations[ 'stored'] ) while data_left_to_transfer > 0: data_left_to_transfer = self.buffer.cold[ BUFFER_ID].receive_observation( self.observation, data_left_to_transfer ) self.assertTrue( self.observation in self.buffer.cold[BUFFER_ID].observations[ 'stored'] ) self.assertEqual(None, self.buffer.cold[BUFFER_ID].observations['transfer']) # @unittest.skip("Functionality has changed") def testWorkflowAddedToQueue(self): """ We only add a workflow to the queue once an observation has finished (and, therefore, after we have finished generating a plan for it). :return: None """ # Calling planner.run() will store the generate plan in the observation object # calling next() runs the iterator immediately after generator is # called self.observation.ast = 0 self.observation.plan = self.planner.run(self.observation, self.buffer, None) self.assertTrue(self.observation.plan is not None)
def setUp(self): self.env = simpy.Environment() config = Config(CONFIG) self.model = SHADOWPlanning('heft') self.cluster = Cluster(env=self.env, config=config) self.buffer = Buffer(env=self.env, cluster=self.cluster, config=config)
class TestBufferIngestDataStream(unittest.TestCase): def setUp(self): """ Returns ------- """ """ setup the buffer and do config stuff :return: Nothing """ self.env = simpy.Environment() self.cluster = Cluster(self.env, CLUSTER_CONFIG) self.buffer = Buffer(self.env, self.cluster, BUFFER_CONFIG) self.observation = Observation( name='test_observation', start=OBS_START_TME, duration=OBS_DURATION, demand=OBS_DEMAND, workflow=OBS_WORKFLOW, type='continuum', data_rate=5, ) def testBasicIngest(self): """ Test the 'ingest_data_stream' event, which is to be called in the Scheduler. The changes we expect in this are simple - after n timesteps, the HotBuffer.currect_capacity will have reduced n*observation_ingest_rate. """ self.observation.status = RunStatus.RUNNING ret = self.env.process(self.buffer.ingest_data_stream( self.observation)) self.env.run(until=1) self.assertEqual(495, self.buffer.hot.current_capacity) self.env.run(until=10) self.assertEqual(450, self.buffer.hot.current_capacity) self.assertEqual(self.buffer.hot.stored_observations[0], self.observation) def testIngestObservationNotRunning(self): """ The buffer won't ingest if the observation is not marked as RunStatus.RUNNING """ self.assertEqual(RunStatus.WAITING, self.observation.status) self.env.process(self.buffer.ingest_data_stream(self.observation)) # self.assertRaises( # RuntimeError, self.env.process, self.buffer.ingest_data_stream( # self.observation # ) # ) self.assertRaises(RuntimeError, self.env.run, until=1) # self.assertEqual(500, self.buffer.hot.current_capacity) def testIngestEdgeCase(self): """
def __init__(self, env, config, instrument, planning_model, planning_algorithm, scheduling, delay=None, timestamp=None, to_file=False, hdf5_path=None, **kwargs): #: :py:obj:`simpy.Environment` object self.env = env if timestamp: #: :py:obj:`~topsim.core.monitor.Monitor` instance self.monitor = Monitor(self, timestamp) self._timestamp = timestamp else: sim_start_time = f'{time.time()}'.split('.')[0] self._timestamp = sim_start_time self.monitor = Monitor(self, sim_start_time) # Process necessary config files self._cfg_path = config #: Configuration path # Initiaise Actor and Resource objects cfg = Config(config) #: :py:obj:`~topsim.core.cluster.Cluster` instance self.cluster = Cluster(env, cfg) #: :py:obj:`~topsim.core.buffer.Buffer` instance self.buffer = Buffer(env, self.cluster, cfg) planning_algorithm = planning_algorithm planning_model = planning_model if not delay: # TODO Have this approach replicated so we don't specify the # model outside the simulation. delay = DelayModel(0.0, "normal", DelayModel.DelayDegree.NONE) self.planner = Planner(env, planning_algorithm, self.cluster, planning_model, delay) scheduling_algorithm = scheduling() #: :py:obj:`~topsim.core.scheduler.Scheduler` instance self.scheduler = Scheduler(env, self.buffer, self.cluster, scheduling_algorithm) #: User-defined :py:obj:`~topsim.core.instrument.Instrument` instance self.instrument = instrument(env=self.env, config=cfg, planner=self.planner, scheduler=self.scheduler) #: :py:obj:`bool` Flag for producing simulation output in a `.pkl` # file. self.to_file = to_file if self.to_file and hdf5_path: try: if os.path.exists(hdf5_path): LOGGER.warning('Output HDF5 path already exists, ' 'simulation appended to existing file') self._hdf5_store = pd.HDFStore(hdf5_path) self._hdf5_store.close() except ValueError( 'Check pandas.HDFStore documentation for valid file path'): raise elif self.to_file and not hdf5_path: raise ValueError( 'Attempted to initialise Simulation object that outputs' 'to file without providing file path') else: LOGGER.info( 'Simulation output will not be stored directly to file') if 'delimiters' in kwargs: #: Used to separate different simulations in HDF5 output self._delimiters = kwargs['delimiters'] else: self._delimiters = '' self.running = False
class TestColdBufferRequests(unittest.TestCase): def setUp(self): self.env = simpy.Environment() self.cluster = Cluster(env=self.env, spec=CLUSTER_CONFIG) self.buffer = Buffer(env=self.env, cluster=self.cluster, config=BUFFER_CONFIG) self.planner = Planner(self.env, PLAN_ALGORITHM, self.cluster) self.observation = Observation('scheduler_observation', OBS_START_TME, OBS_DURATION, OBS_DEMAND, OBS_WORKFLOW, type='continuum', data_rate=2) def tearDown(self): pass def testHotColdInteraction(self): """ Testing the results of running 'buffer.request_data_from(observation)'. Returns ------- """ # TODO THIS NEED TO CHANGE # TODO Hot Cold transfer should be automatic, not instigated by the # scheduler. THis ensures that the scheduler only needs to check the # cold buffer, and that movement of data from the hot buffer to the # cold buffer is 'automatic' (that is, once data has been through # the hot buffer completely and INGEST run on that data, we can move # it to a large buffer store). # Prelimns self.observation.status = RunStatus.RUNNING self.env.process(self.buffer.ingest_data_stream(self.observation)) self.env.run(until=10) self.assertEqual(480, self.buffer.hot.current_capacity) # Moving data from one to the other self.assertEqual(250, self.buffer.cold.current_capacity) self.env.process(self.buffer.request_data_from(self.observation)) self.env.run(until=15) self.assertEqual(240, self.buffer.cold.current_capacity) self.assertEqual(490, self.buffer.hot.current_capacity) self.env.run(until=40) self.assertEqual(230, self.buffer.cold.current_capacity) self.assertEqual(500, self.buffer.hot.current_capacity) self.assertListEqual([self.observation], self.buffer.cold.observations) def testHotColdErrors(self): """ We haven't processed the observation yet, so there shouldn't be anything in the Hot Buffer to request """ self.env.process(self.buffer.request_data_from(self.observation)) self.assertRaises( RuntimeError, self.env.run, until=10, ) def testWorkflowAddedToQueue(self): """ We only add a workflow to the queue once an observation has finished (and, therefore, after we have finished generating a plan for it). :return: None """ # Calling planner.run() will store the generate plan in the observation object # calling next() runs the iterator immediately after generator is called next(self.planner.run(self.observation)) # Buffer observation queue should be empty self.assertTrue(self.buffer.observations_for_processing.empty()) # self.buffer.add_observation_to_waiting_workflows(self.observation) self.assertTrue(self.buffer.observations_for_processing.size() == 1)
class Simulation: """ The Simulation class is a wrapper for all Actors; we start the simulation through the simulation class, which in turn invokes the initial Actors and monitoring, and provides the conditions for checking if the simulation has finished. Parameters ---------- env : :py:obj:`simpy.Environment` object The discrete-event simulation environment. This is the way TOpSim simulation maintains state across the different actors, and interfaces with the simpy processes. config : str Path to the simulation JSOn configuration file instrument : :py:obj:`~topsim.core.instrument.Instrument` User-defined implementation of the Instrument class. planning_model : :py:obj:`~topsim.algorithms.planning.Planning` object User-defined implementation of the planning algorithm class planning_algorithm: str Reference to the specific algorithm implementated in `planning_model` scheduling: :py:obj:`~topsim.algorithms.scheduling.Algorithm` User-defined implementation of the scheduling algorithm :py:obj:`abc.ABC`. delay: :py:obj:`~topsim.core.delay.DelayModel`, optional for the simulation. timestamp: str, optional Optional Simulation start-time; this is useful for testing, to ensure we name the file and the tests match up. Also useful if you do not want to use the time of the simulation as the name. to_file : bool, optional `True` if the simulation is to be written to a Pandas `pkl` file; `False` will return pandas DataFrame objects at the completion of the :py:meth:`~topsim.core.simulation.Simulation.run` function. Notes ----- If to_file left as `False`, simulation results and output will be returned as Pandas DataFrames (see :py:meth:`~topsim.core.simulation.Simulation.run`) . This is designed for running multiple simulations, allowing for the appending of individual simulation results to a 'global' :py:obj:`~pandas.DataFrame` . Current support for output is limited to Panda's `.pkl` files. Parsing in the option `delimiters` provides a way of differentiating between multiple simulations within a single HDF5 store (for example, in an experiment). A typical experimental loop may involve the following structure: >>> for heuristic in list_of_scheduling_heuristics >>> for algorithm in list_of_planning_algorithms >>> for cfg in list_of_system_configs >>> ... >>> delimiter = f'{heuristic}/{algorithm}/{cfg}' This means when querying HDF5 output files, the results of each simulation can be filtered nicely: >>> store = pd.HDFStore('path/to/output.h5') >>> # Returns a dataframe of simulation results >>> store['heuristic_1/algorithm_3/cfg.json'] Examples -------- Standard simulation with data frame output >>> env = simpy.environment() >>> config = Config('path/to/config') >>> instrument = CustomInstrument() >>> plan = PlanningModel() >>> sched = SchedulingModel() >>> simulation = Simulation(env, config, instrument,plan,sched) If we want delays in the model: >>> dm = DelayModel(prob=0.1, dist='normal', dm=DelayModel.DelayDegree.LOW) >>> simulation = Simulation( >>> env, config, instrument,plan,sched, delay=dm >>> ) Running a simulation to completion: >>> df = simulation.run() Running a simulation for a specific time period, then resuming: >>> df = simulation.run(runtime=100) >>> ### Check current status of simulatiion >>> df = simulation.resume(until=150) Raises ------ """ def __init__(self, env, config, instrument, planning_model, planning_algorithm, scheduling, delay=None, timestamp=None, to_file=False, hdf5_path=None, **kwargs): #: :py:obj:`simpy.Environment` object self.env = env if timestamp: #: :py:obj:`~topsim.core.monitor.Monitor` instance self.monitor = Monitor(self, timestamp) self._timestamp = timestamp else: sim_start_time = f'{time.time()}'.split('.')[0] self._timestamp = sim_start_time self.monitor = Monitor(self, sim_start_time) # Process necessary config files self._cfg_path = config #: Configuration path # Initiaise Actor and Resource objects cfg = Config(config) #: :py:obj:`~topsim.core.cluster.Cluster` instance self.cluster = Cluster(env, cfg) #: :py:obj:`~topsim.core.buffer.Buffer` instance self.buffer = Buffer(env, self.cluster, cfg) planning_algorithm = planning_algorithm planning_model = planning_model if not delay: # TODO Have this approach replicated so we don't specify the # model outside the simulation. delay = DelayModel(0.0, "normal", DelayModel.DelayDegree.NONE) self.planner = Planner(env, planning_algorithm, self.cluster, planning_model, delay) scheduling_algorithm = scheduling() #: :py:obj:`~topsim.core.scheduler.Scheduler` instance self.scheduler = Scheduler(env, self.buffer, self.cluster, scheduling_algorithm) #: User-defined :py:obj:`~topsim.core.instrument.Instrument` instance self.instrument = instrument(env=self.env, config=cfg, planner=self.planner, scheduler=self.scheduler) #: :py:obj:`bool` Flag for producing simulation output in a `.pkl` # file. self.to_file = to_file if self.to_file and hdf5_path: try: if os.path.exists(hdf5_path): LOGGER.warning('Output HDF5 path already exists, ' 'simulation appended to existing file') self._hdf5_store = pd.HDFStore(hdf5_path) self._hdf5_store.close() except ValueError( 'Check pandas.HDFStore documentation for valid file path'): raise elif self.to_file and not hdf5_path: raise ValueError( 'Attempted to initialise Simulation object that outputs' 'to file without providing file path') else: LOGGER.info( 'Simulation output will not be stored directly to file') if 'delimiters' in kwargs: #: Used to separate different simulations in HDF5 output self._delimiters = kwargs['delimiters'] else: self._delimiters = '' self.running = False def start(self, runtime=-1): """ Run the simulation, either for the specified runtime, OR until the exit conditions are reached. The exit conditions are: * There are no more observations to process, * There is nothing left in the Buffer * The Scheduler is not waiting to allocate machines to resources * There are not tasks still running on the cluster. Parameters ---------- runtime : int Nominiated runtime of the simulation. If the simulation length is known, pass that as the argument. If not, passing in a negative value (typically, just -1) will run the simulation until the exit condition is reached. Returns ------- If `to_file` is True: sim_data_path, task_data_path : str Path names for the global simulation runtime and the individual task data output. If `to_file` is False: Two pandas.DataFrame objects for global sim runtime and task data. """ if self.running: raise RuntimeError( "start() has already been called!" "Use resume() to continue a simulation that is already in " "progress.") self.running = True self.env.process(self.monitor.run()) self.env.process(self.instrument.run()) self.env.process(self.cluster.run()) self.scheduler.start() self.env.process(self.scheduler.run()) self.env.process(self.buffer.run()) if runtime > 0: self.env.run(until=runtime) else: while not self.is_finished(): self.env.run(self.env.now + 1) self.env.run(self.env.now + 1) LOGGER.info("Simulation Finished @ %s", self.env.now) if self.to_file and self._hdf5_store is not None: global_df = self.monitor.df task_df = self._generate_final_task_data() self._hdf5_store.open() self._compose_hdf5_output(global_df, task_df) self._hdf5_store.close() else: return self.monitor.df, self._generate_final_task_data() def resume(self, until): """ Resume a simulation for a period of time. Useful for testing purposes, as we do not re-initialise the process calls as we used to in :py:obj:`~core.topsim.simulation.Simulation.start` Parameters ---------- until : int The (non-inclusive) :py:obj:`Simpy.env.now` timestep that we want to continue to in the simulation Returns ------- self.env.now : float The current time in the simulation """ if not self.running: raise RuntimeError( "Simulation has not been started! Call start() to initialise " "the process stack.") self.env.run(until=until) def is_finished(self): """ Check if simulation is finished based on the following finish conditions: * The Instrument is idle (i.e. has no more observations left to run) * The Cluster is idle (no tasks are running) * The Buffer is empty (no data sits on the buffer) * The Schedule is idle (there are no more workflows/tasks queued) It is only when all of these return True that the simulation is regarded as finished. Returns ------- True if the above requirements are met; False otherwise (i.e. the simulation is still running). """ if (self.buffer.is_empty() and self.cluster.is_idle() and self.scheduler.is_idle() and self.instrument.is_idle()): return True return False @staticmethod def _split_monolithic_config(self, json): return json def _generate_final_task_data(self): """ Generate a final data frame from the cluster's task dataframe output. Returns ------- """ df = self.cluster.finished_task_time_data() df = df.T size = len(df) df['scheduling'] = [self.planner.algorithm for x in range(size)] df['planning'] = [repr(self.scheduler.algorithm) for x in range(size)] df['config'] = [self._cfg_path for x in range(size)] return df def _compose_hdf5_output(self, global_df, tasks_df): """ Given a :py:obj:`pandas.HDFStore()` object, put global simulation, task specific, and configuration data into HDF5 storage files. Parameters ---------- global_df : :py:obj:pandas.DataFrame The global, per-timestep overview of the simulation tasks_df : :py:obj:pandas.DataFrame Information on each tasks' execution throughout the simulation. Returns ------- """ if self._timestamp: ts = f'd{self._timestamp}' else: ts = f'd{datetime.datetime.today().strftime("%y_%m_%d_%H_%M_%S")}' workflows = self._create_config_table(self._cfg_path) sanitised_path = self._cfg_path.replace(".json", '').split('/')[-1] final_key = f'{ts}/{self._delimiters}/{sanitised_path}' self._hdf5_store.put(key=f"{final_key}/sim", value=global_df) self._hdf5_store.put(key=f'{final_key}/tasks', value=tasks_df) self._hdf5_store.put(key=f'{final_key}/config', value=workflows) return self._hdf5_store def _stringify_json_data(self, path): """ From a given file pointer, get a string representation of the data stored Parameters ---------- fp : file pointer for the opened JSON file Returns ------- jstr : String representation of JSON-encoded data Raises: """ try: with open(path) as fp: jdict = json.load(fp) except json.JSONDecodeError: raise jstr = json.dumps(jdict) # , indent=2) return jstr def _create_config_table(self, path): """ From the simulation config files, find the paths for each observation workflow and produce a table of this information Parameters ---------- path Returns ------- """ cfg_str = self._stringify_json_data(path) jdict = json.loads(cfg_str) pipelines = jdict['instrument']['telescope']['pipelines'] ds = [['simulation_config', path, cfg_str]] for observation in pipelines: p = pipelines[observation]['workflow'] p = p.replace('publications', 'archived_results') wf_str = self._stringify_json_data(p) tpl = [f'{observation}', p, wf_str] ds.append(tpl) df = pd.DataFrame(ds, columns=['entity', 'config_path', 'config_json']) return df
class TestSchedulerIngest(unittest.TestCase): def setUp(self) -> None: self.env = simpy.Environment() config = Config(CONFIG) self.cluster = Cluster(self.env, config) self.buffer = Buffer(self.env, self.cluster, config) self.scheduler = Scheduler(self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan) self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster, SHADOWPlanning('heft')) # planner = None self.telescope = Telescope(self.env, config, self.planner, self.scheduler) def testSchdulerCheckIngestReady(self): """ Check the return status of check_ingest_capacity is correct """ pipelines = self.telescope.pipelines observation = self.telescope.observations[0] max_ingest = self.telescope.max_ingest # There should be capacity self.assertEqual(0.0, self.env.now) ret = self.scheduler.check_ingest_capacity(observation, pipelines, max_ingest) self.assertTrue(ret) # Let's remove capacity to check it returns false tmp = self.cluster._resources['available'] self.cluster._resources['available'] = self.cluster._resources[ 'available'][:3] ret = self.scheduler.check_ingest_capacity(observation, pipelines, max_ingest) self.assertFalse(ret) self.cluster._resources['available'] = tmp self.assertEqual(10, len(self.cluster._resources['available'])) def testSchedulerProvisionsIngest(self): """ Ensure that the scheduler correcly coordinates ingest onto the Cluster and into the Buffer Returns ------- """ pipelines = self.telescope.pipelines max_ingest = self.telescope.max_ingest observation = self.telescope.observations[0] ready_status = self.scheduler.check_ingest_capacity( observation, pipelines, max_ingest) self.env.process(self.cluster.run()) self.env.process(self.buffer.run()) observation.status = RunStatus.WAITING status = self.env.process( self.scheduler.allocate_ingest(observation, pipelines, self.planner)) self.env.run(until=1) self.assertEqual(5, len(self.cluster._resources['available'])) # After 1 timestep, data in the HotBuffer should be 2 self.assertEqual(496, self.buffer.hot[0].current_capacity) self.env.run(until=30) self.assertEqual(10, len(self.cluster._resources['available'])) self.assertEqual(5, len(self.cluster._tasks['finished'])) self.assertEqual(500, self.buffer.hot[0].current_capacity) self.assertEqual(210, self.buffer.cold[0].current_capacity)
def setUp(self) -> None: self.env = simpy.Environment() config = Config(CONFIG) self.cluster = Cluster(self.env, config) self.buffer = Buffer(self.env, self.cluster, config)
class TestDelaysInActors(unittest.TestCase): def setUp(self): """ Repeating above test cases but with delays to determine that delay flags reach us. Returns ------- """ self.env = simpy.Environment() config = Config(INTEGRATION) self.cluster = Cluster(self.env, config) self.buffer = Buffer(self.env, self.cluster, config) dm = DelayModel(0.9, "normal", DelayModel.DelayDegree.HIGH) self.planner = Planner( self.env, PLANNING_ALGORITHM, self.cluster, SHADOWPlanning('heft',delay_model=dm), delay_model=dm ) self.scheduler = Scheduler( self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan() ) self.telescope = Telescope( self.env, config, self.planner, self.scheduler ) self.env.process(self.cluster.run()) self.env.process(self.buffer.run()) self.scheduler.start() self.env.process(self.scheduler.run()) self.env.process(self.telescope.run()) def test_scheduler_delay_detection(self): """ Nothing should change until we reach the workflow plan, as we are testing TaskDelays Returns ------- """ self.env.run(until=1) # Remember - env starts at 0, we don't start until 1. self.assertEqual(10, len(self.cluster._resources['available'])) self.env.run(until=2) # After 1 timestep, data in the HotBuffer should be 4 self.assertEqual(496, self.buffer.hot[0].current_capacity) self.env.run(until=31) self.assertEqual(5, len(self.cluster._tasks['finished'])) self.assertEqual(500, self.buffer.hot[0].current_capacity) self.env.run(until=44) # We know that the schedule has been delayed - however, we don't # report this to the telescope until we know how long we are delayed # (that is, until the task has completely finished its duration). # In this instance. we know that the first task is going to be # delayed, and so wait until it's completed execution to trigger a # delay. self.assertEqual(ScheduleStatus.ONTIME, self.scheduler.schedule_status) self.env.run(until=45) self.assertTrue(ScheduleStatus.DELAYED,self.scheduler.schedule_status) self.env.run(until=124) # Assert that we still have tasks running # self.assertLess( # 0, len(self.cluster.clusters['default']['tasks']['running']) # ) self.assertNotEqual(250, self.buffer.cold[0].current_capacity) def test_telescope_delay_detection(self): """ Returns ------- """ self.env.run(until=1) # Remember - env starts at 0, we don't start until 1. self.assertEqual(10, len(self.cluster._resources['available'])) self.env.run(until=2) # After 1 timestep, data in the HotBuffer should be 4 self.assertEqual(496, self.buffer.hot[0].current_capacity) self.env.run(until=31) self.assertEqual(5, len(self.cluster._tasks['finished'])) self.assertEqual(500, self.buffer.hot[0].current_capacity) self.env.run(until=32) # Ensure the time self.assertEqual(ScheduleStatus.ONTIME, self.scheduler.schedule_status) self.env.run(until=100) self.assertEqual(ScheduleStatus.DELAYED,self.scheduler.schedule_status) self.assertTrue(self.telescope.delayed) def test_telescope_delay_greedy_decision(self): """