Esempio n. 1
0
    def setUp(self) -> None:
        """
        This test scenario is going to test the edge cases like
        double-allocation that may happen.

        For example - the use of curr_allocs should result in the
        _process_current_schedule not double_allocation, so we will test this.
        Returns
        -------

        """
        self.env = simpy.Environment()
        config = Config(CONFIG)
        sched_algorithm = DynamicAlgorithmFromPlan()

        self.cluster = Cluster(env=self.env, config=config)
        self.telescope = Telescope(self.env,
                                   config,
                                   planner=None,
                                   scheduler=None)
        self.buffer = Buffer(self.env, self.cluster, config)

        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   sched_algorithm)

        self.observation = self.telescope.observations[0]
        self.machine = self.cluster.machines[0]
Esempio n. 2
0
class TestTelescopeIngest(unittest.TestCase):
    def setUp(self) -> None:
        self.env = simpy.Environment()
        self.cluster = Cluster(env=self.env, spec=CLUSTER_CONFIG)
        self.buffer = Buffer(env=self.env,
                             cluster=self.cluster,
                             config=BUFFER_CONFIG)
        self.scheduler = Scheduler(env=self.env,
                                   buffer=self.buffer,
                                   cluster=self.cluster,
                                   algorithm=None)
        self.planner = Planner(self.env, 'heft', self.cluster)

    def testIngest(self):
        telescope = Telescope(env=self.env,
                              config=OBSERVATION_CONFIG,
                              planner=self.planner,
                              scheduler=self.scheduler)
        self.assertEqual(0, telescope.telescope_use)
        self.env.process(telescope.run())
        self.scheduler.init()
        self.env.process(self.scheduler.run())
        self.env.run(until=1)
        self.assertEqual(36, telescope.telescope_use)
        self.assertEqual(5, len(self.cluster.available_resources))
        # After 1 timestep, data in the HotBuffer should be 2
        self.assertEqual(496, self.buffer.hot.current_capacity)
        self.env.run(until=10)
        self.assertEqual(460, self.buffer.hot.current_capacity)
        self.env.run(until=12)
        self.assertEqual(0, telescope.telescope_use)
        self.assertEqual(10, len(self.cluster.available_resources))
        self.assertEqual(5, len(self.cluster.finished_tasks))
        self.assertEqual(1, len(self.buffer.waiting_observation_list))
Esempio n. 3
0
    def setUp(self):
        """
        Repeating above test cases but with delays to determine that delay
        flags reach us.
        Returns
        -------

        """

        self.env = simpy.Environment()
        config = Config(INTEGRATION)
        self.cluster = Cluster(self.env, config)
        self.buffer = Buffer(self.env, self.cluster, config)
        dm = DelayModel(0.9, "normal",
                   DelayModel.DelayDegree.HIGH)
        self.planner = Planner(
            self.env, PLANNING_ALGORITHM,
            self.cluster, SHADOWPlanning('heft',delay_model=dm), delay_model=dm
        )

        self.scheduler = Scheduler(
            self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan()
        )
        self.telescope = Telescope(
            self.env, config, self.planner, self.scheduler
        )
        self.env.process(self.cluster.run())
        self.env.process(self.buffer.run())
        self.scheduler.start()
        self.env.process(self.scheduler.run())
        self.env.process(self.telescope.run())
Esempio n. 4
0
 def setUp(self) -> None:
     self.env = simpy.Environment()
     self.cluster = Cluster(env=self.env, spec=CLUSTER_CONFIG)
     self.buffer = Buffer(env=self.env,
                          cluster=self.cluster,
                          config=BUFFER_CONFIG)
     self.scheduler = Scheduler(env=self.env,
                                buffer=self.buffer,
                                cluster=self.cluster,
                                algorithm=None)
     self.planner = Planner(self.env, 'heft', self.cluster)
Esempio n. 5
0
 def setUp(self) -> None:
     self.env = simpy.Environment()
     config = Config(CONFIG)
     self.cluster = Cluster(self.env, config)
     self.buffer = Buffer(self.env, self.cluster, config)
     self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                DynamicAlgorithmFromPlan)
     self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster,
                            SHADOWPlanning('heft'))
     # planner = None
     self.telescope = Telescope(self.env, config, self.planner,
                                self.scheduler)
Esempio n. 6
0
class TestSchedulerIntegration(unittest.TestCase):
    def setUp(self):
        self.env = simpy.Environment()
        config = Config(INTEGRATION)
        self.cluster = Cluster(self.env, config)
        self.buffer = Buffer(self.env, self.cluster, config)
        self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster,
                               SHADOWPlanning('heft'))

        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   DynamicAlgorithmFromPlan())
        self.telescope = Telescope(self.env, config, self.planner,
                                   self.scheduler)
        self.env.process(self.cluster.run())
        self.env.process(self.buffer.run())
        self.scheduler.start()
        self.env.process(self.scheduler.run())
        self.env.process(self.telescope.run())

    def test_FIFO_with_buffer(self):
        """
        Demonstrate that the scheduler accurately schedules when we have
        other Actors working in tandem.

        Expectations:
            - After 1 timestep in the simualtion, we have 5 resources
            available of the 10 that we start with.
            -
        Returns
        -------

        """
        self.env.run(until=1)

        self.assertEqual(10, len(self.cluster._resources['available']))
        # This takes timestep, data in the HotBuffer should be 4
        self.env.run(until=2)
        self.assertEqual(5, len(self.cluster._resources['available']))
        self.assertEqual(496, self.buffer.hot[0].current_capacity)
        self.env.run(until=31)
        self.assertEqual(5, len(self.cluster._tasks['finished']))
        # self.assertEqual(500, self.buffer.hot[0].current_capacity)
        self.assertEqual(210, self.buffer.cold[0].current_capacity)
        self.env.run(until=32)
        # Ensure the time
        self.assertEqual(ScheduleStatus.ONTIME, self.scheduler.schedule_status)
        # 30 timesteps until we finish everything + 81 timesteps to complete
        # workflow plan.
        self.env.run(until=124)
        # As we have been processing the current observation, we are also
        # ingestting the next one.
        self.assertEqual(250, self.buffer.cold[0].current_capacity)
Esempio n. 7
0
 def setUp(self):
     self.env = simpy.Environment()
     sched_algorithm = DynamicAlgorithmFromPlan()
     config = Config(LONG_CONFIG)
     self.cluster = Cluster(self.env, config)
     planning_model = SHADOWPlanning(algorithm=PLANNING_ALGORITHM)
     self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster,
                            planning_model)
     self.buffer = Buffer(self.env, self.cluster, config)
     self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                sched_algorithm)
     self.telescope = Telescope(self.env, config, self.planner,
                                self.scheduler)
Esempio n. 8
0
    def setUp(self) -> None:
        self.env = simpy.Environment()
        self.config = Config(CONFIG)

        self.cluster = Cluster(env=self.env, config=self.config)
        self.buffer = Buffer(env=self.env,
                             cluster=self.cluster,
                             config=self.config)
        self.scheduler = Scheduler(env=self.env,
                                   buffer=self.buffer,
                                   cluster=self.cluster,
                                   algorithm=None)
        self.planner = Planner(self.env, 'heft', self.cluster,
                               SHADOWPlanning('heft'))
Esempio n. 9
0
class TestSchedulerDynamicReAllocation(unittest.TestCase):
    def setUp(self) -> None:
        self.env = simpy.Environment()
        sched_algorithm = GreedyAlgorithmFromPlan()
        config = Config(LONG_CONFIG)
        self.cluster = Cluster(self.env, config)
        self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster,
                               SHADOWPlanning('heft'))
        self.buffer = Buffer(self.env, self.cluster, config)
        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   sched_algorithm)
        self.telescope = Telescope(self.env, config, self.planner,
                                   self.scheduler)

    def test_reallocation_with_plan(self):
        curr_obs = self.telescope.observations[0]
        self.scheduler.observation_queue.append(curr_obs)
        curr_obs.ast = self.env.now
        curr_obs.plan = self.planner.run(curr_obs, self.buffer,
                                         self.telescope.max_ingest)
        self.env.process(self.scheduler.allocate_tasks(curr_obs))
        self.env.run(1)
        self.buffer.cold[0].observations['stored'].append(curr_obs)
        self.env.run(until=299)
        self.assertEqual(0, len(self.scheduler.observation_queue))
Esempio n. 10
0
    def setUp(self):
        self.env = simpy.Environment()
        config = Config(INTEGRATION)
        self.cluster = Cluster(self.env, config)
        self.buffer = Buffer(self.env, self.cluster, config)
        self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster,
                               SHADOWPlanning('heft'))

        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   DynamicAlgorithmFromPlan())
        self.telescope = Telescope(self.env, config, self.planner,
                                   self.scheduler)
        self.env.process(self.cluster.run())
        self.env.process(self.buffer.run())
        self.scheduler.start()
        self.env.process(self.scheduler.run())
        self.env.process(self.telescope.run())
Esempio n. 11
0
class TestTelescopeIngest(unittest.TestCase):
    def setUp(self) -> None:
        self.env = simpy.Environment()
        self.config = Config(CONFIG)

        self.cluster = Cluster(env=self.env, config=self.config)
        self.buffer = Buffer(env=self.env,
                             cluster=self.cluster,
                             config=self.config)
        self.scheduler = Scheduler(env=self.env,
                                   buffer=self.buffer,
                                   cluster=self.cluster,
                                   algorithm=None)
        self.planner = Planner(self.env, 'heft', self.cluster,
                               SHADOWPlanning('heft'))

    def testIngest(self):
        telescope = Telescope(env=self.env,
                              config=self.config,
                              planner=self.planner,
                              scheduler=self.scheduler)
        self.assertEqual(0, telescope.telescope_use)
        self.env.process(telescope.run())
        self.env.process(self.cluster.run())
        self.scheduler.start()
        self.env.process(self.scheduler.run())
        self.env.process(self.buffer.run())
        self.env.run(until=2)
        self.assertEqual(36, telescope.telescope_use)
        self.assertEqual(5, len(self.cluster._resources['available']))
        # After 1 timestep, data in the HotBuffer should be 2
        self.assertEqual(496, self.buffer.hot[0].current_capacity)
        self.env.run(until=11)
        self.assertEqual(len([self.buffer.hot[0].observations["transfer"]]), 1)
        self.assertEqual(462, self.buffer.hot[0].current_capacity)
        self.assertEqual(248, self.buffer.cold[0].current_capacity)
        self.env.run(until=12)
        self.assertEqual(0, telescope.telescope_use)
        self.assertEqual(10, len(self.cluster._resources['available']))
        self.assertEqual(5, len(self.cluster._tasks['finished']))
Esempio n. 12
0
    def setUp(self):
        self.env = simpy.Environment()
        sched_algorithm = FifoAlgorithm()
        self.planner = Planner(self.env, test_data.planning_algorithm,
                               test_data.machine_config)
        self.cluster = Cluster(self.env, CLUSTER_CONFIG)
        self.buffer = Buffer(self.env, self.cluster, BUFFER_CONFIG)
        self.observations = [
            Observation('scheduler_observation',
                        OBS_START_TME,
                        OBS_DURATION,
                        OBS_DEMAND,
                        OBS_WORKFLOW,
                        type='continuum',
                        data_rate=5)
        ]
        telescopemax = 36  # maximum number of antennas

        self.telescope = Telescope(self.env, OBSERVATION_CONFIG,
                                   self.scheduler, self.planner)
        self.scheduler = Scheduler(self.env, sched_algorithm, self.buffer,
                                   self.cluster)
Esempio n. 13
0
	def __init__(
			self,
			env,
			telescope_config,
			cluster_config,
			buffer_config,
			planning_algorithm,
			scheduling_algorithm,
			event_file,
			visualisation=False
	):

		self.env = env
		# Event file setup
		self.event_file = event_file
		self.visualisation = visualisation
		if event_file is not None:
			self.monitor = Monitor(self)
		if visualisation:
			self.visualiser = Visualiser(self)
		# Process necessary config files

		# Initiaise Actor and Resource objects

		self.cluster = Cluster(env, cluster_config)
		self.buffer = Buffer(env, self.cluster, config=buffer_config)
		self.planner = Planner(env, planning_algorithm, cluster_config)
		self.scheduler = Scheduler(
			env, self.buffer, self.cluster, scheduling_algorithm
		)

		self.telescope = Telescope(
			env=self.env,
			config=telescope_config,
			planner=self.planner,
			scheduler=self.scheduler
		)
Esempio n. 14
0
class TestSchedulerDelayHelpers(unittest.TestCase):
    def setUp(self):
        self.env = simpy.Environment()
        config = Config(INTEGRATION)
        self.cluster = Cluster(self.env, config)
        self.buffer = Buffer(self.env, self.cluster, config)
        self.planner = Planner(self.env,
                               PLANNING_ALGORITHM,
                               self.cluster,
                               SHADOWPlanning('heft'),
                               delay_model=DelayModel(0.3, "normal"))

        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   DynamicAlgorithmFromPlan())
        self.telescope = Telescope(self.env, config, self.planner,
                                   self.scheduler)
        self.env.process(self.cluster.run())
        self.env.process(self.buffer.run())
        self.scheduler.start()
        self.env.process(self.scheduler.run())
        self.env.process(self.telescope.run())

    def test_propogate_delay_returns_updated_workflow(self):
        """
Esempio n. 15
0
class TestSchedulerEdgeCases(unittest.TestCase):
    def setUp(self) -> None:
        """
        This test scenario is going to test the edge cases like
        double-allocation that may happen.

        For example - the use of curr_allocs should result in the
        _process_current_schedule not double_allocation, so we will test this.
        Returns
        -------

        """
        self.env = simpy.Environment()
        config = Config(CONFIG)
        sched_algorithm = DynamicAlgorithmFromPlan()

        self.cluster = Cluster(env=self.env, config=config)
        self.telescope = Telescope(self.env,
                                   config,
                                   planner=None,
                                   scheduler=None)
        self.buffer = Buffer(self.env, self.cluster, config)

        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   sched_algorithm)

        self.observation = self.telescope.observations[0]
        self.machine = self.cluster.machines[0]

    def test_double_allocation(self):
        """
        Given an existing schedule, add multiple allocations to ensure
        duplicates do not exist
        """
        task = Task('test_0', 0, 2, self.machine, [])
        dup_task = Task('test_2', 8, 12, self.machine, [])
        existing_schedule = {task: self.machine, dup_task: self.machine}
        new_schedule, new_pairs = self.scheduler._process_current_schedule(
            existing_schedule, allocation_pairs={}, workflow_id='test_id')
        self.assertFalse(task in self.cluster._tasks['running'])
        self.env.run(until=1)
        self.assertTrue(task in self.cluster._tasks['running'])
        self.assertTrue(dup_task in new_schedule)
        self.assertFalse(task in new_schedule)
        self.assertTrue(task.id in new_pairs)
Esempio n. 16
0
    def setUp(self):
        self.algorithm = BatchProcessing
        self.env = simpy.Environment()
        config = Config(CONFIG)
        self.cluster = Cluster(self.env, config=config)
        self.buffer = Buffer(self.env, self.cluster, config)
        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   DynamicAlgorithmFromPlan())
        self.algorithm = BatchProcessing()
        self.model = BatchPlanning('batch')
        self.planner = Planner(
            self.env,
            'heft',
            self.cluster,
            self.model,
        )

        self.telescope = Telescope(self.env, config, self.planner,
                                   self.scheduler)
Esempio n. 17
0
class Simulation(object):
	"""
	The Simulation class is a wrapper for all Actors; we start the simulation
	through the simulation class, which in turn invokes the initial Actors and
	monitoring, and provides the conditions for checking if the simulation has
	finished.

	Parameters
	----------
	env : simpy.Environment bject
		This is how the TOpSim simulation maintains state across the
		different actors, and interfaces with the simpy processes.

	telescope_config: str
		This is a path to the telescope config that follows the TOpSim config
		specification (JSON). This file will be parsed in the Telescope class
		constructure

	cluster_config: str
		Path to the HPC cluster config that forms the computing component of
		the SDP

	buffer_config: str
		Path to the buffer configuration

	planning_algorithm: Object
		instance of the planning algorithm class interface as defined in
		algorithms.examples/

	scheduling_algorithm: object
		instance of the core.algorithm interface

	event_file: str
		Path to the output file that stores execution of simulation.

	visualisation: bool
		If visualisation is required, True; else, False

	Methods
	-------

    

	Raises
	------
	"""

	def __init__(
			self,
			env,
			telescope_config,
			cluster_config,
			buffer_config,
			planning_algorithm,
			scheduling_algorithm,
			event_file,
			visualisation=False
	):

		self.env = env
		# Event file setup
		self.event_file = event_file
		self.visualisation = visualisation
		if event_file is not None:
			self.monitor = Monitor(self)
		if visualisation:
			self.visualiser = Visualiser(self)
		# Process necessary config files

		# Initiaise Actor and Resource objects

		self.cluster = Cluster(env, cluster_config)
		self.buffer = Buffer(env, self.cluster, config=buffer_config)
		self.planner = Planner(env, planning_algorithm, cluster_config)
		self.scheduler = Scheduler(
			env, self.buffer, self.cluster, scheduling_algorithm
		)

		self.telescope = Telescope(
			env=self.env,
			config=telescope_config,
			planner=self.planner,
			scheduler=self.scheduler
		)

	def start(self, runtime=150):
		"""
		Run the simulation, either for the specified runtime, OR until the
		exit condition is reached:

			* There are no more observations to process,
			* There is nothing left in the Buffer
			* The Scheduler is not waiting to allocate machines to resources
			* There are not tasks still running on the cluster.
		

		Parameters
		----------
		runtime : int
			Nominiated runtime of the simulation. If the simulation length is
			known, pass that as the argument. If not, passing in a negative
			value (typically, just -1) will run the simulation until the
			exit condition is reached.

		Returns
		-------

		"""

		if self.event_file is not None:
			self.env.process(self.monitor.run())
		if self.visualisation:
			self.env.process(self.visualiser.run())

		self.env.process(self.telescope.run())
		self.env.process(self.cluster.run())

		self.scheduler.init()
		self.env.process(self.scheduler.run())
		# Calling env.run() invokes the processes passed in init_process()
		if runtime > 0:
			self.env.run(until=runtime)
		else:
			if not self.is_finished():
				self.env.run()

		logger.info("Simulation Finished @ %s", self.env.now)

	def is_finished(self):
		status = (
				len(self.telescope.observations) == 0
				and self.buffer.observations_for_processing.empty()
				and len(self.scheduler.waiting_observations) == 0
				and len(self.cluster.running_tasks) == 0
		)
		if status:
			# Using compound 'or' doesn't give us a True/False
			return True
		else:
			return False
Esempio n. 18
0
 def setUp(self) -> None:
     self.env = simpy.Environment()
     self.cluster = Cluster(self.env, CLUSTER_CONFIG)
     self.buffer = Buffer(self.env, self.cluster, BUFFER_CONFIG)
     self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                FifoAlgorithm)
Esempio n. 19
0
class TestSchedulerIngest(unittest.TestCase):
    def setUp(self) -> None:
        self.env = simpy.Environment()
        config = Config(CONFIG)
        self.cluster = Cluster(self.env, config)
        self.buffer = Buffer(self.env, self.cluster, config)
        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   DynamicAlgorithmFromPlan)
        self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster,
                               SHADOWPlanning('heft'))
        # planner = None
        self.telescope = Telescope(self.env, config, self.planner,
                                   self.scheduler)

    def testSchdulerCheckIngestReady(self):
        """
        Check the return status of check_ingest_capacity is correct
        """
        pipelines = self.telescope.pipelines
        observation = self.telescope.observations[0]

        max_ingest = self.telescope.max_ingest
        # There should be capacity
        self.assertEqual(0.0, self.env.now)
        ret = self.scheduler.check_ingest_capacity(observation, pipelines,
                                                   max_ingest)
        self.assertTrue(ret)

        # Let's remove capacity to check it returns false
        tmp = self.cluster._resources['available']
        self.cluster._resources['available'] = self.cluster._resources[
            'available'][:3]
        ret = self.scheduler.check_ingest_capacity(observation, pipelines,
                                                   max_ingest)
        self.assertFalse(ret)
        self.cluster._resources['available'] = tmp
        self.assertEqual(10, len(self.cluster._resources['available']))

    def testSchedulerProvisionsIngest(self):
        """
        Ensure that the scheduler correcly coordinates ingest onto the Cluster
        and into the Buffer

        Returns
        -------
        """
        pipelines = self.telescope.pipelines
        max_ingest = self.telescope.max_ingest
        observation = self.telescope.observations[0]

        ready_status = self.scheduler.check_ingest_capacity(
            observation, pipelines, max_ingest)
        self.env.process(self.cluster.run())
        self.env.process(self.buffer.run())
        observation.status = RunStatus.WAITING
        status = self.env.process(
            self.scheduler.allocate_ingest(observation, pipelines,
                                           self.planner))

        self.env.run(until=1)
        self.assertEqual(5, len(self.cluster._resources['available']))
        # After 1 timestep, data in the HotBuffer should be 2
        self.assertEqual(496, self.buffer.hot[0].current_capacity)
        self.env.run(until=30)
        self.assertEqual(10, len(self.cluster._resources['available']))
        self.assertEqual(5, len(self.cluster._tasks['finished']))
        self.assertEqual(500, self.buffer.hot[0].current_capacity)
        self.assertEqual(210, self.buffer.cold[0].current_capacity)
Esempio n. 20
0
    def __init__(self,
                 env,
                 config,
                 instrument,
                 planning_model,
                 planning_algorithm,
                 scheduling,
                 delay=None,
                 timestamp=None,
                 to_file=False,
                 hdf5_path=None,
                 **kwargs):

        #: :py:obj:`simpy.Environment` object
        self.env = env

        if timestamp:
            #: :py:obj:`~topsim.core.monitor.Monitor` instance
            self.monitor = Monitor(self, timestamp)
            self._timestamp = timestamp
        else:
            sim_start_time = f'{time.time()}'.split('.')[0]
            self._timestamp = sim_start_time
            self.monitor = Monitor(self, sim_start_time)
        # Process necessary config files

        self._cfg_path = config  #: Configuration path

        # Initiaise Actor and Resource objects
        cfg = Config(config)
        #: :py:obj:`~topsim.core.cluster.Cluster` instance
        self.cluster = Cluster(env, cfg)
        #: :py:obj:`~topsim.core.buffer.Buffer` instance
        self.buffer = Buffer(env, self.cluster, cfg)
        planning_algorithm = planning_algorithm
        planning_model = planning_model

        if not delay:
            # TODO Have this approach replicated so we don't specify the
            #  model outside the simulation.
            delay = DelayModel(0.0, "normal", DelayModel.DelayDegree.NONE)
        self.planner = Planner(env, planning_algorithm, self.cluster,
                               planning_model, delay)
        scheduling_algorithm = scheduling()
        #: :py:obj:`~topsim.core.scheduler.Scheduler` instance
        self.scheduler = Scheduler(env, self.buffer, self.cluster,
                                   scheduling_algorithm)
        #: User-defined :py:obj:`~topsim.core.instrument.Instrument` instance
        self.instrument = instrument(env=self.env,
                                     config=cfg,
                                     planner=self.planner,
                                     scheduler=self.scheduler)

        #: :py:obj:`bool` Flag for producing simulation output in a `.pkl`
        # file.
        self.to_file = to_file
        if self.to_file and hdf5_path:
            try:
                if os.path.exists(hdf5_path):
                    LOGGER.warning('Output HDF5 path already exists, '
                                   'simulation appended to existing file')
                self._hdf5_store = pd.HDFStore(hdf5_path)
                self._hdf5_store.close()
            except ValueError(
                    'Check pandas.HDFStore documentation for valid file path'):
                raise
        elif self.to_file and not hdf5_path:
            raise ValueError(
                'Attempted to initialise Simulation object that outputs'
                'to file without providing file path')
        else:
            LOGGER.info(
                'Simulation output will not be stored directly to file')

        if 'delimiters' in kwargs:
            #: Used to separate different simulations in HDF5 output
            self._delimiters = kwargs['delimiters']
        else:
            self._delimiters = ''

        self.running = False
Esempio n. 21
0
class Simulation:
    """
    The Simulation class is a wrapper for all Actors; we start the simulation
    through the simulation class, which in turn invokes the initial Actors and
    monitoring, and provides the conditions for checking if the simulation has
    finished.

    Parameters
    ----------

    env : :py:obj:`simpy.Environment` object
        The discrete-event simulation environment. This is the way TOpSim
        simulation maintains state across the different actors,
        and interfaces with the simpy processes.

    config : str
        Path to the simulation JSOn configuration file

    instrument : :py:obj:`~topsim.core.instrument.Instrument`
        User-defined implementation of the Instrument class.

    planning_model : :py:obj:`~topsim.algorithms.planning.Planning` object
        User-defined implementation of the planning algorithm class

    planning_algorithm: str
        Reference to the specific algorithm implementated in `planning_model`

    scheduling: :py:obj:`~topsim.algorithms.scheduling.Algorithm`
        User-defined implementation of the scheduling algorithm
        :py:obj:`abc.ABC`.

    delay: :py:obj:`~topsim.core.delay.DelayModel`,  optional
         for the simulation.

    timestamp: str, optional
        Optional Simulation start-time; this is useful for testing, to ensure we
        name the file and the tests match up. Also useful if you do not want to
        use the time of the simulation as the name.

    to_file : bool, optional
        `True` if the simulation is to be written to a Pandas `pkl` file;
        `False` will return pandas DataFrame objects at the completion of the
        :py:meth:`~topsim.core.simulation.Simulation.run` function.

    Notes
    -----
    If to_file left as `False`, simulation results and output will be returned
    as Pandas DataFrames (see
    :py:meth:`~topsim.core.simulation.Simulation.run`) . This is designed for
    running multiple simulations, allowing for the appending of individual
    simulation results to a 'global' :py:obj:`~pandas.DataFrame` . Current
    support for output is limited to Panda's `.pkl` files.

    Parsing in the option `delimiters` provides a way of differentiating
    between multiple simulations within a single HDF5 store (for example,
    in an experiment). A typical experimental loop may involve the following
    structure:

    >>> for heuristic in list_of_scheduling_heuristics
    >>>     for algorithm in list_of_planning_algorithms
    >>>         for cfg in list_of_system_configs
    >>>             ...
    >>>             delimiter = f'{heuristic}/{algorithm}/{cfg}'

    This means when querying HDF5 output files, the results of each
    simulation can be filtered nicely:

    >>> store = pd.HDFStore('path/to/output.h5')
    >>> # Returns a dataframe of simulation results
    >>> store['heuristic_1/algorithm_3/cfg.json']

    Examples
    --------

    Standard simulation with data frame output

    >>> env = simpy.environment()
    >>> config = Config('path/to/config')
    >>> instrument = CustomInstrument()
    >>> plan = PlanningModel()
    >>> sched = SchedulingModel()
    >>> simulation = Simulation(env, config, instrument,plan,sched)

    If we want delays in the model:

    >>> dm = DelayModel(prob=0.1, dist='normal', dm=DelayModel.DelayDegree.LOW)
    >>> simulation =  Simulation(
    >>>    env, config, instrument,plan,sched, delay=dm
    >>> )

    Running a simulation to completion:

    >>> df = simulation.run()

    Running a simulation for a specific time period, then resuming:

    >>> df = simulation.run(runtime=100)
    >>> ### Check current status of simulatiion
    >>> df = simulation.resume(until=150)

    Raises
    ------
    """
    def __init__(self,
                 env,
                 config,
                 instrument,
                 planning_model,
                 planning_algorithm,
                 scheduling,
                 delay=None,
                 timestamp=None,
                 to_file=False,
                 hdf5_path=None,
                 **kwargs):

        #: :py:obj:`simpy.Environment` object
        self.env = env

        if timestamp:
            #: :py:obj:`~topsim.core.monitor.Monitor` instance
            self.monitor = Monitor(self, timestamp)
            self._timestamp = timestamp
        else:
            sim_start_time = f'{time.time()}'.split('.')[0]
            self._timestamp = sim_start_time
            self.monitor = Monitor(self, sim_start_time)
        # Process necessary config files

        self._cfg_path = config  #: Configuration path

        # Initiaise Actor and Resource objects
        cfg = Config(config)
        #: :py:obj:`~topsim.core.cluster.Cluster` instance
        self.cluster = Cluster(env, cfg)
        #: :py:obj:`~topsim.core.buffer.Buffer` instance
        self.buffer = Buffer(env, self.cluster, cfg)
        planning_algorithm = planning_algorithm
        planning_model = planning_model

        if not delay:
            # TODO Have this approach replicated so we don't specify the
            #  model outside the simulation.
            delay = DelayModel(0.0, "normal", DelayModel.DelayDegree.NONE)
        self.planner = Planner(env, planning_algorithm, self.cluster,
                               planning_model, delay)
        scheduling_algorithm = scheduling()
        #: :py:obj:`~topsim.core.scheduler.Scheduler` instance
        self.scheduler = Scheduler(env, self.buffer, self.cluster,
                                   scheduling_algorithm)
        #: User-defined :py:obj:`~topsim.core.instrument.Instrument` instance
        self.instrument = instrument(env=self.env,
                                     config=cfg,
                                     planner=self.planner,
                                     scheduler=self.scheduler)

        #: :py:obj:`bool` Flag for producing simulation output in a `.pkl`
        # file.
        self.to_file = to_file
        if self.to_file and hdf5_path:
            try:
                if os.path.exists(hdf5_path):
                    LOGGER.warning('Output HDF5 path already exists, '
                                   'simulation appended to existing file')
                self._hdf5_store = pd.HDFStore(hdf5_path)
                self._hdf5_store.close()
            except ValueError(
                    'Check pandas.HDFStore documentation for valid file path'):
                raise
        elif self.to_file and not hdf5_path:
            raise ValueError(
                'Attempted to initialise Simulation object that outputs'
                'to file without providing file path')
        else:
            LOGGER.info(
                'Simulation output will not be stored directly to file')

        if 'delimiters' in kwargs:
            #: Used to separate different simulations in HDF5 output
            self._delimiters = kwargs['delimiters']
        else:
            self._delimiters = ''

        self.running = False

    def start(self, runtime=-1):
        """
        Run the simulation, either for the specified runtime, OR until the
        exit conditions are reached.

        The exit conditions are:

            * There are no more observations to process,
            * There is nothing left in the Buffer
            * The Scheduler is not waiting to allocate machines to resources
            * There are not tasks still running on the cluster.

        Parameters
        ----------
        runtime : int
            Nominiated runtime of the simulation. If the simulation length is
            known, pass that as the argument. If not, passing in a negative
            value (typically, just -1) will run the simulation until the
            exit condition is reached.

        Returns
        -------
        If `to_file` is True:
            sim_data_path, task_data_path : str
                Path names for the global simulation runtime and the
                individual task data output.
        If `to_file` is False:
            Two pandas.DataFrame objects for global sim runtime and task data.

        """
        if self.running:
            raise RuntimeError(
                "start() has already been called!"
                "Use resume() to continue a simulation that is already in "
                "progress.")

        self.running = True
        self.env.process(self.monitor.run())
        self.env.process(self.instrument.run())
        self.env.process(self.cluster.run())

        self.scheduler.start()
        self.env.process(self.scheduler.run())
        self.env.process(self.buffer.run())

        if runtime > 0:
            self.env.run(until=runtime)
        else:
            while not self.is_finished():
                self.env.run(self.env.now + 1)
            self.env.run(self.env.now + 1)
        LOGGER.info("Simulation Finished @ %s", self.env.now)

        if self.to_file and self._hdf5_store is not None:
            global_df = self.monitor.df
            task_df = self._generate_final_task_data()
            self._hdf5_store.open()
            self._compose_hdf5_output(global_df, task_df)
            self._hdf5_store.close()

        else:
            return self.monitor.df, self._generate_final_task_data()

    def resume(self, until):
        """
        Resume a simulation for a period of time.

        Useful for testing purposes, as we do not re-initialise the process
        calls as we used to in
        :py:obj:`~core.topsim.simulation.Simulation.start`

        Parameters
        ----------
        until : int
            The (non-inclusive) :py:obj:`Simpy.env.now` timestep that we want to
            continue to in the simulation

        Returns
        -------
        self.env.now : float
            The current time in the simulation
        """
        if not self.running:
            raise RuntimeError(
                "Simulation has not been started! Call start() to initialise "
                "the process stack.")
        self.env.run(until=until)

    def is_finished(self):
        """
        Check if simulation is finished based on the following finish
        conditions:

        * The Instrument is idle (i.e. has no more observations left to run)
        * The Cluster is idle (no tasks are running)
        * The Buffer is empty (no data sits on the buffer)
        * The Schedule is idle (there are no more workflows/tasks queued)

        It is only when all of these return True that the simulation is
        regarded as finished.

        Returns
        -------
        True if the above requirements are met; False otherwise (i.e. the
        simulation is still running).

        """
        if (self.buffer.is_empty() and self.cluster.is_idle()
                and self.scheduler.is_idle() and self.instrument.is_idle()):
            return True
        return False

    @staticmethod
    def _split_monolithic_config(self, json):
        return json

    def _generate_final_task_data(self):
        """
        Generate a final data frame from the cluster's task dataframe output.
        Returns
        -------

        """

        df = self.cluster.finished_task_time_data()
        df = df.T
        size = len(df)
        df['scheduling'] = [self.planner.algorithm for x in range(size)]
        df['planning'] = [repr(self.scheduler.algorithm) for x in range(size)]
        df['config'] = [self._cfg_path for x in range(size)]
        return df

    def _compose_hdf5_output(self, global_df, tasks_df):
        """
        Given a :py:obj:`pandas.HDFStore()` object, put global simulation,
        task specific, and configuration data into HDF5 storage files.
        Parameters
        ----------
        global_df : :py:obj:pandas.DataFrame
            The global, per-timestep overview of the simulation
        tasks_df : :py:obj:pandas.DataFrame
            Information on each tasks' execution throughout the simulation.
        Returns
        -------

        """
        if self._timestamp:
            ts = f'd{self._timestamp}'
        else:
            ts = f'd{datetime.datetime.today().strftime("%y_%m_%d_%H_%M_%S")}'

        workflows = self._create_config_table(self._cfg_path)

        sanitised_path = self._cfg_path.replace(".json", '').split('/')[-1]
        final_key = f'{ts}/{self._delimiters}/{sanitised_path}'
        self._hdf5_store.put(key=f"{final_key}/sim", value=global_df)
        self._hdf5_store.put(key=f'{final_key}/tasks', value=tasks_df)
        self._hdf5_store.put(key=f'{final_key}/config', value=workflows)

        return self._hdf5_store

    def _stringify_json_data(self, path):
        """
        From a given file pointer, get a string representation of the data stored

        Parameters
        ----------
        fp : file pointer for the opened JSON file

        Returns
        -------
        jstr : String representation of JSON-encoded data

        Raises:

        """

        try:
            with open(path) as fp:
                jdict = json.load(fp)
        except json.JSONDecodeError:
            raise

        jstr = json.dumps(jdict)  # , indent=2)
        return jstr

    def _create_config_table(self, path):
        """
        From the simulation config files, find the paths for each observation
        workflow and produce a table of this information

        Parameters
        ----------
        path

        Returns
        -------

        """

        cfg_str = self._stringify_json_data(path)
        jdict = json.loads(cfg_str)
        pipelines = jdict['instrument']['telescope']['pipelines']
        ds = [['simulation_config', path, cfg_str]]
        for observation in pipelines:
            p = pipelines[observation]['workflow']
            p = p.replace('publications', 'archived_results')
            wf_str = self._stringify_json_data(p)
            tpl = [f'{observation}', p, wf_str]
            ds.append(tpl)

        df = pd.DataFrame(ds, columns=['entity', 'config_path', 'config_json'])
        return df
Esempio n. 22
0
class TestDelaysInActors(unittest.TestCase):

    def setUp(self):
        """
        Repeating above test cases but with delays to determine that delay
        flags reach us.
        Returns
        -------

        """

        self.env = simpy.Environment()
        config = Config(INTEGRATION)
        self.cluster = Cluster(self.env, config)
        self.buffer = Buffer(self.env, self.cluster, config)
        dm = DelayModel(0.9, "normal",
                   DelayModel.DelayDegree.HIGH)
        self.planner = Planner(
            self.env, PLANNING_ALGORITHM,
            self.cluster, SHADOWPlanning('heft',delay_model=dm), delay_model=dm
        )

        self.scheduler = Scheduler(
            self.env, self.buffer, self.cluster, DynamicAlgorithmFromPlan()
        )
        self.telescope = Telescope(
            self.env, config, self.planner, self.scheduler
        )
        self.env.process(self.cluster.run())
        self.env.process(self.buffer.run())
        self.scheduler.start()
        self.env.process(self.scheduler.run())
        self.env.process(self.telescope.run())

    def test_scheduler_delay_detection(self):
        """
        Nothing should change until we reach the workflow plan, as we are
        testing TaskDelays
        Returns
        -------
        """

        self.env.run(until=1)
        # Remember - env starts at 0, we don't start until 1.
        self.assertEqual(10, len(self.cluster._resources['available']))
        self.env.run(until=2)

        # After 1 timestep, data in the HotBuffer should be 4
        self.assertEqual(496, self.buffer.hot[0].current_capacity)
        self.env.run(until=31)
        self.assertEqual(5, len(self.cluster._tasks['finished']))
        self.assertEqual(500, self.buffer.hot[0].current_capacity)
        self.env.run(until=44)
        # We know that the schedule has been delayed - however, we don't
        # report this to the telescope until we know how long we are delayed
        # (that is, until the task has completely finished its duration).
        # In this instance. we know that the first task is going to be
        # delayed, and so wait until it's completed execution to trigger a
        # delay.
        self.assertEqual(ScheduleStatus.ONTIME, self.scheduler.schedule_status)
        self.env.run(until=45)
        self.assertTrue(ScheduleStatus.DELAYED,self.scheduler.schedule_status)
        self.env.run(until=124)
        # Assert that we still have tasks running
        # self.assertLess(
        #     0, len(self.cluster.clusters['default']['tasks']['running'])
        # )
        self.assertNotEqual(250, self.buffer.cold[0].current_capacity)

    def test_telescope_delay_detection(self):
        """

        Returns
        -------

        """
        self.env.run(until=1)
        # Remember - env starts at 0, we don't start until 1.
        self.assertEqual(10, len(self.cluster._resources['available']))
        self.env.run(until=2)

        # After 1 timestep, data in the HotBuffer should be 4
        self.assertEqual(496, self.buffer.hot[0].current_capacity)
        self.env.run(until=31)
        self.assertEqual(5, len(self.cluster._tasks['finished']))
        self.assertEqual(500, self.buffer.hot[0].current_capacity)
        self.env.run(until=32)
        # Ensure the time
        self.assertEqual(ScheduleStatus.ONTIME, self.scheduler.schedule_status)
        self.env.run(until=100)
        self.assertEqual(ScheduleStatus.DELAYED,self.scheduler.schedule_status)
        self.assertTrue(self.telescope.delayed)

    def test_telescope_delay_greedy_decision(self):
        """
Esempio n. 23
0
class TestSchedulerFIFO(unittest.TestCase):
    def setUp(self):
        self.env = simpy.Environment()
        sched_algorithm = FifoAlgorithm()
        self.planner = Planner(self.env, test_data.planning_algorithm,
                               test_data.machine_config)
        self.cluster = Cluster(self.env, CLUSTER_CONFIG)
        self.buffer = Buffer(self.env, self.cluster, BUFFER_CONFIG)
        self.observations = [
            Observation('scheduler_observation',
                        OBS_START_TME,
                        OBS_DURATION,
                        OBS_DEMAND,
                        OBS_WORKFLOW,
                        type='continuum',
                        data_rate=5)
        ]
        telescopemax = 36  # maximum number of antennas

        self.telescope = Telescope(self.env, OBSERVATION_CONFIG,
                                   self.scheduler, self.planner)
        self.scheduler = Scheduler(self.env, sched_algorithm, self.buffer,
                                   self.cluster)

    def tearDown(self):
        pass

    def testSchedulerDecision(self):
        # algorithms.make_decision() will do something interesting only when we add a workflow plan to the
        # buffer.
        next(self.planner.run(self.observations[0]))
        #  Observation is what we are interested in with the algorithms, because the observation stores the plan;
        #  The observation object is what is stored in the buffer's 'observations_for_processing' queue.
        self.buffer.add_observation_to_waiting_workflows(self.observations[0])
        '''
		Lets start doing algorithms things!
		IT is important to note that the algorithms is only effective within the context of a simulation,
		as it is directly affected by calls to env.now; this means we need to run a mini-simulation in this
		test - which we can 'simulate' - haha - by using the enviroment and clever timeouts.
		We get an observaiton into the buffer, the algorithms makes a decision - what then?
		We use check_buffer to update the workflows in the algorithms workflow list
		This is called every time-step in the simulation, and is how we add workflow plans to the schedulers list
		'''

        test_flag = True
        self.env.process(self.scheduler.run())
        self.env.run(until=1)
        print(self.env.now)
        # We should be able to get this working nicely
        """
		For this experiment, we are running the scheduler on a single observation, and getting it 
		to allocate a task to the required machine. the first task should be scheduled at T = 0, 
		so at t = 1, we should check to make sure that the target has been scheduled, and that it is on the appropriate 
		machine
		"""
        # Generate list of IDs
        expected_machine = "cat2_m2"
        expected_task_no = 0
        self.assertTrue(self.cluster.running_tasks)
        for m in self.cluster.machines:
            if m.id == expected_machine:
                self.assertEqual(m.current_task.id, expected_task_no)
        # Need to assert that there is something in cluster.running_tasks
        # first element of running tasks should be the first task
        self.env.run(until=100)
        print(self.env.now)
        while test_flag:
            next(self.algorithms.run())
Esempio n. 24
0
class TestSchedulerDynamicPlanAllocation(unittest.TestCase):
    def setUp(self):
        self.env = simpy.Environment()
        sched_algorithm = DynamicAlgorithmFromPlan()
        config = Config(HEFT_CONFIG)
        self.cluster = Cluster(self.env, config)
        self.planner = Planner(self.env, PLANNING_ALGORITHM, self.cluster,
                               SHADOWPlanning('heft'))
        self.buffer = Buffer(self.env, self.cluster, config)
        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   sched_algorithm)
        self.telescope = Telescope(self.env, config, self.planner,
                                   self.scheduler)

    def tearDown(self):
        pass

    def testAllocationTasksNoObservation(self):
        """
        allocate_tasks assumes we have:

            * An observation stored in the ColdBuffer
            * A plan stored for that observation
            * Access to a scheduling algorithm (in this case, FifoAlgorithm).

        Need to check:
            * If there is no current observation, we can't do anthing
            * If there is an observation, but no plan, we assign the
            observation planto the current_plan.
            * Once things are running, we make sure things are being
            scheduled onto the right machines
            * They should also be running for the correct period of time.

        The allocations for the HEFT algorithm are (in sorted order):
            id - mid    - (ast,aft)
            0 - cat2_m2 - (0,11)
            3 - cat2_m2 - (11,21)
            2 - cat2_m2 - (21,30)
            4 - cat1_m1 - (22, 40)
            1 - cat0_m0 - (29,42)
            5 - cat2_m2 - (30,45)
            6 - cat2_m2 - (45, 55)
            8 - cat2_m2 - (58, 71)
            7 - cat0_m0 - (60, 61)
            9 - cat0_m0 - (84,98)

        """
        curr_obs = self.telescope.observations[0]
        gen = self.scheduler.allocate_tasks(curr_obs)
        self.assertRaises(RuntimeError, next, gen)
        l = [0, 3, 2, 4, 1, 5, 6, 8, 7, 9]
        exec_ord = [
            curr_obs.name + '_' + str(self.env.now) + '_' + str(tid)
            for tid in l
        ]
        self.scheduler.observation_queue.append(curr_obs)
        curr_obs.ast = self.env.now
        curr_obs.plan = self.planner.run(curr_obs, self.buffer,
                                         self.telescope.max_ingest)
        self.env.process(self.scheduler.allocate_tasks(curr_obs))
        self.env.run(1)
        self.assertListEqual(l, [a.task.tid for a in curr_obs.plan.exec_order])
        self.buffer.cold[0].observations['stored'].append(curr_obs)
        self.env.run(until=99)
        self.assertEqual(10, len(self.cluster._tasks['finished']))
        self.assertEqual(0, len(self.cluster._tasks['running']))
        self.assertEqual(0, len(self.scheduler.observation_queue))
Esempio n. 25
0
class TestSchedulerIngest(unittest.TestCase):
    def setUp(self) -> None:
        self.env = simpy.Environment()
        self.cluster = Cluster(self.env, CLUSTER_CONFIG)
        self.buffer = Buffer(self.env, self.cluster, BUFFER_CONFIG)
        self.scheduler = Scheduler(self.env, self.buffer, self.cluster,
                                   FifoAlgorithm)

    def testSchdulerCheckIngestReady(self):
        """
		Check the return status of check_ingest_capacity is correct
		"""
        pipelines = {"continuum": {"demand": 5}}
        observation = Observation('planner_observation',
                                  OBS_START_TME,
                                  OBS_DURATION,
                                  OBS_DEMAND,
                                  OBS_WORKFLOW,
                                  type="continuum",
                                  data_rate=2)
        # There should be capacity
        self.assertEqual(0.0, self.env.now)
        ret = self.scheduler.check_ingest_capacity(observation, pipelines)
        self.assertTrue(ret)

        # Let's remove capacity to check it returns false
        tmp = self.cluster.available_resources
        self.cluster.available_resources = self.cluster.available_resources[:3]
        ret = self.scheduler.check_ingest_capacity(observation, pipelines)
        self.assertFalse(ret)
        self.cluster.available_resources = tmp
        self.assertEqual(10, len(self.cluster.available_resources))

    def testSchedulerProvisionsIngest(self):
        """
		Ensure that the scheduler correcly coordinates ingest onto the Cluster
		and into the Buffer

		Returns
		-------
		"""
        pipelines = {"continuum": {"demand": 5}}

        observation = Observation('planner_observation',
                                  OBS_START_TME,
                                  OBS_DURATION,
                                  OBS_DEMAND,
                                  OBS_WORKFLOW,
                                  type="continuum",
                                  data_rate=2)
        ready_status = self.scheduler.check_ingest_capacity(
            observation, pipelines)
        observation.status = RunStatus.WAITING
        status = self.env.process(
            self.scheduler.allocate_ingest(observation, pipelines))
        self.env.run(until=1)
        self.assertEqual(5, len(self.cluster.available_resources))
        # After 1 timestep, data in the HotBuffer should be 2
        self.assertEqual(498, self.buffer.hot.current_capacity)
        self.env.run(until=11)
        self.assertEqual(10, len(self.cluster.available_resources))
        self.assertEqual(5, len(self.cluster.finished_tasks))
        self.assertEqual(480, self.buffer.hot.current_capacity)