def test_simulation_nofile_option(self): simulation = Simulation( self.env, CONFIG, self.instrument, planning_algorithm='heft', planning_model=SHADOWPlanning('heft'), scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp=None, ) simdf, taskdf = simulation.start() self.assertEqual(120, len(simdf)) self.env = simpy.Environment() simulation = Simulation( self.env, CONFIG, self.instrument, planning_algorithm='fcfs', planning_model=SHADOWPlanning('fcfs'), scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp=None, # delimiters=f'test/{algorithm}' ) simdf, taskdf = simulation.start() self.assertEqual(128, len(simdf))
def test_multi_simulation_data_merge(self): # global_sim_df = pd.DataFrame() # global_task_df = pd.DataFrame() for algorithm in ['heft', 'fcfs']: env = simpy.Environment() simulation = Simulation( env, CONFIG, self.instrument, planning_algorithm=algorithm, planning_model=SHADOWPlanning(algorithm), scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp='unittest', hdf5_path='test/simulation_data/test_hdf5.h5', to_file=True, delimiters=f'{algorithm}') simulation.start() self.assertTrue(os.path.exists('test/simulation_data/test_hdf5.h5')) heft_key = '/dunittest/heft/heft_single_observation_simulation/sim/' fcfs_key = '/dunittest/fcfs/heft_single_observation_simulation/sim/' heft_sim = pd.read_hdf('test/simulation_data/test_hdf5.h5', key=heft_key) self.assertEqual(120, len(heft_sim)) self.assertEqual(3, heft_sim.iloc[-1]['available_resources'])
class Episode(object): def __init__(self, machine_configs, task_configs, algorithm, event_file): self.env = simpy.Environment() cluster = Cluster() cluster.add_machines(machine_configs) task_broker = Broker(self.env, task_configs) scheduler = Scheduler(self.env, algorithm) self.simulation = Simulation(self.env, cluster, task_broker, scheduler, event_file) def run(self): self.simulation.start() self.env.run()
class TestSimulationRuntime(unittest.TestCase): def setUp(self) -> None: event_file = EVENT_FILE env = simpy.Environment() planning_algorithm = 'heft' scheduling_algorithm = FifoAlgorithm() self.simulation = Simulation(env, TELESCOPE_CONFIG, CLUSTER_CONFIG, BUFFER_CONFIG, planning_algorithm, scheduling_algorithm, EVENT_FILE, visualisation=False) def testLimitedRuntime(self): self.simulation.start(runtime=60)
def test_simulation_produces_file(self): ts = f'{datetime.datetime(2021,1,1).strftime("%y_%m_%d_%H_%M_%S")}' simulation = Simulation(self.env, CONFIG, Telescope, planning_model=SHADOWPlanning('heft'), planning_algorithm='heft', scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp=ts, to_file=True, hdf5_path=self.output) simulation.start(runtime=60) self.assertTrue(os.path.exists(self.output)) store = pd.HDFStore(self.output) store[f'd{ts}/standard_simulation/sim']
def test_run_batchmodel(self): simulation = Simulation(self.env, CONFIG, Telescope, planning_model=BatchPlanning('batch'), planning_algorithm='batch', scheduling=BatchProcessing, delay=None, timestamp=f'{cwd}/test/data/output/{0}') sim, task = simulation.start() self.assertGreater(len(sim), 0)
class TestSimulationLargeExperiment(unittest.TestCase): def setUp(self) -> None: env = simpy.Environment() self.simulation = Simulation( env, LARGE_CONFIG, Telescope, planning_algorithm='heft', planning_model=SHADOWPlanning('heft'), scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp=f'test/basic-workflow-data/output/{0}') def testSimulationRuns(self): """ This is a quick test to make sure that we can simulate a large complex simulation where all the moving pieces exist. There are a couple of conditions that require us to simply ignore an allocation; there are circumstances that exist when an observation provisions resources for ingest at the same timestep as allocations are made by the scheduler. The observation/ingest is provision first, but the resources are not 'consumed' (moved from 'available to unavailable') until after the scheduler has seen that the resources are available and assigned tasks to them. This may be fixed in future approaches by using a 'shadow' allocation scheme, which acts as a way of maintaining more global state with the actors still 'theoretically' blind. The current approach is useful in that it prototypes the use of simpy returns in a useful way. Returns ------- """ self.simulation.start()
def run_simulation(cfg, timestamp): """ Given the specified, construct the simulation object and run it """ env = simpy.Environment() simulation = Simulation(env=env, config=cfg, instrument=Telescope, planning_algorithm='batch', planning_model=BatchPlanning('batch'), scheduling=BatchProcessing, delay=None, timestamp=timestamp, to_file=True) sim, tasks = simulation.start() return sim, tasks
'publications/2021_isc-hpc/config/single_size/40cluster')): if '.json' in config: CONFIG = f'publications/2021_isc-hpc/config/single_size/40cluster/{config}' env = simpy.Environment() instrument = Telescope timestamp = f'{time.time()}'.split('.')[0] simulation = Simulation( env=env, config=CONFIG, instrument=instrument, planning=algorithm, scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp={timestamp} ) sim, tasks = simulation.start() global_sim = global_sim.append(sim) global_tasks = global_tasks.append(tasks) print(algorithm, config, len(sim)) global_tasks.to_pickle('tasks_output.pkl') global_sim.to_pickle('simulation_output.pkl') # PLOTTING SIMULATION DATA - originally produced in a Jupyter Notebook # Group by planning, delay, and config to get the simulation time for each # simulation. df = global_sim.groupby(['planning','delay', 'config']).size().astype(float).reset_index(name='time').sort_values(by=['planning']) df['config'] = df['config'].str.replace('visualisation_playground/sim_config/single_size/40cluster/mos_sw','').str.strip('.json').astype(float) basetime = pd.Series(df[df['planning'] == 'fcfs']['time']) basetime = basetime.append(basetime,ignore_index=True) df['increase'] = basetime/df['time']
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. """ Basic simulation, with minimal Observation Plan and dummy observation workflows """ import simpy from topsim.algorithms.scheduling import FifoAlgorithm from topsim.core.simulation import Simulation from topsim import common as test_data workflow_file = 'test/data/daliuge_pipeline_test.json' event_file = 'sim.trace' planning_algorithm = 'heft' # env = simpy.RealtimeEnvironment(factor=0.5, strict=False) env = simpy.Environment() tmax = 36 # for starters, we will define telescope configuration as simply number of arrays that exist salgorithm = FifoAlgorithm() vis = False # TODO move things like 'heft' into a 'common' file which has SchedulingAlgorithm.HEFT = 'heft' etc. simulation = Simulation(env, test_data.telescope_config, tmax, test_data.machine_config, salgorithm, 'heft', event_file, vis) simulation.start(-1)
time delay on storing and ingest. This also involves the planning and execution of a more complex workflow; namely, the original HEFT workflow from Topcuoglu 2000. """ import simpy from topsim.user.schedule.dynamic_plan import DynamicAlgorithmFromPlan from topsim.user.telescope import Telescope from topsim.core.simulation import Simulation EVENT_FILE = 'simulations/real_time/real_time.trace' CONFIG = 'simulations/real_time/real_time.json' env = simpy.Environment() planning_algorithm = 'heft' scheduling_algorithm = DynamicAlgorithmFromPlan instrument = Telescope simulation = Simulation( env=env, config=CONFIG, instrument=instrument, algorithm_map={'pheft': 'pheft', 'heft': 'heft', 'fifo': DynamicAlgorithmFromPlan}, event_file=EVENT_FILE, ) simulation.start(11) simulation.resume(300)
class TestMonitorPandasPickle(unittest.TestCase): def setUp(self) -> None: """ Basic simulation using a single observation + heft workflow + homogenous system configuration. Returns ------- """ env = simpy.Environment() self.instrument = Telescope self.simulation = Simulation( env=env, config=CONFIG, instrument=self.instrument, planning_algorithm='heft', planning_model=SHADOWPlanning('heft'), scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp='unittest', to_file=True, hdf5_path='test/simulation_data/test_hdf5.h5', delimiters=f'test/') def tearDown(self): output = f'{cwd}/test/simulation_pickles/{0}' os.remove('test/simulation_data/test_hdf5.h5') # os.remove(f'{output}-sim.pkl') # os.remove(f'{output}-tasks.pkl') def testHDF5GeneratedAfterSimulation(self): """ Test that after a simulation, a HDF5 storage file is generated """ self.simulation.start() self.assertTrue(os.path.exists('test/simulation_data/test_hdf5.h5')) def testHDF5KeysAndDataFramesExist(self): """ Ensure that the generated HDF5 contains the correct results in the keys """ def test_multi_simulation_data_merge(self): # global_sim_df = pd.DataFrame() # global_task_df = pd.DataFrame() for algorithm in ['heft', 'fcfs']: env = simpy.Environment() simulation = Simulation( env, CONFIG, self.instrument, planning_algorithm=algorithm, planning_model=SHADOWPlanning(algorithm), scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp='unittest', hdf5_path='test/simulation_data/test_hdf5.h5', to_file=True, delimiters=f'{algorithm}') simulation.start() self.assertTrue(os.path.exists('test/simulation_data/test_hdf5.h5')) heft_key = '/dunittest/heft/heft_single_observation_simulation/sim/' fcfs_key = '/dunittest/fcfs/heft_single_observation_simulation/sim/' heft_sim = pd.read_hdf('test/simulation_data/test_hdf5.h5', key=heft_key) self.assertEqual(120, len(heft_sim)) self.assertEqual(3, heft_sim.iloc[-1]['available_resources'])
# User defined models from user.telescope import Telescope # Instrument from user.schedule.dynamic_plan import DynamicAlgorithmFromPlan # Scheduling from user.schedule.greedy import GreedyAlgorithmFromPlan # Scheduling from user.schedule.batch_allocation import BatchProcessing from user.plan.batch_planning import BatchPlanning # Planning from user.plan.static_planning import SHADOWPlanning if __name__ == '__main__': LOGGER.info(f"Running experiment from {RUN_PATH}/{FOLDER_PATH}") env = simpy.Environment() instrument = Telescope # timestamp = f'{time.time()}'.split('.')[0] simulation = Simulation(env=env, config=cfg_path, instrument=instrument, planning_algorithm='heft', planning_model=SHADOWPlanning('heft'), scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp='skaworkflows_test', to_file=True, hdf5_path=f'{RUN_PATH}/{FOLDER_PATH}/results.h5' # hdf5_path='', # delimiters=f'test/' ) simulation.start() LOGGER.info(f"Experiment finished, exiting script...")
class TestBasicIngest(unittest.TestCase): def setUp(self) -> None: self.env = simpy.Environment() self.simulation = Simulation( self.env, BASIC_CONFIG, Telescope, planning_algorithm='heft', planning_model=SHADOWPlanning('heft'), scheduling=DynamicAlgorithmFromPlan, delay=None, timestamp=SIM_TIMESTAMP ) # def tearDown(self): # output = 'test/basic-workflow-data/output/{0}' # os.remove(f'{output}-sim.pkl') # os.remove(f'{output}-tasks.pkl') def testClusterIngest(self): """ The basic ingest represents the edge cases for timing and scheduling within the simulation, as demonstrated in this test. There are a couple of edge cases that occur here, especially when we consider that we have only 2 resources; one of these will be taken by ingest, meaning that we cannot start an observation until 1 timestep AFTER an ingest has finished, because the telescope will check before that task is successfully removed from the cluster. This is why we run for 6 seconds and only process 2 observations. After we've observed 2 observations, we reach capacity on the cold-buffer so we are unable to observe any more. Returns ------- """ self.assertEqual(0, self.env.now) self.simulation.start(runtime=7) self.assertEqual( 2, self.simulation.cluster._ingest['completed'] ) self.assertEqual( RunStatus.FINISHED, self.simulation.instrument.observations[1].status ) def testBufferIngest(self): self.assertEqual(0, self.simulation.env.now) self.simulation.start(runtime=1) self.assertEqual( 5, self.simulation.buffer.hot[0].current_capacity ) self.simulation.resume(until=2) self.assertEqual( 10, self.simulation.buffer.hot[0].current_capacity ) self.assertEqual( 5, self.simulation.buffer.cold[0].current_capacity ) self.assertEqual( 1, len(self.simulation.buffer.cold[0].observations["stored"]) ) self.simulation.resume(until=4) self.assertEqual(10, self.simulation.buffer.hot[0].current_capacity) self.assertEqual(0, self.simulation.buffer.cold[0].current_capacity) self.assertEqual( 2, len(self.simulation.buffer.cold[0].observations["stored"]) ) def testSchedulerRunTime(self): self.assertEqual(0, self.simulation.env.now) self.simulation.start(runtime=2) self.assertEqual( 1, len(self.simulation.buffer.cold[0].observations['stored']) ) self.simulation.resume(until=8) self.simulation.resume(until=11) self.simulation.resume(until=12) # self.assertEqual(0, len(self.simulation.cluster.tasks['running'])) # We've finished processing one of the workflows so one observation # is finished. self.assertEqual( 2, len(self.simulation.buffer.cold[0].observations['stored']) )