def test_application_with_no_dependencies(self): env = self.env app = Application(env, 'test', [ Container(env, str(cid), rnd.uniform(0, 10), rnd.uniform(1024, 10240), rnd.uniform(1024, 10240), rnd.uniform(0, 10), rnd.uniform(0, 100)) for cid in range(10) ]) self.assertEqual(len(app.get_sources()), 10)
def generate(self): n_seq_steps = rnd.randint(self.__min_seq_steps, self.__max_seq_steps) n_parallel_steps = rnd.randint(self.__min_parallel_steps, self.__max_parallel_steps) total_steps = n_seq_steps + n_parallel_steps assert total_steps > 0 p_seq_step = n_seq_steps/total_steps n_nodes = 0 containers, last_step = [], [] for is_seq in rnd.choice(a=[True, False], size=total_steps, p=[p_seq_step, 1 - p_seq_step]): cpus = rnd.uniform(self.__min_cpus, self.__max_cpus) mem = rnd.randint(self.__min_mem, self.__max_mem) disk = rnd.randint(self.__min_disk, self.__max_disk) gpus = rnd.randint(self.__min_gpus, self.__max_gpus) unit_output_size = rnd.randint(self.__min_output_size, self.__max_output_size) if is_seq: cid = n_nodes + 1 runtime = rnd.uniform(self.__min_runtime, self.__max_runtime) output_size = unit_output_size * runtime c = Container(self.__env, str(cid), cpus=cpus, mem=mem, disk=disk, gpus=gpus, runtime=runtime, output_size=output_size) for prev in last_step: c.add_dependencies(prev) containers += c, last_step = [str(cid)] n_nodes += 1 else: parallel_level = rnd.randint(self.__min_parallel_level, self.__max_parallel_level) \ if len(last_step) < 2 else len(last_step) for i, cid in enumerate(range(n_nodes + 1, n_nodes + parallel_level + 1)): runtime = rnd.uniform(self.__min_runtime, self.__max_runtime) output_size = unit_output_size * runtime c = Container(self.__env, str(cid), cpus=cpus, mem=mem, disk=disk, gpus=gpus, runtime=runtime, output_size=output_size) cur = i % parallel_level while cur < len(last_step): c.add_dependencies(last_step[cur]) cur += parallel_level containers += c, last_step = [str(i) for i in range(n_nodes + 1, n_nodes + parallel_level + 1)] n_nodes += parallel_level app = Application(self.__env, str(uuid.uuid4()), containers) # app.visualize() return app
def setUp(self): self.env = simpy.Environment() env = self.env contrs = [ Container(env, str(cid), rnd.uniform(0, 10), rnd.uniform(1024, 10240), rnd.uniform(1024, 10240), rnd.uniform(0, 10), rnd.uniform(0, 100)) for cid in range(10) ] for c in contrs: c.add_dependencies(*[str(i) for i in range(int(c.id))]) self.app = Application(env, 'test', contrs)
def generate(self): dag = self.__dag_gen.generate() containers = {} for n in dag.nodes: containers[n] = Container(self.__env, str(n), cpus=rnd.uniform(self.__cpus_lo, self.__cpus_hi), mem=rnd.randint(self.__mem_lo, self.__mem_hi), disk=rnd.randint(self.__disk_lo, self.__disk_hi), gpus=rnd.randint(self.__gpus_lo, self.__gpus_hi), runtime=rnd.uniform(self.__runtime_lo, self.__runtime_hi), output_size=rnd.randint(self.__output_size_lo, self.__output_size_hi)) for u, v in dag.edges: containers[v].add_dependencies(str(u)) app = Application(self.__env, str(uuid.uuid4()), containers.values()) # app.visualize() return app
def _submit(self): counter = 0 rnd = np.random while True: c = Container(self.__env, str(counter), rnd.randint(1, 4), rnd.randint(1024, 4096), rnd.randint(1024, 10240), rnd.randint(1, 4), rnd.randint(10, 20), placement=str(rnd.randint(0, 10))) yield self.__dispatch_q.put(c) self.logger.info( '[%d] Submitted container %s: cpus: %.1f, mem: %d, disk: %d, gpus: %d, ' 'runtime: %d, placement: %s' % (self.__env.now, c.id, c.cpus, c.mem, c.disk, c.gpus, c.runtime, c.placement)) yield self.__env.timeout(rnd.randint(1, 20)) counter += 1
def test_one_app_w_dep(self): """ Test the scheduler with an application with dependencies. In this test, resources in the cluster are sufficient for running all containers in parallel. The test examines: 1. Whether the application finishes end-to-end 2. Whether all the containers are run and finished 3. The maximum runtime differs from the simulator clock within a schedule interval (, which indicates all the containers are run in parallel) """ from scheduler.opportunistic import OpportunisticGlobalScheduler env = self.env contrs = [ Container(env, str(cid), 1, 1024, 1024, 1, rnd.uniform(2, 100)) for cid in range(16) ] app = Application(env, 'test', contrs) for c in app.containers: c.add_dependencies(*[str(i) for i in range(int(c.id))]) dispatch_q, notify_q = simpy.Store(env), simpy.Store(env) hosts = [Host(env, str(i), 1, 1024, 1024, 1) for i in range(1)] cluster = Cluster(env, dispatch_q, notify_q, hosts) global_scheduler = OpportunisticGlobalScheduler( env, dispatch_q, notify_q, cluster) cluster.start() global_scheduler.start() global_scheduler.submit(app) local_scheduler = global_scheduler.get_scheduler(app.id) env.run() self.assertTrue(app.is_finished) self.assertTrue(all([c.is_finished for c in contrs])) self.assertAlmostEqual(env.now, sum([c.runtime for c in contrs]), delta=(local_scheduler.interval - 1) * 16)
def _load_data(self, trace_f): env = self.__env with open(trace_f) as f: jobs = yaml.load(f) # self.logger.info('Processing job %s'%j['id']) for j in jobs: contrs = [] for t in j['tasks']: task_id, runtime, n_inst = str( t['id']), t['runtime'], t['n_instances'] cpus, mem = t['cpus'], t['mem'] deps = [str(d) for d in t['dependencies']] contrs += Container(env, task_id, cpus=cpus, mem=mem * self.MEM_SCALE_FACTOR, output_size=mem * self.__output_size_scale_factor, runtime=runtime, instances=n_inst, dependencies=deps), app = Application(env, j['id'], contrs) self._bin_insert(j['submit_time'], app)