def test_retry(self): sched = MockSchduler() tasks = [MockTask(i) for i in range(10)] job = SimpleJob(sched, tasks, 1, 10) offer = create_offer('localhost') host_offers = {'localhost': (0, offer)} # the host register should with purge elapsed 0, otherwise the failure # will forbit the localhost job.task_host_manager.register_host('localhost', purge_elapsed=0) cpus = [1] mems = [10] gpus = [0] ts = sum([ job.taskOffer( host_offers=host_offers, cpus=cpus, mems=mems, gpus=gpus) for i in range(10) ], []) [job.statusUpdate(t[2].id, 0, 'TASK_FINISHED') for t in ts[1:]] assert job.tasksFinished == 9 job.statusUpdate(ts[0][2].id, 0, 'TASK_FAILED') t = job.taskOffer(host_offers=host_offers, cpus=cpus, mems=mems, gpus=gpus)[0] assert t[2].id == 0 assert not job.taskOffer( host_offers=host_offers, cpus=cpus, mems=mems, gpus=gpus) assert job.tasksLaunched == 10 job.statusUpdate(t[2].id, 1, 'TASK_FINISHED') assert job.tasksFinished == 10
def test_job(self): sched = MockSchduler() tasks = [MockTask(i) for i in range(10)] job = SimpleJob(sched, tasks, 1, 10) ts = [job.slaveOffer('localhost') for i in range(10)] assert len(ts) == 10 assert job.tasksLaunched == 10 assert job.slaveOffer('localhost') is None [job.statusUpdate(t.id, 0, 'TASK_FINISHED') for t in ts] assert job.tasksFinished == 10
def test_retry(self): sched = MockSchduler() tasks = [MockTask(i) for i in range(10)] job = SimpleJob(sched, tasks) ts = [job.slaveOffer('localhost') for i in range(10)] [job.statusUpdate(t.id, 0, 'TASK_FINISHED') for t in ts[1:]] assert job.tasksFinished == 9 job.statusUpdate(ts[0].id, 0, 'TASK_FAILED') t = job.slaveOffer('localhost1') assert t.id == 0 assert job.slaveOffer('localhost') is None assert job.tasksLaunched == 10 job.statusUpdate(t.id, 1, 'TASK_FINISHED') assert job.tasksFinished == 10
def submitTasks(self, tasks): if not tasks: return job = SimpleJob(self, tasks, self.cpus, tasks[0].rdd.mem or self.mem) self.activeJobs[job.id] = job self.activeJobsQueue.append(job) self.jobTasks[job.id] = set() stage_scope = '' try: from dpark.web.ui.views.rddopgraph import StageInfo stage_scope = StageInfo.idToRDDNode[ tasks[0].rdd.id].scope.call_site except: pass stage = self.idToStage[tasks[0].stageId] stage.try_times += 1 logger.info( 'Got job %d with %d tasks for stage: %d(try %d times) ' 'at scope[%s] and rdd:%s', job.id, len(tasks), tasks[0].stageId, stage.try_times, stage_scope, tasks[0].rdd) need_revive = self.started if not self.started: self.start_driver() while not self.isRegistered: self.lock.release() time.sleep(0.01) self.lock.acquire() if need_revive: self.requestMoreResources()
def test_job(self): sched = MockSchduler() tasks = [MockTask(i) for i in range(10)] job = SimpleJob(sched, tasks, 1, 10) offer = create_offer('localhost') host_offers = {'localhost': (0, offer)} job.task_host_manager.register_host('localhost') cpus = [10] mems = [10] gpus = [0] # the return of taskOffer is a list whose item is TUPLE with Index of offer, # information of Offer, # description of Task ts = sum([job.taskOffer(host_offers, cpus, mems, gpus) for i in range(10)], []) assert len(ts) == 10 assert job.tasksLaunched == 10 assert not job.taskOffer(host_offers, cpus, mems, gpus) [job.statusUpdate(t[2].id, 0, 'TASK_FINISHED') for t in ts] assert job.tasksFinished == 10
def submitTasks(self, tasks): if not tasks: return job = SimpleJob(self, tasks, self.cpus, tasks[0].rdd.mem or self.mem) self.activeJobs[job.id] = job self.activeJobsQueue.append(job) self.jobTasks[job.id] = set() logger.info("Got job %d with %d tasks: %s", job.id, len(tasks), tasks[0].rdd) need_revive = self.started if not self.started: self.start_driver() while not self.isRegistered: self.lock.release() time.sleep(0.01) self.lock.acquire() if need_revive: self.requestMoreResources()