Example #1
0
 def test_retry(self):
     sched = MockSchduler()
     tasks = [MockTask(i) for i in range(10)]
     job = SimpleJob(sched, tasks, 1, 10)
     offer = create_offer('localhost')
     host_offers = {'localhost': (0, offer)}
     # the host register should with purge elapsed 0, otherwise the failure
     # will forbit the localhost
     job.task_host_manager.register_host('localhost', purge_elapsed=0)
     cpus = [1]
     mems = [10]
     gpus = [0]
     ts = sum([
         job.taskOffer(
             host_offers=host_offers, cpus=cpus, mems=mems, gpus=gpus)
         for i in range(10)
     ], [])
     [job.statusUpdate(t[2].id, 0, 'TASK_FINISHED') for t in ts[1:]]
     assert job.tasksFinished == 9
     job.statusUpdate(ts[0][2].id, 0, 'TASK_FAILED')
     t = job.taskOffer(host_offers=host_offers,
                       cpus=cpus,
                       mems=mems,
                       gpus=gpus)[0]
     assert t[2].id == 0
     assert not job.taskOffer(
         host_offers=host_offers, cpus=cpus, mems=mems, gpus=gpus)
     assert job.tasksLaunched == 10
     job.statusUpdate(t[2].id, 1, 'TASK_FINISHED')
     assert job.tasksFinished == 10
Example #2
0
    def submitTasks(self, tasks):
        if not tasks:
            return

        job = SimpleJob(self, tasks, self.cpus, tasks[0].rdd.mem or self.mem)
        self.activeJobs[job.id] = job
        self.activeJobsQueue.append(job)
        self.jobTasks[job.id] = set()
        stage_scope = ''
        try:
            from dpark.web.ui.views.rddopgraph import StageInfo
            stage_scope = StageInfo.idToRDDNode[
                tasks[0].rdd.id].scope.call_site
        except:
            pass
        stage = self.idToStage[tasks[0].stageId]
        stage.try_times += 1
        logger.info(
            'Got job %d with %d tasks for stage: %d(try %d times) '
            'at scope[%s] and rdd:%s', job.id, len(tasks), tasks[0].stageId,
            stage.try_times, stage_scope, tasks[0].rdd)

        need_revive = self.started
        if not self.started:
            self.start_driver()
        while not self.isRegistered:
            self.lock.release()
            time.sleep(0.01)
            self.lock.acquire()

        if need_revive:
            self.requestMoreResources()
Example #3
0
 def test_job(self):
     sched = MockSchduler()
     tasks = [MockTask(i) for i in range(10)]
     job = SimpleJob(sched, tasks, 1, 10)
     ts = [job.slaveOffer('localhost') for i in range(10)]
     assert len(ts) == 10
     assert job.tasksLaunched == 10
     assert job.slaveOffer('localhost') is None
     [job.statusUpdate(t.id, 0, 'TASK_FINISHED') for t in ts]
     assert job.tasksFinished == 10
Example #4
0
 def test_retry(self):
     sched = MockSchduler()
     tasks = [MockTask(i) for i in range(10)]
     job = SimpleJob(sched, tasks)
     ts = [job.slaveOffer('localhost') for i in range(10)]
     [job.statusUpdate(t.id, 0, 'TASK_FINISHED') for t in ts[1:]]
     assert job.tasksFinished == 9
     job.statusUpdate(ts[0].id, 0, 'TASK_FAILED')
     t = job.slaveOffer('localhost1')
     assert t.id == 0
     assert job.slaveOffer('localhost') is None
     assert job.tasksLaunched == 10
     job.statusUpdate(t.id, 1, 'TASK_FINISHED')
     assert job.tasksFinished == 10
Example #5
0
 def test_job(self):
     sched = MockSchduler()
     tasks = [MockTask(i) for i in range(10)]
     job = SimpleJob(sched, tasks, 1, 10)
     offer = create_offer('localhost')
     host_offers = {'localhost': (0, offer)}
     job.task_host_manager.register_host('localhost')
     cpus = [10]
     mems = [10]
     gpus = [0]
     # the return of taskOffer is a list whose item is TUPLE with Index of offer,
     # information of Offer,
     # description of Task
     ts = sum([job.taskOffer(host_offers, cpus, mems, gpus) for i in range(10)], [])
     assert len(ts) == 10
     assert job.tasksLaunched == 10
     assert not job.taskOffer(host_offers, cpus, mems, gpus)
     [job.statusUpdate(t[2].id, 0, 'TASK_FINISHED') for t in ts]
     assert job.tasksFinished == 10
Example #6
0
    def submitTasks(self, tasks):
        if not tasks:
            return

        job = SimpleJob(self, tasks, self.cpus, tasks[0].rdd.mem or self.mem)
        self.activeJobs[job.id] = job
        self.activeJobsQueue.append(job)
        self.jobTasks[job.id] = set()
        logger.info("Got job %d with %d tasks: %s", job.id, len(tasks), tasks[0].rdd)

        need_revive = self.started
        if not self.started:
            self.start_driver()
        while not self.isRegistered:
            self.lock.release()
            time.sleep(0.01)
            self.lock.acquire()

        if need_revive:
            self.requestMoreResources()