Beispiel #1
0
 def test_listRunnable(self):
     arr = [("ready", Job.READY), ("running", Job.RUNNING), ("finished", Job.FINISHED),
         ("running", Job.RUNNING), ("delayed", Job.DELAYED)]
     for name, status in arr:
         job = Job.create(db.session, **self.opts)
         job.name = name
         job.status = status
         if status is Job.DELAYED:
             job.submittime = job.submittime - 10000
             job.priority = job.submittime / 1000L
         db.session.commit()
     jobs = Job.listRunnable(db.session, 5, utils.currentTimeMillis())
     self.assertEqual(len(jobs), 2)
     self.assertEqual([x.name for x in jobs], ["delayed", "ready"])
     self.assertEqual([x.status for x in jobs], [Job.DELAYED, Job.READY])
Beispiel #2
0
def action(sampler):
    if not sampler:
        scheduler.logger.error("Sampler is undefined, exiting")
        return
    session = SignallingSession(db)
    try:
        sampler.logger.info("Start refreshing application state")
        sampler.incrementNumRuns()
        lock.acquire()
        for uid, pid in sampler.pool.items():
            if updateProcessStatus(pid) >= 0:
                job = Job.get(session, uid)
                if not job:
                    sampler.logger.warn("Job '%s' does not exist in database, updated skipped", uid)
                else:
                    Job.finish(session, job)
                sampler.removeFromPool(uid)
            else:
                sampler.logger.info("Process '%s' is still running, job uid: '%s'", pid, uid)
        # Check how many pids are left. Compare against NUM_SLOTS, if comparison yields false,
        # skip execution, otherwise it yields true, and we proceed with number of free slots
        freeSlots = NUM_SLOTS - len(sampler.pool)
        if freeSlots <= 0:
            sampler.logger.info("All %s slots are taken, cannot launch job, skipped", NUM_SLOTS)
            sampler.logger.debug("Free slots: %s, pool size: %s, numSlots: %s", freeSlots,
                len(sampler.pool), NUM_SLOTS)
        else:
            # Check how many jobs are running at the moment by checking status of the cluster and
            # requesting number of running applications, if number of applications is equal or more
            # than NUM_SLOTS, skip execution, otherwise compute number of jobs to launch and proceed.
            sparkStatus = sparkContext.clusterStatus()
            if sparkStatus == DOWN:
                sampler.logger.info("Cluster %s[%s] is down, will try again later",
                    sparkContext.getMasterAddress(), sparkContext.getUiAddress())
            else:
                apps = sparkContext.clusterRunningApps()
                freeSlots = NUM_SLOTS - len(apps)
                if freeSlots <= 0:
                    sampler.logger.info("There are %s applications running already, cannot " + \
                        "launch job, skipped", len(apps))
                    sampler.logger.debug("Free slots: %s, apps: %s, numSlots: %s", freeSlots,
                        len(apps), NUM_SLOTS)
                else:
                    # Fetch jobs active (runnable) jobs using Job API based on number of free slots,
                    # acquired earlier. Start jobs in the list, if any. Report when no jobs found.
                    currentTime = utils.currentTimeMillis()
                    sampler.logger.debug("Fetch jobs with session %s, free slots %s, time %s",
                        session, freeSlots, currentTime)
                    runnableJobs = Job.listRunnable(session, freeSlots, currentTime)
                    sampler.logger.info("Registering %s jobs", len(runnableJobs))
                    for job in runnableJobs:
                        pid = launchSparkJob(job)
                        Job.run(session, job)
                        sampler.addToPool(job.uid, pid)

        session.commit()
    except Exception as e:
        sampler.logger.error("Sampler encountered error, execution skipped")
        sampler.logger.exception(e.message)
    finally:
        lock.release()
        session.close()
        if sampler.enabled:
            sampler.logger.debug("Prepared to be invoked in %s seconds", sampler.interval)
            timer = Timer(sampler.interval, action, [sampler])
            timer.daemon = True
            timer.start()
        else:
            sampler.logger.info("Sampler stopped")