def get_data(self, **kwargs): old = datetime.datetime.now() - datetime.timedelta( minutes=self.STALE_INTERVAL) data = [] for pool in Pool._get_collection().find({}, {"_id": 0, "name": 1}): scheduler = Scheduler("discovery", pool=pool["name"]) for r in scheduler.get_collection().find({ "runs": { "$gt": 1 }, "jcls": { "$regex": "_discovery$" }, "st": { "$lte": old } }): mo = ManagedObject.get_by_id(r["key"]) if not mo or not mo.is_managed: continue msg = "" if r["tb"]: tb = r["tb"] if "text" in tb and "code" in tb: if tb["text"].endswith("END OF TRACEBACK"): tb["text"] = "Job crashed" msg = "(%s) %s" % (tb["text"], tb["code"]) data += [[ mo.administrative_domain.name, mo.name, mo.profile.name, mo.platform.name, mo.version.name, mo.address, mo.segment.name, r["jcls"], humanize_distance(r["st"]), msg, ]] return self.from_dataset( title=self.title, columns=[ _("Admin. Domain"), _("Object"), _("Profile"), _("Platform"), _("Version"), _("Address"), _("Segment"), _("Job"), _("Last Success"), _("Reason"), ], data=sorted(data), enumerate=True, )
def call_later(name, delay=None, scheduler="scheduler", pool=None, max_runs=None, **kwargs): """ Run callable *name* in scheduler process :param name: Full callable name :param delay: delay in seconds :param scheduler: Name of scheduler :param pool: Pool name :param max_runs: Maximum amount of retries """ scheduler = Scheduler(scheduler, pool=pool) data = kwargs or {} ts = datetime.datetime.now() if delay: ts += datetime.timedelta(seconds=delay) set_op = { Job.ATTR_TS: ts } iset_op = { Job.ATTR_STATUS: Job.S_WAIT, Job.ATTR_RUNS: 0, Job.ATTR_FAULTS: 0, Job.ATTR_OFFSET: 0 } if max_runs: iset_op[Job.ATTR_MAX_RUNS] = max_runs if data: set_op[Job.ATTR_DATA] = data q = { Job.ATTR_CLASS: "noc.core.scheduler.calljob.CallJob", Job.ATTR_KEY: name } for k in data: q["%s.%s" % (Job.ATTR_DATA, k)] = data[k] op = { "$set": set_op, "$setOnInsert": iset_op } logger.info("Delayed call to %s(%s) in %ss", name, data, delay or "0") logger.debug("update(%s, %s, upsert=True)", q, op) scheduler.get_collection().update(q, op, upsert=True)
def run_job(self, job, mo, checks): if job == "segment": scheduler = Scheduler("scheduler", pool=None, service=ServiceStub()) else: scheduler = Scheduler("discovery", pool=mo.pool.name, service=ServiceStub()) jcls = self.jcls[job] # Try to dereference job job_args = scheduler.get_collection().find_one({ Job.ATTR_CLASS: jcls, Job.ATTR_KEY: mo.id }) if job_args: self.print("Job ID: %s" % job_args["_id"]) else: job_args = {Job.ATTR_ID: "fakeid", Job.ATTR_KEY: mo.id} job_args["_checks"] = checks job = get_handler(jcls)(scheduler, job_args) if job.context_version: ctx_key = job.get_context_cache_key() self.print("Loading job context from %s" % ctx_key) ctx = cache.get(ctx_key, version=job.context_version) if not ctx: self.print("Job context is empty") job.load_context(ctx) sample = 1 if self.trace else 0 with Span(sample=sample): job.dereference() job.handler() if sample: spans = get_spans() self.print("Spans:") self.print("\n".join(str(s) for s in spans)) if scheduler.service.metrics: self.print("Collected CH data:") for t in scheduler.service.metrics: self.print("Table: %s" % t) self.print("\n".join( str(x) for x in scheduler.service.metrics[t])) if job.context_version and job.context: self.print("Saving job context to %s" % ctx_key) scheduler.cache_set(key=ctx_key, value=job.context, version=job.context_version) scheduler.apply_cache_ops() time.sleep(3)
def fix(): for p in Pool.objects.all(): s = Scheduler("discovery", pool=p.name) c = s.get_collection() if not c.count_documents({}): continue # Remove unused schedules fields c.update_many({ "jcls": "noc.services.discovery.jobs.periodic.job.PeriodicDiscoveryJob" }, { "$unset": { "ctx": "", "ctv": "" } }) c.database.command({"compact": c.name, "force": True})