def check_scheduled_but_unprocessed_items(self): query = {"status":{"$in":["proc_schedule","quit","stop"] }} count = self.__logs_collection__.find(query).count() self.inform("Found %d scheduled but Unprocessed items" % count) if count <= 0 : return logs_cursor = self.__logs_collection__.find(query) for log in logs_cursor: file = log.get("file") self.__logs_collection__.update({"_id":log.get("_id")},{"$set":{"status":"proc_schedule","proc_timeout":five_mins_later()}},safe=True) self.schedule_job(log.get("_id"),file)
def check_new_files_to_be_processed(self): self.inform( "{0}Checking New Files to Process{0}".format("="*10)) logs_cursor = self.__logs_collection__.find({"status":"ready"}) count = 0 for file in logs_cursor: count += 1 self.__logs_collection__.update({"_id":file.get("_id")},{"$set":{"status":"proc_schedule","proc_timeout":five_mins_later()}},safe=True) self.schedule_job(file.get("_id"),file.get("file")) self.inform( "Found %s files to process " % ( str(count) if count > 0 else "NO" ) )
def check_scheduled_but_left_behind_jobs(self): self.inform( "{0}Checking Left Behind Tasks to Process{0}".format("="*10)) # qsize = self.__worker_pool__._taskqueue.qsize() # self.inform("Process Queue Length is %s" % (qsize)) query = { "status":{"$in": ["proc_schedule","working"] } } count = self.__logs_collection__.find(query).count() if count <= 0: self.inform( "Nothing seems to be active or left behind" ) else: self.inform("Possibly %d working. Let's check their status" % count) logs_cursor = self.__logs_collection__.find(query) def_timeout = dt.utcnow() - timedelta(hours=1) for log in logs_cursor: shouldReschedule = False file = log.get("file") status = log.get("status") hbeat = log.get("hbeat") timeout = log.get("proc_timeout",def_timeout) if status in ["working"] : if hbeat is None: shouldReschedule = True elif hbeat < dt.utcnow() - timedelta(minutes=1): shouldReschedule = True elif status in ["proc_schedule"] and timeout <= dt.utcnow(): shouldReschedule = True if shouldReschedule: self.inform("Rescheduling %s - was %s" % (file,status)) self.__logs_collection__.update({"_id":log.get("_id")},{"$set":{"status":"proc_schedule","proc_timeout":five_mins_later()}},safe=True) self.schedule_job(log.get("_id"),file)