def maintain_lost(self, tags, container=None, limit=100): perishable_status = [ mahler.core.status.Reserved(''), mahler.core.status.Running('') ] status_names = [status.name for status in perishable_status] updated = 0 projection = {'id': 1, 'registry.heartbeat': 1} task_iterator = self.retrieve_tasks(tags=tags, container=container, status=perishable_status, limit=limit, _return_doc=True, _projection=projection) for task_document in task_iterator: task = Task(op=None, arguments=None, id=task_document['id'], name=None, registrar=self) if task.status.name not in status_names: # Report is outdated, leave it to maintain_report to update it. continue heartbeat_frequency = task_document['registry']['heartbeat'] # TODO: Why the hell would we need this??? task._status.refresh(full=False) last_heartbeat = task._status.last_item['id'].generation_time now = datetime.datetime.now(datetime.timezone.utc) time_since_heartbeat = (now - last_heartbeat).total_seconds() if time_since_heartbeat < 2 * heartbeat_frequency: continue message = 'Lost heartbeat since {:0.02f}s ({:0.02f} x heartbeat)'.format( time_since_heartbeat, time_since_heartbeat / heartbeat_frequency) new_status = mahler.core.status.FailedOver(message) try: self.update_status(task, new_status) except (ValueError, RaceCondition) as e: logger.debug('Task {} status changed concurrently'.format( task_document['id'])) continue else: self.update_report(task.to_dict()) updated += 1 return updated
def maintain_broken(self, tags, container=None, limit=100): onhold_status = mahler.core.status.Broken('') updated = 0 # TODO: Implement dependencies and test projection = {'registry.status': 1} task_iterator = self.retrieve_tasks(tags=tags, container=container, status=onhold_status, limit=limit, _return_doc=True, _projection=projection) for task_document in task_iterator: task = Task(op=None, arguments=None, id=task_document['id'], name=None, registrar=self) status = task.status if status.name != onhold_status.name: # Report is outdated, leave it to maintain_report to update it. continue if (not task.output and all(message_snipet not in status.message for message_snipet in TMP_BROKEN)): continue try: if task.output: new_status = mahler.core.status.Completed( 'Failover completed trial') self.update_status(task, new_status, _force=True) else: new_status = mahler.core.status.FailedOver( 'Crashed because of broken node') self.update_status(task, new_status) self.update_report(task.to_dict()) except (ValueError, RaceCondition) as e: logger.debug('Task {} status changed concurrently'.format( task.id)) continue updated += 1 return updated
def delay(self, *args, **kwargs): self._verify_importability(self._fct) if self._restore: self._verify_importability(self._restore) # Fetch default arguments of task (and restore if given) # Make sure all arguments have name:value. Positional arguments is forbidden # Get importable string # Create task document with function string, arguments # TODO: Turn arguments not supported as-is by pymongo into pickled objects. # task_document = core.task.Task() # task = Task() if self._arguments: overriding_args = [ k for k in self._arguments.keys() if k in kwargs ] if overriding_args: logger.warning('Overriding {}'.format(overriding_args)) tmp_kwargs = copy.deepcopy(self._arguments) tmp_kwargs.update(kwargs) kwargs = tmp_kwargs return Task(op=self, arguments=kwargs)
def retrieve_tasks(self, id=None, arguments=None, attributes=None, tags=tuple(), container=None, status=None, limit=None, sort=None, host=None, use_report=True, _return_doc=False, _projection=None): """ """ task_iterator = self._db.retrieve_tasks(id, arguments, attributes, tags, container, status, limit=limit, sort=sort, host=host, use_report=use_report, projection=_projection) for task_document in task_iterator: if _return_doc: yield task_document continue operator = Operator(**task_document['op']) task = Task(operator, arguments=task_document['arguments'], attributes=task_document['attributes'], id=task_document['id'], name=task_document['name'], registrar=self) task._container = task_document['registry']['container'] yield task
def maintain_onhold(self, tags, container=None, limit=100): onhold_status = mahler.core.status.OnHold('') updated = 0 # TODO: Implement dependencies and test projection = {'registry.status': 1} task_iterator = self.retrieve_tasks(tags=tags, container=container, status=onhold_status, limit=limit, _return_doc=True, _projection=projection) for task_document in task_iterator: task = Task(op=None, arguments=None, id=task_document['id'], name=None, registrar=self) if task.status.name != onhold_status.name: # Report is outdated, leave it to maintain_report to update it. continue # TODO: Implement dependencies and test # task._dependencies = task_document['bounds.dependencies'] try: self.update_status( task, mahler.core.status.Queued('dependencies met')) self.update_report(task.to_dict()) except (ValueError, RaceCondition) as e: logger.debug('Task {} status changed concurrently'.format( task.id)) continue updated += 1 return updated
def maintain_unreported(self, limit=100): projection = { 'arguments': 1, 'attributes': 1, 'name': 1, 'id': 1, 'op': 1, 'registry': 1 } # Querying from immutable cores for task_document in self.retrieve_tasks(_return_doc=True, _projection=projection): # First make sure the task was registered long enough that it is worth looking for a # report created_on = task_document['id'].generation_time now = datetime.datetime.now(datetime.timezone.utc) time_since_creation = (now - created_on).total_seconds() if time_since_creation < MIN_TIME_WAITING: continue # Looking for a report report_iterator = self.retrieve_tasks(id=task_document['id'], _return_doc=True, _projection={'id': 1}) if sum(1 for _ in report_iterator) < 1: logger.info('Adding missing report for {}'.format( task_document['id'])) operator = Operator(**task_document['op']) task = Task(operator, arguments=task_document['arguments'], attributes=task_document['attributes'], id=task_document['id'], name=task_document['name'], registrar=self) task._container = task_document['registry']['container'] self.update_report(task.to_dict(), upsert=True)
def maintain_to_queue(self, tags, container=None, limit=100): queueable_status = [ mahler.core.status.OnHold(''), mahler.core.status.Interrupted(''), mahler.core.status.FailedOver(''), mahler.core.status.SwitchedOver('') ] status_names = [status.name for status in queueable_status] projection = {'registry.status': 1, 'registry.reported_on': 1} task_iterator = self.retrieve_tasks(tags=tags, container=container, status=queueable_status, limit=limit, _return_doc=True, _projection=projection) updated = 0 for task_document in task_iterator: task = Task(op=None, arguments=None, id=task_document['id'], name=None, registrar=self) # First make sure the task was updated since long enough that it is worth trying # to update it now. updated_on = task_document['registry'][ 'reported_on'].generation_time now = datetime.datetime.now(datetime.timezone.utc) time_since_update = (now - updated_on).total_seconds() if time_since_update < MIN_TIME_WAITING: continue if task.status.name not in status_names: # Report is outdated, leave it to maintain_report to update it. continue try: if task.output: self.update_status( task, mahler.core.status.Completed( 'Task was completed and have output.'), _force=True) else: self.update_status( task, mahler.core.status.Queued('re-queue {} task'.format( task_document['registry']['status']))) self.update_report(task.to_dict()) except (ValueError, RaceCondition) as e: logger.debug('Task {} status changed concurrently'.format( task.id)) continue updated += 1 return updated
def maintain_reports(self, tags, container=None, limit=100): updated = 0 volatile_status = [ mahler.core.status.Queued(''), mahler.core.status.Reserved(''), mahler.core.status.Running('') ] queueable_status = [ mahler.core.status.OnHold(''), mahler.core.status.Interrupted(''), mahler.core.status.FailedOver(''), mahler.core.status.SwitchedOver('') ] mutable_status = [ mahler.core.status.Suspended(''), mahler.core.status.Acknowledged(''), mahler.core.status.Cancelled(''), mahler.core.status.Broken('') ] def is_outdated(task, task_document): # First make sure the report was updated long enough that it is worth looking # at the attributes updated_on = task_document['registry'][ 'reported_on'].generation_time now = datetime.datetime.now(datetime.timezone.utc) time_since_update = (now - updated_on).total_seconds() if time_since_update < MIN_TIME_WAITING: return False return ((task.get_recent_status().name != task_document['registry']['status']) or (set(task.tags) != set(task_document['registry']['tags'])) or (task_document['output'] != task.output)) projection = { 'registry.status': 1, 'registry.tags': 1, 'output': 1, 'registry.reported_on': 1 } for status_family in [ volatile_status, queueable_status, mutable_status ]: if limit: limit -= updated task_iterator = self.retrieve_tasks(tags=tags, container=container, status=status_family, limit=limit, _return_doc=True, _projection=projection) for task_document in task_iterator: task = Task(op=None, arguments=None, id=task_document['id'], name=None, registrar=self) if is_outdated(task, task_document): self.update_report(task.to_dict(), update_output=True) updated += 1 return updated