Exemple #1
0
    def maintain_lost(self, tags, container=None, limit=100):
        perishable_status = [
            mahler.core.status.Reserved(''),
            mahler.core.status.Running('')
        ]

        status_names = [status.name for status in perishable_status]

        updated = 0

        projection = {'id': 1, 'registry.heartbeat': 1}

        task_iterator = self.retrieve_tasks(tags=tags,
                                            container=container,
                                            status=perishable_status,
                                            limit=limit,
                                            _return_doc=True,
                                            _projection=projection)

        for task_document in task_iterator:
            task = Task(op=None,
                        arguments=None,
                        id=task_document['id'],
                        name=None,
                        registrar=self)
            if task.status.name not in status_names:
                # Report is outdated, leave it to maintain_report to update it.
                continue

            heartbeat_frequency = task_document['registry']['heartbeat']

            # TODO: Why the hell would we need this???
            task._status.refresh(full=False)
            last_heartbeat = task._status.last_item['id'].generation_time
            now = datetime.datetime.now(datetime.timezone.utc)
            time_since_heartbeat = (now - last_heartbeat).total_seconds()
            if time_since_heartbeat < 2 * heartbeat_frequency:
                continue

            message = 'Lost heartbeat since {:0.02f}s ({:0.02f} x heartbeat)'.format(
                time_since_heartbeat,
                time_since_heartbeat / heartbeat_frequency)
            new_status = mahler.core.status.FailedOver(message)
            try:
                self.update_status(task, new_status)
            except (ValueError, RaceCondition) as e:
                logger.debug('Task {} status changed concurrently'.format(
                    task_document['id']))
                continue
            else:
                self.update_report(task.to_dict())

            updated += 1

        return updated
Exemple #2
0
    def maintain_broken(self, tags, container=None, limit=100):
        onhold_status = mahler.core.status.Broken('')

        updated = 0

        # TODO: Implement dependencies and test
        projection = {'registry.status': 1}

        task_iterator = self.retrieve_tasks(tags=tags,
                                            container=container,
                                            status=onhold_status,
                                            limit=limit,
                                            _return_doc=True,
                                            _projection=projection)

        for task_document in task_iterator:
            task = Task(op=None,
                        arguments=None,
                        id=task_document['id'],
                        name=None,
                        registrar=self)
            status = task.status
            if status.name != onhold_status.name:
                # Report is outdated, leave it to maintain_report to update it.
                continue

            if (not task.output and all(message_snipet not in status.message
                                        for message_snipet in TMP_BROKEN)):
                continue

            try:
                if task.output:
                    new_status = mahler.core.status.Completed(
                        'Failover completed trial')
                    self.update_status(task, new_status, _force=True)
                else:
                    new_status = mahler.core.status.FailedOver(
                        'Crashed because of broken node')
                    self.update_status(task, new_status)
                self.update_report(task.to_dict())
            except (ValueError, RaceCondition) as e:
                logger.debug('Task {} status changed concurrently'.format(
                    task.id))
                continue

            updated += 1

        return updated
Exemple #3
0
    def delay(self, *args, **kwargs):

        self._verify_importability(self._fct)
        if self._restore:
            self._verify_importability(self._restore)

        # Fetch default arguments of task (and restore if given)
        # Make sure all arguments have name:value. Positional arguments is forbidden

        # Get importable string

        # Create task document with function string, arguments
        # TODO: Turn arguments not supported as-is by pymongo into pickled objects.
        # task_document = core.task.Task()

        # task = Task()

        if self._arguments:
            overriding_args = [
                k for k in self._arguments.keys() if k in kwargs
            ]
            if overriding_args:
                logger.warning('Overriding {}'.format(overriding_args))

            tmp_kwargs = copy.deepcopy(self._arguments)
            tmp_kwargs.update(kwargs)
            kwargs = tmp_kwargs

        return Task(op=self, arguments=kwargs)
Exemple #4
0
    def retrieve_tasks(self,
                       id=None,
                       arguments=None,
                       attributes=None,
                       tags=tuple(),
                       container=None,
                       status=None,
                       limit=None,
                       sort=None,
                       host=None,
                       use_report=True,
                       _return_doc=False,
                       _projection=None):
        """
        """
        task_iterator = self._db.retrieve_tasks(id,
                                                arguments,
                                                attributes,
                                                tags,
                                                container,
                                                status,
                                                limit=limit,
                                                sort=sort,
                                                host=host,
                                                use_report=use_report,
                                                projection=_projection)
        for task_document in task_iterator:
            if _return_doc:
                yield task_document
                continue

            operator = Operator(**task_document['op'])
            task = Task(operator,
                        arguments=task_document['arguments'],
                        attributes=task_document['attributes'],
                        id=task_document['id'],
                        name=task_document['name'],
                        registrar=self)
            task._container = task_document['registry']['container']
            yield task
Exemple #5
0
    def maintain_onhold(self, tags, container=None, limit=100):
        onhold_status = mahler.core.status.OnHold('')

        updated = 0

        # TODO: Implement dependencies and test
        projection = {'registry.status': 1}

        task_iterator = self.retrieve_tasks(tags=tags,
                                            container=container,
                                            status=onhold_status,
                                            limit=limit,
                                            _return_doc=True,
                                            _projection=projection)

        for task_document in task_iterator:
            task = Task(op=None,
                        arguments=None,
                        id=task_document['id'],
                        name=None,
                        registrar=self)
            if task.status.name != onhold_status.name:
                # Report is outdated, leave it to maintain_report to update it.
                continue

            # TODO: Implement dependencies and test
            # task._dependencies = task_document['bounds.dependencies']
            try:
                self.update_status(
                    task, mahler.core.status.Queued('dependencies met'))
                self.update_report(task.to_dict())
            except (ValueError, RaceCondition) as e:
                logger.debug('Task {} status changed concurrently'.format(
                    task.id))
                continue

            updated += 1

        return updated
Exemple #6
0
    def maintain_unreported(self, limit=100):

        projection = {
            'arguments': 1,
            'attributes': 1,
            'name': 1,
            'id': 1,
            'op': 1,
            'registry': 1
        }

        # Querying from immutable cores
        for task_document in self.retrieve_tasks(_return_doc=True,
                                                 _projection=projection):
            # First make sure the task was registered long enough that it is worth looking for a
            # report
            created_on = task_document['id'].generation_time
            now = datetime.datetime.now(datetime.timezone.utc)
            time_since_creation = (now - created_on).total_seconds()
            if time_since_creation < MIN_TIME_WAITING:
                continue

            # Looking for a report
            report_iterator = self.retrieve_tasks(id=task_document['id'],
                                                  _return_doc=True,
                                                  _projection={'id': 1})
            if sum(1 for _ in report_iterator) < 1:
                logger.info('Adding missing report for {}'.format(
                    task_document['id']))
                operator = Operator(**task_document['op'])
                task = Task(operator,
                            arguments=task_document['arguments'],
                            attributes=task_document['attributes'],
                            id=task_document['id'],
                            name=task_document['name'],
                            registrar=self)
                task._container = task_document['registry']['container']

                self.update_report(task.to_dict(), upsert=True)
Exemple #7
0
    def maintain_to_queue(self, tags, container=None, limit=100):
        queueable_status = [
            mahler.core.status.OnHold(''),
            mahler.core.status.Interrupted(''),
            mahler.core.status.FailedOver(''),
            mahler.core.status.SwitchedOver('')
        ]

        status_names = [status.name for status in queueable_status]

        projection = {'registry.status': 1, 'registry.reported_on': 1}

        task_iterator = self.retrieve_tasks(tags=tags,
                                            container=container,
                                            status=queueable_status,
                                            limit=limit,
                                            _return_doc=True,
                                            _projection=projection)

        updated = 0
        for task_document in task_iterator:
            task = Task(op=None,
                        arguments=None,
                        id=task_document['id'],
                        name=None,
                        registrar=self)

            # First make sure the task was updated since long enough that it is worth trying
            # to update it now.
            updated_on = task_document['registry'][
                'reported_on'].generation_time
            now = datetime.datetime.now(datetime.timezone.utc)
            time_since_update = (now - updated_on).total_seconds()
            if time_since_update < MIN_TIME_WAITING:
                continue

            if task.status.name not in status_names:
                # Report is outdated, leave it to maintain_report to update it.
                continue

            try:
                if task.output:
                    self.update_status(
                        task,
                        mahler.core.status.Completed(
                            'Task was completed and have output.'),
                        _force=True)
                else:
                    self.update_status(
                        task,
                        mahler.core.status.Queued('re-queue {} task'.format(
                            task_document['registry']['status'])))
                self.update_report(task.to_dict())
            except (ValueError, RaceCondition) as e:
                logger.debug('Task {} status changed concurrently'.format(
                    task.id))
                continue

            updated += 1

        return updated
Exemple #8
0
    def maintain_reports(self, tags, container=None, limit=100):
        updated = 0
        volatile_status = [
            mahler.core.status.Queued(''),
            mahler.core.status.Reserved(''),
            mahler.core.status.Running('')
        ]
        queueable_status = [
            mahler.core.status.OnHold(''),
            mahler.core.status.Interrupted(''),
            mahler.core.status.FailedOver(''),
            mahler.core.status.SwitchedOver('')
        ]
        mutable_status = [
            mahler.core.status.Suspended(''),
            mahler.core.status.Acknowledged(''),
            mahler.core.status.Cancelled(''),
            mahler.core.status.Broken('')
        ]

        def is_outdated(task, task_document):
            # First make sure the report was updated long enough that it is worth looking
            # at the attributes
            updated_on = task_document['registry'][
                'reported_on'].generation_time
            now = datetime.datetime.now(datetime.timezone.utc)
            time_since_update = (now - updated_on).total_seconds()
            if time_since_update < MIN_TIME_WAITING:
                return False

            return ((task.get_recent_status().name !=
                     task_document['registry']['status']) or
                    (set(task.tags) != set(task_document['registry']['tags']))
                    or (task_document['output'] != task.output))

        projection = {
            'registry.status': 1,
            'registry.tags': 1,
            'output': 1,
            'registry.reported_on': 1
        }

        for status_family in [
                volatile_status, queueable_status, mutable_status
        ]:

            if limit:
                limit -= updated

            task_iterator = self.retrieve_tasks(tags=tags,
                                                container=container,
                                                status=status_family,
                                                limit=limit,
                                                _return_doc=True,
                                                _projection=projection)

            for task_document in task_iterator:

                task = Task(op=None,
                            arguments=None,
                            id=task_document['id'],
                            name=None,
                            registrar=self)

                if is_outdated(task, task_document):
                    self.update_report(task.to_dict(), update_output=True)
                    updated += 1

        return updated