Esempio n. 1
0
def postcommit_after_request(response, base_status_error_code=500):
    if response.status_code >= base_status_error_code:
        _local.postcommit_queue = OrderedDict()
        _local.postcommit_celery_queue = OrderedDict()
        return response
    try:
        if postcommit_queue():
            number_of_threads = 30  # one db connection per greenlet, let's share
            pool = Pool(number_of_threads)
            for func in postcommit_queue().values():
                pool.spawn(func)
            pool.join(timeout=5.0, raise_error=True)  # 5 second timeout and reraise exceptions

        if postcommit_celery_queue():
            if settings.USE_CELERY:
                for task_dict in postcommit_celery_queue().values():
                    task = Signature.from_dict(task_dict)
                    task.apply_async()
            else:
                for task in postcommit_celery_queue().values():
                    task()

    except AttributeError as ex:
        if not settings.DEBUG_MODE:
            logger.error('Post commit task queue not initialized: {}'.format(ex))
    return response
Esempio n. 2
0
def setup_cluster(task, *args, **kwargs):
    cluster = kwargs['cluster']

    if '_id' in cluster:
        task.taskflow.logger.info(
            'We are using an existing cluster: %s' % cluster['name'])
    else:
        task.taskflow.logger.info('We are creating an EC2 cluster.')
        task.logger.info('Cluster name %s' % cluster['name'])
        kwargs['machine'] = cluster.get('machine')
        profile = kwargs.get('profile')
        cluster = create_ec2_cluster(
            task, cluster, profile, kwargs['image_spec'])
        task.logger.info('Cluster started.')

    # Call any follow on task
    if 'next' in kwargs:
        kwargs['cluster'] = cluster
        next = Signature.from_dict(kwargs['next'])

        if next.task == 'celery.chain':
            # If we are dealing with a chain we want to update the arg and
            # kwargs passed into the chain.
            first_task = next.kwargs['tasks'][0]
            if first_task:
                if args:
                    first_task.args = tuple(args) + tuple(first_task.args)

                if kwargs:
                    first_task.kwargs = dict(first_task.kwargs, **kwargs)

        next.delay(*args, **kwargs)
def postcommit_after_request(response, base_status_error_code=500):
    if response.status_code >= base_status_error_code:
        _local.postcommit_queue = OrderedDict()
        _local.postcommit_celery_queue = OrderedDict()
        return response
    try:
        if postcommit_queue():
            number_of_threads = 30  # one db connection per greenlet, let's share
            pool = Pool(number_of_threads)
            for func in postcommit_queue().values():
                pool.spawn(func)
            pool.join(
                timeout=5.0,
                raise_error=True)  # 5 second timeout and reraise exceptions

        if postcommit_celery_queue():
            if settings.USE_CELERY:
                for task_dict in postcommit_celery_queue().values():
                    task = Signature.from_dict(task_dict)
                    task.apply_async()
            else:
                for task in postcommit_celery_queue().values():
                    task()

    except AttributeError as ex:
        if not settings.DEBUG_MODE:
            logger.error(
                'Post commit task queue not initialized: {}'.format(ex))
    return response
Esempio n. 4
0
    def execute_callback(self):
        """
        Execute serialized callback. Called via on_chord_part_return.

        There are no parameters, as everything we need is already serialized
        in the model somewhere.
        """
        callback_signature = Signature.from_dict(json.loads(self.serialized_callback))

        if any(result.status == FAILURE for result in self.completed_results.all()):
            # we either remove the failures and only return results from successful subtasks, or fail the entire chord
            if callback_signature.get('options', {}).get('propagate', current_app.conf.CELERY_CHORD_PROPAGATES):
                try:
                    raise ChordError(
                        "Error in subtasks! Ids: {}".format([
                            result.task_id
                            for result in self.completed_results.all()
                            if result.status == FAILURE
                        ])
                    )
                except ChordError as error:
                    self.mark_error(error, is_subtask=True)
                    return
            else:
                """
                Dev note: this doesn't *quite* match the behavior of the default backend.

                According to http://www.pythondoc.com/celery-3.1.11/configuration.html#celery-chord-propagates,
                the 2 options are to either propagate through (as done above), or to forward the Exception result
                into callback (versus this approach of dropping error results). It seems silly to ask callbacks to
                expect exception results as input though, so we drop them.
                """  # pylint: disable=pointless-string-statement
                for result in self.completed_results.all():
                    if result.status == FAILURE:
                        self.completed_results.remove(result)  # pylint: disable=no-member
                        result.delete()

        if callback_signature.get('options', {}).get('use_iterator', True):
            # If we're using an iterator, it's assumed to be because there are size concerns with the results
            # Thus, the callback_result TaskMeta object will have a null 'result' in the database, because you
            # stored those results someplace else as part of the callback function, right?
            try:
                callback_signature(self.completed_results.values_list('result', flat=True).iterator)
            except Exception as error:  # pylint: disable=broad-except
                self.mark_error(error, is_subtask=False)
                return
            else:
                self.callback_result.status = SUCCESS
                self.callback_result.date_done = datetime.now()
                self.callback_result.save()
        else:
            results_list = [subtask.result for subtask in self.completed_results.all()]
            callback_signature.id = self.callback_result.task_id
            callback_signature.apply_async((results_list, ), {})
Esempio n. 5
0
def setup_cluster(task, *args,**kwargs):
    cluster = kwargs['cluster']

    if '_id' in cluster:
        task.taskflow.logger.info('We are using an existing cluster: %s' % cluster['name'])
    else:
        task.taskflow.logger.info('We are creating an EC2 cluster.')
        task.logger.info('Cluster name %s' % cluster['name'])
        kwargs['machine'] = cluster.get('machine')
        ami = kwargs.get('ami')
        profile = kwargs.get('profile')
        cluster = create_ec2_cluster(task, cluster, profile, ami)
        task.logger.info('Cluster started.')

    # Call any follow on task
    if 'next' in kwargs:
        kwargs['cluster'] = cluster
        next = Signature.from_dict(kwargs['next'])
        next.delay(*args, **kwargs)
Esempio n. 6
0
 def inner(*args, **kwargs):
     taskset = kwargs.pop("_taskset", None)
     rv = f(*args, **kwargs)
     if taskset is not None:
         done = False
         with transaction.atomic():
             taskset_id = taskset["taskset_id"]
             sync_row = TaskSetMeta.objects.select_for_update().filter(
                 id=taskset_id).all()
             if sync_row:
                 assert len(sync_row) == 1
                 sync_row = sync_row[0]
                 sync_row.count -= 1
                 sync_row.save()
                 if sync_row.count == 0:
                     logger.info("Finished taskset id %i" % taskset_id)
                     done = True
                 else:
                     logger.info("Taskset %i has %i tasks remaining" % (taskset_id, sync_row.count))
         if done:
             callback = Signature.from_dict(taskset["callback"])
             callback.apply_async()
     return rv
Esempio n. 7
0
 def inner(*args, **kwargs):
     taskset = kwargs.pop("_taskset", None)
     rv = f(*args, **kwargs)
     if taskset is not None:
         done = False
         with transaction.atomic():
             taskset_id = taskset["taskset_id"]
             sync_row = TaskSetMeta.objects.select_for_update().filter(
                 id=taskset_id).all()
             if sync_row:
                 assert len(sync_row) == 1
                 sync_row = sync_row[0]
                 sync_row.count -= 1
                 sync_row.save()
                 if sync_row.count == 0:
                     logger.info("Finished taskset id %i" % taskset_id)
                     done = True
                 else:
                     logger.info("Taskset %i has %i tasks remaining" %
                                 (taskset_id, sync_row.count))
         if done:
             callback = Signature.from_dict(taskset["callback"])
             callback.apply_async()
     return rv
Esempio n. 8
0
def postcommit_celery_task_wrapper(queue):
    # chain.apply calls the tasks synchronously without re-enqueuing each one
    # http://stackoverflow.com/questions/34177131/how-to-solve-python-celery-error-when-using-chain-encodeerrorruntimeerrormaxi?answertab=votes#tab-top
    # celery serialized signatures into dictionaries, so we need to deserialize here
    # https://sentry.cos.io/sentry/osf-iy/issues/289209/
    chain([Signature.from_dict(task_dict) for task_dict in queue.values()]).apply()
Esempio n. 9
0
def setup_cluster(task, *args, **kwargs):
    cluster = kwargs['cluster']
    profile = kwargs.get('profile')
    volume = kwargs.get('volume')
    new = False

    if '_id' in cluster:
        task.taskflow.logger.info('We are using an existing cluster: %s' %
                                  cluster['name'])
    else:
        new = True
        task.taskflow.logger.info('We are creating an EC2 cluster.')
        task.logger.info('Cluster name %s' % cluster['name'])
        kwargs['machine'] = cluster.get('machine')

        if volume:
            config = cluster.setdefault('config', {})
            config['jobOutputDir'] = '/data'

        # Create the model in Girder
        cluster = create_ec2_cluster(task, cluster, profile,
                                     kwargs['image_spec'])

        # Now launch the cluster
        cluster = launch_ec2_cluster(task, cluster, profile)

        task.logger.info('Cluster started.')

    if volume and '_id' in volume:
        task.taskflow.logger.info('We are using an existing volume: %s' %
                                  volume['name'])
    elif volume:
        task.taskflow.logger.info('We are creating a new volume: "%s"' %
                                  volume['name'])
        volume = create_volume(task, volume, profile)

    # Now provision
    if new:
        provision_params = {}

        girder_token = task.taskflow.girder_token
        check_girder_cluster_status(cluster, girder_token, 'provisioning')

        # attach volume
        if volume:
            volume = _attach_volume(task, profile, volume, cluster)
            path = volume.get('path')
            if path:
                provision_params['master_nfs_exports_extra'] = [path]

        cluster = provision_ec2_cluster(task, cluster, profile,
                                        provision_params)

    # Call any follow on task
    if 'next' in kwargs:
        kwargs['cluster'] = cluster
        next = Signature.from_dict(kwargs['next'])

        if next.task == 'celery.chain':
            # If we are dealing with a chain we want to update the arg and
            # kwargs passed into the chain.
            first_task = next.kwargs['tasks'][0]
            if first_task:
                if args:
                    first_task.args = tuple(args) + tuple(first_task.args)

                if kwargs:
                    first_task.kwargs = dict(first_task.kwargs, **kwargs)

        next.delay(*args, **kwargs)