Ejemplo n.º 1
0
    def tick(self):
        """
        Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks.

        This method updates the last heartbeat time of the scheduler.

        :return:    number of seconds before the next tick should run
        :rtype:     float
        """
        worker_watcher.handle_worker_heartbeat(CELERYBEAT_NAME)

        if celery_version.startswith('4') and self.schedule_changed:
            # Setting _heap = None is a workaround for this bug in Celery4
            # https://github.com/celery/celery/pull/3958
            # Once 3958 is released and updated in Fedora this can be removed
            self._heap = None

        now = ensure_tz(datetime.utcnow())
        old_timestamp = now - timedelta(seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL)

        # Updating the current lock if lock is on this instance of celerybeat
        result = CeleryBeatLock.objects(name=CELERYBEAT_NAME).\
            update(set__timestamp=datetime.utcnow())

        # If current instance has lock and updated lock_timestamp, call super
        if result == 1:
            _logger.debug(_('Lock updated by %(celerybeat_name)s')
                          % {'celerybeat_name': CELERYBEAT_NAME})
            ret = self.call_tick(CELERYBEAT_NAME)
        else:
            # check for old enough time_stamp and remove if such lock is present
            CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete()
            try:
                lock_timestamp = datetime.utcnow()

                # Insert new lock entry
                new_lock = CeleryBeatLock(name=CELERYBEAT_NAME, timestamp=lock_timestamp)
                new_lock.save()
                _logger.debug(_("New lock acquired by %(celerybeat_name)s") %
                              {'celerybeat_name': CELERYBEAT_NAME})

                if not self._first_lock_acq_check:
                    msg = _("Failover occurred: '%s' is now the primary celerybeat "
                            "instance") % CELERYBEAT_NAME
                    _logger.warning(msg)

                # After acquiring new lock call super to dispatch tasks
                ret = self.call_tick(CELERYBEAT_NAME)

            except mongoengine.NotUniqueError:
                # Setting a default wait time for celerybeat instances with no lock
                ret = constants.PULP_PROCESS_HEARTBEAT_INTERVAL

                if self._first_lock_acq_check:
                    _logger.info(_("Hot spare celerybeat instance '%(celerybeat_name)s' detected.")
                                 % {'celerybeat_name': CELERYBEAT_NAME})

        self._first_lock_acq_check = False
        return ret
Ejemplo n.º 2
0
    def apply_async(self, *args, **kwargs):
        """
        A wrapper around the PulpTask apply_async method. It allows us to accept a few more
        parameters than Celery does for our own purposes, listed below. It also allows us
        to create and update task status which can be used to track status of this task
        during it's lifetime.

        :param queue:       The queue that the task has been placed into (optional, defaults to
                            the general Celery queue.)
        :type  queue:       basestring
        :param tags:        A list of tags (strings) to place onto the task, used for searching for
                            tasks by tag
        :type  tags:        list
        :param group_id:    The id that identifies which group of tasks a task belongs to
        :type group_id:     uuid.UUID
        :return:            An AsyncResult instance as returned by Celery's apply_async
        :rtype:             celery.result.AsyncResult
        """
        if celery_version.startswith('4'):
            routing_key = kwargs.get(
                'routing_key',
                defaults.NAMESPACES['task']['default_routing_key'].default)
        else:
            routing_key = kwargs.get(
                'routing_key',
                defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default)
        tag_list = kwargs.pop('tags', [])
        group_id = kwargs.pop('group_id', None)

        try:
            async_result = super(Task, self).apply_async(*args, **kwargs)
        except Exception:
            if 'task_id' in kwargs:
                TaskStatus.objects(task_id=kwargs['task_id']).update(
                    state=constants.CALL_ERROR_STATE)
            raise

        async_result.tags = tag_list

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(task_id=async_result.id,
                                 task_type=self.name,
                                 state=constants.CALL_WAITING_STATE,
                                 worker_name=routing_key,
                                 tags=tag_list,
                                 group_id=group_id)
        # We're now racing with __call__, on_failure and on_success, any of which may
        # have completed by now. To avoid overwriting TaskStatus updates from those callbacks,
        # we'll do an upsert and only touch the fields listed below if we've inserted the object.
        task_status.save_with_set_on_insert(fields_to_set_on_insert=[
            'state', 'start_time', 'finish_time', 'result', 'error',
            'spawned_tasks', 'traceback'
        ])
        return async_result
Ejemplo n.º 3
0
Archivo: tasks.py Proyecto: kdelee/pulp
    def apply_async(self, *args, **kwargs):
        """
        A wrapper around the PulpTask apply_async method. It allows us to accept a few more
        parameters than Celery does for our own purposes, listed below. It also allows us
        to create and update task status which can be used to track status of this task
        during it's lifetime.

        :param queue:       The queue that the task has been placed into (optional, defaults to
                            the general Celery queue.)
        :type  queue:       basestring
        :param tags:        A list of tags (strings) to place onto the task, used for searching for
                            tasks by tag
        :type  tags:        list
        :param group_id:    The id that identifies which group of tasks a task belongs to
        :type group_id:     uuid.UUID
        :return:            An AsyncResult instance as returned by Celery's apply_async
        :rtype:             celery.result.AsyncResult
        """
        if celery_version.startswith('4'):
            routing_key = kwargs.get(
                'routing_key',
                defaults.NAMESPACES['task']['default_routing_key'].default)
        else:
            routing_key = kwargs.get(
                'routing_key',
                defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default)
        tag_list = kwargs.pop('tags', [])
        group_id = kwargs.pop('group_id', None)

        try:
            async_result = super(Task, self).apply_async(*args, **kwargs)
        except:
            if 'task_id' in kwargs:
                TaskStatus.objects(task_id=kwargs['task_id']).update(
                    state=constants.CALL_ERROR_STATE)
            raise

        async_result.tags = tag_list

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(task_id=async_result.id,
                                 task_type=self.name,
                                 state=constants.CALL_WAITING_STATE,
                                 worker_name=routing_key,
                                 tags=tag_list,
                                 group_id=group_id)
        # To avoid the race condition where __call__ method below is called before
        # this change is propagated to all db nodes, using an 'upsert' here and setting
        # the task state to 'waiting' only on an insert.
        task_status.save_with_set_on_insert(
            fields_to_set_on_insert=['state', 'start_time'])
        return async_result
Ejemplo n.º 4
0
    def queue_name(self):
        """
        This property is a convenience for getting the queue_name that Celery assigns to this
        Worker.

        :return: The name of the queue that this Worker is uniquely subcribed to.
        :rtype:  basestring
        """
        if not self.name:
            return ""

        queue_name = "%(name)s.dq2" if celery_version.startswith('4') else "%(name)s.dq"
        return queue_name % {'name': self.name}
Ejemplo n.º 5
0
Archivo: tasks.py Proyecto: alexxa/pulp
    def apply_async(self, *args, **kwargs):
        """
        A wrapper around the PulpTask apply_async method. It allows us to accept a few more
        parameters than Celery does for our own purposes, listed below. It also allows us
        to create and update task status which can be used to track status of this task
        during it's lifetime.

        :param queue:       The queue that the task has been placed into (optional, defaults to
                            the general Celery queue.)
        :type  queue:       basestring
        :param tags:        A list of tags (strings) to place onto the task, used for searching for
                            tasks by tag
        :type  tags:        list
        :param group_id:    The id that identifies which group of tasks a task belongs to
        :type group_id:     uuid.UUID
        :return:            An AsyncResult instance as returned by Celery's apply_async
        :rtype:             celery.result.AsyncResult
        """
        if celery_version.startswith('4'):
            routing_key = kwargs.get('routing_key',
                                     defaults.NAMESPACES['task']['default_routing_key'].default)
        else:
            routing_key = kwargs.get('routing_key',
                                     defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default)
        tag_list = kwargs.pop('tags', [])
        group_id = kwargs.pop('group_id', None)

        try:
            async_result = super(Task, self).apply_async(*args, **kwargs)
        except Exception:
            if 'task_id' in kwargs:
                TaskStatus.objects(task_id=kwargs['task_id']).update(
                    state=constants.CALL_ERROR_STATE
                )
            raise

        async_result.tags = tag_list

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(
            task_id=async_result.id, task_type=self.name,
            state=constants.CALL_WAITING_STATE, worker_name=routing_key, tags=tag_list,
            group_id=group_id)
        # We're now racing with __call__, on_failure and on_success, any of which may
        # have completed by now. To avoid overwriting TaskStatus updates from those callbacks,
        # we'll do an upsert and only touch the fields listed below if we've inserted the object.
        task_status.save_with_set_on_insert(fields_to_set_on_insert=[
            'state', 'start_time', 'finish_time', 'result', 'error',
            'spawned_tasks', 'traceback'])
        return async_result
Ejemplo n.º 6
0
    def queue_name(self):
        """
        This property is a convenience for getting the queue_name that Celery assigns to this
        Worker.

        :return: The name of the queue that this Worker is uniquely subcribed to.
        :rtype:  basestring
        """
        if not self.name:
            return ""

        queue_name = "%(name)s.dq2" if celery_version.startswith(
            '4') else "%(name)s.dq"
        return queue_name % {'name': self.name}
Ejemplo n.º 7
0
def init(celery_app,
         pg_db_uri,
         username=None,
         password=None,
         cleaning_thresholds=None,
         db_echo=False):
    # setup db
    db_engine = create_engine(pg_db_uri,
                              client_encoding='utf8',
                              convert_unicode=True,
                              echo=db_echo)
    SessionMaker.configure(bind=db_engine)
    prepared = prepare_models(db_engine)
    if not prepared:
        return

    from .signals import task_sent_handler, task_started_handler, task_retry_handler, task_success_handler, \
        task_failure_handler, task_revoked_handler
    register_after_fork(db_engine, lambda engine: engine.dispose())

    # cleaning
    if not cleaning_thresholds:
        cleaning_thresholds = {}
    # if task hasn't finished after 3600s assume worker has been killed and couldn't send a signal
    if "STARTED" not in cleaning_thresholds:
        cleaning_thresholds["STARTED"] = 3600
    if "SUCCESS" not in cleaning_thresholds:
        cleaning_thresholds["SUCCESS"] = 3600 * 4
    celery_app.conf.dashboard_pg_uri = pg_db_uri
    celery_app.conf.dashboard_username = username
    celery_app.conf.dashboard_password = password
    celery_app.task(name="dashboard_cleaning")(dashboard_cleaning)

    from celery import __version__ as celery_version
    for status, threshold in cleaning_thresholds.items():
        if celery_version.startswith('4'):
            beat_schedule_name = "beat_schedule"
        else:
            beat_schedule_name = "CELERYBEAT_SCHEDULE"
        getattr(celery_app.conf,
                beat_schedule_name)['clean-%s-tasks' % status.lower()] = {
                    'task': 'dashboard_cleaning',
                    'schedule': threshold,
                    'args': (status, threshold),
                    'options': {
                        'queue': 'celery_dashboard',
                        'expires': 10 * 60
                    }
                }
Ejemplo n.º 8
0
def make_celery(settings):
    """
    Creates a new celery object which can be used to define tasks.
    """
    celery_settings = settings.get("worker", {})
    celery_config = celery_settings.get("config", {}).copy()
    celery_obj = Celery("algonaut", broker=celery_config["broker_url"])

    # we convert the queues
    queues = []
    for queue in celery_config.get("task_queues", []):
        queues.append(Queue(**queue))
    celery_config["task_queues"] = queues

    # we parse the celerybeat task schedule
    celerybeat_schedule = celery_settings.get("schedule", {})

    new_schedule = {}
    for task, params in celerybeat_schedule.items():
        params = params.copy()
        if not "schedule" in params:
            logger.warning("No schedule for task {}, skipping...".format(task))
            continue
        schedule = params.get("schedule", {})
        if "timedelta" in schedule:
            params["schedule"] = datetime.timedelta(**schedule["timedelta"])
        elif "crontab" in schedule:
            params["schedule"] = crontab(**schedule["crontab"])
        else:
            logger.error(
                "Unknown schedule format for task {}, skipping...".format(
                    task))
            continue
        new_schedule[task] = params

    celery_config["beat_schedule"] = new_schedule

    # if we use Celery 3, we map the config parameter names to the old format
    if celery_version.startswith("3."):
        for key, value in celery_config.items():
            if key in config_mapping_3_4:
                del celery_config[key]
                celery_config[config_mapping_3_4[key]] = value

    celery_obj.conf.update(**celery_config)

    return celery_obj
Ejemplo n.º 9
0
def make_celery():
    """
    Creates a new celery object which can be used to define tasks.
    """
    celery_settings = settings.get('backend.celery', {})
    celery_config = celery_settings.get('config', {})
    celery_obj = Celery("quantifiedcode", broker=celery_config['broker_url'])

    #we convert the queues
    queues = []
    for queue in celery_config.get('task_queues', []):
        queues.append(Queue(**queue))
    celery_config['task_queues'] = queues

    #we parse the celerybeat task schedule
    celerybeat_schedule = celery_settings.get('celerybeat-schedule', {})
    new_schedule = {}
    for task, params in celerybeat_schedule.items():
        params = params.copy()
        if not 'schedule' in params:
            logger.warning("No schedule for task {}, skipping...".format(task))
            continue
        schedule = params.get('schedule', {})
        if 'timedelta' in schedule:
            params['schedule'] = datetime.timedelta(**schedule['timedelta'])
        elif 'crontab' in schedule:
            params['schedule'] = crontab(**schedule['crontab'])
        else:
            logger.error(
                "Unknown schedule format for task {}, skipping...".format(
                    task))
            continue
        new_schedule[task] = params

    celery_config['beat_schedule'] = new_schedule

    #if we use Celery 3, we map the config parameter names to the old format
    if celery_version.startswith('3.'):
        for key, value in celery_config.items():
            if key in config_mapping_3_4:
                del celery_config[key]
                celery_config[config_mapping_3_4[key]] = value

    celery_obj.conf.update(**celery_config)

    return celery_obj
Ejemplo n.º 10
0
    def setup_schedule(self):
        """
        This loads enabled schedules from the database and adds them to the
        "_schedule" dictionary as instances of celery.beat.ScheduleEntry
        """
        if not Scheduler._mongo_initialized:
            _logger.debug(
                _('Initializing Mongo client connection to read celerybeat schedule'
                  ))
            db_connection.initialize()
            Scheduler._mongo_initialized = True
        _logger.debug(_('loading schedules from app'))
        self._schedule = {}

        if celery_version.startswith('4'):
            items = self.app.conf.beat_schedule.iteritems()
        else:
            items = self.app.conf.CELERYBEAT_SCHEDULE.iteritems()

        for key, value in items:
            self._schedule[key] = beat.ScheduleEntry(**dict(value, name=key))

        # include a "0" as the default in case there are no schedules to load
        update_timestamps = [0]

        _logger.debug(_('loading schedules from DB'))
        ignored_db_count = 0
        self._loaded_from_db_count = 0
        for call in itertools.imap(ScheduledCall.from_db, utils.get_enabled()):
            if call.remaining_runs == 0:
                _logger.debug(
                    _('ignoring schedule with 0 remaining runs: %(id)s') %
                    {'id': call.id})
                ignored_db_count += 1
            else:
                self._schedule[call.id] = call.as_schedule_entry()
                update_timestamps.append(call.last_updated)
                self._loaded_from_db_count += 1

        _logger.debug(
            _('loaded %(count)d schedules') %
            {'count': self._loaded_from_db_count})

        self._most_recent_timestamp = max(update_timestamps)
Ejemplo n.º 11
0
    def setup_schedule(self):
        """
        This loads enabled schedules from the database and adds them to the
        "_schedule" dictionary as instances of celery.beat.ScheduleEntry
        """
        if not Scheduler._mongo_initialized:
            _logger.debug(_('Initializing Mongo client connection to read celerybeat schedule'))
            db_connection.initialize()
            Scheduler._mongo_initialized = True
        _logger.debug(_('loading schedules from app'))
        self._schedule = {}

        if celery_version.startswith('4'):
            items = self.app.conf.beat_schedule.iteritems()
        else:
            items = self.app.conf.CELERYBEAT_SCHEDULE.iteritems()

        for key, value in items:
            self._schedule[key] = beat.ScheduleEntry(**dict(value, name=key))

        # include a "0" as the default in case there are no schedules to load
        update_timestamps = [0]

        _logger.debug(_('loading schedules from DB'))
        ignored_db_count = 0
        self._loaded_from_db_count = 0
        for call in itertools.imap(ScheduledCall.from_db, utils.get_enabled()):
            if call.remaining_runs == 0:
                _logger.debug(
                    _('ignoring schedule with 0 remaining runs: %(id)s') % {'id': call.id})
                ignored_db_count += 1
            else:
                self._schedule[call.id] = call.as_schedule_entry()
                update_timestamps.append(call.last_updated)
                self._loaded_from_db_count += 1

        _logger.debug(_('loaded %(count)d schedules') % {'count': self._loaded_from_db_count})

        self._most_recent_timestamp = max(update_timestamps)
Ejemplo n.º 12
0
# encoding: utf-8
'''
Celery background tasks management.

This module is DEPRECATED, use ``ckan.lib.jobs`` instead.
'''

import ConfigParser
import logging
import os

from ckan.common import config as ckan_config
from pkg_resources import iter_entry_points, VersionConflict

from celery import __version__ as celery_version, Celery
if not celery_version.startswith(u'3.'):
    raise ImportError(u'Only Celery version 3.x is supported.')

log = logging.getLogger(__name__)

log.warning('ckan.lib.celery_app is deprecated, use ckan.lib.jobs instead.')

LIST_PARAMS = """CELERY_IMPORTS ADMINS ROUTES""".split()

celery = Celery()

config = ConfigParser.ConfigParser()

config_file = os.environ.get('CKAN_CONFIG')

if not config_file:
Ejemplo n.º 13
0
"""
from datetime import timedelta
import os
import ssl

from celery import Celery, __version__ as celery_version

from pulp.server.config import config
from pulp.server.constants import PULP_DJANGO_SETTINGS_MODULE

os.environ.setdefault("DJANGO_SETTINGS_MODULE", PULP_DJANGO_SETTINGS_MODULE)

broker_url = config.get('tasks', 'broker_url')
celery = Celery('tasks', broker=broker_url)

DEDICATED_QUEUE_EXCHANGE = 'C.dq2' if celery_version.startswith('4') else 'C.dq'
RESOURCE_MANAGER_QUEUE = 'resource_manager'
CELERYBEAT_SCHEDULE = {
    'reap_expired_documents': {
        'task': 'pulp.server.db.reaper.queue_reap_expired_documents',
        'schedule': timedelta(days=config.getfloat('data_reaping', 'reaper_interval')),
        'args': tuple(),
    },
    'monthly_maintenance': {
        'task': 'pulp.server.maintenance.monthly.queue_monthly_maintenance',
        'schedule': timedelta(days=30),
        'args': tuple(),
    },
    'download_deferred_content': {
        'task': 'pulp.server.controllers.repository.queue_download_deferred',
        'schedule': timedelta(minutes=config.getint('lazy', 'download_interval')),
Ejemplo n.º 14
0
'''
Celery background tasks management.

This module is DEPRECATED, use ``ckan.lib.jobs`` instead.
'''

import ConfigParser
import logging
import os

from ckan.common import config as ckan_config
from pkg_resources import iter_entry_points, VersionConflict

from celery import __version__ as celery_version, Celery
if not celery_version.startswith(u'3.'):
    raise ImportError(u'Only Celery version 3.x is supported.')


log = logging.getLogger(__name__)

log.warning('ckan.lib.celery_app is deprecated, use ckan.lib.jobs instead.')

LIST_PARAMS = """CELERY_IMPORTS ADMINS ROUTES""".split()

celery = Celery()

config = ConfigParser.ConfigParser()

config_file = os.environ.get('CKAN_CONFIG')
Ejemplo n.º 15
0
"""
from datetime import timedelta
import os
import ssl

from celery import Celery, __version__ as celery_version

from pulp.server.config import config
from pulp.server.constants import PULP_DJANGO_SETTINGS_MODULE

os.environ.setdefault("DJANGO_SETTINGS_MODULE", PULP_DJANGO_SETTINGS_MODULE)

broker_url = config.get('tasks', 'broker_url')
celery = Celery('tasks', broker=broker_url)

DEDICATED_QUEUE_EXCHANGE = 'C.dq2' if celery_version.startswith(
    '4') else 'C.dq'
RESOURCE_MANAGER_QUEUE = 'resource_manager'
CELERYBEAT_SCHEDULE = {
    'reap_expired_documents': {
        'task':
        'pulp.server.db.reaper.queue_reap_expired_documents',
        'schedule':
        timedelta(days=config.getfloat('data_reaping', 'reaper_interval')),
        'args':
        tuple(),
    },
    'monthly_maintenance': {
        'task': 'pulp.server.maintenance.monthly.queue_monthly_maintenance',
        'schedule': timedelta(days=30),
        'args': tuple(),
    },
Ejemplo n.º 16
0
    def tick(self):
        """
        Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks.

        This method updates the last heartbeat time of the scheduler.

        :return:    number of seconds before the next tick should run
        :rtype:     float
        """
        worker_watcher.handle_worker_heartbeat(CELERYBEAT_NAME)

        if celery_version.startswith('4') and self.schedule_changed:
            # Setting _heap = None is a workaround for this bug in Celery4
            # https://github.com/celery/celery/pull/3958
            # Once 3958 is released and updated in Fedora this can be removed
            self._heap = None

        now = ensure_tz(datetime.utcnow())
        old_timestamp = now - timedelta(
            seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL)

        # Updating the current lock if lock is on this instance of celerybeat
        result = CeleryBeatLock.objects(name=CELERYBEAT_NAME).\
            update(set__timestamp=datetime.utcnow())

        # If current instance has lock and updated lock_timestamp, call super
        if result == 1:
            _logger.debug(
                _('Lock updated by %(celerybeat_name)s') %
                {'celerybeat_name': CELERYBEAT_NAME})
            ret = self.call_tick(CELERYBEAT_NAME)
        else:
            # check for old enough time_stamp and remove if such lock is present
            CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete()
            try:
                lock_timestamp = datetime.utcnow()

                # Insert new lock entry
                new_lock = CeleryBeatLock(name=CELERYBEAT_NAME,
                                          timestamp=lock_timestamp)
                new_lock.save()
                _logger.debug(
                    _("New lock acquired by %(celerybeat_name)s") %
                    {'celerybeat_name': CELERYBEAT_NAME})

                if not self._first_lock_acq_check:
                    msg = _(
                        "Failover occurred: '%s' is now the primary celerybeat "
                        "instance") % CELERYBEAT_NAME
                    _logger.warning(msg)

                # After acquiring new lock call super to dispatch tasks
                ret = self.call_tick(CELERYBEAT_NAME)

            except mongoengine.NotUniqueError:
                # Setting a default wait time for celerybeat instances with no lock
                ret = constants.PULP_PROCESS_HEARTBEAT_INTERVAL

                if self._first_lock_acq_check:
                    _logger.info(
                        _("Hot spare celerybeat instance '%(celerybeat_name)s' detected."
                          ) % {'celerybeat_name': CELERYBEAT_NAME})

        self._first_lock_acq_check = False
        return ret