Example #1
0
 def write(self, preferred_queue, body):
     if 'guid' in body:
         GuidMiddleware.set_guid(body['guid'])
     try:
         # when the cluster heartbeat occurs, clean up internally
         if isinstance(body,
                       dict) and 'cluster_node_heartbeat' in body['task']:
             self.cleanup()
         if self.should_grow:
             self.up()
         # we don't care about "preferred queue" round robin distribution, just
         # find the first non-busy worker and claim it
         workers = self.workers[:]
         random.shuffle(workers)
         for w in workers:
             if not w.busy:
                 w.put(body)
                 break
         else:
             return super(AutoscalePool, self).write(preferred_queue, body)
     except Exception:
         for conn in connections.all():
             # If the database connection has a hiccup, re-establish a new
             # connection
             conn.close_if_unusable_or_obsolete()
         logger.exception('failed to write inbound message')
Example #2
0
def delete_guid(sender: Optional[dict], **kwargs: dict) -> None:
    """
    Receiver function for when a request finishes.

    When a request is finished, delete a requests _guid reference to prevent memory leaks.

    :param sender: The sender of the signal. By documentation, we must allow this input parameter.
    :param kwargs: The request_finished signal does not actually send any kwargs, but Django will throw an error
        if we don't accept them. This is because at any point arguments could get added to the signal, and the receiver
        must be able to handle those new arguments.
    :return: None
    """
    logger.debug('Received signal `request_finished`')
    GuidMiddleware.delete_guid()
Example #3
0
 def apply_async(cls,
                 args=None,
                 kwargs=None,
                 queue=None,
                 uuid=None,
                 **kw):
     task_id = uuid or str(uuid4())
     args = args or []
     kwargs = kwargs or {}
     queue = (queue or getattr(cls.queue, 'im_func', cls.queue))
     if not queue:
         msg = f'{cls.name}: Queue value required and may not be None'
         logger.error(msg)
         raise ValueError(msg)
     obj = {
         'uuid': task_id,
         'args': args,
         'kwargs': kwargs,
         'task': cls.name
     }
     guid = GuidMiddleware.get_guid()
     if guid:
         obj['guid'] = guid
     obj.update(**kw)
     if callable(queue):
         queue = queue()
     if not settings.IS_TESTING(sys.argv):
         with pg_bus_conn() as conn:
             conn.notify(queue, json.dumps(obj))
     return (obj, queue)
Example #4
0
 def run_callable(self, body):
     '''
     Given some AMQP message, import the correct Python code and run it.
     '''
     task = body['task']
     uuid = body.get('uuid', '<unknown>')
     args = body.get('args', [])
     kwargs = body.get('kwargs', {})
     if 'guid' in body:
         GuidMiddleware.set_guid(body.pop('guid'))
     _call = TaskWorker.resolve_callable(task)
     if inspect.isclass(_call):
         # the callable is a class, e.g., RunJob; instantiate and
         # return its `run()` method
         _call = _call().run
     # don't print kwargs, they often contain launch-time secrets
     logger.debug('task {} starting {}(*{})'.format(uuid, task, args))
     return _call(*args, **kwargs)
Example #5
0
def _enqueue_with_reservation(
    func, resources, args=None, kwargs=None, options=None, task_group=None
):
    if not args:
        args = tuple()
    if not kwargs:
        kwargs = dict()
    if not options:
        options = dict()

    def as_url(r):
        if isinstance(r, str):
            return r
        if isinstance(r, Model):
            return util.get_url(r)
        raise ValueError(_("Must be (str|Model)"))

    resources = {as_url(r) for r in resources}
    inner_task_id = str(uuid.uuid4())
    resource_task_id = str(uuid.uuid4())
    redis_conn = connection.get_redis_connection()
    current_job = get_current_job(connection=redis_conn)
    parent_kwarg = {}
    json.dumps(args, cls=NonJSONWarningEncoder)
    json.dumps(kwargs, cls=NonJSONWarningEncoder)
    if current_job:
        # set the parent task of the spawned task to the current task ID (same as rq Job ID)
        parent_kwarg["parent_task"] = Task.objects.get(pk=current_job.id)

    with transaction.atomic():
        task = Task.objects.create(
            pk=inner_task_id,
            _resource_job_id=resource_task_id,
            state=TASK_STATES.WAITING,
            logging_cid=(GuidMiddleware.get_guid() or ""),
            task_group=task_group,
            name=f"{func.__module__}.{func.__name__}",
            **parent_kwarg,
        )
        for resource in resources:
            reservation_record = ReservedResourceRecord.objects.get_or_create(resource=resource)[0]
            TaskReservedResourceRecord.objects.create(resource=reservation_record, task=task)

        task_args = (func, inner_task_id, list(resources), args, kwargs, options)
        try:
            q = Queue("resource-manager", connection=redis_conn)
            q.enqueue(
                _queue_reserved_task,
                job_id=resource_task_id,
                args=task_args,
                job_timeout=TASK_TIMEOUT,
            )
        except RedisConnectionError as e:
            task.set_failed(e, None)

    return Job(id=inner_task_id, connection=redis_conn)
Example #6
0
 def __init__(self, model=None):
     self.parent_workflow_job_id = None
     self.host_map = {}
     self.guid = GuidMiddleware.get_guid()
     self.job_created = None
     self.recent_event_timings = deque(
         maxlen=settings.MAX_WEBSOCKET_EVENT_RATE)
     self.dispatcher = CallbackQueueDispatcher()
     self.safe_env = {}
     self.event_ct = 0
     self.model = model
Example #7
0
 def run():
     ppid = os.getppid()
     logger.warn('periodic beat started')
     while True:
         if os.getppid() != ppid:
             # if the parent PID changes, this process has been orphaned
             # via e.g., segfault or sigkill, we should exit too
             pid = os.getpid()
             logger.warn(f'periodic beat exiting gracefully pid:{pid}')
             raise SystemExit()
         try:
             for conn in connections.all():
                 # If the database connection has a hiccup, re-establish a new
                 # connection
                 conn.close_if_unusable_or_obsolete()
             GuidMiddleware.set_guid(GuidMiddleware._generate_guid())
             self.run_pending()
         except Exception:
             logger.exception(
                 'encountered an error while scheduling periodic tasks')
         time.sleep(idle_seconds)
    def filter(self, record: LogRecord) -> bool:
        """
        Determines that the specified record is to be logged.

        From the docs:
                Is the specified record to be logged? Returns 0 for no, nonzero for
                yes. If deemed appropriate, the record may be modified in-place.
        :param record: Log record
        :return: True
        """
        record.correlation_id = GuidMiddleware.get_guid()
        return True
Example #9
0
    def perform_job(self, job, queue):
        """
        Set the :class:`pulpcore.app.models.Task` to running and init logging.

        This method is called by the worker's work horse thread (the forked child) just before the
        task begins executing.

        Args:
            job (rq.job.Job): The job to perform
            queue (rq.queue.Queue): The Queue associated with the job
        """
        try:
            task = Task.objects.get(pk=job.get_id())
        except Task.DoesNotExist:
            pass
        else:
            task.set_running()
            user = get_users_with_perms(task).first()
            _set_current_user(user)
            GuidMiddleware.set_guid(task.logging_cid)

        with TaskWorkingDirectory(job):
            return super().perform_job(job, queue)
Example #10
0
    def perform_work(self, body):
        try:
            flush = body.get('event') == 'FLUSH'
            if flush:
                self.last_event = ''
            if not flush:
                event_map = {
                    'job_id': JobEvent,
                    'ad_hoc_command_id': AdHocCommandEvent,
                    'project_update_id': ProjectUpdateEvent,
                    'inventory_update_id': InventoryUpdateEvent,
                    'system_job_id': SystemJobEvent,
                }

                job_identifier = 'unknown job'
                for key, cls in event_map.items():
                    if key in body:
                        job_identifier = body[key]
                        break

                self.last_event = f'\n\t- {cls.__name__} for #{job_identifier} ({body.get("event", "")} {body.get("uuid", "")})'  # noqa

                if body.get('event') == 'EOF':
                    try:
                        if 'guid' in body:
                            GuidMiddleware.set_guid(body['guid'])
                        final_counter = body.get('final_counter', 0)
                        logger.info(
                            'Event processing is finished for Job {}, sending notifications'
                            .format(job_identifier))
                        # EOF events are sent when stdout for the running task is
                        # closed. don't actually persist them to the database; we
                        # just use them to report `summary` websocket events as an
                        # approximation for when a job is "done"
                        emit_channel_notification(
                            'jobs-summary',
                            dict(group_name='jobs',
                                 unified_job_id=job_identifier,
                                 final_counter=final_counter))
                        # Additionally, when we've processed all events, we should
                        # have all the data we need to send out success/failure
                        # notification templates
                        uj = UnifiedJob.objects.get(pk=job_identifier)

                        if isinstance(uj, Job):
                            # *actual playbooks* send their success/failure
                            # notifications in response to the playbook_on_stats
                            # event handling code in main.models.events
                            pass
                        elif hasattr(uj, 'send_notification_templates'):
                            handle_success_and_failure_notifications.apply_async(
                                [uj.id])
                    except Exception:
                        logger.exception(
                            'Worker failed to emit notifications: Job {}'.
                            format(job_identifier))
                    finally:
                        self.subsystem_metrics.inc(
                            'callback_receiver_events_in_memory', -1)
                        GuidMiddleware.set_guid('')
                    return

                skip_websocket_message = body.pop('skip_websocket_message',
                                                  False)

                event = cls.create_from_data(**body)

                if skip_websocket_message:
                    event._skip_websocket_message = True

                self.buff.setdefault(cls, []).append(event)

            retries = 0
            while retries <= self.MAX_RETRIES:
                try:
                    self.flush(force=flush)
                    break
                except (OperationalError, InterfaceError, InternalError):
                    if retries >= self.MAX_RETRIES:
                        logger.exception(
                            'Worker could not re-establish database connectivity, giving up on one or more events.'
                        )
                        return
                    delay = 60 * retries
                    logger.exception(
                        'Database Error Saving Job Event, retry #{i} in {delay} seconds:'
                        .format(i=retries + 1, delay=delay))
                    django_connection.close()
                    time.sleep(delay)
                    retries += 1
                except DatabaseError:
                    logger.exception('Database Error Saving Job Event')
                    break
        except Exception as exc:
            tb = traceback.format_exc()
            logger.error('Callback Task Processor Raised Exception: %r', exc)
            logger.error('Detail: {}'.format(tb))
Example #11
0
 def filter(self, record):
     guid = GuidMiddleware.get_guid() or '-'
     if MODE == 'development':
         guid = guid[:8]
     record.guid = guid
     return True
Example #12
0
def _queue_reserved_task(func, inner_task_id, resources, inner_args, inner_kwargs, options):
    """
    A task that encapsulates another task to be dispatched later.

    This task being encapsulated is called the "inner" task, and a task name, UUID, and accepts a
    list of positional args and keyword args for the inner task. These arguments are named
    inner_args and inner_kwargs. inner_args is a list, and inner_kwargs is a dictionary passed to
    the inner task as positional and keyword arguments using the * and ** operators.

    The inner task is dispatched into a dedicated queue for a worker that is decided at dispatch
    time. The logic deciding which queue receives a task is controlled through the
    find_worker function.

    Args:
        func (basestring): The function to be called
        inner_task_id (basestring): The task_id to be set on the task being called. By providing
            the UUID, the caller can have an asynchronous reference to the inner task
            that will be dispatched.
        resources (basestring): The urls of the resource you wish to reserve for your task.
            The system will ensure that no other tasks that want that same reservation will run
            concurrently with yours.
        inner_args (tuple): The positional arguments to pass on to the task.
        inner_kwargs (dict): The keyword arguments to pass on to the task.
        options (dict): For all options accepted by enqueue see the RQ docs
    """
    redis_conn = connection.get_redis_connection()
    task_status = Task.objects.get(pk=inner_task_id)
    GuidMiddleware.set_guid(task_status.logging_cid)
    task_name = func.__module__ + "." + func.__name__

    while True:
        if task_name == "pulpcore.app.tasks.orphan.orphan_cleanup":
            if ReservedResource.objects.exists():
                # wait until there are no reservations
                time.sleep(0.25)
                continue
            else:
                rq_worker = util.get_current_worker()
                worker = Worker.objects.get(name=rq_worker.name)
                task_status.worker = worker
                task_status.set_running()
                q = Queue("resource-manager", connection=redis_conn, is_async=False)
                try:
                    q.enqueue(
                        func,
                        args=inner_args,
                        kwargs=inner_kwargs,
                        job_id=inner_task_id,
                        job_timeout=TASK_TIMEOUT,
                        **options,
                    )
                    task_status.set_completed()
                except RedisConnectionError as e:
                    task_status.set_failed(e, None)
                return

        try:
            with transaction.atomic():
                # lock the worker - there is a similar lock in mark_worker_offline()
                worker = _acquire_worker(resources)

                # Attempt to lock all resources by their urls. Must be atomic to prevent deadlocks.
                for resource in resources:
                    if worker.reservations.filter(resource=resource).exists():
                        reservation = worker.reservations.get(resource=resource)
                    else:
                        reservation = ReservedResource.objects.create(
                            worker=worker, resource=resource
                        )
                    TaskReservedResource.objects.create(resource=reservation, task=task_status)
        except (Worker.DoesNotExist, IntegrityError):
            # if worker is ready, or we have a worker but we can't create the reservations, wait
            time.sleep(0.25)
        else:
            # we have a worker with the locks
            break

    task_status.worker = worker
    task_status.save()

    try:
        q = Queue(worker.name, connection=redis_conn)
        q.enqueue(
            func,
            args=inner_args,
            kwargs=inner_kwargs,
            job_id=inner_task_id,
            job_timeout=TASK_TIMEOUT,
            **options,
        )
    except RedisConnectionError as e:
        task_status.set_failed(e, None)
Example #13
0
def enqueue_with_reservation(
    func, resources, args=None, kwargs=None, options=None, task_group=None
):
    """
    Enqueue a message to Pulp workers with a reservation.

    This method provides normal enqueue functionality, while also requesting necessary locks for
    serialized urls. No two tasks that claim the same resource can execute concurrently. It
    accepts resources which it transforms into a list of urls (one for each resource).

    This does not dispatch the task directly, but instead promises to dispatch it later by
    encapsulating the desired task through a call to a :func:`_queue_reserved_task` task. See
    the docblock on :func:`_queue_reserved_task` for more information on this.

    This method creates a :class:`pulpcore.app.models.Task` object. Pulp expects to poll on a
    task just after calling this method, so a Task entry needs to exist for it
    before it returns.

    Args:
        func (callable): The function to be run by RQ when the necessary locks are acquired.
        resources (list): A list of resources to reserve guaranteeing that only one task reserves
                          these resources. Each resource can be either a (str) resource URL or a
                          (django.models.Model) resource instance.
        args (tuple): The positional arguments to pass on to the task.
        kwargs (dict): The keyword arguments to pass on to the task.
        options (dict): The options to be passed on to the task.
        task_group (pulpcore.app.models.TaskGroup): A TaskGroup to add the created Task to.

    Returns (rq.job.job): An RQ Job instance as returned by RQ's enqueue function

    Raises:
        ValueError: When `resources` is an unsupported type.
    """
    if not args:
        args = tuple()
    if not kwargs:
        kwargs = dict()
    if not options:
        options = dict()

    def as_url(r):
        if isinstance(r, str):
            return r
        if isinstance(r, Model):
            return util.get_url(r)
        raise ValueError(_("Must be (str|Model)"))

    resources = {as_url(r) for r in resources}
    inner_task_id = str(uuid.uuid4())
    resource_task_id = str(uuid.uuid4())
    redis_conn = connection.get_redis_connection()
    current_job = get_current_job(connection=redis_conn)
    parent_kwarg = {}
    if current_job:
        # set the parent task of the spawned task to the current task ID (same as rq Job ID)
        parent_kwarg["parent_task"] = Task.objects.get(pk=current_job.id)

    with transaction.atomic():
        task = Task.objects.create(
            pk=inner_task_id,
            _resource_job_id=resource_task_id,
            state=TASK_STATES.WAITING,
            logging_cid=(GuidMiddleware.get_guid() or ""),
            task_group=task_group,
            name=f"{func.__module__}.{func.__name__}",
            **parent_kwarg,
        )
        for resource in resources:
            reservation_record = ReservedResourceRecord.objects.get_or_create(resource=resource)[0]
            TaskReservedResourceRecord.objects.create(resource=reservation_record, task=task)

        task_args = (func, inner_task_id, list(resources), args, kwargs, options)
        try:
            q = Queue("resource-manager", connection=redis_conn)
            q.enqueue(
                _queue_reserved_task,
                job_id=resource_task_id,
                args=task_args,
                job_timeout=TASK_TIMEOUT,
            )
        except RedisConnectionError as e:
            task.set_failed(e, None)

    return Job(id=inner_task_id, connection=redis_conn)
def test_is_valid_dashed_guid():
    assert GuidMiddleware._validate_guid(
        '07742cab-407e-4e80-89eb-fd191acbb752') is True
def test_valid_guid():
    assert GuidMiddleware._validate_guid(
        '07742cab407e4e8089ebfd191acbb752') is True