예제 #1
0
    def run_callable(self, body):
        """
        Given some AMQP message, import the correct Python code and run it.
        """
        task = body['task']
        uuid = body.get('uuid', '<unknown>')
        args = body.get('args', [])
        kwargs = body.get('kwargs', {})
        if 'guid' in body:
            set_guid(body.pop('guid'))
        _call = TaskWorker.resolve_callable(task)
        if inspect.isclass(_call):
            # the callable is a class, e.g., RunJob; instantiate and
            # return its `run()` method
            _call = _call().run

        log_extra = ''
        logger_method = logger.debug
        if ('time_ack' in body) and ('time_pub' in body):
            time_publish = body['time_ack'] - body['time_pub']
            time_waiting = time.time() - body['time_ack']
            if time_waiting > 5.0 or time_publish > 5.0:
                # If task too a very long time to process, add this information to the log
                log_extra = f' took {time_publish:.4f} to ack, {time_waiting:.4f} in local dispatcher'
                logger_method = logger.info
        # don't print kwargs, they often contain launch-time secrets
        logger_method(f'task {uuid} starting {task}(*{args}){log_extra}')

        return _call(*args, **kwargs)
예제 #2
0
파일: pool.py 프로젝트: timkids/awx
 def write(self, preferred_queue, body):
     if 'guid' in body:
         set_guid(body['guid'])
     try:
         # when the cluster heartbeat occurs, clean up internally
         if isinstance(body,
                       dict) and 'cluster_node_heartbeat' in body['task']:
             self.cleanup()
         if self.should_grow:
             self.up()
         # we don't care about "preferred queue" round robin distribution, just
         # find the first non-busy worker and claim it
         workers = self.workers[:]
         random.shuffle(workers)
         for w in workers:
             if not w.busy:
                 w.put(body)
                 break
         else:
             return super(AutoscalePool, self).write(preferred_queue, body)
     except Exception:
         for conn in connections.all():
             # If the database connection has a hiccup, re-establish a new
             # connection
             conn.close_if_unusable_or_obsolete()
         logger.exception('failed to write inbound message')
예제 #3
0
 def write(self, preferred_queue, body):
     if 'guid' in body:
         set_guid(body['guid'])
     try:
         if isinstance(body, dict) and body.get('bind_kwargs'):
             self.add_bind_kwargs(body)
         # when the cluster heartbeat occurs, clean up internally
         if isinstance(body, dict) and 'cluster_node_heartbeat' in body['task']:
             self.cleanup()
         if self.should_grow:
             self.up()
         # we don't care about "preferred queue" round robin distribution, just
         # find the first non-busy worker and claim it
         workers = self.workers[:]
         random.shuffle(workers)
         for w in workers:
             if not w.busy:
                 w.put(body)
                 break
         else:
             task_name = 'unknown'
             if isinstance(body, dict):
                 task_name = body.get('task')
             logger.warn(f'Workers maxed, queuing {task_name}, load: {sum(len(w.managed_tasks) for w in self.workers)} / {len(self.workers)}')
             return super(AutoscalePool, self).write(preferred_queue, body)
     except Exception:
         for conn in connections.all():
             # If the database connection has a hiccup, re-establish a new
             # connection
             conn.close_if_unusable_or_obsolete()
         logger.exception('failed to write inbound message')
예제 #4
0
파일: worker.py 프로젝트: gerrod3/pulpcore
    def perform_job(self, job, queue):
        """
        Set the :class:`pulpcore.app.models.Task` to running and init logging.

        This method is called by the worker's work horse thread (the forked child) just before the
        task begins executing.

        Args:
            job (rq.job.Job): The job to perform
            queue (rq.queue.Queue): The Queue associated with the job
        """
        try:
            task = Task.objects.get(pk=job.get_id())
        except Task.DoesNotExist:
            pass
        else:
            if task.state != TASK_STATES.WAITING:
                return
            task.set_running()
            user = get_users_with_perms(task).first()
            _set_current_user(user)
            set_guid(task.logging_cid)

        with TaskWorkingDirectory(job):
            return super().perform_job(job, queue)
예제 #5
0
def worker_prerun(task: Task, **kwargs) -> None:
    """
    Called before a worker starts executing a task.
    Here we make sure to set the appropriate correlation ID for all logs logged
    during the tasks, and on the thread in general. In that regard, this does
    the Celery equivalent to what the django-guid middleware does for a request.
    """
    guid = task.request.get(settings.guid_header_name)
    if guid:
        logger.info('Setting GUID %s', guid)
        set_guid(guid)
    else:
        generated_guid = generate_guid(
            uuid_length=settings.integration_settings.celery.uuid_length)
        logger.info('Generated GUID %s', generated_guid)
        set_guid(generated_guid)

    if settings.integration_settings.celery.log_parent:
        origin = task.request.get(parent_header)
        if origin:
            logger.info('Setting parent ID %s', origin)
            celery_parent.set(origin)
        generated_current_guid = generate_guid(
            uuid_length=settings.integration_settings.celery.uuid_length)
        logger.info('Generated current ID %s', generated_current_guid)
        celery_current.set(generated_current_guid)
예제 #6
0
async def django_guid_api_usage(request: HttpRequest) -> JsonResponse:
    """
    Uses each API function
    """
    logger.info('Current GUID: %s', get_guid())
    set_guid('another guid')
    logger.info('Current GUID: %s', get_guid())
    clear_guid()
    logger.info('Current GUID: %s', get_guid())
    return JsonResponse({'detail': ':)'})
예제 #7
0
def test_cleanup(monkeypatch, mocker: MockerFixture):
    """
    Test that cleanup works as expected
    """
    set_guid('123')
    celery_current.set('123')
    celery_parent.set('123')

    mocked_settings = deepcopy(django_settings.DJANGO_GUID)
    mocked_settings['INTEGRATIONS'] = [CeleryIntegration(log_parent=True)]
    with override_settings(DJANGO_GUID=mocked_settings):
        settings = Settings()
        monkeypatch.setattr('django_guid.integrations.celery.signals.settings',
                            settings)
        clean_up(task=mocker.Mock())

    assert [get_guid(), celery_current.get(),
            celery_parent.get()] == [None, None, None]
예제 #8
0
파일: task.py 프로젝트: yckwon75/awx
 def run_callable(self, body):
     """
     Given some AMQP message, import the correct Python code and run it.
     """
     task = body['task']
     uuid = body.get('uuid', '<unknown>')
     args = body.get('args', [])
     kwargs = body.get('kwargs', {})
     if 'guid' in body:
         set_guid(body.pop('guid'))
     _call = TaskWorker.resolve_callable(task)
     if inspect.isclass(_call):
         # the callable is a class, e.g., RunJob; instantiate and
         # return its `run()` method
         _call = _call().run
     # don't print kwargs, they often contain launch-time secrets
     logger.debug('task {} starting {}(*{})'.format(uuid, task, args))
     return _call(*args, **kwargs)
예제 #9
0
def _perform_task(task_pk, task_working_dir_rel_path):
    """Setup the environment to handle a task and execute it.
    This must be called as a subprocess, while the parent holds the advisory lock."""
    signal.signal(signal.SIGINT, child_signal_handler)
    signal.signal(signal.SIGTERM, child_signal_handler)
    signal.signal(signal.SIGUSR1, child_signal_handler)
    # All processes need to create their own postgres connection
    connection.connection = None
    task = Task.objects.get(pk=task_pk)
    task.set_running()
    # Store the task id in the environment for `Task.current()`.
    os.environ["PULP_TASK_ID"] = str(task.pk)
    user = get_users_with_perms(task).first()
    _set_current_user(user)
    set_guid(task.logging_cid)
    try:
        _logger.info("Starting task {}".format(task.pk))

        # Execute task
        module_name, function_name = task.name.rsplit(".", 1)
        module = importlib.import_module(module_name)
        func = getattr(module, function_name)
        args = json.loads(task.args) or ()
        kwargs = json.loads(task.kwargs) or {}
        os.chdir(task_working_dir_rel_path)
        result = func(*args, **kwargs)
        if asyncio.iscoroutine(result):
            _logger.debug("Task is coroutine {}".format(task.pk))
            loop = asyncio.get_event_loop()
            loop.run_until_complete(result)

    except Exception:
        exc_type, exc, tb = sys.exc_info()
        task.set_failed(exc, tb)
        _logger.info("Task {} failed ({})".format(task.pk, exc))
        _logger.info("\n".join(traceback.format_list(
            traceback.extract_tb(tb))))
    else:
        task.set_completed()
        _logger.info("Task completed {}".format(task.pk))
    os.environ.pop("PULP_TASK_ID")
예제 #10
0
파일: periodic.py 프로젝트: yckwon75/awx
 def run():
     ppid = os.getppid()
     logger.warning('periodic beat started')
     while True:
         if os.getppid() != ppid:
             # if the parent PID changes, this process has been orphaned
             # via e.g., segfault or sigkill, we should exit too
             pid = os.getpid()
             logger.warning(
                 f'periodic beat exiting gracefully pid:{pid}')
             raise SystemExit()
         try:
             for conn in connections.all():
                 # If the database connection has a hiccup, re-establish a new
                 # connection
                 conn.close_if_unusable_or_obsolete()
             set_guid(generate_guid())
             self.run_pending()
         except Exception:
             logger.exception(
                 'encountered an error while scheduling periodic tasks')
         time.sleep(idle_seconds)
예제 #11
0
def test_task_publish_includes_correct_headers(monkeypatch):
    """
    It's important that we include the correct headers when publishing a task
    to the celery worker pool, otherwise there's no transfer of state.
    """
    # Mocking overhead
    mocked_settings = deepcopy(django_settings.DJANGO_GUID)
    mocked_settings['INTEGRATIONS'] = [CeleryIntegration(log_parent=False)]
    with override_settings(DJANGO_GUID=mocked_settings):
        settings = Settings()
        monkeypatch.setattr('django_guid.integrations.celery.signals.settings',
                            settings)

        # Actual testing
        for correlation_id in [None, 'test', 123, -1]:
            # Set the id in our context var
            set_guid(correlation_id)

            # Run signal with empty headers
            headers = {}
            publish_task_from_worker_or_request(headers=headers)

            # Makes sure the returned headers contain the correct result
            assert headers[settings.guid_header_name] == correlation_id
예제 #12
0
파일: callback.py 프로젝트: timkids/awx
    def perform_work(self, body):
        try:
            flush = body.get('event') == 'FLUSH'
            if flush:
                self.last_event = ''
            if not flush:
                job_identifier = 'unknown job'
                for cls in (JobEvent, AdHocCommandEvent, ProjectUpdateEvent,
                            InventoryUpdateEvent, SystemJobEvent):
                    if cls.JOB_REFERENCE in body:
                        job_identifier = body[cls.JOB_REFERENCE]
                        break

                self.last_event = f'\n\t- {cls.__name__} for #{job_identifier} ({body.get("event", "")} {body.get("uuid", "")})'  # noqa

                notification_trigger_event = bool(
                    body.get('event') == cls.WRAPUP_EVENT)

                if body.get('event') == 'EOF':
                    try:
                        if 'guid' in body:
                            set_guid(body['guid'])
                        final_counter = body.get('final_counter', 0)
                        logger.info(
                            'Starting EOF event processing for Job {}'.format(
                                job_identifier))
                        # EOF events are sent when stdout for the running task is
                        # closed. don't actually persist them to the database; we
                        # just use them to report `summary` websocket events as an
                        # approximation for when a job is "done"
                        emit_channel_notification(
                            'jobs-summary',
                            dict(group_name='jobs',
                                 unified_job_id=job_identifier,
                                 final_counter=final_counter))

                        if notification_trigger_event:
                            job_stats_wrapup(job_identifier)
                    except Exception:
                        logger.exception(
                            'Worker failed to perform EOF tasks: Job {}'.
                            format(job_identifier))
                    finally:
                        self.subsystem_metrics.inc(
                            'callback_receiver_events_in_memory', -1)
                        set_guid('')
                    return

                skip_websocket_message = body.pop('skip_websocket_message',
                                                  False)

                event = cls.create_from_data(**body)

                if skip_websocket_message:  # if this event sends websocket messages, fire them off on flush
                    event._skip_websocket_message = True

                if notification_trigger_event:  # if this is an Ansible stats event, ensure notifications on flush
                    event._notification_trigger_event = True

                self.buff.setdefault(cls, []).append(event)

            retries = 0
            while retries <= self.MAX_RETRIES:
                try:
                    self.flush(force=flush)
                    break
                except (OperationalError, InterfaceError, InternalError):
                    if retries >= self.MAX_RETRIES:
                        logger.exception(
                            'Worker could not re-establish database connectivity, giving up on one or more events.'
                        )
                        return
                    delay = 60 * retries
                    logger.exception(
                        'Database Error Saving Job Event, retry #{i} in {delay} seconds:'
                        .format(i=retries + 1, delay=delay))
                    django_connection.close()
                    time.sleep(delay)
                    retries += 1
                except DatabaseError:
                    logger.exception('Database Error Saving Job Event')
                    break
        except Exception as exc:
            tb = traceback.format_exc()
            logger.error('Callback Task Processor Raised Exception: %r', exc)
            logger.error('Detail: {}'.format(tb))
예제 #13
0
파일: callback.py 프로젝트: yckwon75/awx
    def perform_work(self, body):
        try:
            flush = body.get('event') == 'FLUSH'
            if flush:
                self.last_event = ''
            if not flush:
                event_map = {
                    'job_id': JobEvent,
                    'ad_hoc_command_id': AdHocCommandEvent,
                    'project_update_id': ProjectUpdateEvent,
                    'inventory_update_id': InventoryUpdateEvent,
                    'system_job_id': SystemJobEvent,
                }

                job_identifier = 'unknown job'
                for key, cls in event_map.items():
                    if key in body:
                        job_identifier = body[key]
                        break

                self.last_event = f'\n\t- {cls.__name__} for #{job_identifier} ({body.get("event", "")} {body.get("uuid", "")})'  # noqa

                if body.get('event') == 'EOF':
                    try:
                        if 'guid' in body:
                            set_guid(body['guid'])
                        final_counter = body.get('final_counter', 0)
                        logger.info('Event processing is finished for Job {}, sending notifications'.format(job_identifier))
                        # EOF events are sent when stdout for the running task is
                        # closed. don't actually persist them to the database; we
                        # just use them to report `summary` websocket events as an
                        # approximation for when a job is "done"
                        emit_channel_notification('jobs-summary', dict(group_name='jobs', unified_job_id=job_identifier, final_counter=final_counter))
                        # Additionally, when we've processed all events, we should
                        # have all the data we need to send out success/failure
                        # notification templates
                        uj = UnifiedJob.objects.get(pk=job_identifier)

                        if isinstance(uj, Job):
                            # *actual playbooks* send their success/failure
                            # notifications in response to the playbook_on_stats
                            # event handling code in main.models.events
                            pass
                        elif hasattr(uj, 'send_notification_templates'):
                            handle_success_and_failure_notifications.apply_async([uj.id])
                    except Exception:
                        logger.exception('Worker failed to emit notifications: Job {}'.format(job_identifier))
                    finally:
                        self.subsystem_metrics.inc('callback_receiver_events_in_memory', -1)
                        set_guid('')
                    return

                skip_websocket_message = body.pop('skip_websocket_message', False)

                event = cls.create_from_data(**body)

                if skip_websocket_message:
                    event._skip_websocket_message = True

                self.buff.setdefault(cls, []).append(event)

            retries = 0
            while retries <= self.MAX_RETRIES:
                try:
                    self.flush(force=flush)
                    break
                except (OperationalError, InterfaceError, InternalError):
                    if retries >= self.MAX_RETRIES:
                        logger.exception('Worker could not re-establish database connectivity, giving up on one or more events.')
                        return
                    delay = 60 * retries
                    logger.exception('Database Error Saving Job Event, retry #{i} in {delay} seconds:'.format(i=retries + 1, delay=delay))
                    django_connection.close()
                    time.sleep(delay)
                    retries += 1
                except DatabaseError:
                    logger.exception('Database Error Saving Job Event')
                    break
        except Exception as exc:
            tb = traceback.format_exc()
            logger.error('Callback Task Processor Raised Exception: %r', exc)
            logger.error('Detail: {}'.format(tb))
예제 #14
0
def _queue_reserved_task(func, inner_task_id, resources, inner_args,
                         inner_kwargs, options):
    """
    A task that encapsulates another task to be dispatched later.

    This task being encapsulated is called the "inner" task, and a task name, UUID, and accepts a
    list of positional args and keyword args for the inner task. These arguments are named
    inner_args and inner_kwargs. inner_args is a list, and inner_kwargs is a dictionary passed to
    the inner task as positional and keyword arguments using the * and ** operators.

    The inner task is dispatched into a dedicated queue for a worker that is decided at dispatch
    time. The logic deciding which queue receives a task is controlled through the
    find_worker function.

    Args:
        func (basestring): The function to be called
        inner_task_id (basestring): The task_id to be set on the task being called. By providing
            the UUID, the caller can have an asynchronous reference to the inner task
            that will be dispatched.
        resources (basestring): The urls of the resource you wish to reserve for your task.
            The system will ensure that no other tasks that want that same reservation will run
            concurrently with yours.
        inner_args (tuple): The positional arguments to pass on to the task.
        inner_kwargs (dict): The keyword arguments to pass on to the task.
        options (dict): For all options accepted by enqueue see the RQ docs
    """
    redis_conn = connection.get_redis_connection()
    task_status = Task.objects.get(pk=inner_task_id)
    set_guid(task_status.logging_cid)
    task_name = func.__module__ + "." + func.__name__

    while True:
        if task_name == "pulpcore.app.tasks.orphan.orphan_cleanup":
            if ReservedResource.objects.exists():
                # wait until there are no reservations
                time.sleep(0.25)
                continue
            else:
                rq_worker = util.get_current_worker()
                worker = Worker.objects.get(name=rq_worker.name)
                task_status.worker = worker
                task_status.set_running()
                q = Queue("resource-manager",
                          connection=redis_conn,
                          is_async=False)
                try:
                    q.enqueue(
                        func,
                        args=inner_args,
                        kwargs=inner_kwargs,
                        job_id=inner_task_id,
                        job_timeout=TASK_TIMEOUT,
                        **options,
                    )
                    task_status.set_completed()
                except RedisConnectionError as e:
                    task_status.set_failed(e, None)
                return

        try:
            with transaction.atomic():
                # lock the worker - there is a similar lock in mark_worker_offline()
                worker = _acquire_worker(resources)

                # Attempt to lock all resources by their urls. Must be atomic to prevent deadlocks.
                for resource in resources:
                    if worker.reservations.select_for_update().filter(
                            resource=resource).exists():
                        reservation = worker.reservations.get(
                            resource=resource)
                    else:
                        reservation = ReservedResource.objects.create(
                            worker=worker, resource=resource)
                    TaskReservedResource.objects.create(resource=reservation,
                                                        task=task_status)
                worker.cleaned_up = False
                worker.save(update_fields=["cleaned_up"])
        except (Worker.DoesNotExist, IntegrityError):
            # if worker is ready, or we have a worker but we can't create the reservations, wait
            time.sleep(0.25)
        else:
            # we have a worker with the locks
            break

    task_status.worker = worker
    task_status.save()

    try:
        q = Queue(worker.name, connection=redis_conn)
        q.enqueue(
            func,
            args=inner_args,
            kwargs=inner_kwargs,
            job_id=inner_task_id,
            job_timeout=TASK_TIMEOUT,
            **options,
        )
    except RedisConnectionError as e:
        task_status.set_failed(e, None)