def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw): task_id = uuid or str(uuid4()) args = args or [] kwargs = kwargs or {} queue = (queue or getattr(cls.queue, 'im_func', cls.queue)) if not queue: msg = f'{cls.name}: Queue value required and may not be None' logger.error(msg) raise ValueError(msg) obj = { 'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name } obj.update(**kw) if callable(queue): queue = queue() if not settings.IS_TESTING(sys.argv): with pg_bus_conn() as conn: conn.notify(queue, json.dumps(obj)) return (obj, queue)
def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw): task_id = uuid or str(uuid4()) args = args or [] kwargs = kwargs or {} queue = (queue or getattr(cls.queue, 'im_func', cls.queue) or settings.CELERY_DEFAULT_QUEUE) obj = { 'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name } obj.update(**kw) if callable(queue): queue = queue() if not settings.IS_TESTING(sys.argv): with Connection(settings.BROKER_URL) as conn: exchange = Exchange(queue, type=exchange_type or 'direct') producer = Producer(conn) logger.debug('publish {}({}, queue={})'.format( cls.name, task_id, queue)) producer.publish(obj, serializer='json', compression='bzip2', exchange=exchange, declare=[exchange], delivery_mode="persistent", routing_key=queue) return (obj, queue)
def me(self): """Return the currently active instance.""" # If we are running unit tests, return a stub record. if settings.IS_TESTING(sys.argv) or hasattr(sys, '_called_from_test'): return self.model(id=1, hostname='localhost', uuid='00000000-0000-0000-0000-000000000000') node = self.filter(hostname=settings.CLUSTER_HOST_ID) if node.exists(): return node[0] raise RuntimeError("No instance found with the current cluster host id")
def record_aggregate_metrics(self, *args): if not settings.IS_TESTING(): # increment task_manager_schedule_calls regardless if the other # metrics are recorded s_metrics.Metrics(auto_pipe_execute=True).inc("task_manager_schedule_calls", 1) # Only record metrics if the last time recording was more # than SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL ago. # Prevents a short-duration task manager that runs directly after a # long task manager to override useful metrics. current_time = time.time() time_last_recorded = current_time - self.subsystem_metrics.decode("task_manager_recorded_timestamp") if time_last_recorded > settings.SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL: logger.debug(f"recording metrics, last recorded {time_last_recorded} seconds ago") self.subsystem_metrics.set("task_manager_recorded_timestamp", current_time) self.subsystem_metrics.pipe_execute() else: logger.debug(f"skipping recording metrics, last recorded {time_last_recorded} seconds ago")
def filter(self, record): from django.conf import settings return settings.DEBUG or settings.IS_TESTING()
def __init__(self, auto_pipe_execute=False, instance_name=None): self.pipe = redis.Redis.from_url(settings.BROKER_URL).pipeline() self.conn = redis.Redis.from_url(settings.BROKER_URL) self.last_pipe_execute = time.time() # track if metrics have been modified since last saved to redis # start with True so that we get an initial save to redis self.metrics_have_changed = True self.pipe_execute_interval = settings.SUBSYSTEM_METRICS_INTERVAL_SAVE_TO_REDIS self.send_metrics_interval = settings.SUBSYSTEM_METRICS_INTERVAL_SEND_METRICS # auto pipe execute will commit transaction of metric data to redis # at a regular interval (pipe_execute_interval). If set to False, # the calling function should call .pipe_execute() explicitly self.auto_pipe_execute = auto_pipe_execute Instance = apps.get_model('main', 'Instance') if instance_name: self.instance_name = instance_name elif settings.IS_TESTING(): self.instance_name = "awx_testing" else: try: self.instance_name = Instance.objects.me().hostname except Exception as e: self.instance_name = settings.CLUSTER_HOST_ID logger.info( f'Instance {self.instance_name} seems to be unregistered, error: {e}' ) # metric name, help_text METRICSLIST = [ SetIntM('callback_receiver_events_queue_size_redis', 'Current number of events in redis queue'), IntM('callback_receiver_events_popped_redis', 'Number of events popped from redis'), IntM( 'callback_receiver_events_in_memory', 'Current number of events in memory (in transfer from redis to db)' ), IntM('callback_receiver_batch_events_errors', 'Number of times batch insertion failed'), FloatM('callback_receiver_events_insert_db_seconds', 'Total time spent saving events to database'), IntM('callback_receiver_events_insert_db', 'Number of events batch inserted into database'), IntM('callback_receiver_events_broadcast', 'Number of events broadcast to other control plane nodes'), HistogramM('callback_receiver_batch_events_insert_db', 'Number of events batch inserted into database', settings.SUBSYSTEM_METRICS_BATCH_INSERT_BUCKETS), SetFloatM( 'callback_receiver_event_processing_avg_seconds', 'Average processing time per event per callback receiver batch' ), FloatM('subsystem_metrics_pipe_execute_seconds', 'Time spent saving metrics to redis'), IntM('subsystem_metrics_pipe_execute_calls', 'Number of calls to pipe_execute'), FloatM('subsystem_metrics_send_metrics_seconds', 'Time spent sending metrics to other nodes'), SetFloatM('task_manager_get_tasks_seconds', 'Time spent in loading tasks from db'), SetFloatM('task_manager_start_task_seconds', 'Time spent starting task'), SetFloatM('task_manager_process_running_tasks_seconds', 'Time spent processing running tasks'), SetFloatM('task_manager_process_pending_tasks_seconds', 'Time spent processing pending tasks'), SetFloatM('task_manager__schedule_seconds', 'Time spent in running the entire _schedule'), IntM('task_manager__schedule_calls', 'Number of calls to _schedule, after lock is acquired'), SetFloatM('task_manager_recorded_timestamp', 'Unix timestamp when metrics were last recorded'), SetIntM('task_manager_tasks_started', 'Number of tasks started'), SetIntM('task_manager_running_processed', 'Number of running tasks processed'), SetIntM('task_manager_pending_processed', 'Number of pending tasks processed'), SetIntM('task_manager_tasks_blocked', 'Number of tasks blocked from running'), SetFloatM( 'task_manager_commit_seconds', 'Time spent in db transaction, including on_commit calls'), SetFloatM('dependency_manager_get_tasks_seconds', 'Time spent loading pending tasks from db'), SetFloatM('dependency_manager_generate_dependencies_seconds', 'Time spent generating dependencies for pending tasks'), SetFloatM('dependency_manager__schedule_seconds', 'Time spent in running the entire _schedule'), IntM('dependency_manager__schedule_calls', 'Number of calls to _schedule, after lock is acquired'), SetFloatM('dependency_manager_recorded_timestamp', 'Unix timestamp when metrics were last recorded'), SetIntM('dependency_manager_pending_processed', 'Number of pending tasks processed'), SetFloatM('workflow_manager__schedule_seconds', 'Time spent in running the entire _schedule'), IntM('workflow_manager__schedule_calls', 'Number of calls to _schedule, after lock is acquired'), SetFloatM('workflow_manager_recorded_timestamp', 'Unix timestamp when metrics were last recorded'), SetFloatM('workflow_manager_spawn_workflow_graph_jobs_seconds', 'Time spent spawning workflow tasks'), SetFloatM('workflow_manager_get_tasks_seconds', 'Time spent loading workflow tasks from db'), ] # turn metric list into dictionary with the metric name as a key self.METRICS = {} for m in METRICSLIST: self.METRICS[m.field] = m # track last time metrics were sent to other nodes self.previous_send_metrics = SetFloatM( 'send_metrics_time', 'Timestamp of previous send_metrics call')