async def test_monitoring_retry_exceptions(QueueMock, NotifyMock, mock_taskcluster): bus = MessageBus() monitoring = Monitoring(mock_taskcluster, "testqueue", ["pinco@pallino"], 1) monitoring.register(bus) await bus.send("testqueue", ("Group1", "Hook1", "Task-exception-retry:2")) await bus.send("testqueue", ("Group1", "Hook2", "Task-exception-retry:0")) assert bus.queues["testqueue"].qsize() == 2 monitoring.queue = QueueMock assert len(monitoring.queue.created_tasks) == 0 monitoring.notify = NotifyMock # Task exception with 2 retries await monitoring.check_task() assert monitoring.stats["Hook1"]["exception"] == ["Task-exception-retry:2"] assert len(monitoring.queue.created_tasks) == 1 assert bus.queues["testqueue"].qsize() == 2 # The retried task should maintain the original taskGroupId old_task = await monitoring.queue.task("Task-exception-retry:2") new_task_id, new_task = monitoring.queue.created_tasks[0] assert new_task_id != "Task-exception-retry:2" assert new_task != old_task assert new_task["taskGroupId"] == old_task["taskGroupId"] assert new_task["payload"] == old_task["payload"] assert new_task["created"] != old_task["created"] # Task exception with 0 retries # No new task should be created await monitoring.check_task() assert monitoring.stats["Hook2"]["exception"] == ["Task-exception-retry:0"] assert len(monitoring.queue.created_tasks) == 1 assert bus.queues["testqueue"].qsize() == 1
class Events(object): """ Listen to pulse events and trigger new code coverage tasks """ def __init__(self): # Create message bus shared amongst process self.bus = MessageBus() # Build code coverage workflow self.workflow = CodeCoverage( taskcluster_config.secrets["hook_id"], taskcluster_config.secrets["hook_group_id"], self.bus, ) # Setup monitoring for newly created tasks self.monitoring = Monitoring( taskcluster_config, QUEUE_MONITORING, taskcluster_config.secrets["admins"], 7 * 3600, ) self.monitoring.register(self.bus) # Create pulse listener for code coverage self.pulse = PulseListener( { QUEUE_PULSE: [ ("exchange/taskcluster-queue/v1/task-group-resolved", ["#"]) ] }, taskcluster_config.secrets["pulse_user"], taskcluster_config.secrets["pulse_password"], ) self.pulse.register(self.bus) def run(self): consumers = [ # Code coverage main workflow self.workflow.run(), # Add monitoring task self.monitoring.run(), # Add pulse task self.pulse.run(), ] # Run all tasks concurrently run_tasks(consumers)
async def test_report_all_completed(QueueMock, NotifyMock, mock_taskcluster): bus = MessageBus() monitoring = Monitoring(mock_taskcluster, "testqueue", ["pinco@pallino"], 1) monitoring.register(bus) await bus.send("testqueue", ("Group1", "Hook1", "Task1-completed")) await bus.send("testqueue", ("Group1", "Hook1", "Task2-completed")) assert bus.queues["testqueue"].qsize() == 2 monitoring.queue = QueueMock monitoring.notify = NotifyMock await monitoring.check_task() await monitoring.check_task() # No email sent, since all tasks were successful. await monitoring.send_report() assert NotifyMock.email_obj == {} assert monitoring.stats == {}
class Events(object): """ Listen to HTTP notifications from phabricator and trigger new try jobs """ def __init__(self, cache_root): # Create message bus shared amongst processes self.bus = MessageBus() publish = taskcluster_config.secrets["PHABRICATOR"].get( "publish", False) # Check the redis support is enabled on Heroku if heroku.in_dyno(): assert self.bus.redis_enabled is True, "Need Redis on Heroku" community_config = taskcluster_config.secrets.get( "taskcluster_community") test_selection_enabled = taskcluster_config.secrets.get( "test_selection_enabled", False) # Run webserver & pulse on web dyno or single instance if not heroku.in_dyno() or heroku.in_web_dyno(): # Create web server self.webserver = WebServer(QUEUE_WEB_BUILDS) self.webserver.register(self.bus) # Create pulse listener exchanges = {} if taskcluster_config.secrets["autoland_enabled"]: logger.info("Autoland ingestion is enabled") # autoland ingestion exchanges[QUEUE_PULSE_AUTOLAND] = [(PULSE_TASK_GROUP_RESOLVED, ["#.gecko-level-3.#"])] # Create pulse listeners for bugbug test selection task and unit test failures. if community_config is not None and test_selection_enabled: exchanges[QUEUE_PULSE_TRY_TASK_END] = [ (PULSE_TASK_COMPLETED, ["#.gecko-level-1.#"]), (PULSE_TASK_FAILED, ["#.gecko-level-1.#"]), # https://bugzilla.mozilla.org/show_bug.cgi?id=1599863 # ( # "exchange/taskcluster-queue/v1/task-exception", # ["#.gecko-level-1.#"], # ), ] self.community_pulse = PulseListener( { QUEUE_PULSE_BUGBUG_TEST_SELECT: [( "exchange/taskcluster-queue/v1/task-completed", ["route.project.relman.bugbug.test_select"], )] }, taskcluster_config.secrets["communitytc_pulse_user"], taskcluster_config.secrets["communitytc_pulse_password"], "communitytc", ) # Manually register to set queue as redis self.community_pulse.bus = self.bus self.bus.add_queue(QUEUE_PULSE_BUGBUG_TEST_SELECT, redis=True) self.bus.add_queue(QUEUE_PULSE_TRY_TASK_END, redis=True) else: self.community_pulse = None if exchanges: self.pulse = PulseListener( exchanges, taskcluster_config.secrets["pulse_user"], taskcluster_config.secrets["pulse_password"], ) # Manually register to set queue as redis self.pulse.bus = self.bus self.bus.add_queue(QUEUE_PULSE_AUTOLAND, redis=True) else: self.pulse = None else: self.bugbug_utils = None self.webserver = None self.pulse = None self.community_pulse = None logger.info("Skipping webserver, bugbug and pulse consumers") # Register queues for workers self.bus.add_queue(QUEUE_PULSE_AUTOLAND, redis=True) self.bus.add_queue(QUEUE_PULSE_BUGBUG_TEST_SELECT, redis=True) self.bus.add_queue(QUEUE_PULSE_TRY_TASK_END, redis=True) self.bus.add_queue(QUEUE_WEB_BUILDS, redis=True) # Run work processes on worker dyno or single instance if not heroku.in_dyno() or heroku.in_worker_dyno(): self.workflow = CodeReview( api_key=taskcluster_config.secrets["PHABRICATOR"]["api_key"], url=taskcluster_config.secrets["PHABRICATOR"]["url"], publish=publish, user_blacklist=taskcluster_config.secrets["user_blacklist"], ) self.workflow.register(self.bus) # Build mercurial worker and queue self.mercurial = MercurialWorker( QUEUE_MERCURIAL, QUEUE_MERCURIAL_APPLIED, repositories=self.workflow.get_repositories( taskcluster_config.secrets["repositories"], cache_root, default_ssh_key=taskcluster_config.secrets["ssh_key"], ), ) self.mercurial.register(self.bus) # Setup monitoring for newly created tasks self.monitoring = Monitoring( taskcluster_config, QUEUE_MONITORING, taskcluster_config.secrets["admins"], MONITORING_PERIOD, ) self.monitoring.register(self.bus) # Setup monitoring for newly created community tasks if community_config is not None: self.community_monitoring = Monitoring( community_taskcluster_config, QUEUE_MONITORING_COMMUNITY, taskcluster_config.secrets["admins"], MONITORING_PERIOD, ) self.community_monitoring.register(self.bus) else: self.community_monitoring = None self.bugbug_utils = BugbugUtils(self.workflow.api) self.bugbug_utils.register(self.bus) else: self.workflow = None self.mercurial = None self.monitoring = None self.community_monitoring = None self.bugbug_utils = None logger.info("Skipping workers consumers") def run(self): consumers = [] # Code review main workflow if self.workflow: consumers += [ # Process Phabricator build received from webserver self.bus.run(self.workflow.process_build, QUEUE_WEB_BUILDS, sequential=False), # Publish results on Phabricator self.bus.run( self.workflow.publish_results, QUEUE_PHABRICATOR_RESULTS, sequential=False, ), # Trigger autoland tasks self.bus.run( self.workflow.trigger_autoland, QUEUE_PULSE_AUTOLAND, sequential=False, ), # Send to phabricator results publication for normal processing and to bugbug for further analysis self.bus.dispatch( QUEUE_MERCURIAL_APPLIED, [QUEUE_PHABRICATOR_RESULTS, QUEUE_BUGBUG_TRY_PUSH], ), ] if self.bugbug_utils: consumers += [ self.bus.run(self.bugbug_utils.process_build, QUEUE_BUGBUG, sequential=False), self.bus.run( self.bugbug_utils.process_push, QUEUE_BUGBUG_TRY_PUSH, sequential=False, ), self.bus.run( self.bugbug_utils.got_try_task_end, QUEUE_PULSE_TRY_TASK_END, sequential=False, ), self.bus.run( self.bugbug_utils.got_bugbug_test_select_end, QUEUE_PULSE_BUGBUG_TEST_SELECT, sequential=False, ), ] # Add mercurial task if self.mercurial: consumers.append(self.mercurial.run()) # Add monitoring task if self.monitoring: consumers.append(self.monitoring.run()) # Add community monitoring task if self.community_monitoring: consumers.append(self.community_monitoring.run()) # Add pulse listener for task results. if self.pulse: consumers.append(self.pulse.run()) # Add communitytc pulse listener for test selection results. if self.community_pulse: consumers.append(self.community_pulse.run()) # Start the web server in its own process if self.webserver: self.webserver.start() if consumers: # Run all tasks concurrently run_tasks(consumers) else: # Keep the web server process running asyncio.get_event_loop().run_forever() # Make sure any pending task is run. run_tasks(asyncio.Task.all_tasks()) # Stop the webserver when other async processes are stopped if self.webserver: self.webserver.stop()
class Events(object): """ Listen to HTTP notifications from phabricator and trigger new try jobs """ def __init__(self, cache_root): # Create message bus shared amongst processes self.bus = MessageBus() self.workflow = CodeReview( api_key=taskcluster_config.secrets["PHABRICATOR"]["api_key"], url=taskcluster_config.secrets["PHABRICATOR"]["url"], publish=taskcluster_config.secrets["PHABRICATOR"].get( "publish", False), risk_analysis_reviewers=taskcluster_config.secrets.get( "risk_analysis_reviewers", []), community_config=taskcluster_config.secrets.get( "taskcluster_community"), ) self.workflow.register(self.bus) # Build mercurial worker and queue self.mercurial = MercurialWorker( QUEUE_MERCURIAL, QUEUE_PHABRICATOR_RESULTS, repositories=self.workflow.get_repositories( taskcluster_config.secrets["repositories"], cache_root), ) self.mercurial.register(self.bus) # Create web server self.webserver = WebServer(QUEUE_WEB_BUILDS) self.webserver.register(self.bus) # Setup monitoring for newly created tasks self.monitoring = Monitoring(QUEUE_MONITORING, taskcluster_config.secrets["admins"], MONITORING_PERIOD) self.monitoring.register(self.bus) def run(self): consumers = [ # Code review main workflow self.workflow.run(), # Add mercurial task self.mercurial.run(), # Add monitoring task self.monitoring.run(), ] # Publish results on Phabricator if self.workflow.publish: consumers.append( self.bus.run(self.workflow.publish_results, QUEUE_PHABRICATOR_RESULTS)) # Start the web server in its own process self.webserver.start() # Run all tasks concurrently run_tasks(consumers) # Stop the webserver when other async processes are stopped self.webserver.stop()
async def test_monitoring(QueueMock, NotifyMock, mock_taskcluster): bus = MessageBus() monitoring = Monitoring(mock_taskcluster, "testqueue", ["pinco@pallino"], 1) monitoring.register(bus) await bus.send("testqueue", ("Group1", "Hook1", "Task-invalid")) await bus.send("testqueue", ("Group1", "Hook1", "Task-pending")) await bus.send("testqueue", ("Group1", "Hook1", "Task1-completed")) await bus.send("testqueue", ("Group1", "Hook1", "Task2-completed")) await bus.send("testqueue", ("Group1", "Hook2", "Task-exception")) await bus.send("testqueue", ("Group2", "Hook1", "Task-failed")) assert bus.queues["testqueue"].qsize() == 6 monitoring.queue = QueueMock monitoring.notify = NotifyMock # No report sent, since we haven't collected any stats yet. await monitoring.send_report() assert NotifyMock.email_obj == {} # Queue throws exception, remove task from queue. await monitoring.check_task() assert bus.queues["testqueue"].qsize() == 5 # Task is pending, put it back in the queue. await monitoring.check_task() assert bus.queues["testqueue"].qsize() == 5 # No report sent, since we haven't collected any stats yet. await monitoring.send_report() assert NotifyMock.email_obj == {} # Task is completed. await monitoring.check_task() assert monitoring.stats["Hook1"]["completed"] == ["Task1-completed"] assert bus.queues["testqueue"].qsize() == 4 # Another task is completed. await monitoring.check_task() assert monitoring.stats["Hook1"]["completed"] == [ "Task1-completed", "Task2-completed", ] assert bus.queues["testqueue"].qsize() == 3 # Task exception. assert len(monitoring.queue.created_tasks) == 0 await monitoring.check_task() assert monitoring.stats["Hook1"]["exception"] == [] assert monitoring.stats["Hook2"]["exception"] == ["Task-exception"] # A new task has been retried, replacing the exception assert len(monitoring.queue.created_tasks) == 1 assert bus.queues["testqueue"].qsize() == 3 # Task failed. await monitoring.check_task() assert monitoring.stats["Hook1"]["failed"] == ["Task-failed"] assert bus.queues["testqueue"].qsize() == 2 # Task is pending, put it back in the queue. await monitoring.check_task() assert bus.queues["testqueue"].qsize() == 2 content = """# Hook1 tasks for the last period ## completed 66.67% of all tasks (2/3) * [Task1-completed](http://taskcluster.test/tasks/Task1-completed) * [Task2-completed](http://taskcluster.test/tasks/Task2-completed) ## exception 0.00% of all tasks (0/3) ## failed 33.33% of all tasks (1/3) * [Task-failed](http://taskcluster.test/tasks/Task-failed) # Hook2 tasks for the last period ## completed 0.00% of all tasks (0/1) ## exception 100.00% of all tasks (1/1) * [Task-exception](http://taskcluster.test/tasks/Task-exception) ## failed 0.00% of all tasks (0/1) """ await monitoring.send_report() assert NotifyMock.email_obj["address"] == "pinco@pallino" assert NotifyMock.email_obj["subject"] == "Pulse listener tasks" assert NotifyMock.email_obj["content"] == content assert NotifyMock.email_obj["template"] == "fullscreen" assert monitoring.stats == {}
async def test_monitoring_whiteline_between_failed_and_hook( QueueMock, NotifyMock, mock_taskcluster): bus = MessageBus() monitoring = Monitoring(mock_taskcluster, "testqueue", ["pinco@pallino"], 1) monitoring.register(bus) await bus.send("testqueue", ("Group1", "Hook1", "Task-failed")) await bus.send("testqueue", ("Group1", "Hook2", "Task-failed")) assert bus.queues["testqueue"].qsize() == 2 monitoring.queue = QueueMock monitoring.notify = NotifyMock # Task exception. await monitoring.check_task() assert monitoring.stats["Hook1"]["failed"] == ["Task-failed"] assert bus.queues["testqueue"].qsize() == 1 # Task failed. await monitoring.check_task() assert monitoring.stats["Hook2"]["failed"] == ["Task-failed"] assert bus.queues["testqueue"].qsize() == 0 content = """# Hook1 tasks for the last period ## completed 0.00% of all tasks (0/1) ## exception 0.00% of all tasks (0/1) ## failed 100.00% of all tasks (1/1) * [Task-failed](http://taskcluster.test/tasks/Task-failed) # Hook2 tasks for the last period ## completed 0.00% of all tasks (0/1) ## exception 0.00% of all tasks (0/1) ## failed 100.00% of all tasks (1/1) * [Task-failed](http://taskcluster.test/tasks/Task-failed)""" await monitoring.send_report() assert NotifyMock.email_obj["address"] == "pinco@pallino" assert NotifyMock.email_obj["subject"] == "Pulse listener tasks" assert NotifyMock.email_obj["content"] == content assert NotifyMock.email_obj["template"] == "fullscreen" assert monitoring.stats == {}