def setUp(self): self.logger = alert_handler.AlertHandler( project_id=None, write_to_logging=True, write_to_error_reporting=False, write_to_email=False) self.handler = job_status_handler.JobStatusHandler( "unused", "unused", "unused", self.logger)
def setUp(self): self.handler = alert_handler.AlertHandler( project_id='my-project-id', write_to_logging=True, write_to_error_reporting=False, write_to_email=False # Skip the init of the real email client. ) self.handler.write_to_email = True # Enable writing to draft email. # The step below normally would have happened while initializing the # real email client. self.handler.messages_to_email = collections.defaultdict(list)
def setUp(self): self.logger = alert_handler.AlertHandler( project_id=None, write_to_logging=True, write_to_error_reporting=False, write_to_email=False) self.temp_dir = self.create_tempdir().full_path self.summary_writer = tf.summary.create_file_writer(self.temp_dir) with self.summary_writer.as_default(): tf.summary.scalar("foo", 1, 0) tf.summary.scalar("bar", tf.convert_to_tensor(1), 0) tf.summary.scalar("foo", 2, 100) tf.summary.scalar("bar", tf.convert_to_tensor(2), 100) self.summary_writer.close() self.job_status_dict = { 'job_status': 'SUCCESS', 'stop_time': 1000, 'start_time': 2000, }
def run_main(event, context): project_id = google.auth.default()[1] logger = alert_handler.AlertHandler(project_id) # Retrieve pubsub messages for all the tests that have been kicked off by # the test runner. subscriber = pubsub_v1.SubscriberClient() project = subscriber.project_path(project_id) subscription = None for s in subscriber.list_subscriptions(project): if s.topic.split('/')[-1] == METRICS_WRITTEN_TOPIC: subscription = s.name break if not subscription: subscription_id = subscriber.subscription_path( project_id, 'metrics-handler-subscription') topic = subscriber.topic_path(project_id, METRICS_WRITTEN_TOPIC) subscription = subscriber.create_subscription( subscription_id, topic, ack_deadline_seconds=300).name try: all_msgs = subscriber.pull(subscription, 100).received_messages except google.api_core.exceptions.DeadlineExceeded: logger.info( 'No messages found for subscription: {}'.format(subscription)) return # Group messages by test. Each test might have made multiple attempts and # therefore could have multiple messages. test_name_to_msgs = collections.defaultdict(list) ids_to_ack = [] for msg in all_msgs: data_str = msg.message.data try: message_id = msg.message.message_id logger.info('Found message_id: {}'.format(message_id)) data = json.loads(data_str) data['publish_time'] = msg.message.publish_time.seconds data['ack_id'] = msg.ack_id data['message_id'] = message_id test_name_to_msgs[data['test_name']].append(data) except Exception as e: logger.error( 'Metrics handler encountered an invalid message in pubsub queue ' 'for topic `{}` which led to Exception: {}. This message will ' 'be acknowledged and ignored. The message was: {}'.format( METRICS_WRITTEN_TOPIC, e, msg)) ids_to_ack.append(msg.ack_id) # Grab the latest message for each test. We will process only that message # and all other messages for that test will be ack'ed without being processed. msgs_to_process = [] for test_name, msgs in test_name_to_msgs.items(): sorted_msgs = sorted(msgs, key = lambda x: x['publish_time']) ids_to_ack.extend([msg['ack_id'] for msg in msgs[:-1]]) msgs_to_process.append(msgs[-1]) logger.info('Finished deduplicating messages from test runs.') # Note: it's good to ack early and often since pubsub will resend messages # that are not ack'ed within the queue's deadline. if ids_to_ack: logger.info('Will ack these ids: {}'.format(ids_to_ack)) subscriber.acknowledge(subscription, ids_to_ack) logger.info('Successful ack for ids: {}'.format(ids_to_ack)) if not msgs_to_process: logger.info('No messages to process. Stopping early.') return # TODO: Add support for multi-zone and/or multi-cluster setups. zone = msgs_to_process[0].get('zone') cluster = msgs_to_process[0].get('cluster_name') status_handler = job_status_handler.JobStatusHandler( project_id, zone, cluster, logger) # Handle the metrics for each test. Ack if the process was successful or if # the message is permanently invalid. Do not ack if the test is still running # so that we will retry again later once that test has finished running. for msg in msgs_to_process: try: logger.info('Pubsub message to process: {}'.format(msg)) should_ack = _process_pubsub_message(msg, status_handler, logger) except Exception: logger.error( 'Encountered exception while attempting to process message {}. ' 'The message will be acknowledged to prevent more crashes. ' 'Exception: {}'.format(msg, traceback.format_exc())) should_ack = True if should_ack: logger.info('Finished processing message. Will ack') subscriber.acknowledge(subscription, [msg['ack_id']]) logger.info('Acknowledged ack_id: {}'.format(msg['ack_id'])) else: logger.info('Finished processing message. Will not ack') logger.info('Processed a message for each of the following tests: ' '{}'.format([x['test_name'] for x in msgs_to_process])) logger.send_email()