def _get_chrome_infra_event(timestamp_kind, service_name=None): """Compute a basic event. Validates the inputs and returns a pre-filled ChromeInfraEvent or None if any check failed. The proto is filled using values provided in setup_monitoring() at initialization time, and args. Args: timestamp_kind (string): any of ('POINT', 'BEGIN', 'END'). Returns: event (chrome_infra_log_pb2.ChromeInfraEvent): """ if timestamp_kind not in TIMESTAMP_KINDS: logging.error('Invalid value for timestamp_kind: %s' % str(timestamp_kind)) return None # We must accept unicode here. if service_name is not None and not isinstance(service_name, basestring): logging.error('Invalid type for service_name: %s' % type(service_name)) return None event = ChromeInfraEvent() event.CopyFrom(config.cache['default_event']) if timestamp_kind: event.timestamp_kind = getattr(ChromeInfraEvent, timestamp_kind) if service_name: event.event_source.service_name = service_name return event
def test_get_build_event_test_result_mapping(self): # Tests the hacky mapping between buildbot results and the proto values. hostname = 'bot.host.name' build_name = 'build_name' build_number = 314159265 build_scheduling_time = 123456789 # WARNINGS -> WARNING log_event = monitoring.get_build_event( 'BUILD', hostname, build_name, build_number=build_number, build_scheduling_time=build_scheduling_time, result='WARNINGS') # with an S self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.BUILD) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.build_number, build_number) self.assertEquals(event.build_event.build_scheduling_time_ms, build_scheduling_time) self.assertEquals(event.build_event.result, BuildEvent.WARNING) # no S # EXCEPTION -> INFRA_FAILURE log_event = monitoring.get_build_event( 'BUILD', hostname, build_name, build_number=build_number, build_scheduling_time=build_scheduling_time, result='EXCEPTION') self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.BUILD) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.build_number, build_number) self.assertEquals(event.build_event.build_scheduling_time_ms, build_scheduling_time) self.assertEquals(event.build_event.result, BuildEvent.INFRA_FAILURE)
def setup_monitoring(run_type='dry', hostname=None, service_name=None, appengine_name=None, service_account_creds=None, service_accounts_creds_root=None): """Initializes event monitoring. This function is mainly used to provide default global values which are required for the module to work. If you're implementing a command-line tool, use process_argparse_options instead. Args: run_type (str): One of 'dry', 'test', or 'prod'. Do respectively nothing, hit the testing endpoint and the production endpoint. hostname (str): hostname as it should appear in the event. If not provided a default value is computed. service_name (str): logical name of the service that emits events. e.g. "commit_queue". appengine_name (str): name of the appengine app, if running on appengine. service_account_creds (str): path to a json file containing a service account's credentials obtained from a Google Cloud project. **Path is relative to service_account_creds_root**, which is not the current path by default. See infra_libs.authentication for details. service_account_creds_root (str): path containing credentials files. """ global _router logging.debug('event_mon: setting up monitoring.') if not _router: # pragma: no cover default_event = ChromeInfraEvent() hostname = hostname or socket.getfqdn() # hostname might be empty string or None on some systems, who knows. if hostname: # pragma: no branch default_event.event_source.host_name = hostname else: logging.warning('event_mon: unable to determine hostname.') if service_name: default_event.event_source.service_name = service_name if appengine_name: default_event.event_source.appengine_name = appengine_name cache['default_event'] = default_event cache['service_account_creds'] = service_account_creds cache['service_accounts_creds_root'] = service_accounts_creds_root if run_type not in ENDPOINTS: logging.error('Unknown run_type (%s). Setting to "dry"', run_type) endpoint = ENDPOINTS.get(run_type) _router = _Router(cache, endpoint=endpoint)
def test_get_build_event_with_step_info_wrong_type(self): # BUILD event with step info is invalid. hostname = 'bot.host.name' build_name = 'build_name' build_number = 314159265 build_scheduling_time = 123456789 step_name = 'step_name' step_number = 0 # valid step number log_event = monitoring.get_build_event( 'BUILD', hostname, build_name, build_number=build_number, build_scheduling_time=build_scheduling_time, step_name=step_name, step_number=step_number) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.BUILD) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.build_number, build_number) self.assertEquals(event.build_event.build_scheduling_time_ms, build_scheduling_time) self.assertEquals(event.build_event.step_name, step_name) self.assertEquals(event.build_event.step_number, step_number)
def test_get_build_event_step_info_missing_build_info(self): hostname = 'bot.host.name' build_name = 'build_name' step_name = 'step_name' step_number = 0 # valid step number log_event = monitoring.get_build_event( 'STEP', hostname, build_name, step_name=step_name, step_number=step_number) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.STEP) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.step_name, step_name) self.assertEquals(event.build_event.step_number, step_number) self.assertFalse(event.build_event.HasField('build_number')) self.assertFalse(event.build_event.HasField('build_scheduling_time_ms'))
def test_get_service_event_crash_simple(self): self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) log_event = monitoring._get_service_event('CRASH') event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertEqual(event.service_event.type, ServiceEvent.CRASH)
def test_get_build_event_with_invalid_result(self): hostname = 'bot.host.name' build_name = 'build_name' build_number = 314159265 build_scheduling_time = 123456789 result = '---INVALID---' log_event = monitoring.get_build_event( 'BUILD', hostname, build_name, build_number=build_number, build_scheduling_time=build_scheduling_time, result=result) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.BUILD) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.build_number, build_number) self.assertEquals(event.build_event.build_scheduling_time_ms, build_scheduling_time) self.assertFalse(event.build_event.HasField('result'))
def test_get_build_event_valid_result_wrong_type(self): # SCHEDULER can't have a result hostname = 'bot.host.name' build_name = 'build_name' build_number = 314159265 build_scheduling_time = 123456789 result = 'SUCCESS' log_event = monitoring.get_build_event( 'SCHEDULER', hostname, build_name, build_number=build_number, build_scheduling_time=build_scheduling_time, result=result) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.SCHEDULER) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.build_number, build_number) self.assertEquals(event.build_event.build_scheduling_time_ms, build_scheduling_time) self.assertEquals(event.build_event.result, BuildEvent.SUCCESS)
def test_get_service_event_crash_with_unicode_trace(self): self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) stack_trace = u"Soyez prêt à un étrange goût de Noël." log_event = monitoring._get_service_event('CRASH', stack_trace=stack_trace) event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertEqual(event.service_event.type, ServiceEvent.CRASH) self.assertEqual(event.service_event.stack_trace, stack_trace)
def test_get_service_event_crash_with_ascii_trace(self): self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) stack_trace = 'A nice ascii string' log_event = monitoring._get_service_event('CRASH', stack_trace=stack_trace) event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertEqual(event.service_event.type, ServiceEvent.CRASH) self.assertEqual(event.service_event.stack_trace, stack_trace)
def test_get_service_event_crash_with_big_trace(self): self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) stack_trace = "this is way too long" * 55 self.assertTrue(len(stack_trace) > monitoring.STACK_TRACE_MAX_SIZE) log_event = monitoring._get_service_event('CRASH', stack_trace=stack_trace) event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertEqual(event.service_event.type, ServiceEvent.CRASH) self.assertEqual(len(event.service_event.stack_trace), monitoring.STACK_TRACE_MAX_SIZE)
def test_get_service_event_trace_without_crash(self): self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) stack_trace = 'A nice ascii string' log_event = monitoring._get_service_event('START', stack_trace=stack_trace) event = ChromeInfraEvent.FromString(log_event.source_extension) # Make sure we send even invalid data. self.assertEqual(event.service_event.type, ServiceEvent.START) self.assertEqual(event.service_event.stack_trace, stack_trace)
def test_send_service_event_bad_versions(self): # Check that an invalid version does not cause any exception. self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) code_version = [{}, {'revision': 'https://fake.url'}] log_event = monitoring._get_service_event('START', code_version=code_version) event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('service_event')) self.assertTrue(event.service_event.HasField('type')) self.assertEqual(len(event.service_event.code_version), 0)
def test_send_service_event_bad_type(self): # Check that an invalid type for code_version does not raise # any exception. code_versions = [None, 123, 'string', [None], [123], ['string'], [['list']]] for code_version in code_versions: log_event = monitoring._get_service_event('START', code_version=code_version) event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('service_event')) self.assertTrue(event.service_event.HasField('type')) self.assertEqual(len(event.service_event.code_version), 0)
def test_get_build_event_invalid_hostname(self): # an invalid hostname is not a critical error. builder_name = 'builder_name' log_event = monitoring.get_build_event('BUILD', None, builder_name) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.BUILD) self.assertEquals(event.build_event.build_name, builder_name) self.assertFalse(event.build_event.HasField('host_name'))
def test_get_service_event_default(self): self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) log_event = monitoring._get_service_event('START') self.assertIsInstance(log_event, LogRequestLite.LogEventLite) self.assertTrue(log_event.HasField('event_time_ms')) self.assertTrue(log_event.HasField('source_extension')) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('service_event')) self.assertTrue(event.service_event.HasField('type')) self.assertEquals(event.service_event.type, ServiceEvent.START)
def test_get_service_event_crash_invalid_trace(self): self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) # This is not a stacktrace stack_trace = 123456 # Should not crash log_event = monitoring._get_service_event('CRASH', stack_trace=stack_trace) event = ChromeInfraEvent.FromString(log_event.source_extension) # Send only valid data this time self.assertEqual(event.service_event.type, ServiceEvent.CRASH) self.assertFalse(event.service_event.HasField('stack_trace'))
def test_get_build_event_default(self): hostname = 'bot.host.name' build_name = 'build_name' log_event = monitoring.get_build_event('BUILD', hostname, build_name) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) self.assertTrue(log_event.HasField('event_time_ms')) self.assertTrue(log_event.HasField('source_extension')) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.BUILD) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name)
def _post_to_endpoint(self, events, try_num=3, retry_backoff=2.): """Post protobuf to endpoint. Args: events(LogRequestLite): the protobuf to post. Keyword Args: try_num(int): max number of http requests send to the endpoint. retry_backoff(float): time in seconds before retrying posting to the endpoint. Randomized exponential backoff is applied on subsequent retries. Returns: success(bool): whether pushing to the endpoint succeeded or not. """ # Set this time at the very last moment events.request_time_ms = time_ms() if self.endpoint: # pragma: no cover logging.info('event_mon: POSTing events to %s', self.endpoint) for attempt in xrange(try_num - 1): response, _ = self.http.request( uri=self.endpoint, method='POST', headers={'Content-Type': 'application/octet-stream'}, body=events.SerializeToString() ) if response.status == 200: return True logging.error('failed to POST data to %s (attempt %d)', self.endpoint, attempt) logging.error('data: %s', str(events)[:200]) time.sleep(backoff_time(attempt, retry_backoff=retry_backoff)) return False else: infra_events = [str(ChromeInfraEvent.FromString( ev.source_extension)) for ev in events.log_event] logging.info('Fake post request. Sending:\n%s', '\n'.join(infra_events)) return True
def test_get_service_event_correct_versions(self): self.assertIsInstance(config._router, router._Router) self.assertIsInstance(config.cache.get('default_event'), ChromeInfraEvent) code_version = [ {'source_url': 'https://fake.url/thing', 'revision': '708329c2aeece8aac33af6a5a772ffb14b55903f'}, {'source_url': 'svn://fake_svn.url/other_thing', 'revision': '123456'}, {'source_url': 'https://other_fake.url/yet_another_thing', 'version': 'v2.0'}, {'source_url': 'https://other_fake2.url/yet_another_thing2', 'dirty': True}, ] log_event = monitoring._get_service_event('START', code_version=code_version) event = ChromeInfraEvent.FromString(log_event.source_extension) code_version_p = event.service_event.code_version self.assertEquals(len(code_version_p), len(code_version)) self.assertEqual(code_version_p[0].source_url, code_version[0]['source_url']) self.assertEqual(code_version_p[0].git_hash, code_version[0]['revision']) self.assertFalse(code_version_p[0].HasField('svn_revision')) self.assertEqual(code_version_p[1].source_url, code_version[1]['source_url']) self.assertEqual(code_version_p[1].svn_revision, int(code_version[1]['revision'])) self.assertFalse(code_version_p[1].HasField('git_hash')) self.assertEqual(code_version_p[2].source_url, code_version[2]['source_url']) self.assertFalse(code_version_p[2].HasField('svn_revision')) self.assertEqual(code_version_p[2].version, code_version[2]['version']) self.assertEqual(code_version_p[3].source_url, code_version[3]['source_url']) self.assertEqual(code_version_p[3].dirty, True)
def test_get_build_event_missing_build_number(self): hostname = 'bot.host.name' build_name = 'build_name' build_scheduling_time = 123456789 log_event = monitoring.get_build_event( 'BUILD', hostname, build_name, build_scheduling_time=build_scheduling_time) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.BUILD) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.build_scheduling_time_ms, build_scheduling_time) self.assertFalse(event.build_event.HasField('build_number'))
def test_get_build_event_invalid_scheduler(self): # Providing a build number on a scheduler event is invalid. hostname = 'bot.host.name' build_name = 'build_name' build_number = 314159265 # int32 log_event = monitoring.get_build_event( 'SCHEDULER', hostname, build_name, build_number=build_number) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.SCHEDULER) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.build_number, build_number) self.assertFalse(event.build_event.HasField('build_scheduling_time_ms'))
def test_get_build_event_with_build_zero(self): # testing 0 is important because bool(0) == False hostname = 'bot.host.name' build_name = 'build_name' build_number = 0 build_scheduling_time = 123456789 log_event = monitoring.get_build_event( 'BUILD', hostname, build_name, build_number=build_number, build_scheduling_time=build_scheduling_time) self.assertIsInstance(log_event, LogRequestLite.LogEventLite) # Check that source_extension deserializes to the right thing. event = ChromeInfraEvent.FromString(log_event.source_extension) self.assertTrue(event.HasField('build_event')) self.assertEquals(event.build_event.type, BuildEvent.BUILD) self.assertEquals(event.build_event.host_name, hostname) self.assertEquals(event.build_event.build_name, build_name) self.assertEquals(event.build_event.build_number, build_number) self.assertEquals(event.build_event.build_scheduling_time_ms, build_scheduling_time)