def check_delay(server, user, password): """Check the delay of a given slave database server. @param server: Hostname or IP address of the MySQL server. @param user: User name to log in the MySQL server. @param password: Password to log in the MySQL server. """ try: result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD) search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE) if search: delay = int(search.group(1)) metrics.SecondsDistribution(DELAY_METRICS).add( delay, fields={'server': server}) logging.debug('Seconds_Behind_Master of server %s is %d.', server, delay) else: # The value of Seconds_Behind_Master could be NULL, report a large # number to indicate database error. metrics.SecondsDistribution(DELAY_METRICS).add( LARGE_DELAY, fields={'server': server}) logging.error('Failed to get Seconds_Behind_Master of server %s ' 'from slave status:\n %s', server, result) except error.CmdError: logging.exception('Failed to get slave status of server %s.', server)
def RecordSubmissionMetrics(action_history, submitted_change_strategies): """Record submission metrics in monarch. Args: submitted_change_strategies: A dictionary from changes to submission strategies. These changes will have their handling times recorded in monarch. action_history: A CLActionHistory instance for all cl actions for all changes in submitted_change_strategies. """ handling_time_metric = metrics.SecondsDistribution( constants.MON_CL_HANDLE_TIME) precq_time_metric = metrics.SecondsDistribution( constants.MON_CL_PRECQ_TIME) wait_time_metric = metrics.SecondsDistribution(constants.MON_CL_WAIT_TIME) cq_run_time_metric = metrics.SecondsDistribution( constants.MON_CL_CQRUN_TIME) false_rejection_metric = metrics.CumulativeSmallIntegerDistribution( constants.MON_CL_FALSE_REJ) false_rejection_count_metric = metrics.Counter( constants.MON_CL_FALSE_REJ_COUNT) precq_false_rejections = action_history.GetFalseRejections( bot_type=constants.PRE_CQ) cq_false_rejections = action_history.GetFalseRejections( bot_type=constants.CQ) for change, strategy in submitted_change_strategies.iteritems(): strategy = strategy or '' handling_time = GetCLHandlingTime(change, action_history) precq_time = GetPreCQTime(change, action_history) wait_time = GetCQWaitTime(change, action_history) run_time = GetCQRunTime(change, action_history) fields = {'submission_strategy': strategy} handling_time_metric.add(handling_time, fields=fields) precq_time_metric.add(precq_time, fields=fields) wait_time_metric.add(wait_time, fields=fields) cq_run_time_metric.add(run_time, fields=fields) rejection_types = ( (constants.PRE_CQ, precq_false_rejections), (constants.CQ, cq_false_rejections), ) for by, rej in rejection_types: rejections = rej.get(change, []) f = dict(fields, rejected_by=by) false_rejection_metric.add(len(rejections), fields=f) false_rejection_count_metric.increment_by(len(rejections), fields=f)
def _FinishBuildStageInCIDBAndMonarch(self, status, elapsed_time_seconds=0): """Mark the stage as finished in cidb. Args: status: The finish status of the build. Enum type constants.BUILDER_COMPLETED_STATUSES elapsed_time_seconds: (optional) Elapsed time in stage, in seconds. """ _, db = self._run.GetCIDBHandle() if self._build_stage_id is not None and db is not None: db.FinishBuildStage(self._build_stage_id, status) fields = { 'status': status, 'name': self.name, 'build_config': self._run.config.name, 'important': self._run.config.important } metrics.SecondsDistribution(constants.MON_STAGE_DURATION).add( elapsed_time_seconds, fields=fields) metrics.Counter( constants.MON_STAGE_COMP_COUNT).increment(fields=fields)
def EmitRequestMetrics(m): """Emits metrics for each line in the access log. @param m: A regex match object """ # TODO(phobbs) use a memory-efficient structure to detect non-unique paths. # We can't just include the endpoint because it will cause a cardinality # explosion. endpoint = SanitizeEndpoint(m.group('endpoint')) fields = { 'request_method': m.groupdict().get('request_method', ''), 'endpoint': endpoint, 'response_code': int(m.group('response_code')), } # Request seconds and bytes sent are both extremely high cardinality, so # they must be the VAL of a metric, not a metric field. if m.group('response_seconds'): response_seconds = int(m.group('response_seconds')) metrics.SecondsDistribution(ACCESS_TIME_METRIC).add(response_seconds, fields=fields) bytes_sent = int(m.group('bytes_sent')) metrics.CumulativeDistribution(ACCESS_BYTES_METRIC).add(bytes_sent, fields=fields)
def EmitRequestMetrics(m): """Emits metrics for each line in the access log. @param m: A regex match object """ fields = { 'request_method': m.groupdict().get('request_method', ''), 'endpoint': SanitizeEndpoint(m.group('endpoint')), 'response_code': int(m.group('response_code')), } send_rpc_metrics = ('?' in m.group('endpoint') and '/rpc' in m.group('endpoint')) if send_rpc_metrics: EmitRPCMetrics(m) # Request seconds and bytes sent are both extremely high cardinality, so # they must be the VAL of a metric, not a metric field. if m.group('response_seconds'): response_seconds = int(m.group('response_seconds')) metrics.SecondsDistribution(ACCESS_TIME_METRIC).add(response_seconds, fields=fields) bytes_sent = int(m.group('bytes_sent')) metrics.CumulativeDistribution(ACCESS_BYTES_METRIC).add(bytes_sent, fields=fields)
def EmitRPCMetrics(m): """Emit a special metric including the method when the request was an RPC.""" fields = { 'request_method': m.groupdict().get('request_method', ''), 'rpc_method': ParseRPCMethod(m.group('endpoint')), 'response_code': int(m.group('response_code')), } if m.group('response_seconds'): response_seconds = int(m.group('response_seconds')) metrics.SecondsDistribution(RPC_ACCESS_TIME_METRIC).add( response_seconds, fields=fields) bytes_sent = int(m.group('bytes_sent')) metrics.CumulativeDistribution(RPC_ACCESS_BYTES_METRIC).add(bytes_sent, fields=fields)
def main(): """Main script.""" options = parse_options() log_config = logging_config.LoggingConfig() if options.logfile: log_config.add_file_handler(file_path=os.path.abspath(options.logfile), level=logging.DEBUG) server = CONFIG.get_config_value('AUTOTEST_WEB', 'global_db_host', default=CONFIG.get_config_value( 'AUTOTEST_WEB', 'host')) user = CONFIG.get_config_value('AUTOTEST_WEB', 'global_db_user', default=CONFIG.get_config_value( 'AUTOTEST_WEB', 'user')) password = CONFIG.get_config_value('AUTOTEST_WEB', 'global_db_password', default=CONFIG.get_config_value( 'AUTOTEST_WEB', 'password')) database = CONFIG.get_config_value('AUTOTEST_WEB', 'global_db_database', default=CONFIG.get_config_value( 'AUTOTEST_WEB', 'database')) logging.info( 'Start cleaning up old records in TKO database %s on server ' '%s.', database, server) start_time = time.time() try: utils.run_sql_cmd(server, user, password, CLEANUP_TKO_CMD, database) except: logging.exception('Cleanup failed with exception.') finally: duration = time.time() - start_time metrics.SecondsDistribution(CLEANUP_METRICS).add( duration, fields={'server': server}) logging.info('Cleanup finished in %s seconds.', duration)
def _emit_special_task_status_metric(self): """Increments an accumulator associated with this special task.""" fields = {'type': self.TASK_TYPE, 'success': bool(self.success), 'board': str(self.host.board), 'milestone': self._milestone} metrics.Counter(self._COUNT_METRIC).increment( fields=fields) if (self.task.time_finished and self.task.time_started): duration = (self.task.time_finished - self.task.time_started).total_seconds() metrics.SecondsDistribution(self._DURATION_METRIC).add( duration, fields=fields) dut_fields = { 'type': self.TASK_TYPE, 'success': bool(self.success), 'board': str(self.host.board), 'dut_host_name': self.host.hostname } metrics.Counter(self._DUT_METRIC).increment(fields=dut_fields)
def PerformStage(self): """Perform the actual work for this stage. This includes final metadata archival, and update CIDB with our final status as well as producting a logged build result summary. """ build_id, db = self._run.GetCIDBHandle() if results_lib.Results.BuildSucceededSoFar(db, build_id, self.name): final_status = constants.FINAL_STATUS_PASSED else: final_status = constants.FINAL_STATUS_FAILED if not hasattr(self._run.attrs, 'release_tag'): # If, for some reason, sync stage was not completed and # release_tag was not set. Set it to None here because # ArchiveResults() depends the existence of this attr. self._run.attrs.release_tag = None # Set up our report metadata. self._run.attrs.metadata.UpdateWithDict( self.GetReportMetadata( final_status=final_status, completion_instance=self._completion_instance)) # Some operations can only be performed if a valid version is available. try: self._run.GetVersionInfo() self.ArchiveResults(final_status, build_id, db) metadata_url = os.path.join(self.upload_url, constants.METADATA_JSON) except cbuildbot_run.VersionNotSetError: logging.error('A valid version was never set for this run. ' 'Can not archive results.') metadata_url = '' results_lib.Results.Report(sys.stdout, current_version=(self._run.attrs.release_tag or '')) if db: # TODO(akeshet): Eliminate this status string translate once # these differing status strings are merged, crbug.com/318930 translateStatus = lambda s: (constants.BUILDER_STATUS_PASSED if s == constants.FINAL_STATUS_PASSED else constants.BUILDER_STATUS_FAILED) status_for_db = translateStatus(final_status) child_metadatas = self._run.attrs.metadata.GetDict().get( 'child-configs', []) for child_metadata in child_metadatas: db.FinishChildConfig(build_id, child_metadata['name'], translateStatus(child_metadata['status'])) # TODO(pprabhu): After BuildData and CBuildbotMetdata are merged, remove # this extra temporary object creation. # XXX:HACK We're creating a BuildData with an empty URL. Don't try to # MarkGathered this object. build_data = metadata_lib.BuildData( "", self._run.attrs.metadata.GetDict()) # TODO(akeshet): Find a clearer way to get the "primary upload url" for # the metadata.json file. One alternative is _GetUploadUrls(...)[0]. # Today it seems that element 0 of its return list is the primary upload # url, but there is no guarantee or unit test coverage of that. db.FinishBuild(build_id, status=status_for_db, summary=build_data.failure_message, metadata_url=metadata_url) duration = self._GetBuildDuration() mon_fields = { 'status': status_for_db, 'build_config': self._run.config.name, 'important': self._run.config.important } metrics.Counter( constants.MON_BUILD_COMP_COUNT).increment(fields=mon_fields) metrics.SecondsDistribution(constants.MON_BUILD_DURATION).add( duration, fields=mon_fields) # From this point forward, treat all exceptions as warnings. self._post_completion = True # Dump report about things we retry. retry_stats.ReportStats(sys.stdout) # Dump performance stats for this build versus recent builds. if db: output = StringIO.StringIO() self.CollectComparativeBuildTimings(output, build_id, db) # Bunch up our output, so it doesn't interleave with CIDB logs. sys.stdout.write(output.getvalue())