class ReprocessingOneRabbitMQCrashStore(ReprocessingRabbitMQCrashStore): required_config = Namespace() required_config.rabbitmq_class = change_default( RabbitMQCrashStorage, 'rabbitmq_class', ConnectionContext, ) required_config.routing_key = change_default( RabbitMQCrashStorage, 'routing_key', 'socorro.reprocessing' ) def reprocess(self, crash_ids): if not isinstance(crash_ids, (list, tuple)): crash_ids = [crash_ids] success = bool(crash_ids) for crash_id in crash_ids: if not self.save_raw_crash( DotDict({'legacy_processing': 0}), [], crash_id ): success = False return success
class CorrelationInterestingAddonsVersionsRule(CorrelationInterestingModulesRule): required_config = Namespace() required_config.addons = change_default( CorrelationInterestingModulesRule, 'addons', True ) required_config.show_versions = change_default( CorrelationInterestingModulesRule, 'show_versions', True )
class ReprocessingRabbitMQCrashStore(RabbitMQCrashStorage): required_config = Namespace() required_config.routing_key = change_default( RabbitMQCrashStorage, 'routing_key', 'socorro.reprocessing' ) required_config.filter_on_legacy_processing = change_default( RabbitMQCrashStorage, 'filter_on_legacy_processing', False )
class DumpLookupExternalRule(ExternalProcessRule): required_config = Namespace() required_config.add_option( 'dump_field', doc='the default name of a dump', default='upload_file_minidump', ) required_config.add_option( 'processor_symbols_pathname_list', doc='comma or space separated list of symbol files just as for ' 'minidump_stackwalk (quote paths with embedded spaces)', default='/mnt/socorro/symbols/symbols_ffx,' '/mnt/socorro/symbols/symbols_sea,' '/mnt/socorro/symbols/symbols_tbrd,' '/mnt/socorro/symbols/symbols_sbrd,' '/mnt/socorro/symbols/symbols_os', from_string_converter=_create_symbol_path_str ) required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/dump-lookup' ) required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '{dumpfile_pathname} ' '{processor_symbols_pathname_list} ' '2>/dev/null' ) required_config.result_key = change_default( ExternalProcessRule, 'result_key', 'dump_lookup' ) required_config.return_code_key = change_default( ExternalProcessRule, 'return_code_key', 'dump_lookup_return_code' ) #-------------------------------------------------------------------------- def _predicate( self, raw_crash, raw_dumps, processed_crash, processor_meta ): return 'create_dump_lookup' in raw_crash
class BotoS3CrashStorage(BotoCrashStorage): required_config = Namespace() required_config.resource_class = change_default( BotoCrashStorage, 'resource_class', 'socorro.external.boto.connection_context.RegionalS3ConnectionContext' )
class RegionalS3ConnectionContext(S3ConnectionContext): """This derviced class forces you to connect to a specific region which means we can use the OrdinaryCallingFormat as a calling format and then we'll be able to connect to S3 buckets with names in them. """ required_config = Namespace() required_config.add_option( 'region', doc="Name of the S3 region (e.g. us-west-2)", default='us-west-2', reference_value_from='resource.boto', ) required_config.calling_format = change_default( S3ConnectionContext, 'calling_format', 'boto.s3.connection.OrdinaryCallingFormat') #-------------------------------------------------------------------------- def __init__(self, config, quit_check_callback=None): super(RegionalS3ConnectionContext, self).__init__(config) self._region = config.region self._connect_to_endpoint = boto.s3.connect_to_region #-------------------------------------------------------------------------- def _connect(self): try: return self.connection except AttributeError: self.connection = self._connect_to_endpoint( self._region, **self._get_credentials()) return self.connection
class SocorroLiteProcessorAlgorithm2015(Processor2015): """this is the class that processor uses to transform """ required_config = Namespace() required_config.rule_sets = change_default( Processor2015, 'rule_sets', ujson.dumps(socorrolite_processor_rule_sets))
class PGPVNewCrashSource(PGQueryNewCrashSource): required_config = Namespace() required_config.crash_id_query = change_default( PGQueryNewCrashSource, 'crash_id_query', "select uuid " "from reports_clean rc join product_versions pv " " on rc.product_version_id = pv.product_version_id " "where " "%s <= date_processed and date_processed < %s " "and %s between pv.build_date and pv.sunset_date" ) required_config.add_option( 'date', doc="a date in the form YYYY-MM-DD", default=(utc_now() - timedelta(1)).date(), from_string_converter=string_to_datetime ) def __init__(self, config, name, quit_check_callback=None): super(PGPVNewCrashSource, self).__init__( config, name, quit_check_callback ) self.data = ( config.date, config.date + timedelta(1), # add a day config.date )
class PriorityjobRabbitMQCrashStore(RabbitMQCrashStorage): required_config = Namespace() required_config.rabbitmq_class = change_default( RabbitMQCrashStorage, 'rabbitmq_class', ConnectionContext, ) required_config.add_option( 'routing_key', default='socorro.priority', doc='the name of the queue to receive crashes', ) def process(self, crash_ids): if not isinstance(crash_ids, (list, tuple)): crash_ids = [crash_ids] success = bool(crash_ids) for crash_id in crash_ids: if not self.save_raw_crash( DotDict({'legacy_processing': 0}), [], crash_id ): success = False return success
def test_change_default(self): class Alpha(RequiredConfig): required_config = Namespace() required_config.add_option( 'an_option', default=19, doc='this is an an_option', from_string_converter=str, ) a_new_option_with_a_new_default = change_default( Alpha, 'an_option', '29300' ) ok_( a_new_option_with_a_new_default is not Alpha.required_config.an_option ) eq_( a_new_option_with_a_new_default.default, '29300' ) eq_( Alpha.required_config.an_option.default, 19 )
class ESCrashStorageNoStackwalkerOutput(ESCrashStorage): required_config = Namespace() required_config.namespace('es_redactor') required_config.es_redactor.add_option( name="redactor_class", doc="the name of the class that implements a 'redact' method", default='socorro.external.crashstorage_base.Redactor', from_string_converter=class_converter, ) required_config.es_redactor.forbidden_keys = change_default( Redactor, "forbidden_keys", "json_dump, " "upload_file_minidump_flash1.json_dump, " "upload_file_minidump_flash2.json_dump, " "upload_file_minidump_browser.json_dump") #-------------------------------------------------------------------------- def __init__(self, config, quit_check_callback=None): """Init, you know. """ super(ESCrashStorageNoStackwalkerOutput, self).__init__(config, quit_check_callback) self.redactor = config.es_redactor.redactor_class(config.es_redactor) self.config.logger.warning( "beware, this crashstorage class is destructive to the " "processed crash - if you're using a polycrashstore you may " "find the modified processed crash saved to the other crashstores") #-------------------------------------------------------------------------- @staticmethod def reconstitute_datetimes(processed_crash): datetime_fields = [ 'submitted_timestamp', 'date_processed', 'client_crash_date', 'started_datetime', 'startedDateTime', 'completed_datetime', 'completeddatetime', ] for a_key in datetime_fields: try: processed_crash[a_key] = string_to_datetime( processed_crash[a_key]) except KeyError: # not there? we don't care pass #-------------------------------------------------------------------------- def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """This is the only write mechanism that is actually employed in normal usage. """ self.reconstitute_datetimes(processed_crash) self.redactor.redact(processed_crash) super(ESCrashStorageNoStackwalkerOutput, self).save_raw_and_processed(raw_crash, dumps, processed_crash, crash_id)
class ESCrashStorageRedactedSave(ESCrashStorage): required_config = Namespace() required_config.namespace('es_redactor') required_config.es_redactor.add_option( name="redactor_class", doc="the name of the class that implements a 'redact' method", default='socorro.external.crashstorage_base.Redactor', from_string_converter=class_converter, ) required_config.es_redactor.forbidden_keys = change_default( Redactor, "forbidden_keys", "json_dump, " "upload_file_minidump_flash1.json_dump, " "upload_file_minidump_flash2.json_dump, " "upload_file_minidump_browser.json_dump" ) required_config.namespace('raw_crash_es_redactor') required_config.raw_crash_es_redactor.add_option( name="redactor_class", doc="the redactor class to use on the raw_crash", default='socorro.external.es.crashstorage.RawCrashRedactor', from_string_converter=class_converter, ) def __init__(self, config, quit_check_callback=None): super(ESCrashStorageRedactedSave, self).__init__( config, quit_check_callback ) self.redactor = config.es_redactor.redactor_class(config.es_redactor) self.raw_crash_redactor = config.raw_crash_es_redactor.redactor_class( config.raw_crash_es_redactor ) self.config.logger.warning( "Beware, this crashstorage class is destructive to the " "processed crash - if you're using a polycrashstore you may " "find the modified processed crash saved to the other crashstores." ) def is_mutator(self): # This crash storage mutates the crash, so we mark it as such. return True def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """This is the only write mechanism that is actually employed in normal usage. """ self.redactor.redact(processed_crash) self.raw_crash_redactor.redact(raw_crash) super(ESCrashStorageRedactedSave, self).save_raw_and_processed( raw_crash, dumps, processed_crash, crash_id )
class ESCrashStorageRedactedJsonDump(ESCrashStorageRedactedSave): """This class stores redacted crash reports into Elasticsearch, but instead of removing the entire `json_dump`, it keeps only a subset of its keys. """ required_config = Namespace() required_config.add_option( name="json_dump_whitelist_keys", doc="keys of the json_dump field to keep in the processed crash", default=[ "largest_free_vm_block", "tiny_block_size", "write_combine_size", "system_info", ], from_string_converter=list_converter, ) required_config.namespace('es_redactor') required_config.es_redactor.add_option( name="redactor_class", doc="the name of the class that implements a 'redact' method", default='socorro.external.crashstorage_base.Redactor', from_string_converter=class_converter, ) required_config.es_redactor.forbidden_keys = change_default( Redactor, "forbidden_keys", ( "memory_report, " "upload_file_minidump_flash1.json_dump, " "upload_file_minidump_flash2.json_dump, " "upload_file_minidump_browser.json_dump" ) ) def is_mutator(self): # This crash storage mutates the crash, so we mark it as such. return True #-------------------------------------------------------------------------- def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """This is the only write mechanism that is actually employed in normal usage. """ # Replace the `json_dump` with a subset. json_dump = processed_crash.get('json_dump', {}) redacted_json_dump = { k: json_dump.get(k) for k in self.config.json_dump_whitelist_keys } processed_crash['json_dump'] = redacted_json_dump super(ESCrashStorageRedactedJsonDump, self).save_raw_and_processed( raw_crash, dumps, processed_crash, crash_id )
class JitCrashCategorizeRule(ExternalProcessRule): required_config = Namespace() required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '{dump_file_pathname} ' '2>/dev/null') required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/jit-crash-categorize', ) required_config.result_key = change_default( ExternalProcessRule, 'result_key', 'classifications.jit.category', ) required_config.result_key = change_default( ExternalProcessRule, 'return_code_key', 'classifications.jit.category_return_code', ) required_config.add_option( 'threshold', doc="max number of frames until encountering target frame", default=8) #-------------------------------------------------------------------------- def __init__(self, config): super(JitCrashCategorizeRule, self).__init__(config) #-------------------------------------------------------------------------- def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): if (processed_crash.product != 'Firefox' or not processed_crash.os_name.startswith('Windows') or processed_crash.cpu_name != 'x86'): # we don't want any of these return False if processed_crash.json_dump['crashing_thread']['frames'][0].get( 'module', False ): # there is a module at the top of the stack, we don't want this return False return (processed_crash.signature.endswith('EnterBaseline') or processed_crash.signature.endswith('EnterIon'))
class CountStackWalkerTimeoutKills(CountAnythingRuleBase): required_config = Namespace() required_config.rule_name = change_default( CountAnythingRuleBase, 'rule_name', 'stackwalker_timeout_kills' ) def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): # Only run the action if "SIGKILL" is in the processor notes return any(['SIGKILL' in note for note in proc_meta.processor_notes])
class CountStackWalkerFailures(CountAnythingRuleBase): required_config = Namespace() required_config.rule_name = change_default( CountAnythingRuleBase, 'rule_name', 'stackwalker_failures' ) def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): # Only run the action if "MDSW failed" is in the processor notes return any(['MDSW failed' in note for note in proc_meta.processor_notes])
class JsonFileOutputForCoreCounts(FileOutputForCoreCounts): required_config = Namespace() required_config.path_template = change_default( FileOutputForCoreCounts, 'path_template', '{path}/{prefix}/{prefix}_{key}-{name}.json', ) #-------------------------------------------------------------------------- def output_correlations_to_stream(self, counts_summary_structure, stream): json.dump(counts_summary_structure, stream, indent=4, sort_keys=True)
class SingleCrashMQCrashStorage(RabbitMQCrashStorage): required_config = Namespace() required_config.routing_key = change_default(RabbitMQCrashStorage, 'routing_key', 'socorro.normal') required_config.rabbitmq_class = change_default( RabbitMQCrashStorage, 'rabbitmq_class', ConnectionContext, ) required_config.transaction_executor_class = change_default( RabbitMQCrashStorage, 'transaction_executor_class', TransactionExecutor) def submit(self, crash_ids): if not isinstance(crash_ids, (list, tuple)): crash_ids = [crash_ids] success = bool(crash_ids) for crash_id in crash_ids: if not self.save_raw_crash(DotDict({'legacy_processing': 0}), [], crash_id): success = False return success
class ESCrashStorageRedactedSave(ESCrashStorage): required_config = Namespace() required_config.namespace('es_redactor') required_config.es_redactor.add_option( name="redactor_class", doc="the name of the class that implements a 'redact' method", default='socorro.external.crashstorage_base.Redactor', from_string_converter=class_converter, ) required_config.es_redactor.forbidden_keys = change_default( Redactor, "forbidden_keys", "json_dump, " "upload_file_minidump_flash1.json_dump, " "upload_file_minidump_flash2.json_dump, " "upload_file_minidump_browser.json_dump" ) required_config.namespace('raw_crash_es_redactor') required_config.raw_crash_es_redactor.add_option( name="redactor_class", doc="the redactor class to use on the raw_crash", default='socorro.external.es.crashstorage.RawCrashRedactor', from_string_converter=class_converter, ) def __init__(self, config, *args, **kwargs): super(ESCrashStorageRedactedSave, self).__init__(config, *args, **kwargs) self.redactor = config.es_redactor.redactor_class(config.es_redactor) self.raw_crash_redactor = config.raw_crash_es_redactor.redactor_class( config.raw_crash_es_redactor ) def is_mutator(self): # This crash storage mutates the crash, so we mark it as such. return True def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """This is the only write mechanism that is actually employed in normal usage""" self.redactor.redact(processed_crash) self.raw_crash_redactor.redact(raw_crash) super(ESCrashStorageRedactedSave, self).save_raw_and_processed( raw_crash, dumps, processed_crash, crash_id )
def test_change_default(self): class Alpha(RequiredConfig): required_config = Namespace() required_config.add_option( 'an_option', default=19, doc='this is an an_option', from_string_converter=str, ) a_new_option_with_a_new_default = change_default( Alpha, 'an_option', '29300') assert a_new_option_with_a_new_default is not Alpha.required_config.an_option assert a_new_option_with_a_new_default.default == '29300' assert Alpha.required_config.an_option.default == 19
class CountStackWalkerTimeoutKills(CountAnythingRuleBase): required_config = Namespace() required_config.rule_name = change_default( CountAnythingRuleBase, 'rule_name', 'stackwalker_timeout_kills' ) #-------------------------------------------------------------------------- def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): # override me to check any condition within a raw, processed crash # or even the state of the processor itself from the proc_meta return reduce( lambda x, y: x or "SIGKILL" in y, proc_meta.processor_notes, False )
class RegionalS3ConnectionContext(S3ConnectionContext): """Connection context for a specific region This lets you use the OrdinaryCallingFormat as a calling format and then you can use S3 buckets with periods in the names. """ required_config = Namespace() required_config.add_option( 'region', doc="Name of the S3 region (e.g. us-west-2)", default='us-west-2', reference_value_from='resource.boto', ) required_config.calling_format = change_default( S3ConnectionContext, 'calling_format', 'boto.s3.connection.OrdinaryCallingFormat' ) def __init__(self, config, quit_check_callback=None): super(RegionalS3ConnectionContext, self).__init__(config) self._region = config.region self._connect_to_endpoint = boto.s3.connect_to_region def _connect(self): try: return self.connection except AttributeError: self.connection = self._connect_to_endpoint( self._region, **self._get_credentials() ) return self.connection def _get_or_create_bucket(self, conn, bucket_name): try: return self._get_bucket(conn, bucket_name) except self.ResponseError: self._bucket_cache[bucket_name] = conn.create_bucket( bucket_name, location=self._region, ) return self._bucket_cache[bucket_name]
class JitCrashCategorizeRule(ExternalProcessRule): # FIXME(willkg): command_line and command_pathname are referenced in the # uplifted versions in Processor2015. The rest of these config values have # no effect on anything and are just here. required_config = Namespace() required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '{dump_file_pathname} ' '2>/dev/null' ) required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/jit-crash-categorize', ) required_config.result_key = change_default( ExternalProcessRule, 'result_key', 'classifications.jit.category', ) required_config.return_code_key = change_default( ExternalProcessRule, 'return_code_key', 'classifications.jit.category_return_code', ) def predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): if ( processed_crash.product != 'Firefox' or not processed_crash.os_name.startswith('Windows') or processed_crash.cpu_name != 'x86' ): # we don't want any of these return False frames = processed_crash.get('json_dump', {}).get('crashing_thread', {}).get('frames', []) if frames and frames[0].get('module', False): # there is a module at the top of the stack, we don't want this return False return ( processed_crash.signature.endswith('EnterBaseline') or processed_crash.signature.endswith('EnterIon') or processed_crash.signature.endswith('js::jit::FastInvoke') or processed_crash.signature.endswith('js::jit::IonCannon') or processed_crash.signature.endswith('js::irregexp::ExecuteCode<T>') ) def _interpret_external_command_output(self, fp, processor_meta): try: result = fp.read() except IOError as x: processor_meta.processor_notes.append( "%s unable to read external command output: %s" % ( self.config.command_pathname, x ) ) return '' try: return result.strip() except AttributeError: # there's no strip method return result
class BreakpadStackwalkerRule2015(ExternalProcessRule): """Executes the minidump stackwalker external process and puts output in processed crash""" # FIXME(willkg): command_line and command_pathname are referenced in the # uplifted versions in Processor2015. The rest of these config values have # no effect on anything and are just here. required_config = Namespace() required_config.add_option( name='symbols_urls', doc='comma delimited ordered list of urls for symbol lookup', default='https://localhost', from_string_converter=str_to_list, likely_to_be_changed=True ) required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL {kill_timeout} {command_pathname} ' '--raw-json {raw_crash_pathname} ' '{symbols_urls} ' '--symbols-cache {symbol_cache_path} ' '--symbols-tmp {symbol_tmp_path} ' '{dump_file_pathname} ' '2> /dev/null' ) required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', # NOTE(willkg): This is the path for the RPM-based Socorro deploy. When # we switch to Docker, we should change this. '/data/socorro/stackwalk/bin/stackwalker', ) required_config.add_option( 'kill_timeout', doc='amount of time to let mdsw run before declaring it hung', default=600 ) required_config.add_option( 'symbol_tmp_path', doc=( 'directory to use as temp space for downloading symbols--must be on ' 'the same filesystem as symbols-cache' ), default=os.path.join(tempfile.gettempdir(), 'symbols-tmp'), ), required_config.add_option( 'symbol_cache_path', doc=( 'the path where the symbol cache is found, this location must be ' 'readable and writeable (quote path with embedded spaces)' ), default=os.path.join(tempfile.gettempdir(), 'symbols'), ) required_config.add_option( 'temporary_file_system_storage_path', doc='a path where temporary files may be written', default=tempfile.gettempdir(), ) def __init__(self, *args, **kwargs): super(BreakpadStackwalkerRule2015, self).__init__(*args, **kwargs) self.metrics = markus.get_metrics('processor.breakpadstackwalkerrule') @contextmanager def _temp_raw_crash_json_file(self, raw_crash, crash_id): file_pathname = os.path.join( self.config.temporary_file_system_storage_path, '%s.%s.TEMPORARY.json' % (crash_id, threading.currentThread().getName()) ) with open(file_pathname, "w") as f: json.dump(dotdict_to_dict(raw_crash), f) try: yield file_pathname finally: os.unlink(file_pathname) def _execute_external_process(self, command_line, processor_meta): stackwalker_output, return_code = super( BreakpadStackwalkerRule2015, self )._execute_external_process(command_line, processor_meta) if not isinstance(stackwalker_output, Mapping): processor_meta.processor_notes.append( 'MDSW produced unexpected output: %s...' % str(stackwalker_output)[:10] ) stackwalker_output = {} stackwalker_data = DotDict() stackwalker_data.json_dump = stackwalker_output stackwalker_data.mdsw_return_code = return_code stackwalker_data.mdsw_status_string = stackwalker_output.get('status', 'unknown error') stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK' self.metrics.incr( 'run', tags=[ 'outcome:%s' % ('success' if stackwalker_data.success else 'fail'), 'exitcode:%s' % return_code, ] ) if return_code == 124: msg = 'MDSW terminated with SIGKILL due to timeout' processor_meta.processor_notes.append(msg) self.config.logger.warning(msg) elif return_code != 0 or not stackwalker_data.success: msg = 'MDSW failed with %s: %s' % (return_code, stackwalker_data.mdsw_status_string) processor_meta.processor_notes.append(msg) self.config.logger.warning(msg) return stackwalker_data, return_code def expand_commandline(self, dump_file_pathname, raw_crash_pathname): """Expands the command line parameters and returns the final command line""" # NOTE(willkg): If we ever add new configuration variables, we'll need # to add them here, too, otherwise they won't get expanded in the # command line. symbols_urls = ' '.join([ '--symbols-url "%s"' % url.strip() for url in self.config.symbols_urls ]) params = { # These come from config 'kill_timeout': self.config.kill_timeout, 'command_pathname': self.config.command_pathname, 'symbol_cache_path': self.config.symbol_cache_path, 'symbol_tmp_path': self.config.symbol_tmp_path, 'symbols_urls': symbols_urls, # These are calculated 'dump_file_pathname': dump_file_pathname, 'raw_crash_pathname': raw_crash_pathname } return self.config.command_line.format(**params) def action(self, raw_crash, raw_dumps, processed_crash, processor_meta): if 'additional_minidumps' not in processed_crash: processed_crash.additional_minidumps = [] with self._temp_raw_crash_json_file(raw_crash, raw_crash.uuid) as raw_crash_pathname: for dump_name in raw_dumps.keys(): if processor_meta.quit_check: processor_meta.quit_check() # this rule is only interested in dumps targeted for the # minidump stackwalker external program. As of the writing # of this code, there is one other dump type. The only way # to differentiate these dump types is by the name of the # dump. All minidumps targeted for the stackwalker will have # a name with a prefix specified in configuration: if not dump_name.startswith(self.config.dump_field): # dumps not intended for the stackwalker are ignored continue dump_file_pathname = raw_dumps[dump_name] command_line = self.expand_commandline( dump_file_pathname=dump_file_pathname, raw_crash_pathname=raw_crash_pathname ) stackwalker_data, return_code = self._execute_external_process( command_line, processor_meta ) if dump_name == self.config.dump_field: processed_crash.update(stackwalker_data) else: processed_crash.additional_minidumps.append(dump_name) processed_crash[dump_name] = stackwalker_data
class JitCrashCategorizeRule(ExternalProcessRule): required_config = Namespace() required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '{dump_file_pathname} ' '2>/dev/null') required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/jit-crash-categorize', ) required_config.result_key = change_default( ExternalProcessRule, 'result_key', 'classifications.jit.category', ) required_config.return_code_key = change_default( ExternalProcessRule, 'return_code_key', 'classifications.jit.category_return_code', ) required_config.add_option( 'threshold', doc="max number of frames until encountering target frame", default=8) def __init__(self, config): super(JitCrashCategorizeRule, self).__init__(config) def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): if (processed_crash.product != 'Firefox' or not processed_crash.os_name.startswith('Windows') or processed_crash.cpu_name != 'x86'): # we don't want any of these return False frames = processed_crash.get('json_dump', {}).get('crashing_thread', {}).get('frames', []) if frames and frames[0].get('module', False): # there is a module at the top of the stack, we don't want this return False return ( processed_crash.signature.endswith('EnterBaseline') or processed_crash.signature.endswith('EnterIon') or processed_crash.signature.endswith('js::jit::FastInvoke') or processed_crash.signature.endswith('js::jit::IonCannon') or processed_crash.signature.endswith('js::irregexp::ExecuteCode<T>')) def _interpret_external_command_output(self, fp, processor_meta): try: result = fp.read() except IOError as x: processor_meta.processor_notes.append( "%s unable to read external command output: %s" % (self.config.command_pathname, x)) return '' try: return result.strip() except AttributeError as x: # there's no strip method return result
class BreakpadStackwalkerRule2015(ExternalProcessRule): required_config = Namespace() required_config.add_option(name='public_symbols_url', doc='url of the public symbol server', default="https://localhost", likely_to_be_changed=True) required_config.add_option(name='private_symbols_url', doc='url of the private symbol server', default="https://localhost", likely_to_be_changed=True) required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '--raw-json {raw_crash_pathname} ' '--symbols-url {public_symbols_url} ' '--symbols-url {private_symbols_url} ' '--symbols-cache {symbol_cache_path} ' '{dump_file_pathname} ' '2>/dev/null') required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/stackwalker', ) required_config.add_option( 'symbol_cache_path', doc='the path where the symbol cache is found, this location must be ' 'readable and writeable (quote path with embedded spaces)', default=os.path.join(tempfile.gettempdir(), 'symbols'), ) required_config.add_option( 'temporary_file_system_storage_path', doc='a path where temporary files may be written', default=tempfile.gettempdir(), ) def version(self): return '1.0' @contextmanager def _temp_raw_crash_json_file(self, raw_crash, crash_id): file_pathname = os.path.join( self.config.temporary_file_system_storage_path, "%s.%s.TEMPORARY.json" % (crash_id, threading.currentThread().getName())) with open(file_pathname, "w") as f: ujson.dump(raw_crash, f) try: yield file_pathname finally: os.unlink(file_pathname) def _execute_external_process(self, command_line, processor_meta): stackwalker_output, return_code = super( BreakpadStackwalkerRule2015, self)._execute_external_process(command_line, processor_meta) if not isinstance(stackwalker_output, Mapping): processor_meta.processor_notes.append( "MDSW produced unexpected output: %s..." % str(stackwalker_output)[:10]) stackwalker_output = {} stackwalker_data = DotDict() stackwalker_data.json_dump = stackwalker_output stackwalker_data.mdsw_return_code = return_code stackwalker_data.mdsw_status_string = stackwalker_output.get( 'status', 'unknown error') stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK' if return_code == 124: processor_meta.processor_notes.append( "MDSW terminated with SIGKILL due to timeout") elif return_code != 0 or not stackwalker_data.success: processor_meta.processor_notes.append( "MDSW failed on '%s': %s" % (command_line, stackwalker_data.mdsw_status_string)) return stackwalker_data, return_code def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): if 'additional_minidumps' not in processed_crash: processed_crash.additional_minidumps = [] with self._temp_raw_crash_json_file( raw_crash, raw_crash.uuid) as raw_crash_pathname: for dump_name in raw_dumps.iterkeys(): if processor_meta.quit_check: processor_meta.quit_check() # this rule is only interested in dumps targeted for the # minidump stackwalker external program. As of the writing # of this code, there is one other dump type. The only way # to differentiate these dump types is by the name of the # dump. All minidumps targeted for the stackwalker will have # a name with a prefix specified in configuration: if not dump_name.startswith(self.config.dump_field): # dumps not intended for the stackwalker are ignored continue dump_pathname = raw_dumps[dump_name] if self.config.chatty: self.config.logger.debug("BreakpadStackwalkerRule: %s, %s", dump_name, dump_pathname) command_line = self.config.command_line.format( **dict(self.config, dump_file_pathname=dump_pathname, raw_crash_pathname=raw_crash_pathname)) stackwalker_data, return_code = self._execute_external_process( command_line, processor_meta) if dump_name == self.config.dump_field: processed_crash.update(stackwalker_data) else: processed_crash.additional_minidumps.append(dump_name) processed_crash[dump_name] = stackwalker_data return True
class Processor2015(RequiredConfig): """this class is a generalization of the Processor into a rule processing framework. This class is suitable for use in the 'processor_app' introducted in 2012.""" required_config = Namespace('transform_rules') required_config.add_option( 'database_class', doc="the class of the database", default='socorro.external.postgresql.connection_context.' 'ConnectionContext', from_string_converter=str_to_python_object, reference_value_from='resource.postgresql', ) required_config.add_option( 'transaction_executor_class', default="socorro.database.transaction_executor." "TransactionExecutorWithInfiniteBackoff", doc='a class that will manage transactions', from_string_converter=str_to_python_object, reference_value_from='resource.postgresql', ) required_config.add_option( 'dump_field', doc='the default name of a dump', default='upload_file_minidump', ) required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', # NOTE(willkg): This is the path for the RPM-based Socorro deploy. When # we switch to Docker, we should change this. '/data/socorro/stackwalk/bin/stackwalker', ) required_config.add_option( 'result_key', doc=('the key where the external process result should be stored ' 'in the processed crash'), default='stackwalker_result', ) required_config.add_option( 'return_code_key', doc=('the key where the external process return code should be stored ' 'in the processed crash'), default='stackwalker_return_code', ) required_config.add_option( name='symbols_urls', doc='comma-delimited ordered list of urls for symbol lookup', default='https://localhost', from_string_converter=str_to_list, likely_to_be_changed=True) required_config.command_line = change_default( ExternalProcessRule, 'command_line', ('timeout -s KILL {kill_timeout} {command_pathname} ' '--raw-json {raw_crash_pathname} ' '{symbols_urls} ' '--symbols-cache {symbol_cache_path} ' '--symbols-tmp {symbol_tmp_path} ' '{dump_file_pathname} ' '2> /dev/null')) required_config.add_option( 'kill_timeout', doc='amount of time to let mdsw run before declaring it hung', default=600) required_config.add_option( 'symbol_tmp_path', doc=('directory to use as temp space for downloading symbols--must be ' 'on the same filesystem as symbols-cache'), default=os.path.join(tempfile.gettempdir(), 'symbols-tmp'), ), required_config.add_option( 'symbol_cache_path', doc=('the path where the symbol cache is found, this location must be ' 'readable and writeable (quote path with embedded spaces)'), default=os.path.join(tempfile.gettempdir(), 'symbols'), ) required_config.add_option( 'temporary_file_system_storage_path', doc='a path where temporary files may be written', default=tempfile.gettempdir(), ) def __init__(self, config, rules=None, quit_check_callback=None): super(Processor2015, self).__init__() self.config = config # the quit checks are components of a system of callbacks used # primarily by the TaskManager system. This is the system that # controls the execution model. If the ThreadedTaskManager is in use, # these callbacks just check the ThreadedTaskManager task manager's # quit flag. If they detect a quit condition, they raise an exception # that causes the thread to shut down. For the GreenletTaskMangager, # using cooperative multitasking, the callbacks do the 'yield' to # allow another green thread to take over. # It is perfectly acceptable to hook into this callback system to # accomplish any task that needs be done periodically. if quit_check_callback: self.quit_check = quit_check_callback else: self.quit_check = lambda: False rule_set = rules or list(DEFAULT_RULES) self.rules = [] for a_rule_class in rule_set: self.rules.append(a_rule_class(config)) def process_crash(self, raw_crash, raw_dumps, processed_crash): """Take a raw_crash and its associated raw_dumps and return a processed_crash. """ # processor_meta_data will be used to ferry "inside information" to # transformation rules. Sometimes rules need a bit more extra # information about the transformation process itself. processor_meta_data = DotDict() processor_meta_data.processor_notes = [ self.config.processor_name, self.__class__.__name__ ] processor_meta_data.quit_check = self.quit_check processor_meta_data.processor = self processor_meta_data.config = self.config if "processor_notes" in processed_crash: original_processor_notes = [ x.strip() for x in processed_crash.processor_notes.split(";") ] processor_meta_data.processor_notes.append( "earlier processing: %s" % processed_crash.get("started_datetime", 'Unknown Date')) else: original_processor_notes = [] processed_crash.success = False processed_crash.started_datetime = utc_now() # for backwards compatibility: processed_crash.startedDateTime = processed_crash.started_datetime processed_crash.signature = 'EMPTY: crash failed to process' crash_id = raw_crash['uuid'] try: # quit_check calls ought to be scattered around the code to allow # the processor to be responsive to requests to shut down. self.quit_check() start_time = self.config.logger.info( "starting transform for crash: %s", crash_id) processor_meta_data.started_timestamp = start_time # apply_all_rules for rule in self.rules: rule.act(raw_crash, raw_dumps, processed_crash, processor_meta_data) self.quit_check() # the crash made it through the processor rules with no exceptions # raised, call it a success. processed_crash.success = True except Exception as exception: self.config.logger.warning('Error while processing %s: %s', crash_id, str(exception), exc_info=True) processor_meta_data.processor_notes.append( 'unrecoverable processor error: %s' % exception) # the processor notes are in the form of a list. Join them all # together to make a single string processor_meta_data.processor_notes.extend(original_processor_notes) processed_crash.processor_notes = '; '.join( processor_meta_data.processor_notes) completed_datetime = utc_now() processed_crash.completed_datetime = completed_datetime # for backwards compatibility: processed_crash.completeddatetime = completed_datetime self.config.logger.info( "finishing %s transform for crash: %s", 'successful' if processed_crash.success else 'failed', crash_id) return processed_crash def reject_raw_crash(self, crash_id, reason): self.config.logger.warning('%s rejected: %s', crash_id, reason) def close(self): self.config.logger.debug('closing rules') for rule in self.rules: rule.close()
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage): """Sends a subset of the processed crash to an S3 bucket The subset of the processed crash is based on the JSON Schema which is derived from "socorro/external/es/super_search_fields.py". This uses a boto connection context with one twist: if you set "resource.boto.telemetry_bucket_name", then that will override the value. """ required_config = Namespace() required_config.resource_class = change_default( BotoCrashStorage, 'resource_class', 'socorro.external.boto.connection_context.RegionalS3ConnectionContext' ) required_config.add_option( 'telemetry_bucket_name', default='', reference_value_from='resource.boto', doc='if set, overrides resource_class bucket name' ) required_config.elasticsearch = Namespace() required_config.elasticsearch.add_option( 'elasticsearch_class', default='socorro.external.es.connection_context.ConnectionContext', from_string_converter=class_converter, reference_value_from='resource.elasticsearch', ) def __init__(self, config, *args, **kwargs): # This class requires that we use # SimpleDatePrefixKeyBuilder, so we stomp on the configuration # to make absolutely sure it gets set that way. config.keybuilder_class = SimpleDatePrefixKeyBuilder super(TelemetryBotoS3CrashStorage, self).__init__( config, *args, **kwargs ) if config.telemetry_bucket_name: # If we have a telemetry.bucket_name set, then stomp on it with # config.telemetry_bucket_name. # FIXME(willkg): It'd be better if we could detect whether the # connection context bucket_name was set at all (it's a default # value, or the value of resource.boto.bucket_name). config.logger.info( 'Using %s for TelemetryBotoS3CrashStorage bucket', config.telemetry_bucket_name ) self.connection_source.config.bucket_name = config.telemetry_bucket_name def _get_all_fields(self): if ( hasattr(self, '_all_fields') and hasattr(self, '_all_fields_timestamp') ): # we might have it cached age = time.time() - self._all_fields_timestamp if age < 60 * 60: # fresh enough return self._all_fields self._all_fields = SuperSearchFields(config=self.config).get() self._all_fields_timestamp = time.time() return self._all_fields def save_raw_and_processed( self, raw_crash, dumps, processed_crash, crash_id ): all_fields = self._get_all_fields() crash_report = {} # TODO Opportunity of optimization; # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list # of all (recursive) keys that are in there and use that # to limit the two following loops to not bother # filling up `crash_report` with keys that will never be # needed. # Rename fields in raw_crash. raw_fields_map = dict( (x['in_database_name'], x['name']) for x in all_fields.values() if x['namespace'] == 'raw_crash' ) for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash. processed_fields_map = dict( (x['in_database_name'], x['name']) for x in all_fields.values() if x['namespace'] == 'processed_crash' ) for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report. crash_report = json_schema_reducer.make_reduced_dict( CRASH_REPORT_JSON_SCHEMA, crash_report ) self.save_processed(crash_report) @staticmethod def _do_save_processed(boto_connection, processed_crash): """Overriding this method so we can control the "name of thing" prefix used to upload to S3.""" crash_id = processed_crash['uuid'] processed_crash_as_string = boto_connection._convert_mapping_to_string( processed_crash ) boto_connection.submit( crash_id, "crash_report", processed_crash_as_string )
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage): """S3 crash storage class for sending a subset of the processed crash but reduced to only include the files in the processed crash JSON Schema.""" required_config = Namespace() required_config.resource_class = change_default( BotoCrashStorage, 'resource_class', 'socorro.external.boto.connection_context.RegionalS3ConnectionContext') required_config.elasticsearch = Namespace() required_config.elasticsearch.add_option( 'elasticsearch_class', default='socorro.external.es.connection_context.ConnectionContext', from_string_converter=class_converter, reference_value_from='resource.elasticsearch', ) def __init__(self, config, *args, **kwargs): # This class requires that we use # SimpleDatePrefixKeyBuilder, so we stomp on the configuration # to make absolutely sure it gets set that way. config.keybuilder_class = SimpleDatePrefixKeyBuilder super(TelemetryBotoS3CrashStorage, self).__init__(config, *args, **kwargs) def _get_all_fields(self): if (hasattr(self, '_all_fields') and hasattr(self, '_all_fields_timestamp')): # we might have it cached age = time.time() - self._all_fields_timestamp if age < 60 * 60: # fresh enough return self._all_fields self._all_fields = SuperSearchFields(config=self.config).get() self._all_fields_timestamp = time.time() return self._all_fields def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): all_fields = self._get_all_fields() crash_report = {} # TODO Opportunity of optimization; # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list # of all (recursive) keys that are in there and use that # to limit the two following loops to not bother # filling up `crash_report` with keys that will never be # needed. # Rename fields in raw_crash. raw_fields_map = dict((x['in_database_name'], x['name']) for x in all_fields.values() if x['namespace'] == 'raw_crash') for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash. processed_fields_map = dict((x['in_database_name'], x['name']) for x in all_fields.values() if x['namespace'] == 'processed_crash') for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report. crash_report = json_schema_reducer.make_reduced_dict( CRASH_REPORT_JSON_SCHEMA, crash_report) self.save_processed(crash_report) @staticmethod def _do_save_processed(boto_connection, processed_crash): """Overriding this method so we can control the "name of thing" prefix used to upload to S3.""" crash_id = processed_crash['uuid'] processed_crash_as_string = boto_connection._convert_mapping_to_string( processed_crash) boto_connection.submit(crash_id, "crash_report", processed_crash_as_string)
class CorrelationInterestingModulesRule(CorrelationRule): """this class attempts to be a faithful reproduction of the function of the original dbaron the "per-crash-interesting-modules.py" application embodied as a Socorro TransformRule. Individual crashes will be offered to this rule by a Fetch Transform Save app through the "_action_" method. This class will examine the crash and to counters build on an instance of a ProductVersionMapping. The counter add structure it builds looks like this: pv_counters[os_name*] .count .signatures[a_signature*] .count .modules[a_module*] .count .versions[a_version*] int .modules[a_module*] .count .versions[a_version*] int """ required_config = Namespace() required_config.add_option( "show_versions", doc="Show data on module versions", default=False ) required_config.add_option( "addons", doc="Tabulate addons (rather than modules)", default=False ) required_config.add_option( "min_baseline_diff", doc="a floating point number", default=0.05 ) required_config.namespace('output') required_config.output.output_class = change_default( CorrelationRule, 'output.output_class', 'socorro.analysis.correlations.interesting_rule' '.FileOutputForInterestingModules', new_reference_value='global.correlations.interesting' ) #-------------------------------------------------------------------------- def version(self): return '1.0' #-------------------------------------------------------------------------- def __init__(self, config=None, quit_check_callback=None): super(CorrelationInterestingModulesRule, self).__init__( config, quit_check_callback ) for an_accumulator in self.counters_for_all_producs_and_versions.values(): an_accumulator.osyses = {} self.date_suffix = defaultdict(int) self.summary_names = { #(show_versions, addons) (False, False): 'interesting-modules', (True, False): 'interesting-modules-with-versions', (False, True): 'interesting-addons', (True, True): 'interesting-addons-with-versions', } #-------------------------------------------------------------------------- def summary_name(self): return self.summary_names[( self.config.show_versions, self.config.addons, )] #-------------------------------------------------------------------------- @staticmethod def contains_bare_address(a_signature): return re.search(r"\S+@0x[0-9a-fA-F]+$", a_signature) is not None #-------------------------------------------------------------------------- @staticmethod def remove_bare_address_from_signature(a_signature): return re.sub(r"@0x[0-9a-fA-F]+$", "", a_signature) #-------------------------------------------------------------------------- def _action(self, raw, dumps, crash, processor_meta): self.date_suffix[crash['crash_id'][-6:]] += 1 if not "os_name" in crash: # We have some bad crash reports. return False # give the names of the old algorithm's critical variables to their # variables in the new system try: osyses = self.counters_for_all_producs_and_versions[ (crash["product"], crash["version"]) ].osyses self.counters_for_all_producs_and_versions[ (crash["product"], crash["version"]) ].counter += 1 except (AttributeError, KeyError): # why both types? crashes can be represented by either the Socorro # or configman DotDict types which raise different exception on # not finding a key. osyses = {} self.counters_for_all_producs_and_versions[ (crash["product"], crash["version"]) ].osyses = osyses self.counters_for_all_producs_and_versions[ (crash["product"], crash["version"]) ].counter = 1 options = self.config # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - refactored code section # unlike the "core count correlation report", this code from the # was refactored to help understand the structure of the counters # so that a generic summary structure could be made. This allows # for output of the summary information to somewhere other than # stdout. # # the structure has been broken down into levels of regular dicts # and SocorroDotDicts. The DotDicts have keys that are constant # and no more are added when new crashes come in. The regular dicts # are key with variable things that come in with crashes. In the # structure below, keys of DotDicts are shown as constants like # ".count" and ".modules". The keys of the dicts are shown as the # name of a field with a * (to designate zero or more) inside square # brackets. # # the counters structure looks like this: # pv_counters[os_name*] # .count # .signatures[a_signature*] # .count # .modules[a_module*] # .count # .versions[a_version*] int # .modules[a_module*] # .count # .versions[a_version*] int # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - os_name = crash["os_name"] # The os_version field is way too specific on Linux, and we don't # have much Linux data anyway. if options.by_os_version and os_name != "Linux": os_name = os_name + " " + crash["os_version"] counters_for_an_os = osyses.setdefault( os_name, SocorroDotDict({ "count": 0, "signatures": {}, "modules": {}, }) ) a_signature = crash["signature"] if self.contains_bare_address(a_signature): if options.condense: # Condense all signatures in a given DLL. a_signature = self.remove_bare_address_from_signature( a_signature ) if "reason" in crash and crash["reason"] is not None: a_signature = a_signature + "|" + crash["reason"] counters_for_a_signature = counters_for_an_os.signatures.setdefault( a_signature, SocorroDotDict({ "count": 0, "modules": {} }), ) list_of_counters = [counters_for_an_os, counters_for_a_signature] # increment both the os & signature counters for a_counter in list_of_counters: a_counter.count += 1 for libname, version in self.generate_modules_or_addons(crash): # Increment the global count on osys and the per-signature count. for a_counter in list_of_counters: counters_for_modules = a_counter.modules.setdefault( libname, SocorroDotDict({ "count": 0, "versions": defaultdict(int), }) ) counters_for_modules.count += 1 # Count versions of each module as well. counters_for_modules.versions[version] += 1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return True #-------------------------------------------------------------------------- def _summary_for_a_product_version_pair(self, a_pv_accumulator): """in the original code, the counter structures were walked and manipulated to form the statistics. Once a stat was determined, it was printed to stdout. Since we want to have various means of outputting the data, instead of printing to stdout, this method save the statistic in a "summary_structure" This structure will later be walked for printing or output to some future storage scheme The summary structure looks like this: pv_summary .date_key # a list of the last six UUID characters present .notes # any notes added by the algorithm to tell of problems .os_counters[os_name*] .count .signatures[a_signature*] .count .in_sig_ratio .in_os_ratio .in_os_count .osys_count .modules[a_module*] # may be addons .in_sig_ratio .in_os_ratio .in_os_count .osys_count .verisons[a_version*] # may be addon versions .sig_ver_ratio .sig_ver_count .sig_count .os_ver_ratio .os_ver_count .osys_count .version """ options = self.config pv_summary = SocorroDotDict({ 'notes': [], }) if (len(self.date_suffix) > 1): message = ( "crashes from more than one day %s" % str(tuple(self.date_suffix.keys())) ) ## self.config.logger.debug(message) pv_summary.notes.append(message) pv_summary.date_key = self.date_suffix.keys()[0] pv_summary.os_counters = {} MIN_CRASHES = self.config.min_crashes counters_for_multiple_os = a_pv_accumulator.osyses # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - infostr_re = re.compile("^(.*) with (\d+) cores$") # unused? for os_name in counters_for_multiple_os.keys(): counters_for_an_os = counters_for_multiple_os[os_name] pv_summary.os_counters[os_name] = SocorroDotDict() pv_summary.os_counters[os_name].count = counters_for_multiple_os[os_name].count pv_summary.os_counters[os_name].signatures = {} filtered_signatures = [ (signature, signature_counter) for (signature, signature_counter) in counters_for_an_os["signatures"].items() if signature_counter.count >= MIN_CRASHES ] for a_signature, a_signtaure_counter in filtered_signatures: pv_summary.os_counters[os_name].signatures[a_signature] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[a_signature].count = a_signtaure_counter.count pv_summary.os_counters[os_name].signatures[a_signature].modules = {} modules_list = [ SocorroDotDict({ "libname": module_name, "in_sig_count": a_module_counter.count, "in_sig_ratio": float(a_module_counter.count) / a_signtaure_counter.count, "in_sig_versions": a_module_counter.versions, "in_os_count": counters_for_an_os.modules[module_name].count, "in_os_ratio": ( float(counters_for_an_os.modules[module_name].count) / counters_for_an_os.count ), "in_os_versions": counters_for_an_os.modules[module_name].versions }) for module_name, a_module_counter in a_signtaure_counter.modules.iteritems() ] modules_list = [ module for module in modules_list if module.in_sig_ratio - module.in_os_ratio >= self.config.min_baseline_diff ] modules_list.sort( key=lambda module: module.in_sig_ratio - module.in_os_ratio, reverse=True ) for module in modules_list: module_name = module.libname if options.addons: info = addonids.info_for_id(module_name) if info is not None: module_name = ( module_name + u" ({0}, {1})".format( info.name, info.url ) ) if options.show_versions and len(module["in_os_versions"]) == 1: onlyver = module.in_os_versions.keys()[0] if os_name.startswith("Mac OS X"): info = macdebugids.info_for_id(module_name, onlyver) if info is not None: onlyver = onlyver + "; " + info if (onlyver != ""): module_name = module_name + " (" + onlyver + ")" pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_sig_count = ( module.in_sig_count ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_sig_ratio = ( int(round(module["in_sig_ratio"] * 100)) ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_os_ratio = ( int(round(module.in_os_ratio * 100)) ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_os_count = ( module.in_os_count ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].osys_count = ( counters_for_an_os.count ) if options.show_versions and len(module.in_os_versions) != 1: versions = module.in_os_versions.keys() versions.sort() pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions = {} for version in versions: sig_ver_count = module.in_sig_versions.get(version, 0) os_ver_count = module.in_os_versions[version] if os_name.startswith("Mac OS X"): info = macdebugids.info_for_id(module_name, version) if info is not None: version = version + " (" + info + ")" pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_ver_ratio = ( int(round(float(sig_ver_count) / a_signtaure_counter.count * 100)) ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_ver_count = sig_ver_count pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_count = a_signtaure_counter.count pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].os_ver_ratio = ( int(round(float(os_ver_count) / counters_for_an_os.count * 100)) ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].os_ver_count = os_ver_count pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].osys_count = counters_for_an_os.count pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].version = version # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return pv_summary #-------------------------------------------------------------------------- def generate_modules_or_addons(self, crash): options = self.config if (options.addons): for addon in crash["addons"]: yield addon[0], addon[1] else: if "json_dump" in crash and "modules" in crash["json_dump"]: for module in crash["json_dump"]["modules"]: libname = module["filename"] version = module["version"] pdb = module["debug_file"] # never used? checksum = module["debug_id"] addrstart = module["base_addr"] # vener used? addrend = module["end_addr"] # never used? if crash["os_name"].startswith("Win"): # We only have good version data on Windows. yield libname, version else: yield libname, checksum #-------------------------------------------------------------------------- def summarize(self): # for each product version pair in the accumulators summary = {} for pv, an_accumulator in self.counters_for_all_producs_and_versions.iteritems(): summary['_'.join(pv)] = self._summary_for_a_product_version_pair( an_accumulator ) return summary
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage): """Sends a subset of the processed crash to an S3 bucket The subset of the processed crash is based on the JSON Schema which is derived from "socorro/external/es/super_search_fields.py". """ required_config = Namespace() required_config.resource_class = change_default( BotoCrashStorage, 'resource_class', 'socorro.external.boto.connection_context.RegionalS3ConnectionContext') required_config.elasticsearch = Namespace() required_config.elasticsearch.add_option( 'elasticsearch_class', default='socorro.external.es.connection_context.ConnectionContext', from_string_converter=class_converter, reference_value_from='resource.elasticsearch', ) def __init__(self, config, *args, **kwargs): super(TelemetryBotoS3CrashStorage, self).__init__(config, *args, **kwargs) self._all_fields = SuperSearchFields(config=self.config).get() def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): crash_report = {} # TODO Opportunity of optimization; # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list # of all (recursive) keys that are in there and use that # to limit the two following loops to not bother # filling up `crash_report` with keys that will never be # needed. # Rename fields in raw_crash. raw_fields_map = dict((x['in_database_name'], x['name']) for x in self._all_fields.values() if x['namespace'] == 'raw_crash') for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash. processed_fields_map = dict((x['in_database_name'], x['name']) for x in self._all_fields.values() if x['namespace'] == 'processed_crash') for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report. crash_report = json_schema_reducer.make_reduced_dict( CRASH_REPORT_JSON_SCHEMA, crash_report) self.save_processed(crash_report) @staticmethod def _do_save_processed(boto_connection, processed_crash): """Overriding this to change "name of thing" to crash_report""" crash_id = processed_crash['uuid'] processed_crash_as_string = boto_connection._convert_mapping_to_string( processed_crash) boto_connection.submit(crash_id, "crash_report", processed_crash_as_string) @staticmethod def _do_get_unredacted_processed(boto_connection, crash_id, json_object_hook): """Overriding this to change "name of thing" to crash_report""" try: processed_crash_as_string = boto_connection.fetch( crash_id, 'crash_report') return json.loads( processed_crash_as_string, object_hook=json_object_hook, ) except boto_connection.ResponseError as x: raise CrashIDNotFound('%s not found: %s' % (crash_id, x))