def test_save_raw_and_processed(self): config = self.get_standard_config() db_sampling = DBCrashStorageWrapperNewCrashSource(config) crash_id = '86b58ff2-9708-487d-bfc4-9dac32121214' fake_raw_crash = SocorroDotDict({ "name": "Gabi", "submitted_timestamp": "2012-12-14T00:00:00" }) fake_dumps_as_files = FileDumpsMapping({ 'upload_file_minidump': '86b58ff2-9708-487d-bfc4-9dac32121214' '.upload_file_minidump.TEMPORARY.dump' }) fake_processed = SocorroDotDict({ "name": "Gabi", "submitted_timestamp": "2012-12-14T00:00:00" }) # the call to be tested db_sampling.save_raw_and_processed( fake_raw_crash, fake_dumps_as_files, fake_processed, crash_id ) # this is what should have happened db_sampling._implementation.save_raw_and_processed \ .assert_called_once_with( fake_raw_crash, fake_dumps_as_files, fake_processed, crash_id )
def save_raw_and_processed(self, raw_crash, dump, processed_crash, crash_id): storage_exception = PolyStorageError() # Later we're going to need to clone this per every crash storage # in the loop. But, to save time, before we do that, convert the # processed crash which is a SocorroDotDict into a pure python # dict which we can more easily copy.deepcopy() operate on. processed_crash_as_dict = socorrodotdict_to_dict(processed_crash) raw_crash_as_dict = socorrodotdict_to_dict(raw_crash) for a_store in self.stores.itervalues(): self.quit_check() try: actual_store = getattr(a_store, 'wrapped_object', a_store) if hasattr(actual_store, 'is_mutator') and actual_store.is_mutator(): # We do this because `a_store.save_raw_and_processed` # expects the processed crash to be a DotDict but # you can't deepcopy those, so we deepcopy the # pure dict version and then dress it back up as a # DotDict. my_processed_crash = SocorroDotDict( copy.deepcopy(processed_crash_as_dict) ) my_raw_crash = SocorroDotDict( copy.deepcopy(raw_crash_as_dict) ) else: my_processed_crash = processed_crash my_raw_crash = raw_crash a_store.save_raw_and_processed( my_raw_crash, dump, my_processed_crash, crash_id ) except Exception: store_class = getattr( a_store, 'wrapped_object', a_store.__class__ ) self.logger.error( '%r failed (crash id: %s)', store_class, crash_id, exc_info=True ) storage_exception.gather_current_exception() if storage_exception.has_exceptions(): raise storage_exception
def test_poly_crash_storage_immutability_deeper(self): n = Namespace() n.add_option( 'storage', default=PolyCrashStorage, ) n.add_option( 'logger', default=mock.Mock(), ) value = { 'storage_classes': ( 'socorro.unittest.external.test_crashstorage_base' '.MutatingProcessedCrashCrashStorage' ), } cm = ConfigurationManager(n, values_source_list=[value]) with cm.context() as config: raw_crash = {'ooid': '12345'} dump = '12345' processed_crash = { 'foo': DotDict({'other': 'thing'}), 'bar': SocorroDotDict({'something': 'else'}), } poly_store = config.storage(config) poly_store.save_raw_and_processed( raw_crash, dump, processed_crash, 'n' ) assert processed_crash['foo']['other'] == 'thing' assert processed_crash['bar']['something'] == 'else'
def test_complex(self): def comp(data, expected): # First socorrodotdict_to_dict the data and compare it. new_dict = socorrodotdict_to_dict(data) assert new_dict == expected # Now deepcopy the new dict to make sure it's ok. copy.deepcopy(new_dict) # dict -> dict comp({'a': 1}, {'a': 1}) # outer socorrodotdict -> dict comp(SocorroDotDict({'a': 1}), {'a': 1}) # nested socorrodotdict -> dict comp( SocorroDotDict({ 'a': 1, 'b': SocorroDotDict({ 'a': 2 }) }), {'a': 1, 'b': {'a': 2}} ) # inner socorrodotdict comp( { 'a': 1, 'b': SocorroDotDict({ 'a': 2 }) }, {'a': 1, 'b': {'a': 2}} ) # in a list comp( { 'a': 1, 'b': [ SocorroDotDict({ 'a': 2 }), 3, 4 ] }, {'a': 1, 'b': [{'a': 2}, 3, 4]} ) # mixed dotdicts comp( DotDict({ 'a': 1, 'b': SocorroDotDict({ 'a': 2 }) }), {'a': 1, 'b': {'a': 2}} )
def test_save_processed(self): config = self.get_standard_config() db_sampling = DBCrashStorageWrapperNewCrashSource(config) fake_processed = SocorroDotDict({ "name": "Gabi", "submitted_timestamp": "2012-12-14T00:00:00" }) # the call to be tested db_sampling.save_processed(fake_processed) # this is what should have happened db_sampling._implementation.save_processed.assert_called_once_with( fake_processed )
def test_get_processed(self): config = self.get_standard_config() db_sampling = DBCrashStorageWrapperNewCrashSource(config) crash_id = '86b58ff2-9708-487d-bfc4-9dac32121214' fake_processed = SocorroDotDict({ "name": "Gabi", "submitted_timestamp": "2012-12-14T00:00:00" }) mocked_get_processed = Mock(return_value=fake_processed) db_sampling._implementation.get_processed = mocked_get_processed # the call to be tested processed = db_sampling.get_processed(crash_id) # this is what should have happened ok_(fake_processed is processed) db_sampling._implementation.get_processed.assert_called_with(crash_id)
def get_unredacted_processed(self, crash_id): """the default implementation of fetching a processed_crash parameters: crash_id - the id of a processed_crash to fetch""" return SocorroDotDict()
def get_raw_dumps_as_files(self, crash_id): """the default implementation of fetching all the dumps parameters: crash_id - the id of a dump to fetch""" return SocorroDotDict()
def get_raw_crash(self, crash_id): """the default implementation of fetching a raw_crash parameters: crash_id - the id of a raw crash to fetch""" return SocorroDotDict()
def _summary_for_a_product_version_pair(self, a_pv_accumulator): """in the original code, the counter structures were walked and manipulated to form the statistics. Once a stat was determined, it was printed to stdout. Since we want to have various means of outputting the data, instead of printing to stdout, this method save the statistic in a "summary_structure" This structure will later be walked for printing or output to some future storage scheme The summary structure looks like this: pv_summary .date_key # a list of the last six UUID characters present .notes # any notes added by the algorithm to tell of problems .os_counters[os_name*] .count .signatures[a_signature*] .count .in_sig_ratio .in_os_ratio .in_os_count .osys_count .modules[a_module*] # may be addons .in_sig_ratio .in_os_ratio .in_os_count .osys_count .verisons[a_version*] # may be addon versions .sig_ver_ratio .sig_ver_count .sig_count .os_ver_ratio .os_ver_count .osys_count .version """ options = self.config pv_summary = SocorroDotDict({ 'notes': [], }) if (len(self.date_suffix) > 1): message = ( "crashes from more than one day %s" % str(tuple(self.date_suffix.keys())) ) ## self.config.logger.debug(message) pv_summary.notes.append(message) pv_summary.date_key = self.date_suffix.keys()[0] pv_summary.os_counters = {} MIN_CRASHES = self.config.min_crashes counters_for_multiple_os = a_pv_accumulator.osyses # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - infostr_re = re.compile("^(.*) with (\d+) cores$") # unused? for os_name in counters_for_multiple_os.keys(): counters_for_an_os = counters_for_multiple_os[os_name] pv_summary.os_counters[os_name] = SocorroDotDict() pv_summary.os_counters[os_name].count = counters_for_multiple_os[os_name].count pv_summary.os_counters[os_name].signatures = {} filtered_signatures = [ (signature, signature_counter) for (signature, signature_counter) in counters_for_an_os["signatures"].items() if signature_counter.count >= MIN_CRASHES ] for a_signature, a_signtaure_counter in filtered_signatures: pv_summary.os_counters[os_name].signatures[a_signature] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[a_signature].count = a_signtaure_counter.count pv_summary.os_counters[os_name].signatures[a_signature].modules = {} modules_list = [ SocorroDotDict({ "libname": module_name, "in_sig_count": a_module_counter.count, "in_sig_ratio": float(a_module_counter.count) / a_signtaure_counter.count, "in_sig_versions": a_module_counter.versions, "in_os_count": counters_for_an_os.modules[module_name].count, "in_os_ratio": ( float(counters_for_an_os.modules[module_name].count) / counters_for_an_os.count ), "in_os_versions": counters_for_an_os.modules[module_name].versions }) for module_name, a_module_counter in a_signtaure_counter.modules.iteritems() ] modules_list = [ module for module in modules_list if module.in_sig_ratio - module.in_os_ratio >= self.config.min_baseline_diff ] modules_list.sort( key=lambda module: module.in_sig_ratio - module.in_os_ratio, reverse=True ) for module in modules_list: module_name = module.libname if options.addons: info = addonids.info_for_id(module_name) if info is not None: module_name = ( module_name + u" ({0}, {1})".format( info.name, info.url ) ) if options.show_versions and len(module["in_os_versions"]) == 1: onlyver = module.in_os_versions.keys()[0] if os_name.startswith("Mac OS X"): info = macdebugids.info_for_id(module_name, onlyver) if info is not None: onlyver = onlyver + "; " + info if (onlyver != ""): module_name = module_name + " (" + onlyver + ")" pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_sig_count = ( module.in_sig_count ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_sig_ratio = ( int(round(module["in_sig_ratio"] * 100)) ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_os_ratio = ( int(round(module.in_os_ratio * 100)) ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_os_count = ( module.in_os_count ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].osys_count = ( counters_for_an_os.count ) if options.show_versions and len(module.in_os_versions) != 1: versions = module.in_os_versions.keys() versions.sort() pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions = {} for version in versions: sig_ver_count = module.in_sig_versions.get(version, 0) os_ver_count = module.in_os_versions[version] if os_name.startswith("Mac OS X"): info = macdebugids.info_for_id(module_name, version) if info is not None: version = version + " (" + info + ")" pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_ver_ratio = ( int(round(float(sig_ver_count) / a_signtaure_counter.count * 100)) ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_ver_count = sig_ver_count pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_count = a_signtaure_counter.count pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].os_ver_ratio = ( int(round(float(os_ver_count) / counters_for_an_os.count * 100)) ) pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].os_ver_count = os_ver_count pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].osys_count = counters_for_an_os.count pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].version = version # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return pv_summary
def _action(self, raw, dumps, crash, processor_meta): self.date_suffix[crash['crash_id'][-6:]] += 1 if not "os_name" in crash: # We have some bad crash reports. return False # give the names of the old algorithm's critical variables to their # variables in the new system try: osyses = self.counters_for_all_producs_and_versions[ (crash["product"], crash["version"]) ].osyses self.counters_for_all_producs_and_versions[ (crash["product"], crash["version"]) ].counter += 1 except (AttributeError, KeyError): # why both types? crashes can be represented by either the Socorro # or configman DotDict types which raise different exception on # not finding a key. osyses = {} self.counters_for_all_producs_and_versions[ (crash["product"], crash["version"]) ].osyses = osyses self.counters_for_all_producs_and_versions[ (crash["product"], crash["version"]) ].counter = 1 options = self.config # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - refactored code section # unlike the "core count correlation report", this code from the # was refactored to help understand the structure of the counters # so that a generic summary structure could be made. This allows # for output of the summary information to somewhere other than # stdout. # # the structure has been broken down into levels of regular dicts # and SocorroDotDicts. The DotDicts have keys that are constant # and no more are added when new crashes come in. The regular dicts # are key with variable things that come in with crashes. In the # structure below, keys of DotDicts are shown as constants like # ".count" and ".modules". The keys of the dicts are shown as the # name of a field with a * (to designate zero or more) inside square # brackets. # # the counters structure looks like this: # pv_counters[os_name*] # .count # .signatures[a_signature*] # .count # .modules[a_module*] # .count # .versions[a_version*] int # .modules[a_module*] # .count # .versions[a_version*] int # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - os_name = crash["os_name"] # The os_version field is way too specific on Linux, and we don't # have much Linux data anyway. if options.by_os_version and os_name != "Linux": os_name = os_name + " " + crash["os_version"] counters_for_an_os = osyses.setdefault( os_name, SocorroDotDict({ "count": 0, "signatures": {}, "modules": {}, }) ) a_signature = crash["signature"] if self.contains_bare_address(a_signature): if options.condense: # Condense all signatures in a given DLL. a_signature = self.remove_bare_address_from_signature( a_signature ) if "reason" in crash and crash["reason"] is not None: a_signature = a_signature + "|" + crash["reason"] counters_for_a_signature = counters_for_an_os.signatures.setdefault( a_signature, SocorroDotDict({ "count": 0, "modules": {} }), ) list_of_counters = [counters_for_an_os, counters_for_a_signature] # increment both the os & signature counters for a_counter in list_of_counters: a_counter.count += 1 for libname, version in self.generate_modules_or_addons(crash): # Increment the global count on osys and the per-signature count. for a_counter in list_of_counters: counters_for_modules = a_counter.modules.setdefault( libname, SocorroDotDict({ "count": 0, "versions": defaultdict(int), }) ) counters_for_modules.count += 1 # Count versions of each module as well. counters_for_modules.versions[version] += 1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return True
def _summary_for_a_product_version_pair(self, an_accumulator): """in the original code, the counter structures were walked and manipulated to form the statistics. Once a stat was determined, it was printed to stdout. Since we want to have various means of outputting the data, instead of printing to stdout, this method save the statistic in a "summary_structure" This structure will later be walked for printing or output to some future storage scheme The summary structure looks like this: summary[product_version*] .note - a list of comments by the algorithm [os_name] .count .signatures[signame*] .name .count .cores[number_of_cores] .in_sig_count .in_sig_ratio .rounded_in_sig_ratio .in_os_count .in_os_ratio .rounded_in_os_ratio """ pv_summary = { 'notes': [], } if (len(self.date_suffix) > 1): message = ("crashes from more than one day %s" % str(tuple(self.date_suffix.keys()))) pv_summary['notes'].append(message) pv_summary['date_key'] = self.date_suffix.keys()[0] MIN_CRASHES = self.config.min_crashes osyses = an_accumulator.osyses # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - minimally altered section from original code # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - infostr_re = re.compile("^(.*) with (\d+) cores$") #---------------------------------------------------------------------- def cmp_infostr(x, y): (familyx, coresx) = infostr_re.match(x).groups() (familyy, coresy) = infostr_re.match(y).groups() if familyx != familyy: return cmp(familyx, familyy) return cmp(int(coresx), int(coresy)) #---------------------------------------------------------------------- sorted_osyses = osyses.keys() sorted_osyses.sort() for osname in sorted_osyses: osys = osyses[osname] pv_summary[osname] = SocorroDotDict() pv_summary[osname].count = osys['count'] pv_summary[osname].signatures = {} sorted_signatures = [ sig for sig in osys["signatures"].items() if sig[1]["count"] >= MIN_CRASHES ] sorted_signatures.sort(key=lambda tuple: tuple[1]["count"], reverse=True) sorted_cores = osys["core_counts"].keys() # strongly suspect that sorting is useless here sorted_cores.sort(cmp=cmp_infostr) for signame, sig in sorted_signatures: pv_summary[osname].signatures[signame] = SocorroDotDict({ 'name': signame, 'count': sig['count'], 'cores': {}, }) by_number_of_cores = \ pv_summary[osname].signatures[signame].cores for cores in sorted_cores: by_number_of_cores[cores] = SocorroDotDict() in_sig_count = sig["core_counts"].get(cores, 0) in_sig_ratio = float(in_sig_count) / sig["count"] in_os_count = osys["core_counts"][cores] in_os_ratio = float(in_os_count) / osys["count"] rounded_in_sig_ratio = int(round(in_sig_ratio * 100)) rounded_in_os_ratio = int(round(in_os_ratio * 100)) by_number_of_cores[cores].in_sig_count = in_sig_count by_number_of_cores[cores].in_sig_ratio = in_sig_ratio by_number_of_cores[cores].rounded_in_sig_ratio = \ rounded_in_sig_ratio by_number_of_cores[cores].in_os_count = in_os_count by_number_of_cores[cores].in_os_ratio = in_os_ratio by_number_of_cores[cores].rounded_in_os_ratio = \ rounded_in_os_ratio # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - minimally altered code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return pv_summary