def test_save_raw_and_processed(self):
        config = self.get_standard_config()
        db_sampling = DBCrashStorageWrapperNewCrashSource(config)
        crash_id = '86b58ff2-9708-487d-bfc4-9dac32121214'

        fake_raw_crash = SocorroDotDict({
            "name": "Gabi",
            "submitted_timestamp": "2012-12-14T00:00:00"
        })
        fake_dumps_as_files = FileDumpsMapping({
            'upload_file_minidump':
                '86b58ff2-9708-487d-bfc4-9dac32121214'
                '.upload_file_minidump.TEMPORARY.dump'
        })
        fake_processed = SocorroDotDict({
            "name": "Gabi",
            "submitted_timestamp": "2012-12-14T00:00:00"
        })

        # the call to be tested
        db_sampling.save_raw_and_processed(
            fake_raw_crash,
            fake_dumps_as_files,
            fake_processed,
            crash_id
        )

        # this is what should have happened
        db_sampling._implementation.save_raw_and_processed \
            .assert_called_once_with(
                fake_raw_crash,
                fake_dumps_as_files,
                fake_processed,
                crash_id
            )
    def save_raw_and_processed(self, raw_crash, dump, processed_crash,
                               crash_id):
        storage_exception = PolyStorageError()

        # Later we're going to need to clone this per every crash storage
        # in the loop. But, to save time, before we do that, convert the
        # processed crash which is a SocorroDotDict into a pure python
        # dict which we can more easily copy.deepcopy() operate on.
        processed_crash_as_dict = socorrodotdict_to_dict(processed_crash)
        raw_crash_as_dict = socorrodotdict_to_dict(raw_crash)

        for a_store in self.stores.itervalues():
            self.quit_check()
            try:
                actual_store = getattr(a_store, 'wrapped_object', a_store)

                if hasattr(actual_store, 'is_mutator') and actual_store.is_mutator():
                    # We do this because `a_store.save_raw_and_processed`
                    # expects the processed crash to be a DotDict but
                    # you can't deepcopy those, so we deepcopy the
                    # pure dict version and then dress it back up as a
                    # DotDict.
                    my_processed_crash = SocorroDotDict(
                        copy.deepcopy(processed_crash_as_dict)
                    )
                    my_raw_crash = SocorroDotDict(
                        copy.deepcopy(raw_crash_as_dict)
                    )
                else:
                    my_processed_crash = processed_crash
                    my_raw_crash = raw_crash

                a_store.save_raw_and_processed(
                    my_raw_crash,
                    dump,
                    my_processed_crash,
                    crash_id
                )
            except Exception:
                store_class = getattr(
                    a_store, 'wrapped_object', a_store.__class__
                )
                self.logger.error(
                    '%r failed (crash id: %s)',
                    store_class,
                    crash_id,
                    exc_info=True
                )
                storage_exception.gather_current_exception()
        if storage_exception.has_exceptions():
            raise storage_exception
    def test_poly_crash_storage_immutability_deeper(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=PolyCrashStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'storage_classes': (
                'socorro.unittest.external.test_crashstorage_base'
                '.MutatingProcessedCrashCrashStorage'
            ),
        }
        cm = ConfigurationManager(n, values_source_list=[value])
        with cm.context() as config:
            raw_crash = {'ooid': '12345'}
            dump = '12345'
            processed_crash = {
                'foo': DotDict({'other': 'thing'}),
                'bar': SocorroDotDict({'something': 'else'}),
            }

            poly_store = config.storage(config)

            poly_store.save_raw_and_processed(
                raw_crash,
                dump,
                processed_crash,
                'n'
            )
            assert processed_crash['foo']['other'] == 'thing'
            assert processed_crash['bar']['something'] == 'else'
    def test_complex(self):
        def comp(data, expected):
            # First socorrodotdict_to_dict the data and compare it.
            new_dict = socorrodotdict_to_dict(data)
            assert new_dict == expected

            # Now deepcopy the new dict to make sure it's ok.
            copy.deepcopy(new_dict)

        # dict -> dict
        comp({'a': 1}, {'a': 1})

        # outer socorrodotdict -> dict
        comp(SocorroDotDict({'a': 1}), {'a': 1})

        # nested socorrodotdict -> dict
        comp(
            SocorroDotDict({
                'a': 1,
                'b': SocorroDotDict({
                    'a': 2
                })
            }),
            {'a': 1, 'b': {'a': 2}}
        )
        # inner socorrodotdict
        comp(
            {
                'a': 1,
                'b': SocorroDotDict({
                    'a': 2
                })
            },
            {'a': 1, 'b': {'a': 2}}
        )
        # in a list
        comp(
            {
                'a': 1,
                'b': [
                    SocorroDotDict({
                        'a': 2
                    }),
                    3,
                    4
                ]
            },
            {'a': 1, 'b': [{'a': 2}, 3, 4]}
        )
        # mixed dotdicts
        comp(
            DotDict({
                'a': 1,
                'b': SocorroDotDict({
                    'a': 2
                })
            }),
            {'a': 1, 'b': {'a': 2}}
        )
    def test_save_processed(self):
        config = self.get_standard_config()
        db_sampling = DBCrashStorageWrapperNewCrashSource(config)

        fake_processed = SocorroDotDict({
            "name": "Gabi",
            "submitted_timestamp": "2012-12-14T00:00:00"
        })

        # the call to be tested
        db_sampling.save_processed(fake_processed)

        # this is what should have happened
        db_sampling._implementation.save_processed.assert_called_once_with(
            fake_processed
        )
    def test_get_processed(self):
        config = self.get_standard_config()
        db_sampling = DBCrashStorageWrapperNewCrashSource(config)

        crash_id = '86b58ff2-9708-487d-bfc4-9dac32121214'
        fake_processed = SocorroDotDict({
            "name": "Gabi",
            "submitted_timestamp": "2012-12-14T00:00:00"
        })

        mocked_get_processed = Mock(return_value=fake_processed)
        db_sampling._implementation.get_processed = mocked_get_processed

        # the call to be tested
        processed = db_sampling.get_processed(crash_id)

        # this is what should have happened
        ok_(fake_processed is processed)
        db_sampling._implementation.get_processed.assert_called_with(crash_id)
    def get_unredacted_processed(self, crash_id):
        """the default implementation of fetching a processed_crash

        parameters:
           crash_id - the id of a processed_crash to fetch"""
        return SocorroDotDict()
    def get_raw_dumps_as_files(self, crash_id):
        """the default implementation of fetching all the dumps

        parameters:
           crash_id - the id of a dump to fetch"""
        return SocorroDotDict()
    def get_raw_crash(self, crash_id):
        """the default implementation of fetching a raw_crash

        parameters:
           crash_id - the id of a raw crash to fetch"""
        return SocorroDotDict()
Exemple #10
0
    def _summary_for_a_product_version_pair(self, a_pv_accumulator):
        """in the original code, the counter structures were walked and
        manipulated to form the statistics.  Once a stat was determined,
        it was printed to stdout.  Since we want to have various means of
        outputting the data, instead of printing to stdout, this method
        save the statistic in a "summary_structure"  This structure will
        later be walked for printing or output to some future storage scheme

        The summary structure looks like this:
        pv_summary
            .date_key  # a list of the last six UUID characters present
            .notes  # any notes added by the algorithm to tell of problems
            .os_counters[os_name*]
                 .count
                 .signatures[a_signature*]
                     .count
                     .in_sig_ratio
                     .in_os_ratio
                     .in_os_count
                     .osys_count
                     .modules[a_module*]  # may be addons
                         .in_sig_ratio
                         .in_os_ratio
                         .in_os_count
                         .osys_count
                         .verisons[a_version*]  # may be addon versions
                             .sig_ver_ratio
                             .sig_ver_count
                             .sig_count
                             .os_ver_ratio
                             .os_ver_count
                             .osys_count
                             .version
        """

        options = self.config
        pv_summary = SocorroDotDict({
            'notes': [],
        })
        if (len(self.date_suffix) > 1):
            message = (
                "crashes from more than one day %s" %
                str(tuple(self.date_suffix.keys()))
            )
##            self.config.logger.debug(message)
            pv_summary.notes.append(message)
        pv_summary.date_key = self.date_suffix.keys()[0]
        pv_summary.os_counters = {}

        MIN_CRASHES = self.config.min_crashes
        counters_for_multiple_os = a_pv_accumulator.osyses

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        infostr_re = re.compile("^(.*) with (\d+) cores$")  # unused?

        for os_name in counters_for_multiple_os.keys():
            counters_for_an_os = counters_for_multiple_os[os_name]

            pv_summary.os_counters[os_name] = SocorroDotDict()
            pv_summary.os_counters[os_name].count = counters_for_multiple_os[os_name].count
            pv_summary.os_counters[os_name].signatures = {}
            filtered_signatures = [
                (signature, signature_counter)
                for (signature, signature_counter)
                    in counters_for_an_os["signatures"].items()
                if signature_counter.count >= MIN_CRASHES
            ]
            for a_signature, a_signtaure_counter in filtered_signatures:
                pv_summary.os_counters[os_name].signatures[a_signature] = SocorroDotDict()
                pv_summary.os_counters[os_name].signatures[a_signature].count = a_signtaure_counter.count
                pv_summary.os_counters[os_name].signatures[a_signature].modules = {}
                modules_list = [
                    SocorroDotDict({
                        "libname": module_name,
                        "in_sig_count": a_module_counter.count,
                        "in_sig_ratio": float(a_module_counter.count) / a_signtaure_counter.count,
                        "in_sig_versions": a_module_counter.versions,
                        "in_os_count": counters_for_an_os.modules[module_name].count,
                        "in_os_ratio": (
                            float(counters_for_an_os.modules[module_name].count) /
                            counters_for_an_os.count
                        ),
                        "in_os_versions":
                            counters_for_an_os.modules[module_name].versions
                    })
                    for module_name, a_module_counter in a_signtaure_counter.modules.iteritems()
                ]

                modules_list = [
                    module for module in modules_list
                    if module.in_sig_ratio - module.in_os_ratio >= self.config.min_baseline_diff
                ]

                modules_list.sort(
                    key=lambda module: module.in_sig_ratio - module.in_os_ratio,
                    reverse=True
                )

                for module in modules_list:
                    module_name = module.libname
                    if options.addons:
                        info = addonids.info_for_id(module_name)
                        if info is not None:
                            module_name = (
                                module_name + u" ({0}, {1})".format(
                                    info.name,
                                    info.url
                                )
                            )
                    if options.show_versions and len(module["in_os_versions"]) == 1:
                        onlyver = module.in_os_versions.keys()[0]
                        if os_name.startswith("Mac OS X"):
                            info = macdebugids.info_for_id(module_name, onlyver)
                            if info is not None:
                                onlyver = onlyver + "; " + info
                        if (onlyver != ""):
                            module_name = module_name + " (" + onlyver + ")"
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name] = SocorroDotDict()
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_sig_count = (
                        module.in_sig_count
                    )
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_sig_ratio = (
                        int(round(module["in_sig_ratio"] * 100))
                    )
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_os_ratio = (
                        int(round(module.in_os_ratio * 100))
                    )
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_os_count = (
                        module.in_os_count
                    )
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].osys_count = (
                        counters_for_an_os.count
                    )

                    if options.show_versions and len(module.in_os_versions) != 1:
                        versions = module.in_os_versions.keys()
                        versions.sort()
                        pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions = {}
                        for version in versions:
                            sig_ver_count = module.in_sig_versions.get(version, 0)
                            os_ver_count = module.in_os_versions[version]
                            if os_name.startswith("Mac OS X"):
                                info = macdebugids.info_for_id(module_name, version)
                                if info is not None:
                                    version = version + " (" + info + ")"
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version] = SocorroDotDict()
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_ver_ratio = (
                                int(round(float(sig_ver_count) / a_signtaure_counter.count * 100))
                            )
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_ver_count = sig_ver_count
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_count = a_signtaure_counter.count
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].os_ver_ratio = (
                                int(round(float(os_ver_count) / counters_for_an_os.count * 100))
                            )
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].os_ver_count = os_ver_count
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].osys_count = counters_for_an_os.count
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].version = version
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        return pv_summary
Exemple #11
0
    def _action(self, raw, dumps, crash, processor_meta):
        self.date_suffix[crash['crash_id'][-6:]] += 1
        if not "os_name" in crash:
            # We have some bad crash reports.
            return False

        # give the names of the old algorithm's critical variables to their
        # variables in the new system
        try:
            osyses = self.counters_for_all_producs_and_versions[
                (crash["product"], crash["version"])
            ].osyses
            self.counters_for_all_producs_and_versions[
                (crash["product"], crash["version"])
            ].counter += 1
        except (AttributeError, KeyError):
            # why both types? crashes can be represented by either the Socorro
            # or configman DotDict types which raise different exception on
            # not finding a key.
            osyses = {}
            self.counters_for_all_producs_and_versions[
                (crash["product"], crash["version"])
            ].osyses = osyses
            self.counters_for_all_producs_and_versions[
                (crash["product"], crash["version"])
            ].counter = 1

        options = self.config

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - refactored code section
        # unlike the "core count correlation report", this code from the
        # was refactored to help understand the structure of the counters
        # so that a generic summary structure could be made.  This allows
        # for output of the summary information to somewhere other than
        # stdout.
        #
        # the structure has been broken down into levels of regular dicts
        # and SocorroDotDicts.  The DotDicts have keys that are constant
        # and no more are added when new crashes come in.  The regular dicts
        # are key with variable things that come in with crashes.  In the
        # structure below, keys of DotDicts are shown as constants like
        # ".count" and ".modules". The keys of the dicts are shown as the
        # name of a field with a * (to designate zero or more) inside square
        # brackets.
        #
        # the counters structure looks like this:
        #     pv_counters[os_name*]
        #         .count
        #         .signatures[a_signature*]
        #             .count
        #             .modules[a_module*]
        #                 .count
        #                 .versions[a_version*] int
        #         .modules[a_module*]
        #              .count
        #              .versions[a_version*] int

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        os_name = crash["os_name"]
        # The os_version field is way too specific on Linux, and we don't
        # have much Linux data anyway.
        if options.by_os_version and os_name != "Linux":
            os_name = os_name + " " + crash["os_version"]
        counters_for_an_os = osyses.setdefault(
            os_name,
            SocorroDotDict({
                "count": 0,
                "signatures": {},
                "modules": {},
            })
        )
        a_signature = crash["signature"]
        if self.contains_bare_address(a_signature):
            if options.condense:
                # Condense all signatures in a given DLL.
                a_signature = self.remove_bare_address_from_signature(
                    a_signature
                )
        if "reason" in crash and crash["reason"] is not None:
            a_signature = a_signature + "|" + crash["reason"]
        counters_for_a_signature = counters_for_an_os.signatures.setdefault(
            a_signature,
            SocorroDotDict({
                "count": 0,
                "modules": {}
            }),
        )
        list_of_counters = [counters_for_an_os, counters_for_a_signature]
        # increment both the os & signature counters
        for a_counter in list_of_counters:
            a_counter.count += 1

        for libname, version in self.generate_modules_or_addons(crash):
            # Increment the global count on osys and the per-signature count.
            for a_counter in list_of_counters:
                counters_for_modules = a_counter.modules.setdefault(
                    libname,
                    SocorroDotDict({
                        "count": 0,
                        "versions": defaultdict(int),
                    })
                )
                counters_for_modules.count += 1
                # Count versions of each module as well.
                counters_for_modules.versions[version] += 1
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        return True
Exemple #12
0
    def _summary_for_a_product_version_pair(self, an_accumulator):
        """in the original code, the counter structures were walked and
        manipulated to form the statistics.  Once a stat was determined,
        it was printed to stdout.  Since we want to have various means of
        outputting the data, instead of printing to stdout, this method
        save the statistic in a "summary_structure"  This structure will
        later be walked for printing or output to some future storage scheme

        The summary structure looks like this:

        summary[product_version*]
            .note - a list of comments by the algorithm
            [os_name]
                .count
                .signatures[signame*]
                    .name
                    .count
                    .cores[number_of_cores]
                        .in_sig_count
                        .in_sig_ratio
                        .rounded_in_sig_ratio
                        .in_os_count
                        .in_os_ratio
                        .rounded_in_os_ratio

        """
        pv_summary = {
            'notes': [],
        }
        if (len(self.date_suffix) > 1):
            message = ("crashes from more than one day %s" %
                       str(tuple(self.date_suffix.keys())))
            pv_summary['notes'].append(message)
        pv_summary['date_key'] = self.date_suffix.keys()[0]

        MIN_CRASHES = self.config.min_crashes
        osyses = an_accumulator.osyses

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - minimally altered section from original code
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        infostr_re = re.compile("^(.*) with (\d+) cores$")

        #----------------------------------------------------------------------
        def cmp_infostr(x, y):
            (familyx, coresx) = infostr_re.match(x).groups()
            (familyy, coresy) = infostr_re.match(y).groups()
            if familyx != familyy:
                return cmp(familyx, familyy)
            return cmp(int(coresx), int(coresy))

        #----------------------------------------------------------------------
        sorted_osyses = osyses.keys()
        sorted_osyses.sort()

        for osname in sorted_osyses:
            osys = osyses[osname]

            pv_summary[osname] = SocorroDotDict()
            pv_summary[osname].count = osys['count']
            pv_summary[osname].signatures = {}

            sorted_signatures = [
                sig for sig in osys["signatures"].items()
                if sig[1]["count"] >= MIN_CRASHES
            ]
            sorted_signatures.sort(key=lambda tuple: tuple[1]["count"],
                                   reverse=True)
            sorted_cores = osys["core_counts"].keys()
            # strongly suspect that sorting is useless here
            sorted_cores.sort(cmp=cmp_infostr)
            for signame, sig in sorted_signatures:
                pv_summary[osname].signatures[signame] = SocorroDotDict({
                    'name':
                    signame,
                    'count':
                    sig['count'],
                    'cores': {},
                })
                by_number_of_cores = \
                    pv_summary[osname].signatures[signame].cores
                for cores in sorted_cores:
                    by_number_of_cores[cores] = SocorroDotDict()
                    in_sig_count = sig["core_counts"].get(cores, 0)
                    in_sig_ratio = float(in_sig_count) / sig["count"]
                    in_os_count = osys["core_counts"][cores]
                    in_os_ratio = float(in_os_count) / osys["count"]

                    rounded_in_sig_ratio = int(round(in_sig_ratio * 100))
                    rounded_in_os_ratio = int(round(in_os_ratio * 100))
                    by_number_of_cores[cores].in_sig_count = in_sig_count
                    by_number_of_cores[cores].in_sig_ratio = in_sig_ratio
                    by_number_of_cores[cores].rounded_in_sig_ratio = \
                        rounded_in_sig_ratio
                    by_number_of_cores[cores].in_os_count = in_os_count
                    by_number_of_cores[cores].in_os_ratio = in_os_ratio
                    by_number_of_cores[cores].rounded_in_os_ratio = \
                        rounded_in_os_ratio
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - minimally altered code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        return pv_summary