Ejemplo n.º 1
0
class ReprocessingOneRabbitMQCrashStore(ReprocessingRabbitMQCrashStore):
    required_config = Namespace()
    required_config.rabbitmq_class = change_default(
        RabbitMQCrashStorage,
        'rabbitmq_class',
        ConnectionContext,
    )
    required_config.routing_key = change_default(
        RabbitMQCrashStorage,
        'routing_key',
        'socorro.reprocessing'
    )

    def reprocess(self, crash_ids):
        if not isinstance(crash_ids, (list, tuple)):
            crash_ids = [crash_ids]
        success = bool(crash_ids)
        for crash_id in crash_ids:
            if not self.save_raw_crash(
                DotDict({'legacy_processing': 0}),
                [],
                crash_id
            ):
                success = False
        return success
class JitCrashCategorizeRule(ExternalProcessRule):

    required_config = Namespace()
    required_config.command_line = change_default(
        ExternalProcessRule, 'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '{dump_file_pathname} '
        '2>/dev/null')
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        '/data/socorro/stackwalk/bin/jit-crash-categorize',
    )
    required_config.result_key = change_default(
        ExternalProcessRule,
        'result_key',
        'classifications.jit.category',
    )
    required_config.return_code_key = change_default(
        ExternalProcessRule,
        'return_code_key',
        'classifications.jit.category_return_code',
    )
    required_config.add_option(
        'threshold',
        doc="max number of frames until encountering target frame",
        default=8)

    #--------------------------------------------------------------------------
    def __init__(self, config):
        super(JitCrashCategorizeRule, self).__init__(config)

    #--------------------------------------------------------------------------
    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        if (processed_crash.product != 'Firefox'
                or not processed_crash.os_name.startswith('Windows')
                or processed_crash.cpu_name != 'x86'):
            # we don't want any of these
            return False
        if processed_crash.json_dump['crashing_thread']['frames'][0].get(
                'module', False
        ):  # there is a module at the top of the stack, we don't want this
            return False
        return (processed_crash.signature.endswith('EnterBaseline')
                or processed_crash.signature.endswith('EnterIon'))

    #--------------------------------------------------------------------------
    def _interpret_external_command_output(self, fp, processor_meta):
        try:
            result = fp.read()
        except IOError, x:
            processor_meta.processor_notes.append(
                "%s unable to read external command output: %s" %
                (self.config.command_pathname, x))
            return ''
        try:
            return result.strip()
        except AttributeError, x:
            # there's no strip method
            return result
Ejemplo n.º 3
0
class CorrelationInterestingAddonsVersionsRule(
        CorrelationInterestingModulesRule):
    required_config = Namespace()
    required_config.addons = change_default(CorrelationInterestingModulesRule,
                                            'addons', True)
    required_config.show_versions = change_default(
        CorrelationInterestingModulesRule, 'show_versions', True)
Ejemplo n.º 4
0
class ReprocessingRabbitMQCrashStore(RabbitMQCrashStorage):
    required_config = Namespace()
    required_config.routing_key = change_default(
        RabbitMQCrashStorage,
        'routing_key',
        'socorro.reprocessing'
    )
    required_config.filter_on_legacy_processing = change_default(
        RabbitMQCrashStorage,
        'filter_on_legacy_processing',
        False
    )
Ejemplo n.º 5
0
class RegionalS3ConnectionContext(S3ConnectionContext):
    """This derviced class forces you to connect to a specific region
    which means we can use the OrdinaryCallingFormat as a calling format
    and then we'll be able to connect to S3 buckets with names in them.
    """
    required_config = Namespace()
    required_config.add_option(
        'region',
        doc="Name of the S3 region (e.g. us-west-2)",
        default='us-west-2',
        reference_value_from='resource.boto',
    )
    required_config.calling_format = change_default(
        S3ConnectionContext, 'calling_format',
        'boto.s3.connection.OrdinaryCallingFormat')

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(RegionalS3ConnectionContext, self).__init__(config)
        self._region = config.region
        self._connect_to_endpoint = boto.s3.connect_to_region

    #--------------------------------------------------------------------------
    def _connect(self):
        try:
            return self.connection
        except AttributeError:
            self.connection = self._connect_to_endpoint(
                self._region, **self._get_credentials())
            return self.connection
class SocorroLiteProcessorAlgorithm2015(Processor2015):
    """this is the class that processor uses to transform """

    required_config = Namespace()
    required_config.rule_sets = change_default(
        Processor2015, 'rule_sets',
        ujson.dumps(socorrolite_processor_rule_sets))
Ejemplo n.º 7
0
    def test_change_default(self):
        class Alpha(RequiredConfig):
            required_config = Namespace()
            required_config.add_option(
                'an_option',
                default=19,
                doc='this is an an_option',
                from_string_converter=str,
            )
        a_new_option_with_a_new_default = change_default(
            Alpha,
            'an_option',
            '29300'
        )

        ok_(
            a_new_option_with_a_new_default
            is not Alpha.required_config.an_option
        )
        eq_(
            a_new_option_with_a_new_default.default,
            '29300'
        )
        eq_(
            Alpha.required_config.an_option.default,
            19
        )
Ejemplo n.º 8
0
class PriorityjobRabbitMQCrashStore(RabbitMQCrashStorage):
    required_config = Namespace()
    required_config.rabbitmq_class = change_default(
        RabbitMQCrashStorage,
        'rabbitmq_class',
        ConnectionContext,
    )
    required_config.add_option(
        'routing_key',
        default='socorro.priority',
        doc='the name of the queue to receive crashes',
    )

    def process(self, crash_ids):
        if not isinstance(crash_ids, (list, tuple)):
            crash_ids = [crash_ids]
        success = bool(crash_ids)
        for crash_id in crash_ids:
            if not self.save_raw_crash(
                DotDict({'legacy_processing': 0}),
                [],
                crash_id
            ):
                success = False
        return success
Ejemplo n.º 9
0
class JsonFileOutputForCoreCounts(FileOutputForCoreCounts):
    required_config = Namespace()
    required_config.path_template = change_default(
        FileOutputForCoreCounts,
        'path_template',
        '{path}/{prefix}/{prefix}_{key}-{name}.json',
    )

    #--------------------------------------------------------------------------
    def output_correlations_to_stream(self, counts_summary_structure, stream):
        json.dump(counts_summary_structure, stream, indent=4, sort_keys=True)
class DumpLookupExternalRule(ExternalProcessRule):

    required_config = Namespace()
    required_config.add_option(
        'dump_field',
        doc='the default name of a dump',
        default='upload_file_minidump',
    )
    required_config.add_option(
        'processor_symbols_pathname_list',
        doc='comma or space separated list of symbol files just as for '
        'minidump_stackwalk (quote paths with embedded spaces)',
        default='/mnt/socorro/symbols/symbols_ffx,'
        '/mnt/socorro/symbols/symbols_sea,'
        '/mnt/socorro/symbols/symbols_tbrd,'
        '/mnt/socorro/symbols/symbols_sbrd,'
        '/mnt/socorro/symbols/symbols_os',
        from_string_converter=_create_symbol_path_str)
    required_config.command_pathname = change_default(
        ExternalProcessRule, 'command_pathname',
        '/data/socorro/stackwalk/bin/dump-lookup')
    required_config.command_line = change_default(
        ExternalProcessRule, 'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '{dumpfile_pathname} '
        '{processor_symbols_pathname_list} '
        '2>/dev/null')
    required_config.result_key = change_default(ExternalProcessRule,
                                                'result_key', 'dump_lookup')
    required_config.return_code_key = change_default(
        ExternalProcessRule, 'return_code_key', 'dump_lookup_return_code')

    #--------------------------------------------------------------------------
    def _predicate(self, raw_crash, raw_dumps, processed_crash,
                   processor_meta):
        return 'create_dump_lookup' in raw_crash
Ejemplo n.º 11
0
    def test_change_default(self):
        class Alpha(RequiredConfig):
            required_config = Namespace()
            required_config.add_option(
                'an_option',
                default=19,
                doc='this is an an_option',
                from_string_converter=str,
            )

        a_new_option_with_a_new_default = change_default(
            Alpha, 'an_option', '29300')

        ok_(a_new_option_with_a_new_default
            is not Alpha.required_config.an_option)
        eq_(a_new_option_with_a_new_default.default, '29300')
        eq_(Alpha.required_config.an_option.default, 19)
Ejemplo n.º 12
0
class CountStackWalkerTimeoutKills(CountAnythingRuleBase):
    required_config = Namespace()
    required_config.rule_name = change_default(
        CountAnythingRuleBase,
        'rule_name',
        'stackwalker_timeout_kills'
    )

    #--------------------------------------------------------------------------
    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        # override me to check any condition within a raw, processed crash
        # or even the state of the processor itself from the proc_meta
        return reduce(
            lambda x, y: x or "SIGKILL" in y,
            proc_meta.processor_notes,
            False
        )
Ejemplo n.º 13
0
class ESCrashStorageRedactedJsonDump(ESCrashStorageRedactedSave):
    """This class stores redacted crash reports into Elasticsearch, but instead
    of removing the entire `json_dump`, it keeps only a subset of its keys.
    """
    required_config = Namespace()
    required_config.add_option(
        name="json_dump_whitelist_keys",
        doc="keys of the json_dump field to keep in the processed crash",
        default=[
            "largest_free_vm_block",
            "tiny_block_size",
            "write_combine_size",
        ],
        from_string_converter=list_converter,
    )

    required_config.namespace('es_redactor')
    required_config.es_redactor.add_option(
        name="redactor_class",
        doc="the name of the class that implements a 'redact' method",
        default='socorro.external.crashstorage_base.Redactor',
        from_string_converter=class_converter,
    )
    required_config.es_redactor.forbidden_keys = change_default(
        Redactor, "forbidden_keys", "upload_file_minidump_flash1.json_dump, "
        "upload_file_minidump_flash2.json_dump, "
        "upload_file_minidump_browser.json_dump")

    #--------------------------------------------------------------------------
    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """This is the only write mechanism that is actually employed in normal
        usage.
        """
        # Replace the `json_dump` with a subset.
        json_dump = processed_crash.get('json_dump', {})
        redacted_json_dump = {
            k: json_dump.get(k)
            for k in self.config.json_dump_whitelist_keys
        }
        processed_crash['json_dump'] = redacted_json_dump

        super(ESCrashStorageRedactedJsonDump,
              self).save_raw_and_processed(raw_crash, dumps, processed_crash,
                                           crash_id)
Ejemplo n.º 14
0
class PGPVNewCrashSource(PGQueryNewCrashSource):
    required_config = Namespace()
    required_config.crash_id_query = change_default(
        PGQueryNewCrashSource, 'crash_id_query', "select uuid "
        "from reports_clean rc join product_versions pv "
        "    on rc.product_version_id = pv.product_version_id "
        "where "
        "%s <= date_processed and date_processed < %s "
        "and %s between pv.build_date and pv.sunset_date")
    required_config.add_option('date',
                               doc="a date in the form YYYY-MM-DD",
                               default=(utc_now() - timedelta(1)).date(),
                               from_string_converter=string_to_datetime)

    #--------------------------------------------------------------------------
    def __init__(self, config, name, quit_check_callback=None):
        super(PGPVNewCrashSource, self).__init__(config, name,
                                                 quit_check_callback)
        self.data = (
            config.date,
            config.date + timedelta(1),  # add a day
            config.date)
Ejemplo n.º 15
0
class ESCrashStorageRedactedSave(ESCrashStorage):
    required_config = Namespace()
    required_config.namespace('es_redactor')
    required_config.es_redactor.add_option(
        name="redactor_class",
        doc="the name of the class that implements a 'redact' method",
        default='socorro.external.crashstorage_base.Redactor',
        from_string_converter=class_converter,
    )
    required_config.es_redactor.forbidden_keys = change_default(
        Redactor, "forbidden_keys", "json_dump, "
        "upload_file_minidump_flash1.json_dump, "
        "upload_file_minidump_flash2.json_dump, "
        "upload_file_minidump_browser.json_dump")

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(ESCrashStorageRedactedSave,
              self).__init__(config, quit_check_callback)
        self.redactor = config.es_redactor.redactor_class(config.es_redactor)
        self.config.logger.warning(
            "Beware, this crashstorage class is destructive to the "
            "processed crash - if you're using a polycrashstore you may "
            "find the modified processed crash saved to the other crashstores."
        )

    #--------------------------------------------------------------------------
    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """This is the only write mechanism that is actually employed in normal
        usage.
        """
        self.redactor.redact(processed_crash)

        super(ESCrashStorageRedactedSave,
              self).save_raw_and_processed(raw_crash, dumps, processed_crash,
                                           crash_id)
Ejemplo n.º 16
0
class CorrelationInterestingModulesRule(CorrelationRule):
    """this class attempts to be a faithful reproduction of the function of
    the original dbaron the "per-crash-interesting-modules.py" application
    embodied as a Socorro TransformRule.

    Individual crashes will be offered to this rule by a Fetch Transform Save
    app through the "_action_" method.  This class will examine the crash and
    to counters build on an instance of a ProductVersionMapping.  The counter
    add structure it builds looks like this:

    pv_counters[os_name*]
        .count
        .signatures[a_signature*]
           .count
           .modules[a_module*]
               .count
               .versions[a_version*] int
        .modules[a_module*]
            .count
            .versions[a_version*] int


    """
    required_config = Namespace()
    required_config.add_option("show_versions",
                               doc="Show data on module versions",
                               default=False)
    required_config.add_option("addons",
                               doc="Tabulate addons (rather than modules)",
                               default=False)
    required_config.add_option("min_baseline_diff",
                               doc="a floating point number",
                               default=0.05)
    required_config.namespace('output')
    required_config.output.output_class = change_default(
        CorrelationRule,
        'output.output_class', 'socorro.analysis.correlations.interesting_rule'
        '.FileOutputForInterestingModules',
        new_reference_value='global.correlations.interesting')

    #--------------------------------------------------------------------------
    def version(self):
        return '1.0'

    #--------------------------------------------------------------------------
    def __init__(self, config=None, quit_check_callback=None):
        super(CorrelationInterestingModulesRule,
              self).__init__(config, quit_check_callback)
        for an_accumulator in self.counters_for_all_producs_and_versions.values(
        ):
            an_accumulator.osyses = {}
        self.date_suffix = defaultdict(int)
        self.summary_names = {
            #(show_versions, addons)
            (False, False): 'interesting-modules',
            (True, False): 'interesting-modules-with-versions',
            (False, True): 'interesting-addons',
            (True, True): 'interesting-addons-with-versions',
        }

    #--------------------------------------------------------------------------
    def summary_name(self):
        return self.summary_names[(
            self.config.show_versions,
            self.config.addons,
        )]

    #--------------------------------------------------------------------------
    @staticmethod
    def contains_bare_address(a_signature):
        return re.search(r"\S+@0x[0-9a-fA-F]+$", a_signature) is not None

    #--------------------------------------------------------------------------
    @staticmethod
    def remove_bare_address_from_signature(a_signature):
        return re.sub(r"@0x[0-9a-fA-F]+$", "", a_signature)

    #--------------------------------------------------------------------------
    def _action(self, raw, dumps, crash, processor_meta):
        self.date_suffix[crash['crash_id'][-6:]] += 1
        if not "os_name" in crash:
            # We have some bad crash reports.
            return False

        # give the names of the old algorithm's critical variables to their
        # variables in the new system
        try:
            osyses = self.counters_for_all_producs_and_versions[(
                crash["product"], crash["version"])].osyses
            self.counters_for_all_producs_and_versions[(
                crash["product"], crash["version"])].counter += 1
        except (AttributeError, KeyError):
            # why both types? crashes can be represented by either the Socorro
            # or configman DotDict types which raise different exception on
            # not finding a key.
            osyses = {}
            self.counters_for_all_producs_and_versions[(
                crash["product"], crash["version"])].osyses = osyses
            self.counters_for_all_producs_and_versions[(
                crash["product"], crash["version"])].counter = 1

        options = self.config

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - refactored code section
        # unlike the "core count correlation report", this code from the
        # was refactored to help understand the structure of the counters
        # so that a generic summary structure could be made.  This allows
        # for output of the summary information to somewhere other than
        # stdout.
        #
        # the structure has been broken down into levels of regular dicts
        # and SocorroDotDicts.  The DotDicts have keys that are constant
        # and no more are added when new crashes come in.  The regular dicts
        # are key with variable things that come in with crashes.  In the
        # structure below, keys of DotDicts are shown as constants like
        # ".count" and ".modules". The keys of the dicts are shown as the
        # name of a field with a * (to designate zero or more) inside square
        # brackets.
        #
        # the counters structure looks like this:
        #     pv_counters[os_name*]
        #         .count
        #         .signatures[a_signature*]
        #             .count
        #             .modules[a_module*]
        #                 .count
        #                 .versions[a_version*] int
        #         .modules[a_module*]
        #              .count
        #              .versions[a_version*] int

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        os_name = crash["os_name"]
        # The os_version field is way too specific on Linux, and we don't
        # have much Linux data anyway.
        if options.by_os_version and os_name != "Linux":
            os_name = os_name + " " + crash["os_version"]
        counters_for_an_os = osyses.setdefault(
            os_name,
            SocorroDotDict({
                "count": 0,
                "signatures": {},
                "modules": {},
            }))
        a_signature = crash["signature"]
        if self.contains_bare_address(a_signature):
            if options.condense:
                # Condense all signatures in a given DLL.
                a_signature = self.remove_bare_address_from_signature(
                    a_signature)
        if "reason" in crash and crash["reason"] is not None:
            a_signature = a_signature + "|" + crash["reason"]
        counters_for_a_signature = counters_for_an_os.signatures.setdefault(
            a_signature,
            SocorroDotDict({
                "count": 0,
                "modules": {}
            }),
        )
        list_of_counters = [counters_for_an_os, counters_for_a_signature]
        # increment both the os & signature counters
        for a_counter in list_of_counters:
            a_counter.count += 1

        for libname, version in self.generate_modules_or_addons(crash):
            # Increment the global count on osys and the per-signature count.
            for a_counter in list_of_counters:
                counters_for_modules = a_counter.modules.setdefault(
                    libname,
                    SocorroDotDict({
                        "count": 0,
                        "versions": defaultdict(int),
                    }))
                counters_for_modules.count += 1
                # Count versions of each module as well.
                counters_for_modules.versions[version] += 1
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        return True

    #--------------------------------------------------------------------------
    def _summary_for_a_product_version_pair(self, a_pv_accumulator):
        """in the original code, the counter structures were walked and
        manipulated to form the statistics.  Once a stat was determined,
        it was printed to stdout.  Since we want to have various means of
        outputting the data, instead of printing to stdout, this method
        save the statistic in a "summary_structure"  This structure will
        later be walked for printing or output to some future storage scheme

        The summary structure looks like this:
        pv_summary
            .date_key  # a list of the last six UUID characters present
            .notes  # any notes added by the algorithm to tell of problems
            .os_counters[os_name*]
                 .count
                 .signatures[a_signature*]
                     .count
                     .in_sig_ratio
                     .in_os_ratio
                     .in_os_count
                     .osys_count
                     .modules[a_module*]  # may be addons
                         .in_sig_ratio
                         .in_os_ratio
                         .in_os_count
                         .osys_count
                         .verisons[a_version*]  # may be addon versions
                             .sig_ver_ratio
                             .sig_ver_count
                             .sig_count
                             .os_ver_ratio
                             .os_ver_count
                             .osys_count
                             .version
        """

        options = self.config
        pv_summary = SocorroDotDict({
            'notes': [],
        })
        if (len(self.date_suffix) > 1):
            message = ("crashes from more than one day %s" %
                       str(tuple(self.date_suffix.keys())))
            ##            self.config.logger.debug(message)
            pv_summary.notes.append(message)
        pv_summary.date_key = self.date_suffix.keys()[0]
        pv_summary.os_counters = {}

        MIN_CRASHES = self.config.min_crashes
        counters_for_multiple_os = a_pv_accumulator.osyses

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        infostr_re = re.compile("^(.*) with (\d+) cores$")  # unused?

        for os_name in counters_for_multiple_os.keys():
            counters_for_an_os = counters_for_multiple_os[os_name]

            pv_summary.os_counters[os_name] = SocorroDotDict()
            pv_summary.os_counters[os_name].count = counters_for_multiple_os[
                os_name].count
            pv_summary.os_counters[os_name].signatures = {}
            filtered_signatures = [(signature, signature_counter) for (
                signature,
                signature_counter) in counters_for_an_os["signatures"].items()
                                   if signature_counter.count >= MIN_CRASHES]
            for a_signature, a_signtaure_counter in filtered_signatures:
                pv_summary.os_counters[os_name].signatures[
                    a_signature] = SocorroDotDict()
                pv_summary.os_counters[os_name].signatures[
                    a_signature].count = a_signtaure_counter.count
                pv_summary.os_counters[os_name].signatures[
                    a_signature].modules = {}
                modules_list = [
                    SocorroDotDict({
                        "libname":
                        module_name,
                        "in_sig_count":
                        a_module_counter.count,
                        "in_sig_ratio":
                        float(a_module_counter.count) /
                        a_signtaure_counter.count,
                        "in_sig_versions":
                        a_module_counter.versions,
                        "in_os_count":
                        counters_for_an_os.modules[module_name].count,
                        "in_os_ratio":
                        (float(counters_for_an_os.modules[module_name].count) /
                         counters_for_an_os.count),
                        "in_os_versions":
                        counters_for_an_os.modules[module_name].versions
                    }) for module_name, a_module_counter in
                    a_signtaure_counter.modules.iteritems()
                ]

                modules_list = [
                    module for module in modules_list if module.in_sig_ratio -
                    module.in_os_ratio >= self.config.min_baseline_diff
                ]

                modules_list.sort(key=lambda module: module.in_sig_ratio -
                                  module.in_os_ratio,
                                  reverse=True)

                for module in modules_list:
                    module_name = module.libname
                    if options.addons:
                        info = addonids.info_for_id(module_name)
                        if info is not None:
                            module_name = (
                                module_name +
                                u" ({0}, {1})".format(info.name, info.url))
                    if options.show_versions and len(
                            module["in_os_versions"]) == 1:
                        onlyver = module.in_os_versions.keys()[0]
                        if os_name.startswith("Mac OS X"):
                            info = macdebugids.info_for_id(
                                module_name, onlyver)
                            if info is not None:
                                onlyver = onlyver + "; " + info
                        if (onlyver != ""):
                            module_name = module_name + " (" + onlyver + ")"
                    pv_summary.os_counters[os_name].signatures[
                        a_signature].modules[module_name] = SocorroDotDict()
                    pv_summary.os_counters[os_name].signatures[
                        a_signature].modules[module_name].in_sig_count = (
                            module.in_sig_count)
                    pv_summary.os_counters[os_name].signatures[
                        a_signature].modules[module_name].in_sig_ratio = (int(
                            round(module["in_sig_ratio"] * 100)))
                    pv_summary.os_counters[os_name].signatures[
                        a_signature].modules[module_name].in_os_ratio = (int(
                            round(module.in_os_ratio * 100)))
                    pv_summary.os_counters[os_name].signatures[
                        a_signature].modules[module_name].in_os_count = (
                            module.in_os_count)
                    pv_summary.os_counters[os_name].signatures[
                        a_signature].modules[module_name].osys_count = (
                            counters_for_an_os.count)

                    if options.show_versions and len(
                            module.in_os_versions) != 1:
                        versions = module.in_os_versions.keys()
                        versions.sort()
                        pv_summary.os_counters[os_name].signatures[
                            a_signature].modules[module_name].versions = {}
                        for version in versions:
                            sig_ver_count = module.in_sig_versions.get(
                                version, 0)
                            os_ver_count = module.in_os_versions[version]
                            if os_name.startswith("Mac OS X"):
                                info = macdebugids.info_for_id(
                                    module_name, version)
                                if info is not None:
                                    version = version + " (" + info + ")"
                            pv_summary.os_counters[os_name].signatures[
                                a_signature].modules[module_name].versions[
                                    version] = SocorroDotDict()
                            pv_summary.os_counters[os_name].signatures[
                                a_signature].modules[module_name].versions[
                                    version].sig_ver_ratio = (int(
                                        round(
                                            float(sig_ver_count) /
                                            a_signtaure_counter.count * 100)))
                            pv_summary.os_counters[os_name].signatures[
                                a_signature].modules[module_name].versions[
                                    version].sig_ver_count = sig_ver_count
                            pv_summary.os_counters[os_name].signatures[
                                a_signature].modules[module_name].versions[
                                    version].sig_count = a_signtaure_counter.count
                            pv_summary.os_counters[os_name].signatures[
                                a_signature].modules[module_name].versions[
                                    version].os_ver_ratio = (int(
                                        round(
                                            float(os_ver_count) /
                                            counters_for_an_os.count * 100)))
                            pv_summary.os_counters[os_name].signatures[
                                a_signature].modules[module_name].versions[
                                    version].os_ver_count = os_ver_count
                            pv_summary.os_counters[os_name].signatures[
                                a_signature].modules[module_name].versions[
                                    version].osys_count = counters_for_an_os.count
                            pv_summary.os_counters[os_name].signatures[
                                a_signature].modules[module_name].versions[
                                    version].version = version
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        return pv_summary

    #--------------------------------------------------------------------------
    def generate_modules_or_addons(self, crash):
        options = self.config
        if (options.addons):
            for addon in crash["addons"]:
                yield addon[0], addon[1]
        else:
            if "json_dump" in crash and "modules" in crash["json_dump"]:
                for module in crash["json_dump"]["modules"]:
                    libname = module["filename"]
                    version = module["version"]
                    pdb = module["debug_file"]  # never used?
                    checksum = module["debug_id"]
                    addrstart = module["base_addr"]  # vener used?
                    addrend = module["end_addr"]  # never used?
                    if crash["os_name"].startswith("Win"):
                        # We only have good version data on Windows.
                        yield libname, version
                    else:
                        yield libname, checksum

    #--------------------------------------------------------------------------
    def summarize(self):
        # for each product version pair in the accumulators
        summary = {}
        for pv, an_accumulator in self.counters_for_all_producs_and_versions.iteritems(
        ):
            summary['_'.join(pv)] = self._summary_for_a_product_version_pair(
                an_accumulator)
        return summary
Ejemplo n.º 17
0
class CorrelationCoreCountRule(CorrelationRule):
    """this class attempts to be a faithful reproduction of the function of
    the original dbaron the "per-crash-core-count.py" application embodied as
    a Socorro TransformRule.

    Individual crashes will be offered to this rule by a Fetch Transform Save
    app through the "_action_" method.  This class will examine the crash and
    to counters build on an instance of a ProductVersionMapping.  The counter
    add structure it builds looks like this:

    a_product_version_mapping[product_version*]
        .osyses[operating_system_name*]
            .count
            .signature[a_signature*]
                .count
                .core_counts[number_of_cores*]
            .core_counts[number_of_cores*]


    """
    required_config = Namespace()
    required_config.namespace('output')
    required_config.output.output_class = change_default(
        CorrelationRule,
        'output.output_class', 'socorro.analysis.correlations.core_count_rule'
        '.FileOutputForCoreCounts',
        new_reference_value='global.correlations.core')

    #--------------------------------------------------------------------------
    def version(self):
        return '1.0'

    #--------------------------------------------------------------------------
    def __init__(self, config=None, quit_check_callback=None):
        super(CorrelationCoreCountRule, self).__init__(config,
                                                       quit_check_callback)
        for an_accumulator in self.counters_for_all_producs_and_versions.values(
        ):
            an_accumulator.osyses = {}
        self.date_suffix = defaultdict(int)

    #--------------------------------------------------------------------------
    def summary_name(self):
        return 'core-counts'

    #--------------------------------------------------------------------------
    def _action(self, raw, dumps, crash, processor_meta):
        self.date_suffix[crash['crash_id'][-6:]] += 1
        if not "os_name" in crash:
            # We have some bad crash reports.
            return False

        # give the names of the old algorithm's critical variables to their
        # variables in the new system
        # what does "osyses" mean?  this is the original variable name from
        # the dbaron correlation scripts for a mapping of each os name to the
        # counters for the signatures & crashes for that os.
        try:
            osyses = self.counters_for_all_producs_and_versions[(
                crash["product"], crash["version"])].osyses
            self.counters_for_all_producs_and_versions[(
                crash["product"], crash["version"])].counter += 1
        except (AttributeError, KeyError):
            osyses = {}
            self.counters_for_all_producs_and_versions[(
                crash["product"], crash["version"])].osyses = osyses
            self.counters_for_all_producs_and_versions[(
                crash["product"], crash["version"])].counter = 1
        options = self.config

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - original unaltered code section
        # to not introduce errors, this code was not refactored to produce more
        # comprehensible variable names or adopt current style guides.
        # glossary of names:
        #     osyses - a mapping keyed by the name of an OS
        #     osys - the counter structure for an individual OS
        #     signame - a signature
        #     signature - the counter structure for a signature
        #     accumulate_objs = a list of counter structures
        #     obj = a counter as a loop variable
        #     crash = a socorro processed crash
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        osname = crash["os_name"]
        # The os_version field is way too specific on Linux, and we don't
        # have much Linux data anyway.
        if options.by_os_version and osname != "Linux":
            osname = osname + " " + crash["os_version"]
        osys = osyses.setdefault(osname, {
            "count": 0,
            "signatures": {},
            "core_counts": {}
        })
        signame = crash["signature"]
        if re.search(r"\S+@0x[0-9a-fA-F]+$", signame) is not None:
            if options.condense:
                # Condense all signatures in a given DLL.
                signame = re.sub(r"@0x[0-9a-fA-F]+$", "", signame)
        if "reason" in crash and crash["reason"] is not None:
            signame = signame + "|" + crash["reason"]
        signature = osys["signatures"].setdefault(signame, {
            "count": 0,
            "core_counts": {}
        })
        accumulate_objs = [osys, signature]

        for obj in accumulate_objs:
            obj["count"] = obj["count"] + 1

        if "json_dump" in crash and "system_info" in crash["json_dump"]:
            family = crash["json_dump"]["system_info"]["cpu_arch"]
            details = crash["json_dump"]["system_info"]["cpu_info"]  # unused?
            cores = crash["json_dump"]["system_info"]["cpu_count"]
            infostr = family + " with " + str(cores) + " cores"
            # Increment the global count on osys and the per-signature count.
            for obj in accumulate_objs:
                obj["core_counts"][infostr] = \
                    obj["core_counts"].get(infostr, 0) + 1
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - original unaltered code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        return True

    #--------------------------------------------------------------------------
    def _summary_for_a_product_version_pair(self, an_accumulator):
        """in the original code, the counter structures were walked and
        manipulated to form the statistics.  Once a stat was determined,
        it was printed to stdout.  Since we want to have various means of
        outputting the data, instead of printing to stdout, this method
        save the statistic in a "summary_structure"  This structure will
        later be walked for printing or output to some future storage scheme

        The summary structure looks like this:

        summary[product_version*]
            .note - a list of comments by the algorithm
            [os_name]
                .count
                .signatures[signame*]
                    .name
                    .count
                    .cores[number_of_cores]
                        .in_sig_count
                        .in_sig_ratio
                        .rounded_in_sig_ratio
                        .in_os_count
                        .in_os_ratio
                        .rounded_in_os_ratio

        """
        pv_summary = {
            'notes': [],
        }
        if (len(self.date_suffix) > 1):
            message = ("crashes from more than one day %s" %
                       str(tuple(self.date_suffix.keys())))
            pv_summary['notes'].append(message)
        pv_summary['date_key'] = self.date_suffix.keys()[0]

        MIN_CRASHES = self.config.min_crashes
        osyses = an_accumulator.osyses

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - minimally altered section from original code
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        infostr_re = re.compile("^(.*) with (\d+) cores$")

        #----------------------------------------------------------------------
        def cmp_infostr(x, y):
            (familyx, coresx) = infostr_re.match(x).groups()
            (familyy, coresy) = infostr_re.match(y).groups()
            if familyx != familyy:
                return cmp(familyx, familyy)
            return cmp(int(coresx), int(coresy))

        #----------------------------------------------------------------------
        sorted_osyses = osyses.keys()
        sorted_osyses.sort()

        for osname in sorted_osyses:
            osys = osyses[osname]

            pv_summary[osname] = SocorroDotDict()
            pv_summary[osname].count = osys['count']
            pv_summary[osname].signatures = {}

            sorted_signatures = [
                sig for sig in osys["signatures"].items()
                if sig[1]["count"] >= MIN_CRASHES
            ]
            sorted_signatures.sort(key=lambda tuple: tuple[1]["count"],
                                   reverse=True)
            sorted_cores = osys["core_counts"].keys()
            # strongly suspect that sorting is useless here
            sorted_cores.sort(cmp=cmp_infostr)
            for signame, sig in sorted_signatures:
                pv_summary[osname].signatures[signame] = SocorroDotDict({
                    'name':
                    signame,
                    'count':
                    sig['count'],
                    'cores': {},
                })
                by_number_of_cores = \
                    pv_summary[osname].signatures[signame].cores
                for cores in sorted_cores:
                    by_number_of_cores[cores] = SocorroDotDict()
                    in_sig_count = sig["core_counts"].get(cores, 0)
                    in_sig_ratio = float(in_sig_count) / sig["count"]
                    in_os_count = osys["core_counts"][cores]
                    in_os_ratio = float(in_os_count) / osys["count"]

                    rounded_in_sig_ratio = int(round(in_sig_ratio * 100))
                    rounded_in_os_ratio = int(round(in_os_ratio * 100))
                    by_number_of_cores[cores].in_sig_count = in_sig_count
                    by_number_of_cores[cores].in_sig_ratio = in_sig_ratio
                    by_number_of_cores[cores].rounded_in_sig_ratio = \
                        rounded_in_sig_ratio
                    by_number_of_cores[cores].in_os_count = in_os_count
                    by_number_of_cores[cores].in_os_ratio = in_os_ratio
                    by_number_of_cores[cores].rounded_in_os_ratio = \
                        rounded_in_os_ratio
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - minimally altered code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        return pv_summary

    #--------------------------------------------------------------------------
    def summarize(self):
        # for each product version pair in the accumulators
        summary = {}
        for pv, counters_for_pv in self.counters_for_all_producs_and_versions.iteritems(
        ):
            summary['_'.join(pv)] = self._summary_for_a_product_version_pair(
                counters_for_pv)
        return summary
Ejemplo n.º 18
0
class BotoS3CrashStorage(BotoCrashStorage):
    required_config = Namespace()
    required_config.resource_class = change_default(
        BotoCrashStorage, 'resource_class',
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext')
Ejemplo n.º 19
0
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """S3 crash storage class for sending a subset of the processed crash
    but reduced to only include the files in the processed crash
    JSON Schema."""

    required_config = Namespace()
    required_config.resource_class = change_default(
        BotoCrashStorage, 'resource_class',
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext')

    required_config.elasticsearch = Namespace()
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )

    def __init__(self, config, *args, **kwargs):
        # This class requires that we use
        # SimpleDatePrefixKeyBuilder, so we stomp on the configuration
        # to make absolutely sure it gets set that way.
        config.keybuilder_class = SimpleDatePrefixKeyBuilder
        super(TelemetryBotoS3CrashStorage,
              self).__init__(config, *args, **kwargs)

    def _get_all_fields(self):
        if (hasattr(self, '_all_fields')
                and hasattr(self, '_all_fields_timestamp')):
            # we might have it cached
            age = time.time() - self._all_fields_timestamp
            if age < 60 * 60:
                # fresh enough
                return self._all_fields

        self._all_fields = SuperSearchFields(config=self.config).get()
        self._all_fields_timestamp = time.time()
        return self._all_fields

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        all_fields = self._get_all_fields()
        crash_report = {}

        # TODO Opportunity of optimization;
        # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list
        # of all (recursive) keys that are in there and use that
        # to limit the two following loops to not bother
        # filling up `crash_report` with keys that will never be
        # needed.

        # Rename fields in raw_crash.
        raw_fields_map = dict((x['in_database_name'], x['name'])
                              for x in all_fields.values()
                              if x['namespace'] == 'raw_crash')
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash.
        processed_fields_map = dict((x['in_database_name'], x['name'])
                                    for x in all_fields.values()
                                    if x['namespace'] == 'processed_crash')
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report.
        crash_report = json_schema_reducer.make_reduced_dict(
            CRASH_REPORT_JSON_SCHEMA, crash_report)
        self.save_processed(crash_report)

    @staticmethod
    def _do_save_processed(boto_connection, processed_crash):
        """Overriding this method so we can control the "name of thing"
        prefix used to upload to S3."""
        crash_id = processed_crash['uuid']
        processed_crash_as_string = boto_connection._convert_mapping_to_string(
            processed_crash)
        boto_connection.submit(crash_id, "crash_report",
                               processed_crash_as_string)
class BreakpadStackwalkerRule2015(ExternalProcessRule):

    required_config = Namespace()
    required_config.add_option(name='public_symbols_url',
                               doc='url of the public symbol server',
                               default="https://localhost",
                               likely_to_be_changed=True)
    required_config.add_option(name='private_symbols_url',
                               doc='url of the private symbol server',
                               default="https://localhost",
                               likely_to_be_changed=True)
    required_config.command_line = change_default(
        ExternalProcessRule, 'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '--raw-json {raw_crash_pathname} '
        '--symbols-url {public_symbols_url} '
        '--symbols-url {private_symbols_url} '
        '--symbols-cache {symbol_cache_path} '
        '{dump_file_pathname} '
        '2>/dev/null')
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        '/data/socorro/stackwalk/bin/stackwalker',
    )
    required_config.add_option(
        'symbol_cache_path',
        doc='the path where the symbol cache is found, this location must be '
        'readable and writeable (quote path with embedded spaces)',
        default=os.path.join(tempfile.gettempdir(), 'symbols'),
    )
    required_config.add_option(
        'temporary_file_system_storage_path',
        doc='a path where temporary files may be written',
        default=tempfile.gettempdir(),
    )

    #--------------------------------------------------------------------------
    def version(self):
        return '1.0'

    #--------------------------------------------------------------------------
    @contextmanager
    def _temp_raw_crash_json_file(self, raw_crash, crash_id):
        file_pathname = os.path.join(
            self.config.temporary_file_system_storage_path,
            "%s.%s.TEMPORARY.json" %
            (crash_id, threading.currentThread().getName()))
        with open(file_pathname, "w") as f:
            ujson.dump(raw_crash, f)
        try:
            yield file_pathname
        finally:
            os.unlink(file_pathname)

    #--------------------------------------------------------------------------
    def _execute_external_process(self, command_line, processor_meta):
        stackwalker_output, return_code = super(
            BreakpadStackwalkerRule2015,
            self)._execute_external_process(command_line, processor_meta)

        if not isinstance(stackwalker_output, Mapping):
            processor_meta.processor_notes.append(
                "MDSW produced unexpected output: %s..." %
                str(stackwalker_output)[:10])
            stackwalker_output = {}

        stackwalker_data = DotDict()
        stackwalker_data.json_dump = stackwalker_output
        stackwalker_data.mdsw_return_code = return_code

        stackwalker_data.mdsw_status_string = stackwalker_output.get(
            'status', 'unknown error')
        stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK'

        if return_code == 124:
            processor_meta.processor_notes.append(
                "MDSW terminated with SIGKILL due to timeout")
        elif return_code != 0 or not stackwalker_data.success:
            processor_meta.processor_notes.append(
                "MDSW failed on '%s': %s" %
                (command_line, stackwalker_data.mdsw_status_string))

        return stackwalker_data, return_code

    #--------------------------------------------------------------------------
    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        if 'additional_minidumps' not in processed_crash:
            processed_crash.additional_minidumps = []
        with self._temp_raw_crash_json_file(
                raw_crash, raw_crash.uuid) as raw_crash_pathname:
            for dump_name in raw_dumps.iterkeys():

                if processor_meta.quit_check:
                    processor_meta.quit_check()

                # this rule is only interested in dumps targeted for the
                # minidump stackwalker external program.  As of the writing
                # of this code, there is one other dump type.  The only way
                # to differentiate these dump types is by the name of the
                # dump.  All minidumps targeted for the stackwalker will have
                # a name with a prefix specified in configuration:
                if not dump_name.startswith(self.config.dump_field):
                    # dumps not intended for the stackwalker are ignored
                    continue

                dump_pathname = raw_dumps[dump_name]

                if self.config.chatty:
                    self.config.logger.debug("BreakpadStackwalkerRule: %s, %s",
                                             dump_name, dump_pathname)

                command_line = self.config.command_line.format(
                    **dict(self.config,
                           dump_file_pathname=dump_pathname,
                           raw_crash_pathname=raw_crash_pathname))

                stackwalker_data, return_code = self._execute_external_process(
                    command_line, processor_meta)

                if dump_name == self.config.dump_field:
                    processed_crash.update(stackwalker_data)
                else:
                    processed_crash.additional_minidumps.append(dump_name)
                    processed_crash[dump_name] = stackwalker_data

        return True