Beispiel #1
0
class ReprocessingOneRabbitMQCrashStore(ReprocessingRabbitMQCrashStore):
    required_config = Namespace()
    required_config.rabbitmq_class = change_default(
        RabbitMQCrashStorage,
        'rabbitmq_class',
        ConnectionContext,
    )
    required_config.routing_key = change_default(
        RabbitMQCrashStorage,
        'routing_key',
        'socorro.reprocessing'
    )

    def reprocess(self, crash_ids):
        if not isinstance(crash_ids, (list, tuple)):
            crash_ids = [crash_ids]
        success = bool(crash_ids)
        for crash_id in crash_ids:
            if not self.save_raw_crash(
                DotDict({'legacy_processing': 0}),
                [],
                crash_id
            ):
                success = False
        return success
Beispiel #2
0
class CorrelationInterestingAddonsVersionsRule(CorrelationInterestingModulesRule):
    required_config = Namespace()
    required_config.addons = change_default(
        CorrelationInterestingModulesRule,
        'addons',
        True
    )
    required_config.show_versions = change_default(
        CorrelationInterestingModulesRule,
        'show_versions',
        True
    )
Beispiel #3
0
class ReprocessingRabbitMQCrashStore(RabbitMQCrashStorage):
    required_config = Namespace()
    required_config.routing_key = change_default(
        RabbitMQCrashStorage,
        'routing_key',
        'socorro.reprocessing'
    )
    required_config.filter_on_legacy_processing = change_default(
        RabbitMQCrashStorage,
        'filter_on_legacy_processing',
        False
    )
Beispiel #4
0
class DumpLookupExternalRule(ExternalProcessRule):

    required_config = Namespace()
    required_config.add_option(
        'dump_field',
        doc='the default name of a dump',
        default='upload_file_minidump',
    )
    required_config.add_option(
        'processor_symbols_pathname_list',
        doc='comma or space separated list of symbol files just as for '
        'minidump_stackwalk (quote paths with embedded spaces)',
        default='/mnt/socorro/symbols/symbols_ffx,'
        '/mnt/socorro/symbols/symbols_sea,'
        '/mnt/socorro/symbols/symbols_tbrd,'
        '/mnt/socorro/symbols/symbols_sbrd,'
        '/mnt/socorro/symbols/symbols_os',
        from_string_converter=_create_symbol_path_str
    )
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        '/data/socorro/stackwalk/bin/dump-lookup'
    )
    required_config.command_line = change_default(
        ExternalProcessRule,
        'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '{dumpfile_pathname} '
        '{processor_symbols_pathname_list} '
        '2>/dev/null'
    )
    required_config.result_key = change_default(
        ExternalProcessRule,
        'result_key',
        'dump_lookup'
    )
    required_config.return_code_key = change_default(
        ExternalProcessRule,
        'return_code_key',
        'dump_lookup_return_code'
    )

    #--------------------------------------------------------------------------
    def _predicate(
        self,
        raw_crash,
        raw_dumps,
        processed_crash,
        processor_meta
    ):
        return 'create_dump_lookup' in raw_crash
Beispiel #5
0
class BotoS3CrashStorage(BotoCrashStorage):
    required_config = Namespace()
    required_config.resource_class = change_default(
        BotoCrashStorage,
        'resource_class',
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext'
    )
class RegionalS3ConnectionContext(S3ConnectionContext):
    """This derviced class forces you to connect to a specific region
    which means we can use the OrdinaryCallingFormat as a calling format
    and then we'll be able to connect to S3 buckets with names in them.
    """
    required_config = Namespace()
    required_config.add_option(
        'region',
        doc="Name of the S3 region (e.g. us-west-2)",
        default='us-west-2',
        reference_value_from='resource.boto',
    )
    required_config.calling_format = change_default(
        S3ConnectionContext, 'calling_format',
        'boto.s3.connection.OrdinaryCallingFormat')

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(RegionalS3ConnectionContext, self).__init__(config)
        self._region = config.region
        self._connect_to_endpoint = boto.s3.connect_to_region

    #--------------------------------------------------------------------------
    def _connect(self):
        try:
            return self.connection
        except AttributeError:
            self.connection = self._connect_to_endpoint(
                self._region, **self._get_credentials())
            return self.connection
class SocorroLiteProcessorAlgorithm2015(Processor2015):
    """this is the class that processor uses to transform """

    required_config = Namespace()
    required_config.rule_sets = change_default(
        Processor2015, 'rule_sets',
        ujson.dumps(socorrolite_processor_rule_sets))
Beispiel #8
0
class PGPVNewCrashSource(PGQueryNewCrashSource):
    required_config = Namespace()
    required_config.crash_id_query = change_default(
        PGQueryNewCrashSource,
        'crash_id_query',
        "select uuid "
        "from reports_clean rc join product_versions pv "
        "    on rc.product_version_id = pv.product_version_id "
        "where "
        "%s <= date_processed and date_processed < %s "
        "and %s between pv.build_date and pv.sunset_date"
    )
    required_config.add_option(
        'date',
        doc="a date in the form YYYY-MM-DD",
        default=(utc_now() - timedelta(1)).date(),
        from_string_converter=string_to_datetime
    )

    def __init__(self, config, name, quit_check_callback=None):
        super(PGPVNewCrashSource, self).__init__(
            config,
            name,
            quit_check_callback
        )
        self.data = (
            config.date,
            config.date + timedelta(1),  # add a day
            config.date
        )
Beispiel #9
0
class PriorityjobRabbitMQCrashStore(RabbitMQCrashStorage):
    required_config = Namespace()
    required_config.rabbitmq_class = change_default(
        RabbitMQCrashStorage,
        'rabbitmq_class',
        ConnectionContext,
    )
    required_config.add_option(
        'routing_key',
        default='socorro.priority',
        doc='the name of the queue to receive crashes',
    )

    def process(self, crash_ids):
        if not isinstance(crash_ids, (list, tuple)):
            crash_ids = [crash_ids]
        success = bool(crash_ids)
        for crash_id in crash_ids:
            if not self.save_raw_crash(
                DotDict({'legacy_processing': 0}),
                [],
                crash_id
            ):
                success = False
        return success
    def test_change_default(self):
        class Alpha(RequiredConfig):
            required_config = Namespace()
            required_config.add_option(
                'an_option',
                default=19,
                doc='this is an an_option',
                from_string_converter=str,
            )
        a_new_option_with_a_new_default = change_default(
            Alpha,
            'an_option',
            '29300'
        )

        ok_(
            a_new_option_with_a_new_default
            is not Alpha.required_config.an_option
        )
        eq_(
            a_new_option_with_a_new_default.default,
            '29300'
        )
        eq_(
            Alpha.required_config.an_option.default,
            19
        )
Beispiel #11
0
class ESCrashStorageNoStackwalkerOutput(ESCrashStorage):
    required_config = Namespace()
    required_config.namespace('es_redactor')
    required_config.es_redactor.add_option(
        name="redactor_class",
        doc="the name of the class that implements a 'redact' method",
        default='socorro.external.crashstorage_base.Redactor',
        from_string_converter=class_converter,
    )
    required_config.es_redactor.forbidden_keys = change_default(
        Redactor, "forbidden_keys", "json_dump, "
        "upload_file_minidump_flash1.json_dump, "
        "upload_file_minidump_flash2.json_dump, "
        "upload_file_minidump_browser.json_dump")

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        """Init, you know.
        """
        super(ESCrashStorageNoStackwalkerOutput,
              self).__init__(config, quit_check_callback)
        self.redactor = config.es_redactor.redactor_class(config.es_redactor)
        self.config.logger.warning(
            "beware, this crashstorage class is destructive to the "
            "processed crash - if you're using a polycrashstore you may "
            "find the modified processed crash saved to the other crashstores")

    #--------------------------------------------------------------------------
    @staticmethod
    def reconstitute_datetimes(processed_crash):
        datetime_fields = [
            'submitted_timestamp',
            'date_processed',
            'client_crash_date',
            'started_datetime',
            'startedDateTime',
            'completed_datetime',
            'completeddatetime',
        ]
        for a_key in datetime_fields:
            try:
                processed_crash[a_key] = string_to_datetime(
                    processed_crash[a_key])
            except KeyError:
                # not there? we don't care
                pass

    #--------------------------------------------------------------------------
    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """This is the only write mechanism that is actually employed in normal
        usage.
        """
        self.reconstitute_datetimes(processed_crash)
        self.redactor.redact(processed_crash)

        super(ESCrashStorageNoStackwalkerOutput,
              self).save_raw_and_processed(raw_crash, dumps, processed_crash,
                                           crash_id)
Beispiel #12
0
class ESCrashStorageRedactedSave(ESCrashStorage):
    required_config = Namespace()
    required_config.namespace('es_redactor')
    required_config.es_redactor.add_option(
        name="redactor_class",
        doc="the name of the class that implements a 'redact' method",
        default='socorro.external.crashstorage_base.Redactor',
        from_string_converter=class_converter,
    )
    required_config.es_redactor.forbidden_keys = change_default(
        Redactor,
        "forbidden_keys",
        "json_dump, "
        "upload_file_minidump_flash1.json_dump, "
        "upload_file_minidump_flash2.json_dump, "
        "upload_file_minidump_browser.json_dump"
    )

    required_config.namespace('raw_crash_es_redactor')
    required_config.raw_crash_es_redactor.add_option(
        name="redactor_class",
        doc="the redactor class to use on the raw_crash",
        default='socorro.external.es.crashstorage.RawCrashRedactor',
        from_string_converter=class_converter,
    )

    def __init__(self, config, quit_check_callback=None):
        super(ESCrashStorageRedactedSave, self).__init__(
            config,
            quit_check_callback
        )
        self.redactor = config.es_redactor.redactor_class(config.es_redactor)
        self.raw_crash_redactor = config.raw_crash_es_redactor.redactor_class(
            config.raw_crash_es_redactor
        )
        self.config.logger.warning(
            "Beware, this crashstorage class is destructive to the "
            "processed crash - if you're using a polycrashstore you may "
            "find the modified processed crash saved to the other crashstores."
        )

    def is_mutator(self):
        # This crash storage mutates the crash, so we mark it as such.
        return True

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """This is the only write mechanism that is actually employed in normal
        usage.
        """
        self.redactor.redact(processed_crash)
        self.raw_crash_redactor.redact(raw_crash)

        super(ESCrashStorageRedactedSave, self).save_raw_and_processed(
            raw_crash,
            dumps,
            processed_crash,
            crash_id
        )
Beispiel #13
0
class ESCrashStorageRedactedJsonDump(ESCrashStorageRedactedSave):
    """This class stores redacted crash reports into Elasticsearch, but instead
    of removing the entire `json_dump`, it keeps only a subset of its keys.
    """
    required_config = Namespace()
    required_config.add_option(
        name="json_dump_whitelist_keys",
        doc="keys of the json_dump field to keep in the processed crash",
        default=[
            "largest_free_vm_block",
            "tiny_block_size",
            "write_combine_size",
            "system_info",
        ],
        from_string_converter=list_converter,
    )

    required_config.namespace('es_redactor')
    required_config.es_redactor.add_option(
        name="redactor_class",
        doc="the name of the class that implements a 'redact' method",
        default='socorro.external.crashstorage_base.Redactor',
        from_string_converter=class_converter,
    )
    required_config.es_redactor.forbidden_keys = change_default(
        Redactor,
        "forbidden_keys",
        (
            "memory_report, "
            "upload_file_minidump_flash1.json_dump, "
            "upload_file_minidump_flash2.json_dump, "
            "upload_file_minidump_browser.json_dump"
        )
    )

    def is_mutator(self):
        # This crash storage mutates the crash, so we mark it as such.
        return True

    #--------------------------------------------------------------------------
    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """This is the only write mechanism that is actually employed in normal
        usage.
        """
        # Replace the `json_dump` with a subset.
        json_dump = processed_crash.get('json_dump', {})
        redacted_json_dump = {
            k: json_dump.get(k)
            for k in self.config.json_dump_whitelist_keys
        }
        processed_crash['json_dump'] = redacted_json_dump

        super(ESCrashStorageRedactedJsonDump, self).save_raw_and_processed(
            raw_crash,
            dumps,
            processed_crash,
            crash_id
        )
Beispiel #14
0
class JitCrashCategorizeRule(ExternalProcessRule):

    required_config = Namespace()
    required_config.command_line = change_default(
        ExternalProcessRule, 'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '{dump_file_pathname} '
        '2>/dev/null')
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        '/data/socorro/stackwalk/bin/jit-crash-categorize',
    )
    required_config.result_key = change_default(
        ExternalProcessRule,
        'result_key',
        'classifications.jit.category',
    )
    required_config.result_key = change_default(
        ExternalProcessRule,
        'return_code_key',
        'classifications.jit.category_return_code',
    )
    required_config.add_option(
        'threshold',
        doc="max number of frames until encountering target frame",
        default=8)

    #--------------------------------------------------------------------------
    def __init__(self, config):
        super(JitCrashCategorizeRule, self).__init__(config)

    #--------------------------------------------------------------------------
    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        if (processed_crash.product != 'Firefox'
                or not processed_crash.os_name.startswith('Windows')
                or processed_crash.cpu_name != 'x86'):
            # we don't want any of these
            return False
        if processed_crash.json_dump['crashing_thread']['frames'][0].get(
                'module', False
        ):  # there is a module at the top of the stack, we don't want this
            return False
        return (processed_crash.signature.endswith('EnterBaseline')
                or processed_crash.signature.endswith('EnterIon'))
Beispiel #15
0
class CountStackWalkerTimeoutKills(CountAnythingRuleBase):
    required_config = Namespace()
    required_config.rule_name = change_default(
        CountAnythingRuleBase,
        'rule_name',
        'stackwalker_timeout_kills'
    )

    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        # Only run the action if "SIGKILL" is in the processor notes
        return any(['SIGKILL' in note for note in proc_meta.processor_notes])
Beispiel #16
0
class CountStackWalkerFailures(CountAnythingRuleBase):
    required_config = Namespace()
    required_config.rule_name = change_default(
        CountAnythingRuleBase,
        'rule_name',
        'stackwalker_failures'
    )

    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        # Only run the action if "MDSW failed" is in the processor notes
        return any(['MDSW failed' in note for note in proc_meta.processor_notes])
Beispiel #17
0
class JsonFileOutputForCoreCounts(FileOutputForCoreCounts):
    required_config = Namespace()
    required_config.path_template = change_default(
        FileOutputForCoreCounts,
        'path_template',
        '{path}/{prefix}/{prefix}_{key}-{name}.json',
    )

    #--------------------------------------------------------------------------
    def output_correlations_to_stream(self, counts_summary_structure, stream):
        json.dump(counts_summary_structure, stream, indent=4, sort_keys=True)
class SingleCrashMQCrashStorage(RabbitMQCrashStorage):
    required_config = Namespace()
    required_config.routing_key = change_default(RabbitMQCrashStorage,
                                                 'routing_key',
                                                 'socorro.normal')
    required_config.rabbitmq_class = change_default(
        RabbitMQCrashStorage,
        'rabbitmq_class',
        ConnectionContext,
    )
    required_config.transaction_executor_class = change_default(
        RabbitMQCrashStorage, 'transaction_executor_class',
        TransactionExecutor)

    def submit(self, crash_ids):
        if not isinstance(crash_ids, (list, tuple)):
            crash_ids = [crash_ids]
        success = bool(crash_ids)
        for crash_id in crash_ids:
            if not self.save_raw_crash(DotDict({'legacy_processing': 0}), [],
                                       crash_id):
                success = False
        return success
Beispiel #19
0
class ESCrashStorageRedactedSave(ESCrashStorage):
    required_config = Namespace()
    required_config.namespace('es_redactor')
    required_config.es_redactor.add_option(
        name="redactor_class",
        doc="the name of the class that implements a 'redact' method",
        default='socorro.external.crashstorage_base.Redactor',
        from_string_converter=class_converter,
    )
    required_config.es_redactor.forbidden_keys = change_default(
        Redactor,
        "forbidden_keys",
        "json_dump, "
        "upload_file_minidump_flash1.json_dump, "
        "upload_file_minidump_flash2.json_dump, "
        "upload_file_minidump_browser.json_dump"
    )

    required_config.namespace('raw_crash_es_redactor')
    required_config.raw_crash_es_redactor.add_option(
        name="redactor_class",
        doc="the redactor class to use on the raw_crash",
        default='socorro.external.es.crashstorage.RawCrashRedactor',
        from_string_converter=class_converter,
    )

    def __init__(self, config, *args, **kwargs):
        super(ESCrashStorageRedactedSave, self).__init__(config, *args, **kwargs)
        self.redactor = config.es_redactor.redactor_class(config.es_redactor)
        self.raw_crash_redactor = config.raw_crash_es_redactor.redactor_class(
            config.raw_crash_es_redactor
        )

    def is_mutator(self):
        # This crash storage mutates the crash, so we mark it as such.
        return True

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id):
        """This is the only write mechanism that is actually employed in normal usage"""
        self.redactor.redact(processed_crash)
        self.raw_crash_redactor.redact(raw_crash)

        super(ESCrashStorageRedactedSave, self).save_raw_and_processed(
            raw_crash,
            dumps,
            processed_crash,
            crash_id
        )
Beispiel #20
0
    def test_change_default(self):
        class Alpha(RequiredConfig):
            required_config = Namespace()
            required_config.add_option(
                'an_option',
                default=19,
                doc='this is an an_option',
                from_string_converter=str,
            )

        a_new_option_with_a_new_default = change_default(
            Alpha, 'an_option', '29300')

        assert a_new_option_with_a_new_default is not Alpha.required_config.an_option
        assert a_new_option_with_a_new_default.default == '29300'
        assert Alpha.required_config.an_option.default == 19
Beispiel #21
0
class CountStackWalkerTimeoutKills(CountAnythingRuleBase):
    required_config = Namespace()
    required_config.rule_name = change_default(
        CountAnythingRuleBase,
        'rule_name',
        'stackwalker_timeout_kills'
    )

    #--------------------------------------------------------------------------
    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        # override me to check any condition within a raw, processed crash
        # or even the state of the processor itself from the proc_meta
        return reduce(
            lambda x, y: x or "SIGKILL" in y,
            proc_meta.processor_notes,
            False
        )
Beispiel #22
0
class RegionalS3ConnectionContext(S3ConnectionContext):
    """Connection context for a specific region

    This lets you use the OrdinaryCallingFormat as a calling format and then
    you can use S3 buckets with periods in the names.

    """
    required_config = Namespace()
    required_config.add_option(
        'region',
        doc="Name of the S3 region (e.g. us-west-2)",
        default='us-west-2',
        reference_value_from='resource.boto',
    )
    required_config.calling_format = change_default(
        S3ConnectionContext,
        'calling_format',
        'boto.s3.connection.OrdinaryCallingFormat'
    )

    def __init__(self, config, quit_check_callback=None):
        super(RegionalS3ConnectionContext, self).__init__(config)
        self._region = config.region
        self._connect_to_endpoint = boto.s3.connect_to_region

    def _connect(self):
        try:
            return self.connection
        except AttributeError:
            self.connection = self._connect_to_endpoint(
                self._region,
                **self._get_credentials()
            )
            return self.connection

    def _get_or_create_bucket(self, conn, bucket_name):
        try:
            return self._get_bucket(conn, bucket_name)
        except self.ResponseError:
            self._bucket_cache[bucket_name] = conn.create_bucket(
                bucket_name,
                location=self._region,
            )
            return self._bucket_cache[bucket_name]
Beispiel #23
0
class JitCrashCategorizeRule(ExternalProcessRule):
    # FIXME(willkg): command_line and command_pathname are referenced in the
    # uplifted versions in Processor2015. The rest of these config values have
    # no effect on anything and are just here.
    required_config = Namespace()
    required_config.command_line = change_default(
        ExternalProcessRule,
        'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '{dump_file_pathname} '
        '2>/dev/null'
    )
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        '/data/socorro/stackwalk/bin/jit-crash-categorize',
    )
    required_config.result_key = change_default(
        ExternalProcessRule,
        'result_key',
        'classifications.jit.category',
    )
    required_config.return_code_key = change_default(
        ExternalProcessRule,
        'return_code_key',
        'classifications.jit.category_return_code',
    )

    def predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        if (
            processed_crash.product != 'Firefox' or
            not processed_crash.os_name.startswith('Windows') or
            processed_crash.cpu_name != 'x86'
        ):
            # we don't want any of these
            return False

        frames = processed_crash.get('json_dump', {}).get('crashing_thread', {}).get('frames', [])
        if frames and frames[0].get('module', False):
            # there is a module at the top of the stack, we don't want this
            return False

        return (
            processed_crash.signature.endswith('EnterBaseline') or
            processed_crash.signature.endswith('EnterIon') or
            processed_crash.signature.endswith('js::jit::FastInvoke') or
            processed_crash.signature.endswith('js::jit::IonCannon') or
            processed_crash.signature.endswith('js::irregexp::ExecuteCode<T>')
        )

    def _interpret_external_command_output(self, fp, processor_meta):
        try:
            result = fp.read()
        except IOError as x:
            processor_meta.processor_notes.append(
                "%s unable to read external command output: %s" % (
                    self.config.command_pathname,
                    x
                )
            )
            return ''
        try:
            return result.strip()
        except AttributeError:
            # there's no strip method
            return result
Beispiel #24
0
class BreakpadStackwalkerRule2015(ExternalProcessRule):
    """Executes the minidump stackwalker external process and puts output in processed crash"""
    # FIXME(willkg): command_line and command_pathname are referenced in the
    # uplifted versions in Processor2015. The rest of these config values have
    # no effect on anything and are just here.
    required_config = Namespace()
    required_config.add_option(
        name='symbols_urls',
        doc='comma delimited ordered list of urls for symbol lookup',
        default='https://localhost',
        from_string_converter=str_to_list,
        likely_to_be_changed=True
    )
    required_config.command_line = change_default(
        ExternalProcessRule,
        'command_line',
        'timeout -s KILL {kill_timeout} {command_pathname} '
        '--raw-json {raw_crash_pathname} '
        '{symbols_urls} '
        '--symbols-cache {symbol_cache_path} '
        '--symbols-tmp {symbol_tmp_path} '
        '{dump_file_pathname} '
        '2> /dev/null'
    )
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        # NOTE(willkg): This is the path for the RPM-based Socorro deploy. When
        # we switch to Docker, we should change this.
        '/data/socorro/stackwalk/bin/stackwalker',
    )
    required_config.add_option(
        'kill_timeout',
        doc='amount of time to let mdsw run before declaring it hung',
        default=600
    )
    required_config.add_option(
        'symbol_tmp_path',
        doc=(
            'directory to use as temp space for downloading symbols--must be on '
            'the same filesystem as symbols-cache'
        ),
        default=os.path.join(tempfile.gettempdir(), 'symbols-tmp'),
    ),
    required_config.add_option(
        'symbol_cache_path',
        doc=(
            'the path where the symbol cache is found, this location must be '
            'readable and writeable (quote path with embedded spaces)'
        ),
        default=os.path.join(tempfile.gettempdir(), 'symbols'),
    )
    required_config.add_option(
        'temporary_file_system_storage_path',
        doc='a path where temporary files may be written',
        default=tempfile.gettempdir(),
    )

    def __init__(self, *args, **kwargs):
        super(BreakpadStackwalkerRule2015, self).__init__(*args, **kwargs)
        self.metrics = markus.get_metrics('processor.breakpadstackwalkerrule')

    @contextmanager
    def _temp_raw_crash_json_file(self, raw_crash, crash_id):
        file_pathname = os.path.join(
            self.config.temporary_file_system_storage_path,
            '%s.%s.TEMPORARY.json' % (crash_id, threading.currentThread().getName())
        )
        with open(file_pathname, "w") as f:
            json.dump(dotdict_to_dict(raw_crash), f)
        try:
            yield file_pathname
        finally:
            os.unlink(file_pathname)

    def _execute_external_process(self, command_line, processor_meta):
        stackwalker_output, return_code = super(
            BreakpadStackwalkerRule2015,
            self
        )._execute_external_process(command_line, processor_meta)

        if not isinstance(stackwalker_output, Mapping):
            processor_meta.processor_notes.append(
                'MDSW produced unexpected output: %s...' % str(stackwalker_output)[:10]
            )
            stackwalker_output = {}

        stackwalker_data = DotDict()
        stackwalker_data.json_dump = stackwalker_output
        stackwalker_data.mdsw_return_code = return_code

        stackwalker_data.mdsw_status_string = stackwalker_output.get('status', 'unknown error')
        stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK'

        self.metrics.incr(
            'run',
            tags=[
                'outcome:%s' % ('success' if stackwalker_data.success else 'fail'),
                'exitcode:%s' % return_code,
            ]
        )

        if return_code == 124:
            msg = 'MDSW terminated with SIGKILL due to timeout'
            processor_meta.processor_notes.append(msg)
            self.config.logger.warning(msg)

        elif return_code != 0 or not stackwalker_data.success:
            msg = 'MDSW failed with %s: %s' % (return_code, stackwalker_data.mdsw_status_string)
            processor_meta.processor_notes.append(msg)
            self.config.logger.warning(msg)

        return stackwalker_data, return_code

    def expand_commandline(self, dump_file_pathname, raw_crash_pathname):
        """Expands the command line parameters and returns the final command line"""
        # NOTE(willkg): If we ever add new configuration variables, we'll need
        # to add them here, too, otherwise they won't get expanded in the
        # command line.

        symbols_urls = ' '.join([
            '--symbols-url "%s"' % url.strip()
            for url in self.config.symbols_urls
        ])

        params = {
            # These come from config
            'kill_timeout': self.config.kill_timeout,
            'command_pathname': self.config.command_pathname,
            'symbol_cache_path': self.config.symbol_cache_path,
            'symbol_tmp_path': self.config.symbol_tmp_path,
            'symbols_urls': symbols_urls,

            # These are calculated
            'dump_file_pathname': dump_file_pathname,
            'raw_crash_pathname': raw_crash_pathname
        }
        return self.config.command_line.format(**params)

    def action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        if 'additional_minidumps' not in processed_crash:
            processed_crash.additional_minidumps = []

        with self._temp_raw_crash_json_file(raw_crash, raw_crash.uuid) as raw_crash_pathname:
            for dump_name in raw_dumps.keys():
                if processor_meta.quit_check:
                    processor_meta.quit_check()

                # this rule is only interested in dumps targeted for the
                # minidump stackwalker external program.  As of the writing
                # of this code, there is one other dump type.  The only way
                # to differentiate these dump types is by the name of the
                # dump.  All minidumps targeted for the stackwalker will have
                # a name with a prefix specified in configuration:
                if not dump_name.startswith(self.config.dump_field):
                    # dumps not intended for the stackwalker are ignored
                    continue

                dump_file_pathname = raw_dumps[dump_name]

                command_line = self.expand_commandline(
                    dump_file_pathname=dump_file_pathname,
                    raw_crash_pathname=raw_crash_pathname
                )

                stackwalker_data, return_code = self._execute_external_process(
                    command_line,
                    processor_meta
                )

                if dump_name == self.config.dump_field:
                    processed_crash.update(stackwalker_data)
                else:
                    processed_crash.additional_minidumps.append(dump_name)
                    processed_crash[dump_name] = stackwalker_data
class JitCrashCategorizeRule(ExternalProcessRule):

    required_config = Namespace()
    required_config.command_line = change_default(
        ExternalProcessRule, 'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '{dump_file_pathname} '
        '2>/dev/null')
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        '/data/socorro/stackwalk/bin/jit-crash-categorize',
    )
    required_config.result_key = change_default(
        ExternalProcessRule,
        'result_key',
        'classifications.jit.category',
    )
    required_config.return_code_key = change_default(
        ExternalProcessRule,
        'return_code_key',
        'classifications.jit.category_return_code',
    )
    required_config.add_option(
        'threshold',
        doc="max number of frames until encountering target frame",
        default=8)

    def __init__(self, config):
        super(JitCrashCategorizeRule, self).__init__(config)

    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        if (processed_crash.product != 'Firefox'
                or not processed_crash.os_name.startswith('Windows')
                or processed_crash.cpu_name != 'x86'):
            # we don't want any of these
            return False

        frames = processed_crash.get('json_dump',
                                     {}).get('crashing_thread',
                                             {}).get('frames', [])
        if frames and frames[0].get('module', False):
            # there is a module at the top of the stack, we don't want this
            return False

        return (
            processed_crash.signature.endswith('EnterBaseline')
            or processed_crash.signature.endswith('EnterIon')
            or processed_crash.signature.endswith('js::jit::FastInvoke')
            or processed_crash.signature.endswith('js::jit::IonCannon') or
            processed_crash.signature.endswith('js::irregexp::ExecuteCode<T>'))

    def _interpret_external_command_output(self, fp, processor_meta):
        try:
            result = fp.read()
        except IOError as x:
            processor_meta.processor_notes.append(
                "%s unable to read external command output: %s" %
                (self.config.command_pathname, x))
            return ''
        try:
            return result.strip()
        except AttributeError as x:
            # there's no strip method
            return result
class BreakpadStackwalkerRule2015(ExternalProcessRule):

    required_config = Namespace()
    required_config.add_option(name='public_symbols_url',
                               doc='url of the public symbol server',
                               default="https://localhost",
                               likely_to_be_changed=True)
    required_config.add_option(name='private_symbols_url',
                               doc='url of the private symbol server',
                               default="https://localhost",
                               likely_to_be_changed=True)
    required_config.command_line = change_default(
        ExternalProcessRule, 'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '--raw-json {raw_crash_pathname} '
        '--symbols-url {public_symbols_url} '
        '--symbols-url {private_symbols_url} '
        '--symbols-cache {symbol_cache_path} '
        '{dump_file_pathname} '
        '2>/dev/null')
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        '/data/socorro/stackwalk/bin/stackwalker',
    )
    required_config.add_option(
        'symbol_cache_path',
        doc='the path where the symbol cache is found, this location must be '
        'readable and writeable (quote path with embedded spaces)',
        default=os.path.join(tempfile.gettempdir(), 'symbols'),
    )
    required_config.add_option(
        'temporary_file_system_storage_path',
        doc='a path where temporary files may be written',
        default=tempfile.gettempdir(),
    )

    def version(self):
        return '1.0'

    @contextmanager
    def _temp_raw_crash_json_file(self, raw_crash, crash_id):
        file_pathname = os.path.join(
            self.config.temporary_file_system_storage_path,
            "%s.%s.TEMPORARY.json" %
            (crash_id, threading.currentThread().getName()))
        with open(file_pathname, "w") as f:
            ujson.dump(raw_crash, f)
        try:
            yield file_pathname
        finally:
            os.unlink(file_pathname)

    def _execute_external_process(self, command_line, processor_meta):
        stackwalker_output, return_code = super(
            BreakpadStackwalkerRule2015,
            self)._execute_external_process(command_line, processor_meta)

        if not isinstance(stackwalker_output, Mapping):
            processor_meta.processor_notes.append(
                "MDSW produced unexpected output: %s..." %
                str(stackwalker_output)[:10])
            stackwalker_output = {}

        stackwalker_data = DotDict()
        stackwalker_data.json_dump = stackwalker_output
        stackwalker_data.mdsw_return_code = return_code

        stackwalker_data.mdsw_status_string = stackwalker_output.get(
            'status', 'unknown error')
        stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK'

        if return_code == 124:
            processor_meta.processor_notes.append(
                "MDSW terminated with SIGKILL due to timeout")
        elif return_code != 0 or not stackwalker_data.success:
            processor_meta.processor_notes.append(
                "MDSW failed on '%s': %s" %
                (command_line, stackwalker_data.mdsw_status_string))

        return stackwalker_data, return_code

    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        if 'additional_minidumps' not in processed_crash:
            processed_crash.additional_minidumps = []
        with self._temp_raw_crash_json_file(
                raw_crash, raw_crash.uuid) as raw_crash_pathname:
            for dump_name in raw_dumps.iterkeys():

                if processor_meta.quit_check:
                    processor_meta.quit_check()

                # this rule is only interested in dumps targeted for the
                # minidump stackwalker external program.  As of the writing
                # of this code, there is one other dump type.  The only way
                # to differentiate these dump types is by the name of the
                # dump.  All minidumps targeted for the stackwalker will have
                # a name with a prefix specified in configuration:
                if not dump_name.startswith(self.config.dump_field):
                    # dumps not intended for the stackwalker are ignored
                    continue

                dump_pathname = raw_dumps[dump_name]

                if self.config.chatty:
                    self.config.logger.debug("BreakpadStackwalkerRule: %s, %s",
                                             dump_name, dump_pathname)

                command_line = self.config.command_line.format(
                    **dict(self.config,
                           dump_file_pathname=dump_pathname,
                           raw_crash_pathname=raw_crash_pathname))

                stackwalker_data, return_code = self._execute_external_process(
                    command_line, processor_meta)

                if dump_name == self.config.dump_field:
                    processed_crash.update(stackwalker_data)
                else:
                    processed_crash.additional_minidumps.append(dump_name)
                    processed_crash[dump_name] = stackwalker_data

        return True
Beispiel #27
0
class Processor2015(RequiredConfig):
    """this class is a generalization of the Processor into a rule processing
    framework. This class is suitable for use in the 'processor_app'
    introducted in 2012."""

    required_config = Namespace('transform_rules')
    required_config.add_option(
        'database_class',
        doc="the class of the database",
        default='socorro.external.postgresql.connection_context.'
        'ConnectionContext',
        from_string_converter=str_to_python_object,
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        'transaction_executor_class',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithInfiniteBackoff",
        doc='a class that will manage transactions',
        from_string_converter=str_to_python_object,
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        'dump_field',
        doc='the default name of a dump',
        default='upload_file_minidump',
    )
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        # NOTE(willkg): This is the path for the RPM-based Socorro deploy. When
        # we switch to Docker, we should change this.
        '/data/socorro/stackwalk/bin/stackwalker',
    )
    required_config.add_option(
        'result_key',
        doc=('the key where the external process result should be stored '
             'in the processed crash'),
        default='stackwalker_result',
    )
    required_config.add_option(
        'return_code_key',
        doc=('the key where the external process return code should be stored '
             'in the processed crash'),
        default='stackwalker_return_code',
    )
    required_config.add_option(
        name='symbols_urls',
        doc='comma-delimited ordered list of urls for symbol lookup',
        default='https://localhost',
        from_string_converter=str_to_list,
        likely_to_be_changed=True)
    required_config.command_line = change_default(
        ExternalProcessRule, 'command_line',
        ('timeout -s KILL {kill_timeout} {command_pathname} '
         '--raw-json {raw_crash_pathname} '
         '{symbols_urls} '
         '--symbols-cache {symbol_cache_path} '
         '--symbols-tmp {symbol_tmp_path} '
         '{dump_file_pathname} '
         '2> /dev/null'))
    required_config.add_option(
        'kill_timeout',
        doc='amount of time to let mdsw run before declaring it hung',
        default=600)
    required_config.add_option(
        'symbol_tmp_path',
        doc=('directory to use as temp space for downloading symbols--must be '
             'on the same filesystem as symbols-cache'),
        default=os.path.join(tempfile.gettempdir(), 'symbols-tmp'),
    ),
    required_config.add_option(
        'symbol_cache_path',
        doc=('the path where the symbol cache is found, this location must be '
             'readable and writeable (quote path with embedded spaces)'),
        default=os.path.join(tempfile.gettempdir(), 'symbols'),
    )
    required_config.add_option(
        'temporary_file_system_storage_path',
        doc='a path where temporary files may be written',
        default=tempfile.gettempdir(),
    )

    def __init__(self, config, rules=None, quit_check_callback=None):
        super(Processor2015, self).__init__()
        self.config = config
        # the quit checks are components of a system of callbacks used
        # primarily by the TaskManager system.  This is the system that
        # controls the execution model.  If the ThreadedTaskManager is in use,
        # these callbacks just check the ThreadedTaskManager task manager's
        # quit flag.  If they detect a quit condition, they raise an exception
        # that causes the thread to shut down.  For the GreenletTaskMangager,
        # using cooperative multitasking, the callbacks do the 'yield' to
        # allow another green thread to take over.
        # It is perfectly acceptable to hook into this callback system to
        # accomplish any task that needs be done periodically.
        if quit_check_callback:
            self.quit_check = quit_check_callback
        else:
            self.quit_check = lambda: False

        rule_set = rules or list(DEFAULT_RULES)

        self.rules = []
        for a_rule_class in rule_set:
            self.rules.append(a_rule_class(config))

    def process_crash(self, raw_crash, raw_dumps, processed_crash):
        """Take a raw_crash and its associated raw_dumps and return a
        processed_crash.
        """
        # processor_meta_data will be used to ferry "inside information" to
        # transformation rules.  Sometimes rules need a bit more extra
        # information about the transformation process itself.
        processor_meta_data = DotDict()
        processor_meta_data.processor_notes = [
            self.config.processor_name, self.__class__.__name__
        ]
        processor_meta_data.quit_check = self.quit_check
        processor_meta_data.processor = self
        processor_meta_data.config = self.config

        if "processor_notes" in processed_crash:
            original_processor_notes = [
                x.strip() for x in processed_crash.processor_notes.split(";")
            ]
            processor_meta_data.processor_notes.append(
                "earlier processing: %s" %
                processed_crash.get("started_datetime", 'Unknown Date'))
        else:
            original_processor_notes = []

        processed_crash.success = False
        processed_crash.started_datetime = utc_now()
        # for backwards compatibility:
        processed_crash.startedDateTime = processed_crash.started_datetime
        processed_crash.signature = 'EMPTY: crash failed to process'

        crash_id = raw_crash['uuid']
        try:
            # quit_check calls ought to be scattered around the code to allow
            # the processor to be responsive to requests to shut down.
            self.quit_check()

            start_time = self.config.logger.info(
                "starting transform for crash: %s", crash_id)
            processor_meta_data.started_timestamp = start_time

            # apply_all_rules
            for rule in self.rules:
                rule.act(raw_crash, raw_dumps, processed_crash,
                         processor_meta_data)
                self.quit_check()

            # the crash made it through the processor rules with no exceptions
            # raised, call it a success.
            processed_crash.success = True

        except Exception as exception:
            self.config.logger.warning('Error while processing %s: %s',
                                       crash_id,
                                       str(exception),
                                       exc_info=True)
            processor_meta_data.processor_notes.append(
                'unrecoverable processor error: %s' % exception)

        # the processor notes are in the form of a list.  Join them all
        # together to make a single string
        processor_meta_data.processor_notes.extend(original_processor_notes)
        processed_crash.processor_notes = '; '.join(
            processor_meta_data.processor_notes)
        completed_datetime = utc_now()
        processed_crash.completed_datetime = completed_datetime
        # for backwards compatibility:
        processed_crash.completeddatetime = completed_datetime

        self.config.logger.info(
            "finishing %s transform for crash: %s",
            'successful' if processed_crash.success else 'failed', crash_id)
        return processed_crash

    def reject_raw_crash(self, crash_id, reason):
        self.config.logger.warning('%s rejected: %s', crash_id, reason)

    def close(self):
        self.config.logger.debug('closing rules')
        for rule in self.rules:
            rule.close()
Beispiel #28
0
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """Sends a subset of the processed crash to an S3 bucket

    The subset of the processed crash is based on the JSON Schema which is
    derived from "socorro/external/es/super_search_fields.py".

    This uses a boto connection context with one twist: if you set
    "resource.boto.telemetry_bucket_name", then that will override the value.

    """

    required_config = Namespace()
    required_config.resource_class = change_default(
        BotoCrashStorage,
        'resource_class',
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext'
    )
    required_config.add_option(
        'telemetry_bucket_name',
        default='',
        reference_value_from='resource.boto',
        doc='if set, overrides resource_class bucket name'
    )

    required_config.elasticsearch = Namespace()
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )

    def __init__(self, config, *args, **kwargs):
        # This class requires that we use
        # SimpleDatePrefixKeyBuilder, so we stomp on the configuration
        # to make absolutely sure it gets set that way.
        config.keybuilder_class = SimpleDatePrefixKeyBuilder
        super(TelemetryBotoS3CrashStorage, self).__init__(
            config, *args, **kwargs
        )

        if config.telemetry_bucket_name:
            # If we have a telemetry.bucket_name set, then stomp on it with
            # config.telemetry_bucket_name.

            # FIXME(willkg): It'd be better if we could detect whether the
            # connection context bucket_name was set at all (it's a default
            # value, or the value of resource.boto.bucket_name).
            config.logger.info(
                'Using %s for TelemetryBotoS3CrashStorage bucket', config.telemetry_bucket_name
            )
            self.connection_source.config.bucket_name = config.telemetry_bucket_name

    def _get_all_fields(self):
        if (
            hasattr(self, '_all_fields') and
            hasattr(self, '_all_fields_timestamp')
        ):
            # we might have it cached
            age = time.time() - self._all_fields_timestamp
            if age < 60 * 60:
                # fresh enough
                return self._all_fields

        self._all_fields = SuperSearchFields(config=self.config).get()
        self._all_fields_timestamp = time.time()
        return self._all_fields

    def save_raw_and_processed(
        self,
        raw_crash,
        dumps,
        processed_crash,
        crash_id
    ):
        all_fields = self._get_all_fields()
        crash_report = {}

        # TODO Opportunity of optimization;
        # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list
        # of all (recursive) keys that are in there and use that
        # to limit the two following loops to not bother
        # filling up `crash_report` with keys that will never be
        # needed.

        # Rename fields in raw_crash.
        raw_fields_map = dict(
            (x['in_database_name'], x['name'])
            for x in all_fields.values()
            if x['namespace'] == 'raw_crash'
        )
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash.
        processed_fields_map = dict(
            (x['in_database_name'], x['name'])
            for x in all_fields.values()
            if x['namespace'] == 'processed_crash'
        )
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report.
        crash_report = json_schema_reducer.make_reduced_dict(
            CRASH_REPORT_JSON_SCHEMA, crash_report
        )
        self.save_processed(crash_report)

    @staticmethod
    def _do_save_processed(boto_connection, processed_crash):
        """Overriding this method so we can control the "name of thing"
        prefix used to upload to S3."""
        crash_id = processed_crash['uuid']
        processed_crash_as_string = boto_connection._convert_mapping_to_string(
            processed_crash
        )
        boto_connection.submit(
            crash_id,
            "crash_report",
            processed_crash_as_string
        )
Beispiel #29
0
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """S3 crash storage class for sending a subset of the processed crash
    but reduced to only include the files in the processed crash
    JSON Schema."""

    required_config = Namespace()
    required_config.resource_class = change_default(
        BotoCrashStorage, 'resource_class',
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext')

    required_config.elasticsearch = Namespace()
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )

    def __init__(self, config, *args, **kwargs):
        # This class requires that we use
        # SimpleDatePrefixKeyBuilder, so we stomp on the configuration
        # to make absolutely sure it gets set that way.
        config.keybuilder_class = SimpleDatePrefixKeyBuilder
        super(TelemetryBotoS3CrashStorage,
              self).__init__(config, *args, **kwargs)

    def _get_all_fields(self):
        if (hasattr(self, '_all_fields')
                and hasattr(self, '_all_fields_timestamp')):
            # we might have it cached
            age = time.time() - self._all_fields_timestamp
            if age < 60 * 60:
                # fresh enough
                return self._all_fields

        self._all_fields = SuperSearchFields(config=self.config).get()
        self._all_fields_timestamp = time.time()
        return self._all_fields

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        all_fields = self._get_all_fields()
        crash_report = {}

        # TODO Opportunity of optimization;
        # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list
        # of all (recursive) keys that are in there and use that
        # to limit the two following loops to not bother
        # filling up `crash_report` with keys that will never be
        # needed.

        # Rename fields in raw_crash.
        raw_fields_map = dict((x['in_database_name'], x['name'])
                              for x in all_fields.values()
                              if x['namespace'] == 'raw_crash')
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash.
        processed_fields_map = dict((x['in_database_name'], x['name'])
                                    for x in all_fields.values()
                                    if x['namespace'] == 'processed_crash')
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report.
        crash_report = json_schema_reducer.make_reduced_dict(
            CRASH_REPORT_JSON_SCHEMA, crash_report)
        self.save_processed(crash_report)

    @staticmethod
    def _do_save_processed(boto_connection, processed_crash):
        """Overriding this method so we can control the "name of thing"
        prefix used to upload to S3."""
        crash_id = processed_crash['uuid']
        processed_crash_as_string = boto_connection._convert_mapping_to_string(
            processed_crash)
        boto_connection.submit(crash_id, "crash_report",
                               processed_crash_as_string)
Beispiel #30
0
class CorrelationInterestingModulesRule(CorrelationRule):
    """this class attempts to be a faithful reproduction of the function of
    the original dbaron the "per-crash-interesting-modules.py" application
    embodied as a Socorro TransformRule.

    Individual crashes will be offered to this rule by a Fetch Transform Save
    app through the "_action_" method.  This class will examine the crash and
    to counters build on an instance of a ProductVersionMapping.  The counter
    add structure it builds looks like this:

    pv_counters[os_name*]
        .count
        .signatures[a_signature*]
           .count
           .modules[a_module*]
               .count
               .versions[a_version*] int
        .modules[a_module*]
            .count
            .versions[a_version*] int


    """
    required_config = Namespace()
    required_config.add_option(
        "show_versions",
        doc="Show data on module versions",
        default=False
    )
    required_config.add_option(
        "addons",
        doc="Tabulate addons (rather than modules)",
        default=False
    )
    required_config.add_option(
        "min_baseline_diff",
        doc="a floating point number",
        default=0.05
    )
    required_config.namespace('output')
    required_config.output.output_class = change_default(
        CorrelationRule,
        'output.output_class',
        'socorro.analysis.correlations.interesting_rule'
        '.FileOutputForInterestingModules',
        new_reference_value='global.correlations.interesting'
    )

    #--------------------------------------------------------------------------
    def version(self):
        return '1.0'

    #--------------------------------------------------------------------------
    def __init__(self, config=None, quit_check_callback=None):
        super(CorrelationInterestingModulesRule, self).__init__(
            config,
            quit_check_callback
        )
        for an_accumulator in self.counters_for_all_producs_and_versions.values():
            an_accumulator.osyses = {}
        self.date_suffix = defaultdict(int)
        self.summary_names = {
            #(show_versions, addons)
            (False, False): 'interesting-modules',
            (True, False): 'interesting-modules-with-versions',
            (False, True): 'interesting-addons',
            (True, True): 'interesting-addons-with-versions',
        }

    #--------------------------------------------------------------------------
    def summary_name(self):
        return self.summary_names[(
            self.config.show_versions,
            self.config.addons,
        )]

    #--------------------------------------------------------------------------
    @staticmethod
    def contains_bare_address(a_signature):
        return re.search(r"\S+@0x[0-9a-fA-F]+$", a_signature) is not None

    #--------------------------------------------------------------------------
    @staticmethod
    def remove_bare_address_from_signature(a_signature):
        return re.sub(r"@0x[0-9a-fA-F]+$", "", a_signature)

    #--------------------------------------------------------------------------
    def _action(self, raw, dumps, crash, processor_meta):
        self.date_suffix[crash['crash_id'][-6:]] += 1
        if not "os_name" in crash:
            # We have some bad crash reports.
            return False

        # give the names of the old algorithm's critical variables to their
        # variables in the new system
        try:
            osyses = self.counters_for_all_producs_and_versions[
                (crash["product"], crash["version"])
            ].osyses
            self.counters_for_all_producs_and_versions[
                (crash["product"], crash["version"])
            ].counter += 1
        except (AttributeError, KeyError):
            # why both types? crashes can be represented by either the Socorro
            # or configman DotDict types which raise different exception on
            # not finding a key.
            osyses = {}
            self.counters_for_all_producs_and_versions[
                (crash["product"], crash["version"])
            ].osyses = osyses
            self.counters_for_all_producs_and_versions[
                (crash["product"], crash["version"])
            ].counter = 1

        options = self.config

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - refactored code section
        # unlike the "core count correlation report", this code from the
        # was refactored to help understand the structure of the counters
        # so that a generic summary structure could be made.  This allows
        # for output of the summary information to somewhere other than
        # stdout.
        #
        # the structure has been broken down into levels of regular dicts
        # and SocorroDotDicts.  The DotDicts have keys that are constant
        # and no more are added when new crashes come in.  The regular dicts
        # are key with variable things that come in with crashes.  In the
        # structure below, keys of DotDicts are shown as constants like
        # ".count" and ".modules". The keys of the dicts are shown as the
        # name of a field with a * (to designate zero or more) inside square
        # brackets.
        #
        # the counters structure looks like this:
        #     pv_counters[os_name*]
        #         .count
        #         .signatures[a_signature*]
        #             .count
        #             .modules[a_module*]
        #                 .count
        #                 .versions[a_version*] int
        #         .modules[a_module*]
        #              .count
        #              .versions[a_version*] int

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        os_name = crash["os_name"]
        # The os_version field is way too specific on Linux, and we don't
        # have much Linux data anyway.
        if options.by_os_version and os_name != "Linux":
            os_name = os_name + " " + crash["os_version"]
        counters_for_an_os = osyses.setdefault(
            os_name,
            SocorroDotDict({
                "count": 0,
                "signatures": {},
                "modules": {},
            })
        )
        a_signature = crash["signature"]
        if self.contains_bare_address(a_signature):
            if options.condense:
                # Condense all signatures in a given DLL.
                a_signature = self.remove_bare_address_from_signature(
                    a_signature
                )
        if "reason" in crash and crash["reason"] is not None:
            a_signature = a_signature + "|" + crash["reason"]
        counters_for_a_signature = counters_for_an_os.signatures.setdefault(
            a_signature,
            SocorroDotDict({
                "count": 0,
                "modules": {}
            }),
        )
        list_of_counters = [counters_for_an_os, counters_for_a_signature]
        # increment both the os & signature counters
        for a_counter in list_of_counters:
            a_counter.count += 1

        for libname, version in self.generate_modules_or_addons(crash):
            # Increment the global count on osys and the per-signature count.
            for a_counter in list_of_counters:
                counters_for_modules = a_counter.modules.setdefault(
                    libname,
                    SocorroDotDict({
                        "count": 0,
                        "versions": defaultdict(int),
                    })
                )
                counters_for_modules.count += 1
                # Count versions of each module as well.
                counters_for_modules.versions[version] += 1
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        return True

    #--------------------------------------------------------------------------
    def _summary_for_a_product_version_pair(self, a_pv_accumulator):
        """in the original code, the counter structures were walked and
        manipulated to form the statistics.  Once a stat was determined,
        it was printed to stdout.  Since we want to have various means of
        outputting the data, instead of printing to stdout, this method
        save the statistic in a "summary_structure"  This structure will
        later be walked for printing or output to some future storage scheme

        The summary structure looks like this:
        pv_summary
            .date_key  # a list of the last six UUID characters present
            .notes  # any notes added by the algorithm to tell of problems
            .os_counters[os_name*]
                 .count
                 .signatures[a_signature*]
                     .count
                     .in_sig_ratio
                     .in_os_ratio
                     .in_os_count
                     .osys_count
                     .modules[a_module*]  # may be addons
                         .in_sig_ratio
                         .in_os_ratio
                         .in_os_count
                         .osys_count
                         .verisons[a_version*]  # may be addon versions
                             .sig_ver_ratio
                             .sig_ver_count
                             .sig_count
                             .os_ver_ratio
                             .os_ver_count
                             .osys_count
                             .version
        """

        options = self.config
        pv_summary = SocorroDotDict({
            'notes': [],
        })
        if (len(self.date_suffix) > 1):
            message = (
                "crashes from more than one day %s" %
                str(tuple(self.date_suffix.keys()))
            )
##            self.config.logger.debug(message)
            pv_summary.notes.append(message)
        pv_summary.date_key = self.date_suffix.keys()[0]
        pv_summary.os_counters = {}

        MIN_CRASHES = self.config.min_crashes
        counters_for_multiple_os = a_pv_accumulator.osyses

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # begin - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        infostr_re = re.compile("^(.*) with (\d+) cores$")  # unused?

        for os_name in counters_for_multiple_os.keys():
            counters_for_an_os = counters_for_multiple_os[os_name]

            pv_summary.os_counters[os_name] = SocorroDotDict()
            pv_summary.os_counters[os_name].count = counters_for_multiple_os[os_name].count
            pv_summary.os_counters[os_name].signatures = {}
            filtered_signatures = [
                (signature, signature_counter)
                for (signature, signature_counter)
                    in counters_for_an_os["signatures"].items()
                if signature_counter.count >= MIN_CRASHES
            ]
            for a_signature, a_signtaure_counter in filtered_signatures:
                pv_summary.os_counters[os_name].signatures[a_signature] = SocorroDotDict()
                pv_summary.os_counters[os_name].signatures[a_signature].count = a_signtaure_counter.count
                pv_summary.os_counters[os_name].signatures[a_signature].modules = {}
                modules_list = [
                    SocorroDotDict({
                        "libname": module_name,
                        "in_sig_count": a_module_counter.count,
                        "in_sig_ratio": float(a_module_counter.count) / a_signtaure_counter.count,
                        "in_sig_versions": a_module_counter.versions,
                        "in_os_count": counters_for_an_os.modules[module_name].count,
                        "in_os_ratio": (
                            float(counters_for_an_os.modules[module_name].count) /
                            counters_for_an_os.count
                        ),
                        "in_os_versions":
                            counters_for_an_os.modules[module_name].versions
                    })
                    for module_name, a_module_counter in a_signtaure_counter.modules.iteritems()
                ]

                modules_list = [
                    module for module in modules_list
                    if module.in_sig_ratio - module.in_os_ratio >= self.config.min_baseline_diff
                ]

                modules_list.sort(
                    key=lambda module: module.in_sig_ratio - module.in_os_ratio,
                    reverse=True
                )

                for module in modules_list:
                    module_name = module.libname
                    if options.addons:
                        info = addonids.info_for_id(module_name)
                        if info is not None:
                            module_name = (
                                module_name + u" ({0}, {1})".format(
                                    info.name,
                                    info.url
                                )
                            )
                    if options.show_versions and len(module["in_os_versions"]) == 1:
                        onlyver = module.in_os_versions.keys()[0]
                        if os_name.startswith("Mac OS X"):
                            info = macdebugids.info_for_id(module_name, onlyver)
                            if info is not None:
                                onlyver = onlyver + "; " + info
                        if (onlyver != ""):
                            module_name = module_name + " (" + onlyver + ")"
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name] = SocorroDotDict()
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_sig_count = (
                        module.in_sig_count
                    )
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_sig_ratio = (
                        int(round(module["in_sig_ratio"] * 100))
                    )
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_os_ratio = (
                        int(round(module.in_os_ratio * 100))
                    )
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].in_os_count = (
                        module.in_os_count
                    )
                    pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].osys_count = (
                        counters_for_an_os.count
                    )

                    if options.show_versions and len(module.in_os_versions) != 1:
                        versions = module.in_os_versions.keys()
                        versions.sort()
                        pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions = {}
                        for version in versions:
                            sig_ver_count = module.in_sig_versions.get(version, 0)
                            os_ver_count = module.in_os_versions[version]
                            if os_name.startswith("Mac OS X"):
                                info = macdebugids.info_for_id(module_name, version)
                                if info is not None:
                                    version = version + " (" + info + ")"
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version] = SocorroDotDict()
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_ver_ratio = (
                                int(round(float(sig_ver_count) / a_signtaure_counter.count * 100))
                            )
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_ver_count = sig_ver_count
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].sig_count = a_signtaure_counter.count
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].os_ver_ratio = (
                                int(round(float(os_ver_count) / counters_for_an_os.count * 100))
                            )
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].os_ver_count = os_ver_count
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].osys_count = counters_for_an_os.count
                            pv_summary.os_counters[os_name].signatures[a_signature].modules[module_name].versions[version].version = version
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # end - refactored code section
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        return pv_summary

    #--------------------------------------------------------------------------
    def generate_modules_or_addons(self, crash):
        options = self.config
        if (options.addons):
            for addon in crash["addons"]:
                yield addon[0], addon[1]
        else:
            if "json_dump" in crash and "modules" in crash["json_dump"]:
                for module in crash["json_dump"]["modules"]:
                    libname = module["filename"]
                    version = module["version"]
                    pdb = module["debug_file"]  # never used?
                    checksum = module["debug_id"]
                    addrstart = module["base_addr"]  # vener used?
                    addrend = module["end_addr"]  # never used?
                    if crash["os_name"].startswith("Win"):
                        # We only have good version data on Windows.
                        yield libname, version
                    else:
                        yield libname, checksum

    #--------------------------------------------------------------------------
    def summarize(self):
        # for each product version pair in the accumulators
        summary = {}
        for pv, an_accumulator in self.counters_for_all_producs_and_versions.iteritems():
            summary['_'.join(pv)] = self._summary_for_a_product_version_pair(
                an_accumulator
            )
        return summary
Beispiel #31
0
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """Sends a subset of the processed crash to an S3 bucket

    The subset of the processed crash is based on the JSON Schema which is
    derived from "socorro/external/es/super_search_fields.py".

    """

    required_config = Namespace()
    required_config.resource_class = change_default(
        BotoCrashStorage, 'resource_class',
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext')
    required_config.elasticsearch = Namespace()
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )

    def __init__(self, config, *args, **kwargs):
        super(TelemetryBotoS3CrashStorage,
              self).__init__(config, *args, **kwargs)
        self._all_fields = SuperSearchFields(config=self.config).get()

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        crash_report = {}

        # TODO Opportunity of optimization;
        # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list
        # of all (recursive) keys that are in there and use that
        # to limit the two following loops to not bother
        # filling up `crash_report` with keys that will never be
        # needed.

        # Rename fields in raw_crash.
        raw_fields_map = dict((x['in_database_name'], x['name'])
                              for x in self._all_fields.values()
                              if x['namespace'] == 'raw_crash')
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash.
        processed_fields_map = dict((x['in_database_name'], x['name'])
                                    for x in self._all_fields.values()
                                    if x['namespace'] == 'processed_crash')
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report.
        crash_report = json_schema_reducer.make_reduced_dict(
            CRASH_REPORT_JSON_SCHEMA, crash_report)
        self.save_processed(crash_report)

    @staticmethod
    def _do_save_processed(boto_connection, processed_crash):
        """Overriding this to change "name of thing" to crash_report"""
        crash_id = processed_crash['uuid']
        processed_crash_as_string = boto_connection._convert_mapping_to_string(
            processed_crash)
        boto_connection.submit(crash_id, "crash_report",
                               processed_crash_as_string)

    @staticmethod
    def _do_get_unredacted_processed(boto_connection, crash_id,
                                     json_object_hook):
        """Overriding this to change "name of thing" to crash_report"""
        try:
            processed_crash_as_string = boto_connection.fetch(
                crash_id, 'crash_report')
            return json.loads(
                processed_crash_as_string,
                object_hook=json_object_hook,
            )
        except boto_connection.ResponseError as x:
            raise CrashIDNotFound('%s not found: %s' % (crash_id, x))