예제 #1
0
class StandAloneServer(WebServerBase):
    required_config = Namespace()
    required_config.add_option('port',
                               doc='the port to listen to for submissions',
                               default=8882)
예제 #2
0
class Foo(RequiredConfig):
    required_config = Namespace()
    required_config.add_option('x', default=17)
    required_config.add_option('y', default=23)
예제 #3
0
class BreakpadStackwalkerRule2015(ExternalProcessRule):
    """Executes the minidump stackwalker external process and puts output in processed crash"""
    required_config = Namespace()
    required_config.add_option(
        name='public_symbols_url',
        doc='url of the public symbol server',
        default="https://localhost",
        likely_to_be_changed=True
    )
    required_config.add_option(
        name='private_symbols_url',
        doc='url of the private symbol server',
        default="https://localhost",
        likely_to_be_changed=True
    )
    required_config.command_line = change_default(
        ExternalProcessRule,
        'command_line',
        'timeout -s KILL {kill_timeout} {command_pathname} '
        '--raw-json {raw_crash_pathname} '
        '--symbols-url {public_symbols_url} '
        '--symbols-url {private_symbols_url} '
        '--symbols-cache {symbol_cache_path} '
        '--symbols-tmp {symbol_tmp_path} '
        '{dump_file_pathname} '
        '2> /dev/null'
    )
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        # NOTE(willkg): This is the path for the RPM-based Socorro deploy. When
        # we switch to Docker, we should change this.
        '/data/socorro/stackwalk/bin/stackwalker',
    )
    required_config.add_option(
        'kill_timeout',
        doc='amount of time to let mdsw run before declaring it hung',
        default=600
    )
    required_config.add_option(
        'symbol_tmp_path',
        doc=(
            'directory to use as temp space for downloading symbols--must be on '
            'the same filesystem as symbols-cache'
        ),
        default=os.path.join(tempfile.gettempdir(), 'symbols-tmp'),
    ),
    required_config.add_option(
        'symbol_cache_path',
        doc=(
            'the path where the symbol cache is found, this location must be '
            'readable and writeable (quote path with embedded spaces)'
        ),
        default=os.path.join(tempfile.gettempdir(), 'symbols'),
    )
    required_config.add_option(
        'temporary_file_system_storage_path',
        doc='a path where temporary files may be written',
        default=tempfile.gettempdir(),
    )

    def version(self):
        return '1.0'

    @contextmanager
    def _temp_raw_crash_json_file(self, raw_crash, crash_id):
        file_pathname = os.path.join(
            self.config.temporary_file_system_storage_path,
            "%s.%s.TEMPORARY.json" % (
                crash_id,
                threading.currentThread().getName()
            )
        )
        with open(file_pathname, "w") as f:
            ujson.dump(raw_crash, f)
        try:
            yield file_pathname
        finally:
            os.unlink(file_pathname)

    def _execute_external_process(self, command_line, processor_meta):
        stackwalker_output, return_code = super(
            BreakpadStackwalkerRule2015,
            self
        )._execute_external_process(command_line, processor_meta)

        if not isinstance(stackwalker_output, Mapping):
            processor_meta.processor_notes.append(
                "MDSW produced unexpected output: %s..." %
                str(stackwalker_output)[:10]
            )
            stackwalker_output = {}

        stackwalker_data = DotDict()
        stackwalker_data.json_dump = stackwalker_output
        stackwalker_data.mdsw_return_code = return_code

        stackwalker_data.mdsw_status_string = stackwalker_output.get(
            'status',
            'unknown error'
        )
        stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK'

        if return_code == 124:
            processor_meta.processor_notes.append(
                "MDSW terminated with SIGKILL due to timeout"
            )
        elif return_code != 0 or not stackwalker_data.success:
            processor_meta.processor_notes.append(
                "MDSW failed on '%s': %s" % (
                    command_line,
                    stackwalker_data.mdsw_status_string
                )
            )

        return stackwalker_data, return_code

    def expand_commandline(self, dump_file_pathname, raw_crash_pathname):
        """Expands the command line parameters and returns the final command line"""

        # NOTE(willkg): If we ever add new configuration variables, we'll need
        # to add them here, too, otherwise they won't get expanded in the
        # command line.

        params = {
            # These come from config
            'kill_timeout': self.config.kill_timeout,
            'command_pathname': self.config.command_pathname,
            'public_symbols_url': self.config.public_symbols_url,
            'private_symbols_url': self.config.private_symbols_url,
            'symbol_cache_path': self.config.symbol_cache_path,
            'symbol_tmp_path': self.config.symbol_tmp_path,

            # These are calculated
            'dump_file_pathname': dump_file_pathname,
            'raw_crash_pathname': raw_crash_pathname
        }

        return self.config.command_line.format(**params)

    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        if 'additional_minidumps' not in processed_crash:
            processed_crash.additional_minidumps = []
        with self._temp_raw_crash_json_file(
            raw_crash,
            raw_crash.uuid
        ) as raw_crash_pathname:
            for dump_name in raw_dumps.iterkeys():

                if processor_meta.quit_check:
                    processor_meta.quit_check()

                # this rule is only interested in dumps targeted for the
                # minidump stackwalker external program.  As of the writing
                # of this code, there is one other dump type.  The only way
                # to differentiate these dump types is by the name of the
                # dump.  All minidumps targeted for the stackwalker will have
                # a name with a prefix specified in configuration:
                if not dump_name.startswith(self.config.dump_field):
                    # dumps not intended for the stackwalker are ignored
                    continue

                dump_file_pathname = raw_dumps[dump_name]

                if self.config.chatty:
                    self.config.logger.debug(
                        "BreakpadStackwalkerRule2015: %s, %s",
                        dump_name,
                        dump_file_pathname
                    )

                command_line = self.expand_commandline(
                    dump_file_pathname=dump_file_pathname,
                    raw_crash_pathname=raw_crash_pathname
                )

                stackwalker_data, return_code = self._execute_external_process(
                    command_line,
                    processor_meta
                )

                if dump_name == self.config.dump_field:
                    processed_crash.update(stackwalker_data)
                else:
                    processed_crash.additional_minidumps.append(dump_name)
                    processed_crash[dump_name] = stackwalker_data

        return True
예제 #4
0
class PostgreSQLCrashStorage(CrashStorageBase):
    """this implementation of crashstorage saves processed crashes to
    an instance of Postgresql.  It only saves certain key values to the
    partitioned reports table, therefore it is not a source for fetching
    complete processed reports and doesn't not implement any of the 'get'
    methods."""

    required_config = Namespace()

    required_config.add_option(
        'transaction_executor_class',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithInfiniteBackoff",
        doc='a class that will manage transactions',
        from_string_converter=class_converter,
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        'database_class',
        default=ConnectionContext,
        doc='the class responsible for connecting to Postgres',
        reference_value_from='resource.postgresql',
    )

    _reports_table_mappings = (
        # processed name, reports table name
        ("addons_checked", "addons_checked"),
        ("address", "address"),
        ("app_notes", "app_notes"),
        ("build", "build"),
        ("client_crash_date", "client_crash_date"),
        ("completeddatetime", "completed_datetime"),
        ("cpu_info", "cpu_info"),
        ("cpu_name", "cpu_name"),
        ("date_processed", "date_processed"),
        ("distributor", "distributor"),
        ("distributor_version", "distributor_version"),
        ("email", "email"),
        ("exploitability", "exploitability"),
        #("flash_process_dump", "flash_process_dump"),  # future
        ("flash_version", "flash_version"),
        ("hangid", "hangid"),
        ("install_age", "install_age"),
        ("last_crash", "last_crash"),
        ("os_name", "os_name"),
        ("os_version", "os_version"),
        ("processor_notes", "processor_notes"),
        ("process_type", "process_type"),
        ("product", "product"),
        ("productid", "productid"),
        ("reason", "reason"),
        ("release_channel", "release_channel"),
        ("signature", "signature"),
        ("startedDateTime", "started_datetime"),
        ("success", "success"),
        ("topmost_filenames", "topmost_filenames"),
        ("truncated", "truncated"),
        ("uptime", "uptime"),
        ("user_comments", "user_comments"),
        ("user_id", "user_id"),
        ("url", "url"),
        ("uuid", "uuid"),
        ("version", "version"),
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(PostgreSQLCrashStorage,
              self).__init__(config, quit_check_callback=quit_check_callback)
        self.database = config.database_class(config)
        self.transaction = config.transaction_executor_class(
            config, self.database, quit_check_callback=quit_check_callback)

    #--------------------------------------------------------------------------
    def save_raw_crash(self, raw_crash, dumps, crash_id):
        """nota bene: this function does not save the dumps in PG, only
        the raw crash json is saved."""
        self.transaction(self._save_raw_crash_transaction, raw_crash, crash_id)

    #--------------------------------------------------------------------------
    def _save_raw_crash_transaction(self, connection, raw_crash, crash_id):
        raw_crash_table_name = ('raw_crashes_%s' %
                                self._table_suffix_for_crash_id(crash_id))
        insert_sql = """insert into %s (uuid, raw_crash, date_processed) values
                        (%%s, %%s, %%s)""" % raw_crash_table_name
        savepoint_name = threading.currentThread().getName().replace('-', '')
        value_list = (crash_id, json.dumps(raw_crash),
                      raw_crash["submitted_timestamp"])
        execute_no_results(connection, "savepoint %s" % savepoint_name)
        try:
            execute_no_results(connection, insert_sql, value_list)
            execute_no_results(connection,
                               "release savepoint %s" % savepoint_name)
        except self.config.database_class.IntegrityError:
            # report already exists
            execute_no_results(connection,
                               "rollback to savepoint %s" % savepoint_name)
            execute_no_results(connection,
                               "release savepoint %s" % savepoint_name)
            execute_no_results(
                connection,
                "delete from %s where uuid = %%s" % raw_crash_table_name,
                (crash_id, ))
            execute_no_results(connection, insert_sql, value_list)

    #--------------------------------------------------------------------------
    def get_raw_crash(self, crash_id):
        """the default implementation of fetching a raw_crash

        parameters:
           crash_id - the id of a raw crash to fetch"""
        return self.transaction(self._get_raw_crash_transaction, crash_id)

    #--------------------------------------------------------------------------
    def _get_raw_crash_transaction(self, connection, crash_id):
        raw_crash_table_name = ('raw_crash_%s' %
                                self._table_suffix_for_crash_id(crash_id))
        fetch_sql = 'select raw_crash from %s where uuid = %%s' % \
                    raw_crash_table_name
        try:
            return single_value_sql(connection, fetch_sql, (crash_id, ))
        except SQLDidNotReturnSingleValue:
            raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def save_processed(self, processed_crash):
        self.transaction(self._save_processed_transaction, processed_crash)

    #--------------------------------------------------------------------------
    def _save_processed_transaction(self, connection, processed_crash):
        report_id = self._save_processed_report(connection, processed_crash)
        self._save_plugins(connection, processed_crash, report_id)
        self._save_extensions(connection, processed_crash, report_id)
        self._save_processed_crash(connection, processed_crash)

    def _save_processed_crash(self, connection, processed_crash):
        crash_id = processed_crash['uuid']
        processed_crashes_table_name = (
            'processed_crashes_%s' % self._table_suffix_for_crash_id(crash_id))
        upsert_sql = """
        WITH
        update_processed_crash AS (
            UPDATE %(table)s SET
                processed_crash = %%(processed_json)s,
                date_processed = %%(date_processed)s
            WHERE uuid = %%(uuid)s
            RETURNING 1
        ),
        insert_processed_crash AS (
            INSERT INTO %(table)s (uuid, processed_crash, date_processed)
            ( SELECT
                %%(uuid)s as uuid,
                %%(processed_json)s as processed_crash,
                %%(date_processed)s as date_processed
                WHERE NOT EXISTS (
                    SELECT uuid from %(table)s
                    WHERE
                        uuid = %%(uuid)s
                    LIMIT 1
                )
            )
            RETURNING 2
        )
        SELECT * from update_processed_crash
        UNION ALL
        SELECT * from insert_processed_crash
        """ % {
            'table': processed_crashes_table_name,
            'uuid': crash_id
        }

        values = {
            'processed_json': json.dumps(processed_crash, cls=JsonDTEncoder),
            'date_processed': processed_crash["date_processed"],
            'uuid': crash_id
        }
        execute_no_results(connection, upsert_sql, values)

    #--------------------------------------------------------------------------
    def _save_processed_report(self, connection, processed_crash):
        column_list = []
        placeholder_list = []
        value_list = []
        for pro_crash_name, report_name in self._reports_table_mappings:
            column_list.append(report_name)
            placeholder_list.append('%s')
            value_list.append(processed_crash[pro_crash_name])
        crash_id = processed_crash['uuid']
        reports_table_name = ('reports_%s' %
                              self._table_suffix_for_crash_id(crash_id))
        insert_sql = "insert into %s (%s) values (%s) returning id" % (
            reports_table_name, ', '.join(column_list),
            ', '.join(placeholder_list))
        # we want to insert directly into the report table.  There is a
        # chance however that the record already exists.  If it does, then
        # the insert would fail and the connection fall into a "broken" state.
        # To avoid this, we set a savepoint to which we can roll back if the
        # record already exists - essentially a nested transaction.
        # We use the name of the executing thread as the savepoint name.
        # alternatively we could get a uuid.
        savepoint_name = threading.currentThread().getName().replace('-', '')
        execute_no_results(connection, "savepoint %s" % savepoint_name)
        try:
            report_id = single_value_sql(connection, insert_sql, value_list)
            execute_no_results(connection,
                               "release savepoint %s" % savepoint_name)
        except self.config.database_class.IntegrityError:
            # report already exists
            execute_no_results(connection,
                               "rollback to savepoint %s" % savepoint_name)
            execute_no_results(connection,
                               "release savepoint %s" % savepoint_name)
            execute_no_results(
                connection,
                "delete from %s where uuid = %%s" % reports_table_name,
                (processed_crash.uuid, ))
            report_id = single_value_sql(connection, insert_sql, value_list)
        return report_id

    #--------------------------------------------------------------------------
    def _save_plugins(self, connection, processed_crash, report_id):
        """ Electrolysis Support - Optional - processed_crash may contain a
        ProcessType of plugin. In the future this value would be default,
        content, maybe even Jetpack... This indicates which process was the
        crashing process.
            plugin - When set to plugin, the jsonDocument MUST calso contain
                     PluginFilename, PluginName, and PluginVersion
        """
        process_type = processed_crash['process_type']
        if not process_type:
            return

        if process_type == "plugin":

            # Bug#543776 We actually will are relaxing the non-null policy...
            # a null filename, name, and version is OK. We'll use empty strings
            try:
                plugin_filename = processed_crash['PluginFilename']
                plugin_name = processed_crash['PluginName']
                plugin_version = processed_crash['PluginVersion']
            except KeyError, x:
                self.config.logger.error(
                    'the crash is missing a required field: %s', str(x))
                return
            find_plugin_sql = ('select id from plugins '
                               'where filename = %s '
                               'and name = %s')
            try:
                plugin_id = single_value_sql(connection, find_plugin_sql,
                                             (plugin_filename, plugin_name))
            except SQLDidNotReturnSingleValue:
                insert_plugsins_sql = ("insert into plugins (filename, name) "
                                       "values (%s, %s) returning id")
                plugin_id = single_value_sql(connection, insert_plugsins_sql,
                                             (plugin_filename, plugin_name))
            crash_id = processed_crash['uuid']
            table_suffix = self._table_suffix_for_crash_id(crash_id)
            plugin_reports_table_name = 'plugins_reports_%s' % table_suffix
            plugins_reports_insert_sql = (
                'insert into %s '
                '    (report_id, plugin_id, date_processed, version) '
                'values '
                '    (%%s, %%s, %%s, %%s)' % plugin_reports_table_name)
            values_tuple = (report_id, plugin_id,
                            processed_crash['date_processed'], plugin_version)
            execute_no_results(connection, plugins_reports_insert_sql,
                               values_tuple)
예제 #5
0
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage):
    """Sends a subset of the processed crash to an S3 bucket

    The subset of the processed crash is based on the JSON Schema which is
    derived from "socorro/external/es/super_search_fields.py".

    """

    required_config = Namespace()
    required_config.resource_class = change_default(
        BotoCrashStorage, 'resource_class',
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext')
    required_config.elasticsearch = Namespace()
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )

    def __init__(self, config, *args, **kwargs):
        # This class requires that we use
        # SimpleDatePrefixKeyBuilder, so we stomp on the configuration
        # to make absolutely sure it gets set that way.
        config.keybuilder_class = SimpleDatePrefixKeyBuilder
        super(TelemetryBotoS3CrashStorage,
              self).__init__(config, *args, **kwargs)

    def _get_all_fields(self):
        if (hasattr(self, '_all_fields')
                and hasattr(self, '_all_fields_timestamp')):
            # we might have it cached
            age = time.time() - self._all_fields_timestamp
            if age < 60 * 60:
                # fresh enough
                return self._all_fields

        self._all_fields = SuperSearchFields(config=self.config).get()
        self._all_fields_timestamp = time.time()
        return self._all_fields

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        all_fields = self._get_all_fields()
        crash_report = {}

        # TODO Opportunity of optimization;
        # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list
        # of all (recursive) keys that are in there and use that
        # to limit the two following loops to not bother
        # filling up `crash_report` with keys that will never be
        # needed.

        # Rename fields in raw_crash.
        raw_fields_map = dict((x['in_database_name'], x['name'])
                              for x in all_fields.values()
                              if x['namespace'] == 'raw_crash')
        for key, val in raw_crash.items():
            crash_report[raw_fields_map.get(key, key)] = val

        # Rename fields in processed_crash.
        processed_fields_map = dict((x['in_database_name'], x['name'])
                                    for x in all_fields.values()
                                    if x['namespace'] == 'processed_crash')
        for key, val in processed_crash.items():
            crash_report[processed_fields_map.get(key, key)] = val

        # Validate crash_report.
        crash_report = json_schema_reducer.make_reduced_dict(
            CRASH_REPORT_JSON_SCHEMA, crash_report)
        self.save_processed(crash_report)

    @staticmethod
    def _do_save_processed(boto_connection, processed_crash):
        """Overriding this to change "name of thing" to crash_report"""
        crash_id = processed_crash['uuid']
        processed_crash_as_string = boto_connection._convert_mapping_to_string(
            processed_crash)
        boto_connection.submit(crash_id, "crash_report",
                               processed_crash_as_string)

    @staticmethod
    def _do_get_unredacted_processed(boto_connection, crash_id,
                                     json_object_hook):
        """Overriding this to change "name of thing" to crash_report"""
        try:
            processed_crash_as_string = boto_connection.fetch(
                crash_id, 'crash_report')
            return json.loads(
                processed_crash_as_string,
                object_hook=json_object_hook,
            )
        except boto_connection.ResponseError as x:
            raise CrashIDNotFound('%s not found: %s' % (crash_id, x))
예제 #6
0
    def test_basic_crashstorage(self):

        required_config = Namespace()

        mock_logging = Mock()
        required_config.add_option('logger', default=mock_logging)
        required_config.update(CrashStorageBase.required_config)

        config_manager = ConfigurationManager(
            [required_config],
            app_name='testapp',
            app_version='1.0',
            app_description='app description',
            values_source_list=[{
                'logger': mock_logging,
            }],
            argv_source=[]
        )

        with config_manager.context() as config:
            crashstorage = CrashStorageBase(
                config,
                quit_check_callback=fake_quit_check
            )
            crashstorage.save_raw_crash({}, 'payload', 'ooid')
            crashstorage.save_processed({})
            assert_raises(
                NotImplementedError,
                crashstorage.get_raw_crash, 'ooid'
            )
            assert_raises(
                NotImplementedError,
                crashstorage.get_raw_dump, 'ooid'
            )
            assert_raises(
                NotImplementedError,
                crashstorage.get_unredacted_processed, 'ooid'
            )
            assert_raises(
                NotImplementedError,
                crashstorage.remove, 'ooid'
            )
            eq_(crashstorage.new_crashes(), [])
            crashstorage.close()

        with config_manager.context() as config:
            class MyCrashStorageTest(CrashStorageBase):
                def save_raw_crash(self, raw_crash, dumps, crash_id):
                    eq_(crash_id, "fake_id")
                    eq_(raw_crash, "fake raw crash")
                    eq_(
                        sorted(dumps.keys()),
                        sorted(['one', 'two', 'three'])
                    )
                    eq_(
                        sorted(dumps.values()),
                        sorted(['eins', 'zwei', 'drei'])
                    )

            values = ['eins', 'zwei', 'drei']

            def open_function(*args, **kwargs):
                return values.pop(0)

            crashstorage = MyCrashStorageTest(
                config,
                quit_check_callback=fake_quit_check
            )

            with mock.patch("__builtin__.open") as open_mock:
                open_mock.return_value = mock.MagicMock()
                (
                    open_mock.return_value.__enter__
                    .return_value.read.side_effect
                ) = open_function
                crashstorage.save_raw_crash_with_file_dumps(
                    "fake raw crash",
                    FileDumpsMapping({
                        'one': 'eins',
                        'two': 'zwei',
                        'three': 'drei'
                    }),
                    'fake_id'
                )
예제 #7
0
    def test_benchmarking_crashstore(self):
        required_config = Namespace()

        mock_logging = Mock()
        required_config.add_option('logger', default=mock_logging)
        required_config.update(BenchmarkingCrashStorage.get_required_config())
        fake_crash_store = Mock()

        config_manager = ConfigurationManager(
            [required_config],
            app_name='testapp',
            app_version='1.0',
            app_description='app description',
            values_source_list=[{
                'logger': mock_logging,
                'wrapped_crashstore': fake_crash_store,
                'benchmark_tag': 'test'
            }],
            argv_source=[]
        )

        with config_manager.context() as config:
            crashstorage = BenchmarkingCrashStorage(
                config,
                quit_check_callback=fake_quit_check
            )
            crashstorage.start_timer = lambda: 0
            crashstorage.end_timer = lambda: 1
            fake_crash_store.assert_called_with(config, fake_quit_check)

            crashstorage.save_raw_crash({}, 'payload', 'ooid')
            crashstorage.wrapped_crashstore.save_raw_crash.assert_called_with(
                {},
                'payload',
                'ooid'
            )
            mock_logging.debug.assert_called_with(
                '%s save_raw_crash %s',
                'test',
                1
            )
            mock_logging.debug.reset_mock()

            crashstorage.save_processed({})
            crashstorage.wrapped_crashstore.save_processed.assert_called_with(
                {}
            )
            mock_logging.debug.assert_called_with(
                '%s save_processed %s',
                'test',
                1
            )
            mock_logging.debug.reset_mock()

            crashstorage.save_raw_and_processed({}, 'payload', {}, 'ooid')
            crashstorage.wrapped_crashstore.save_raw_and_processed \
                .assert_called_with(
                    {},
                    'payload',
                    {},
                    'ooid'
                )
            mock_logging.debug.assert_called_with(
                '%s save_raw_and_processed %s',
                'test',
                1
            )
            mock_logging.debug.reset_mock()

            crashstorage.get_raw_crash('uuid')
            crashstorage.wrapped_crashstore.get_raw_crash.assert_called_with(
                'uuid'
            )
            mock_logging.debug.assert_called_with(
                '%s get_raw_crash %s',
                'test',
                1
            )
            mock_logging.debug.reset_mock()

            crashstorage.get_raw_dump('uuid')
            crashstorage.wrapped_crashstore.get_raw_dump.assert_called_with(
                'uuid'
            )
            mock_logging.debug.assert_called_with(
                '%s get_raw_dump %s',
                'test',
                1
            )
            mock_logging.debug.reset_mock()

            crashstorage.get_raw_dumps('uuid')
            crashstorage.wrapped_crashstore.get_raw_dumps.assert_called_with(
                'uuid'
            )
            mock_logging.debug.assert_called_with(
                '%s get_raw_dumps %s',
                'test',
                1
            )
            mock_logging.debug.reset_mock()

            crashstorage.get_raw_dumps_as_files('uuid')
            crashstorage.wrapped_crashstore.get_raw_dumps_as_files \
                .assert_called_with(
                    'uuid'
                )
            mock_logging.debug.assert_called_with(
                '%s get_raw_dumps_as_files %s',
                'test',
                1
            )
            mock_logging.debug.reset_mock()

            crashstorage.get_unredacted_processed('uuid')
            crashstorage.wrapped_crashstore.get_unredacted_processed \
                .assert_called_with(
                    'uuid'
                )
            mock_logging.debug.assert_called_with(
                '%s get_unredacted_processed %s',
                'test',
                1
            )
            mock_logging.debug.reset_mock()
예제 #8
0
class ConnectionContextBase(RequiredConfig):
    required_config = Namespace()
    required_config.add_option(
        "access_key",
        doc="access key",
        default=None,
        reference_value_from="resource.boto",
    )
    required_config.add_option(
        "secret_access_key",
        doc="secret access key",
        default=None,
        secret=True,
        reference_value_from="secrets.boto",
        likely_to_be_changed=True,
    )
    required_config.add_option(
        "bucket_name",
        doc="The name of the bucket.",
        default="crashstats",
        reference_value_from="resource.boto",
        likely_to_be_changed=True,
    )
    required_config.add_option(
        "prefix",
        doc="a prefix to use inside the bucket",
        default="",
        reference_value_from="resource.boto",
        likely_to_be_changed=True,
    )
    required_config.add_option(
        "boto_metrics_prefix",
        doc="a prefix to use for boto metrics",
        default="",
        reference_value_from="resource.boto",
    )

    RETRYABLE_EXCEPTIONS = (
        socket.timeout,
        boto.exception.PleaseRetryException,
        boto.exception.ResumableTransferDisposition,
        boto.exception.ResumableUploadException,
    )

    def __init__(self, config, quit_check_callback=None):
        self.config = config
        self._CreateError = boto.exception.StorageCreateError
        self.ResponseError = (boto.exception.StorageResponseError, KeyNotFound)
        self._bucket_cache = {}
        self.metrics = markus.get_metrics(config.boto_metrics_prefix)

    def _connect(self):
        try:
            return self.connection
        except AttributeError:
            self.connection = self._connect_to_endpoint(
                **self._get_credentials())
            return self.connection

    def _get_credentials(self):
        """Returns credentials for creating the connection"""
        raise NotImplementedError

    def _get_datestamp(self, crashid):
        """Retrieves datestamp from a crashid or raises an exception"""
        datestamp = date_from_ooid(crashid)
        if datestamp is None:
            # We should never hit this situation unless the crashid is not valid
            raise CrashidMissingDatestamp("%s is missing datestamp" % crashid)
        return datestamp

    def build_keys(self, prefix, name_of_thing, crashid):
        """Builds a list of s3 pseudo-filenames

        When using keys for saving a crash, always use the first one given.

        When using keys for loading a crash, try each key in order. This lets
        us change our key scheme and continue to access things saved using the
        old key.

        :arg prefix: the prefix to use
        :arg name_of_thing: the kind of thing we're building a filename for; e.g.
            "raw_crash"
        :arg crashid: the crash id for the thing being stored

        :returns: list of keys to try in order

        """
        if name_of_thing == "raw_crash":
            # Insert the first 3 chars of the crashid providing some entropy
            # earlier in the key so that consecutive s3 requests get
            # distributed across multiple s3 partitions
            entropy = crashid[:3]
            date = self._get_datestamp(crashid).strftime("%Y%m%d")
            return [
                "%(prefix)s/v2/%(nameofthing)s/%(entropy)s/%(date)s/%(crashid)s"
                % {
                    "prefix": prefix,
                    "nameofthing": name_of_thing,
                    "entropy": entropy,
                    "date": date,
                    "crashid": crashid,
                }
            ]

        elif name_of_thing == "crash_report":
            # Crash data from the TelemetryBotoS3CrashStorage
            date = self._get_datestamp(crashid).strftime("%Y%m%d")
            return [
                "%(prefix)s/v1/%(nameofthing)s/%(date)s/%(crashid)s" % {
                    "prefix": prefix,
                    "nameofthing": name_of_thing,
                    "date": date,
                    "crashid": crashid,
                }
            ]

        return [
            "%(prefix)s/v1/%(nameofthing)s/%(crashid)s" % {
                "prefix": prefix,
                "nameofthing": name_of_thing,
                "crashid": crashid
            }
        ]

    def _get_bucket(self, conn, bucket_name):
        try:
            return self._bucket_cache[bucket_name]
        except KeyError:
            self._bucket_cache[bucket_name] = conn.get_bucket(bucket_name)
            return self._bucket_cache[bucket_name]

    def _get_or_create_bucket(self, conn, bucket_name):
        try:
            return self._get_bucket(conn, bucket_name)
        except self.ResponseError:
            self._bucket_cache[bucket_name] = conn.create_bucket(bucket_name)
            return self._bucket_cache[bucket_name]

    def submit(self, id, name_of_thing, thing):
        """submit something to boto"""
        # can only submit binary to boto
        assert isinstance(thing, bytes), type(thing)
        try:
            start_time = time.time()

            conn = self._connect()
            bucket = self._get_or_create_bucket(conn, self.config.bucket_name)

            all_keys = self.build_keys(self.config.prefix, name_of_thing, id)
            # Always submit using the first key
            key = all_keys[0]
            key_object = bucket.new_key(key)
            key_object.set_contents_from_string(thing)
            index_outcome = "successful"
        except Exception:
            index_outcome = "failed"
            raise
        finally:
            elapsed_time = time.time() - start_time
            self.metrics.histogram(
                "submit",
                value=elapsed_time * 1000.0,
                tags=["kind:" + name_of_thing, "outcome:" + index_outcome],
            )

    def fetch(self, id, name_of_thing):
        """Retrieve something from boto"""
        conn = self._connect()
        bucket = self._get_bucket(conn, self.config.bucket_name)

        all_keys = self.build_keys(self.config.prefix, name_of_thing, id)
        for key in all_keys:
            key_object = bucket.get_key(key)
            if key_object is not None:
                # NOTE(willkg): this says "as string", but in Python 3 this
                # will be bytes.
                return key_object.get_contents_as_string()

        # None of the keys worked, so raise an error
        raise KeyNotFound(
            "%s (bucket=%r keys=%r) not found, no value returned" %
            (id, self.config.bucket_name, all_keys))

    def _convert_mapping_to_string(self, a_mapping):
        return json.dumps(a_mapping, cls=JSONISOEncoder)

    def _convert_list_to_string(self, a_list):
        return json.dumps(list(a_list))

    def _convert_string_to_list(self, a_string):
        return json.loads(a_string)

    @contextlib.contextmanager
    def __call__(self):
        yield self

    def force_reconnect(self):
        pass

    def is_retryable_exception(self, exc):
        return isinstance(exc, self.RETRYABLE_EXCEPTIONS)
예제 #9
0

#------------------------------------------------------------------------------
def query1(conn):
    """a transaction to be executed by the database"""
    conn.query('select * from life')


#------------------------------------------------------------------------------
def query2(conn):
    """another transaction to be executed by the database"""
    raise Exception("not a database related error")

#==============================================================================
if __name__ == "__main__":
    definition_source = Namespace()
    definition_source.add_option('transaction_executor_class',
                                 default=TransactionExecutorWithBackoff,
                                 doc='a class that will execute transactions')

    c = ConfigurationManager(definition_source,
                             app_name='advanced_demo_3',
                             app_description=__doc__)

    with c.context() as config:
        # the configuration has a class that can execute transactions
        # we instantiate it here.
        executor = config.transaction_executor_class(config)

        # this first query has a 50% probability of failing due to a database
        # connectivity problem.  If the transaction_executor_class is a class
예제 #10
0
class TarFileCrashStore(CrashStorageBase):
    required_config = Namespace()
    required_config.add_option(name='tarball_name',
                               doc='pathname to a the target tarfile',
                               default='fred.tar')
    required_config.add_option(name='temp_directory',
                               doc='the pathname of a temporary directory',
                               default='/tmp')
    required_config.add_option(
        name='tarfile_module',
        doc='a module that supplies the tarfile interface',
        default='tarfile',
        from_string_converter=class_converter)
    required_config.add_option(name='gzip_module',
                               doc='a module that supplies the gzip interface',
                               default='gzip',
                               from_string_converter=class_converter)
    required_config.add_option(name='os_module',
                               doc='a module that supplies the os interface',
                               default='os',
                               from_string_converter=class_converter)

    @staticmethod
    def stringify_datetimes(obj):
        if isinstance(obj, datetime.datetime):
            return obj.strftime("%Y-%m-%d %H:%M:%S.%f")
        raise TypeError

    def __init__(self, config, quit_check_callback=None):
        super(TarFileCrashStore, self).__init__(config, quit_check_callback)
        self.tarfile_module = config.tarfile_module
        self.gzip_module = config.gzip_module
        self.os_module = config.os_module

    def _save_to_tarfile(self, actual_pathname, target_pathname):
        try:
            self.tar_file.add(actual_pathname, target_pathname)
        except AttributeError:
            # the tar_file is lazy instantiated.  It isn't created until
            # a process tries to save something to it
            self.tar_file = self.tarfile_module.open(self.config.tarball_name,
                                                     'w')
            self.tar_file.add(actual_pathname, target_pathname)

    def close(self):
        try:
            self.tar_file.close()
        except AttributeError:
            # the tar_file was never actually created because the save_*
            # were never called.  we can silently ignore this
            pass

    def save_processed(self, processed_crash):
        processed_crash_as_string = json.dumps(
            processed_crash, default=self.stringify_datetimes)
        crash_id = processed_crash['crash_id']
        file_name = os.path.join(self.config.temp_directory,
                                 crash_id + '.jsonz')
        file_handle = self.gzip_module.open(file_name, 'w', 9)
        try:
            file_handle.write(processed_crash_as_string)
        finally:
            file_handle.close()
        self._save_to_tarfile(
            file_name,
            os.path.join(crash_id[:2], crash_id[2:4], crash_id + '.jsonz'))
        self.os_module.unlink(file_name)
        self.config.logger.debug('saved - %s', file_name)
예제 #11
0
class ThreadedTaskManager(TaskManager):
    """Given an iterator over a sequence of job parameters and a function,
    this class will execute the function in a set of threads."""
    required_config = Namespace()
    required_config.add_option('idle_delay',
                               default=7,
                               doc='the delay in seconds if no job is found')
    # how does one choose how many threads to use?  Keep the number low if your
    # application is compute bound.  You can raise it if your app is i/o
    # bound.  The best thing to do is to test the through put of your app with
    # several values.  For Socorro, we've found that setting this value to the
    # number of processor cores in the system gives the best throughput.
    required_config.add_option('number_of_threads',
                               default=4,
                               doc='the number of threads')
    # there is wisdom is setting the maximum queue size to be no more than
    # twice the number of threads.  By keeping the threads starved, the
    # queing thread will be blocked more more frequently.  Once an item
    # is in the queue, there may be no way to fetch it again if disaster
    # strikes and this app quits or fails.  Potentially anything left in
    # the queue could be lost.  Limiting the queue size insures minimal
    # damage in a worst case scenario.
    required_config.add_option('maximum_queue_size',
                               default=8,
                               doc='the maximum size of the internal queue')

    def __init__(self,
                 config,
                 job_source_iterator=default_iterator,
                 task_func=default_task_func):
        """the constructor accepts the function that will serve as the data
        source iterator and the function that the threads will execute on
        consuming the data.

        parameters:
            job_source_iterator - an iterator to serve as the source of data.
                                  it can be of the form of a generator or
                                  iterator; a function that returns an
                                  iterator; a instance of an iterable object;
                                  or a class that when instantiated with a
                                  config object can be iterated.  The iterator
                                  must yield a tuple consisting of a
                                  function's tuple of args and, optionally, a
                                  mapping of kwargs.
                                  Ex:  (('a', 17), {'x': 23})
            task_func - a function that will accept the args and kwargs yielded
                        by the job_source_iterator"""
        super(ThreadedTaskManager, self).__init__(config, job_source_iterator,
                                                  task_func)
        self.thread_list = []  # the thread object storage
        self.number_of_threads = config.number_of_threads
        self.task_queue = queue.Queue(config.maximum_queue_size)

    def start(self):
        """this function will start the queing thread that executes the
        iterator and feeds jobs into the queue.  It also starts the worker
        threads that just sit and wait for items to appear on the queue. This
        is a non blocking call, so the executing thread is free to do other
        things while the other threads work."""
        self.logger.debug('start')
        # start each of the task threads.
        for x in range(self.number_of_threads):
            # each thread is given the config object as well as a reference to
            # this manager class.  The manager class is where the queue lives
            # and the task threads will refer to it to get their next jobs.
            new_thread = TaskThread(self.config, self.task_queue)
            self.thread_list.append(new_thread)
            new_thread.start()
        self.queuing_thread = threading.Thread(
            name="QueuingThread", target=self._queuing_thread_func)
        self.queuing_thread.start()

    def wait_for_completion(self, waiting_func=None):
        """This is a blocking function call that will wait for the queuing
        thread to complete.

        parameters:
            waiting_func - this function will be called every one second while
                           waiting for the queuing thread to quit.  This allows
                           for logging timers, status indicators, etc."""
        self.logger.debug("waiting to join queuingThread")
        self._responsive_join(self.queuing_thread, waiting_func)

    def stop(self):
        """This function will tell all threads to quit.  All threads
        periodically look at the value of quit.  If they detect quit is True,
        then they commit ritual suicide.  After setting the quit flag, this
        function will wait for the queuing thread to quit."""
        self.quit = True
        self.wait_for_completion()

    def blocking_start(self, waiting_func=None):
        """this function is just a wrapper around the start and
        wait_for_completion methods.  It starts the queuing thread and then
        waits for it to complete.  If run by the main thread, it will detect
        the KeyboardInterrupt exception (which is what SIGTERM and SIGHUP
        have been translated to) and will order the threads to die."""
        try:
            self.start()
            self.wait_for_completion(waiting_func)
            # it only ends if someone hits  ^C or sends SIGHUP or SIGTERM -
            # any of which will get translated into a KeyboardInterrupt
        except KeyboardInterrupt:
            while True:
                try:
                    self.stop()
                    break
                except KeyboardInterrupt:
                    self.logger.warning(
                        'We heard you the first time.  There '
                        'is no need for further keyboard or signal '
                        'interrupts.  We are waiting for the '
                        'worker threads to stop.  If this app '
                        'does not halt soon, you may have to send '
                        'SIGKILL (kill -9)')

    def wait_for_empty_queue(self, wait_log_interval=0, wait_reason=''):
        """Sit around and wait for the queue to become empty

        parameters:
            wait_log_interval - while sleeping, it is helpful if the thread
                                periodically announces itself so that we
                                know that it is still alive.  This number is
                                the time in seconds between log entries.
            wait_reason - the is for the explaination of why the thread is
                          sleeping.  This is likely to be a message like:
                          'there is no work to do'."""
        seconds = 0
        while True:
            if self.task_queue.empty():
                break
            self.quit_check()
            if wait_log_interval and not seconds % wait_log_interval:
                self.logger.info('%s: %dsec so far', wait_reason, seconds)
                self.quit_check()
            seconds += 1
            time.sleep(1.0)

    def _responsive_join(self, thread, waiting_func=None):
        """similar to the responsive sleep, a join function blocks a thread
        until some other thread dies.  If that takes a long time, we'd like to
        have some indicaition as to what the waiting thread is doing.  This
        method will wait for another thread while calling the waiting_func
        once every second.

        parameters:
            thread - an instance of the TaskThread class representing the
                     thread to wait for
            waiting_func - a function to call every second while waiting for
                           the thread to die"""
        while True:
            try:
                thread.join(1.0)
                if not thread.isAlive():
                    break
                if waiting_func:
                    waiting_func()
            except KeyboardInterrupt:
                self.logger.debug('quit detected by _responsive_join')
                self.quit = True

    def _kill_worker_threads(self):
        """This function coerces the consumer/worker threads to kill
        themselves.  When called by the queuing thread, one death token will
        be placed on the queue for each thread.  Each worker thread is always
        looking for the death token.  When it encounters it, it immediately
        runs to completion without drawing anything more off the queue.

        This is a blocking call.  The thread using this function will wait for
        all the worker threads to die."""
        for x in range(self.number_of_threads):
            self.task_queue.put((None, None))
        self.logger.debug("waiting for standard worker threads to stop")
        for t in self.thread_list:
            t.join()

    def _queuing_thread_func(self):
        """This is the function responsible for reading the iterator and
        putting contents into the queue.  It loops as long as there are items
        in the iterator.  Should something go wrong with this thread, or it
        detects the quit flag, it will calmly kill its workers and then
        quit itself."""
        self.logger.debug('_queuing_thread_func start')
        try:
            # May never raise StopIteration
            for job_params in self._get_iterator():
                self.config.logger.debug('received %r', job_params)
                if job_params is None:
                    if self.config.quit_on_empty_queue:
                        self.wait_for_empty_queue(
                            wait_log_interval=10,
                            wait_reason='waiting for queue to drain')
                        raise KeyboardInterrupt
                    self.logger.info("there is nothing to do.  Sleeping "
                                     "for %d seconds" % self.config.idle_delay)
                    self._responsive_sleep(self.config.idle_delay)
                    continue
                self.quit_check()
                # self.logger.debug("queuing job %s", job_params)
                self.task_queue.put((self.task_func, job_params))
        except Exception:
            self.logger.error('queuing jobs has failed', exc_info=True)
        except KeyboardInterrupt:
            self.logger.debug('queuingThread gets quit request')
        finally:
            self.logger.debug("we're quitting queuingThread")
            self._kill_worker_threads()
            self.logger.debug("all worker threads stopped")
            # now that we've killed all the workers, we can set the quit flag
            # to True.  This will cause any other threads to die and shut down
            # the application.  Originally, the setting of this flag was at the
            # start of this "finally" block.  However, that meant that the
            # workers would abort their currently running jobs.  In the case of
            # of the natural ending of an application where an iterater ran to
            # exhaustion, the workers would die before completing their tasks.
            # Moving the setting of the flag to this location allows the
            # workers to finish and then the app shuts down.
            self.quit = True

    def executor_identity(self):
        """this function is likely to be called via the configuration parameter
        'executor_identity' at the root of the self.config attribute of the
        application.  It is most frequently used in the Pooled
        ConnectionContext classes to ensure that connections aren't shared
        between threads, greenlets, or whatever the unit of execution is.
        This is useful for maintaining transactional integrity on a resource
        connection."""
        return threading.currentThread().getName()
예제 #12
0
class FSRadixTreeStorage(CrashStorageBase):
    """
    This class implements basic radix tree storage. It stores crashes using the
    crash_id radix scheme under ``fs_root``.

    Files are stored in the following scheme::

        root/yyyymmdd/name_branch_base/radix.../crash_id/<files>

    The date is determined using the date suffix of the crash_id, and the
    name_branch_base is given in the configuration options. The radix is
    computed from the crash_id by substringing the UUID in octets to the depth
    given in the crash_id, for instance:

    0bba929f-8721-460c-dead-a43c20071025 is stored in::

        root/20071025/name/0b/ba/92/9f/0bba929f-8721-460c-dead-a43c20071025

    This storage does not implement ``new_crashes``, but is able to store
    processed crashes. Used alone, it is intended to store only processed
    crashes.
    """

    required_config = Namespace()
    required_config.add_option(
        'fs_root',
        doc='a path to a file system',
        default='./crashes',

        # We strip / from the right so we can consistently use os.sep.join
        # instead of os.path.join (which is faster).
        from_string_converter=lambda x: x.rstrip('/'),
        reference_value_from='resource.fs',
    )
    required_config.add_option(
        'umask',
        doc='umask to use for new files',
        default=0o022,
        reference_value_from='resource.fs',
    )
    required_config.add_option(
        'json_file_suffix',
        doc='the suffix used to identify a json file',
        default='.json',
        reference_value_from='resource.fs',
    )
    required_config.add_option(
        'jsonz_file_suffix',
        doc='the suffix used to identify a gzipped json file',
        default='.jsonz',
        reference_value_from='resource.fs',
    )
    required_config.add_option(
        'dump_file_suffix',
        doc='the suffix used to identify a dump file',
        default='.dump',
        reference_value_from='resource.fs',
    )
    required_config.add_option(
        'dump_field',
        doc='the default dump field',
        default='upload_file_minidump',
        reference_value_from='resource.fs',
    )
    required_config.add_option(
        'name_branch_base',
        doc='the directory base name to use for the named radix tree storage',
        default='name',
        reference_value_from='resource.fs',
    )

    def __init__(self, *args, **kwargs):
        super(FSRadixTreeStorage, self).__init__(*args, **kwargs)
        try:
            with using_umask(self.config.umask):
                os.makedirs(self.config.fs_root)
        except OSError:
            self.logger.info("didn't make directory: %s " %
                             self.config.fs_root)

    @staticmethod
    def _cleanup_empty_dirs(base, leaf):
        parts = leaf.split(os.sep)

        while parts:
            cur = os.sep.join([base] + parts)
            parts.pop()

            try:
                os.rmdir(cur)
            except OSError:
                # this directory isn't empty, so we can stop cleanup
                break

    def _get_dump_file_name(self, crash_id, dump_name):
        if dump_name == self.config.dump_field or not dump_name:
            return crash_id + self.config.dump_file_suffix
        else:
            return "%s.%s%s" % (crash_id, dump_name,
                                self.config.dump_file_suffix)

    @staticmethod
    def _get_radix(crash_id):
        return [
            crash_id[i * 2:(i + 1) * 2] for i in range(depthFromOoid(crash_id))
        ]

    def _get_base(self, crash_id):
        date = dateFromOoid(crash_id)
        if not date:
            date = utc_now()
        date_formatted = "%4d%02d%02d" % (date.year, date.month, date.day)
        return [self.config.fs_root, date_formatted]

    def _get_radixed_parent_directory(self, crash_id):
        return os.sep.join(
            self._get_base(crash_id) + [self.config.name_branch_base] +
            self._get_radix(crash_id) + [crash_id])

    def _dump_names_from_paths(self, pathnames):
        dump_names = []
        for a_pathname in pathnames:
            base_name = os.path.basename(a_pathname)
            dump_name = base_name[37:-len(self.config.dump_file_suffix)]
            if not dump_name:
                dump_name = self.config.dump_field
            dump_names.append(dump_name)
        return dump_names

    def _save_files(self, crash_id, files):
        parent_dir = self._get_radixed_parent_directory(crash_id)

        with using_umask(self.config.umask):
            try:
                os.makedirs(parent_dir)
            except OSError:
                # probably already created, ignore
                pass
                #self.logger.debug("could not make directory: %s" %
                #self.config.fs_root)

            for fn, contents in files.iteritems():
                with open(os.sep.join([parent_dir, fn]), 'wb') as f:
                    f.write(contents)

    def save_processed(self, processed_crash):
        crash_id = processed_crash['uuid']
        processed_crash = processed_crash.copy()
        f = StringIO()
        with closing(gzip.GzipFile(mode='wb', fileobj=f)) as fz:
            json.dump(processed_crash, fz, default=self.json_default)
        self._save_files(
            crash_id, {crash_id + self.config.jsonz_file_suffix: f.getvalue()})

    def save_raw_crash(self, raw_crash, dumps, crash_id):
        files = {
            crash_id + self.config.json_file_suffix: json.dumps(raw_crash)
        }
        files.update(
            dict((self._get_dump_file_name(crash_id, fn), dump)
                 for fn, dump in dumps.iteritems()))
        self._save_files(crash_id, files)

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """ bug 866973 - do not try to save dumps=None into the Filesystem
            We are doing this in lieu of a queuing solution that could allow
            us to operate an independent crashmover. When the queuing system
            is implemented, we could remove this, and have the raw crash
            saved by a crashmover that's consuming crash_ids the same way
            that the processor consumes them.

            Even though it is ok to resave the raw_crash in this case to the
            filesystem, the fs does not know what to do with a dumps=None
            when passed to save_raw, so we are going to avoid that.
        """
        self.save_processed(processed_crash)

    def get_raw_crash(self, crash_id):
        parent_dir = self._get_radixed_parent_directory(crash_id)
        if not os.path.exists(parent_dir):
            raise CrashIDNotFound
        with open(
                os.sep.join(
                    [parent_dir, crash_id + self.config.json_file_suffix]),
                'r') as f:
            return json.load(f, object_hook=DotDict)

    def get_raw_dump(self, crash_id, name=None):
        parent_dir = self._get_radixed_parent_directory(crash_id)
        if not os.path.exists(parent_dir):
            raise CrashIDNotFound
        with open(
                os.sep.join(
                    [parent_dir,
                     self._get_dump_file_name(crash_id, name)]), 'rb') as f:
            return f.read()

    def get_raw_dumps_as_files(self, crash_id):
        parent_dir = self._get_radixed_parent_directory(crash_id)
        if not os.path.exists(parent_dir):
            raise CrashIDNotFound
        dump_paths = [
            os.sep.join([parent_dir, dump_file_name])
            for dump_file_name in os.listdir(parent_dir)
            if dump_file_name.startswith(crash_id)
            and dump_file_name.endswith(self.config.dump_file_suffix)
        ]
        return DotDict(zip(self._dump_names_from_paths(dump_paths),
                           dump_paths))

    def get_raw_dumps(self, crash_id):
        def read_with(fn):
            with open(fn) as f:
                return f.read()

        return DotDict(
            (k, read_with(v))
            for k, v in self.get_raw_dumps_as_files(crash_id).iteritems())

    def get_unredacted_processed(self, crash_id):
        """this method returns an unredacted processed crash"""
        parent_dir = self._get_radixed_parent_directory(crash_id)
        pathname = os.sep.join(
            [parent_dir, crash_id + self.config.jsonz_file_suffix])
        if not os.path.exists(pathname):
            raise CrashIDNotFound
        with closing(gzip.GzipFile(pathname, 'rb')) as f:
            return json.load(f, object_hook=DotDict)

    def remove(self, crash_id):
        parent_dir = self._get_radixed_parent_directory(crash_id)
        if not os.path.exists(parent_dir):
            raise CrashIDNotFound
        shutil.rmtree(parent_dir)

    @staticmethod
    def json_default(obj):
        if isinstance(obj, datetime.datetime):
            return obj.strftime("%Y-%m-%d %H:%M:%S.%f")
        raise TypeError
예제 #13
0
class FSDatedRadixTreeStorage(FSRadixTreeStorage):
    """
    This class implements dated radix tree storage -- it enables for traversing
    a radix tree using an hour/minute prefix. It allows searching for new
    crashes, but doesn't store processed crashes.

    It supplements the basic radix tree storage with indexing by date. It takes
    the current hour, minute and second and stores items in the following
    scheme::

        root/yyyymmdd/date_branch_base/hour/minute_(minute_slice)/crash_id

        minute_slice is computed by taking the second of the current timestamp
        and floor dividing by minute_slice_interval, e.g. a minute slice of 4
        provides slots from 0..14.

    This is a symlink to the items stored in the base radix tree storage.
    Additionally, a symlink is created in the base radix tree directory called
    ``date_root` which links to the ``minute_(minute_slice)`` folder.

    This storage class is suitable for use as raw crash storage, as it supports
    the ``new_crashes`` method.
    """

    required_config = Namespace()
    required_config.add_option(
        'date_branch_base',
        doc='the directory base name to use for the dated radix tree storage',
        default='date',
        reference_value_from='resource.fs',
    )
    required_config.add_option(
        'minute_slice_interval',
        doc='how finely to slice minutes into slots, e.g. 4 means every 4 '
        'seconds a new slot will be allocated',
        default=4,
        reference_value_from='resource.fs',
    )

    # This is just a constant for len(self._current_slot()).
    SLOT_DEPTH = 2
    DIR_DEPTH = 2

    def _get_current_date(self):
        date = utc_now()
        return "%02d%02d%02d" % (date.year, date.month, date.day)

    def _get_date_root_name(self, crash_id):
        return 'date_root'

    def _get_dump_file_name(self, crash_id, dump_name):
        if dump_name == self.config.dump_field or dump_name is None:
            return crash_id + self.config.dump_file_suffix
        else:
            return "%s.%s%s" % (crash_id, dump_name,
                                self.config.dump_file_suffix)

    def _get_dated_parent_directory(self, crash_id, slot):
        return os.sep.join(
            self._get_base(crash_id) + [self.config.date_branch_base] + slot)

    def _current_slot(self):
        now = utc_now()
        return [
            "%02d" % now.hour,
            "%02d_%02d" %
            (now.minute, now.second // self.config.minute_slice_interval)
        ]

    def _create_name_to_date_symlink(self, crash_id, slot):
        """we traverse the path back up from date/slot... to make a link:
           src:  "name"/radix.../crash_id (or "name"/radix... for legacy mode)
           dest: "date"/slot.../crash_id"""
        self._get_radixed_parent_directory(crash_id)

        root = os.sep.join([os.path.pardir] * (self.SLOT_DEPTH + 1))
        os.symlink(
            os.sep.join([root, self.config.name_branch_base] +
                        self._get_radix(crash_id) + [crash_id]),
            os.sep.join(
                [self._get_dated_parent_directory(crash_id, slot), crash_id]))

    def _create_date_to_name_symlink(self, crash_id, slot):
        """the path is something like name/radix.../crash_id, so what we do is
           add 2 to the directories to go up _dir_depth + len(radix).
           we make a link:
           src:  "date"/slot...
           dest: "name"/radix.../crash_id/date_root_name"""
        radixed_parent_dir = self._get_radixed_parent_directory(crash_id)

        root = os.sep.join([os.path.pardir] *
                           (len(self._get_radix(crash_id)) + self.DIR_DEPTH))
        os.symlink(
            os.sep.join([root, self.config.date_branch_base] + slot),
            os.sep.join(
                [radixed_parent_dir,
                 self._get_date_root_name(crash_id)]))

    def save_raw_crash(self, raw_crash, dumps, crash_id):
        super(FSDatedRadixTreeStorage,
              self).save_raw_crash(raw_crash, dumps, crash_id)

        slot = self._current_slot()
        parent_dir = self._get_dated_parent_directory(crash_id, slot)

        try:
            os.makedirs(parent_dir)
        except OSError:
            # probably already created, ignore
            pass
            #self.logger.debug("could not make directory: %s" %
            #parent_dir)

        with using_umask(self.config.umask):
            # Bug 971496 reversed the order of these calls so that the one that
            # can fail will fail first and not leave an orphan symlink behind.
            self._create_date_to_name_symlink(crash_id, slot)
            self._create_name_to_date_symlink(crash_id, slot)

    def remove(self, crash_id):
        dated_path = os.path.realpath(
            os.sep.join([
                self._get_radixed_parent_directory(crash_id),
                self._get_date_root_name(crash_id)
            ]))

        try:
            # We can just unlink the symlink and later new_crashes will clean
            # up for us.
            os.unlink(os.sep.join([dated_path, crash_id]))
        except OSError:
            pass  # we might be trying to remove a visited crash and that's
            # okay

        # Now we actually remove the crash.
        super(FSDatedRadixTreeStorage, self).remove(crash_id)

    def _visit_minute_slot(self, minute_slot_base):
        for crash_id in os.listdir(minute_slot_base):
            namedir = os.sep.join([minute_slot_base, crash_id])
            st_result = os.lstat(namedir)

            if stat.S_ISLNK(st_result.st_mode):
                # This is a link, so we can dereference it to find
                # crashes.
                if os.path.isfile(
                        os.sep.join(
                            [namedir,
                             crash_id + self.config.json_file_suffix])):
                    date_root_path = os.sep.join(
                        [namedir, self._get_date_root_name(crash_id)])
                    yield crash_id

                    try:
                        os.unlink(date_root_path)
                    except OSError:
                        self.logger.error(
                            "could not find a date root in "
                            "%s; is crash corrupt?",
                            namedir,
                            exc_info=True)

                    os.unlink(namedir)

    def new_crashes(self):
        """
        The ``new_crashes`` method returns a generator that visits all new
        crashes like so:

        * Traverse the date root to find all crashes.

        * If we find a symlink in a slot, then we dereference the link and
          check if the directory has crash data.

        * if the directory does, then we remove the symlink in the slot,
          clean up the parent directories if they're empty and then yield
          the crash_id.
        """
        current_slot = self._current_slot()
        current_date = self._get_current_date()

        dates = os.listdir(self.config.fs_root)
        for date in dates:
            dated_base = os.sep.join(
                [self.config.fs_root, date, self.config.date_branch_base])

            try:
                hour_slots = os.listdir(dated_base)
            except OSError:
                # it is okay that the date root doesn't exist - skip on to
                # the next date
                #self.logger.info("date root for %s doesn't exist" % date)
                continue

            for hour_slot in hour_slots:
                skip_dir = False
                hour_slot_base = os.sep.join([dated_base, hour_slot])
                for minute_slot in os.listdir(hour_slot_base):
                    minute_slot_base = os.sep.join(
                        [hour_slot_base, minute_slot])
                    slot = [hour_slot, minute_slot]

                    if slot >= current_slot and date >= current_date:
                        # the slot is currently being used, we want to skip it
                        # for now
                        self.logger.info("not processing slot: %s/%s" %
                                         tuple(slot))
                        skip_dir = True
                        continue

                    for x in self._visit_minute_slot(minute_slot_base):
                        yield x

                    try:
                        # We've finished processing the slot, so we can remove
                        # it.
                        os.rmdir(minute_slot_base)
                    except OSError:
                        self.logger.error(
                            "could not fully remove directory: "
                            "%s; are there more crashes in it?",
                            minute_slot_base,
                            exc_info=True)

                if not skip_dir and hour_slot < current_slot[0]:
                    try:
                        # If the current slot is greater than the hour slot
                        # we're processing, then we can conclude the directory
                        # is safe to remove.
                        os.rmdir(hour_slot_base)
                    except OSError:
                        self.logger.error(
                            "could not fully remove directory: "
                            "%s; are there more crashes in it?",
                            hour_slot_base,
                            exc_info=True)
예제 #14
0
    def test_write_with_imported_module_with_internal_mappings(self):
        import os
        from configman.tests.values_for_module_tests_1 import Alpha, foo

        d = {
            'a': 18,
            'b': 'hello',
            'c': [1, 2, 3],
            'd': {
                'host': 'localhost',
                'port': 5432,
            }
        }

        definitions = {
            'os_module': os,
            'a': 17,
            'imported_class': Alpha,
            'imported_function': foo,
            'xxx': {
                'yyy': Option('yyy', default=d)
            },
            'e': None,
        }
        required_config = Namespace()
        required_config.add_option(
            'minimal_version_for_understanding_refusal',
            doc='ignore the Thottleable protocol',
            default={'Firefox': '3.5.4'},
        )

        cm = ConfigurationManager(
            [definitions, required_config],
            values_source_list=[],
        )

        cm.get_config()

        s = StringIO()

        @contextlib.contextmanager
        def s_opener():
            yield s

        cm.write_conf('py', s_opener)
        generated_python_module_text = s.getvalue()

        expected = """# generated Python configman file

from configman.dotdict import DotDict
from configman.tests.values_for_module_tests_1 import (
    Alpha,
    foo,
)

import os

# the following symbols will be ignored by configman when
# this module is used as a value source.  This will
# suppress the mismatch warning since these symbols are
# values for options, not option names themselves.
ignore_symbol_list = [
    "Alpha",
    "DotDict",
    "foo",
    "os",
]


# a
a = 17

# e
e = None

# imported_class
imported_class = Alpha

# imported_function
imported_function = foo

# ignore the Thottleable protocol
minimal_version_for_understanding_refusal = {
    "Firefox": "3.5.4"
}

# os_module
os_module = os

# Namespace: xxx
xxx = DotDict()

xxx.yyy = {
    "a": 18,
    "b": "hello",
    "c": [
        1,
        2,
        3
    ],
    "d": {
        "host": "localhost",
        "port": 5432
    }
}
"""
        self.assertEqual(generated_python_module_text, expected)
예제 #15
0
class B(A):
    foo = 'b'
    required_config = Namespace()
    required_config.add_option('z', default=2)
예제 #16
0
class Postgres(RequiredConfig):
    """a configman compliant class for setup of Postgres transactions"""
    #--------------------------------------------------------------------------
    # configman parameter definition section
    # here we're setting up the minimal parameters required for connecting
    # to a database.
    required_config = Namespace()
    required_config.add_option(
        name='database_host',
        default='localhost',
        doc='the hostname of the database',
    )
    required_config.add_option(
        name='database_name',
        default='breakpad',
        doc='the name of the database',
    )
    required_config.add_option(
        name='database_port',
        default=5432,
        doc='the port for the database',
    )
    required_config.add_option(
        name='database_user',
        default='breakpad_rw',
        doc='the name of the user within the database',
    )
    required_config.add_option(
        name='database_password',
        default='secrets',
        doc="the user's database password",
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, local_config):
        """Initialize the parts needed to start making database connections

        parameters:
            config - the complete config for the app.  If a real app, this
                     would be where a logger or other resources could be
                     found.
            local_config - this is the namespace within the complete config
                           where the actual database parameters are found"""
        super(Postgres, self).__init__()
        self.dsn = ("host=%(database_host)s "
                    "dbname=%(database_name)s "
                    "port=%(database_port)s "
                    "user=%(database_user)s "
                    "password=%(database_password)s") % local_config
        self.operational_exceptions = (FakeDBOperationalError,
                                       socket.timeout)

    #--------------------------------------------------------------------------
    def connection(self, name_unused=None):
        """return a new database connection

        parameters:
            name_unused - optional named connections.  Used by the
                          derived class
        """
        return FakeDatabaseConnection(self.dsn)

    #--------------------------------------------------------------------------
    @contextlib.contextmanager
    def __call__(self, name=None):
        """returns a database connection wrapped in a contextmanager.

        This function allows database connections to be used in a with
        statement.  Connection/transaction objects will automatically be
        rolled back if they weren't explicitly committed within the context of
        the 'with' statement.  Additionally, it is equipped with the ability to
        automatically close the connection when leaving the 'with' block.

        parameters:
            name - an optional name for the database connection"""
        exception_raised = False
        conn = self.connection(name)
        try:
            yield conn
        except self.operational_exceptions:
            # we need to close the connection
            print "Postgres - operational exception caught"
            exception_raised = True
        except Exception:
            print "Postgres - non operational exception caught"
            exception_raised = True
        finally:
            if not exception_raised:
                try:
                    if conn.in_transaction:
                        conn.rollback()
                    self.close_connection(conn)
                except self.operational_exceptions:
                    exception_raised = True
            if exception_raised:
                try:
                    self.close_connection(conn, force=True)
                except self.operational_exceptions:
                    pass
                raise

    #--------------------------------------------------------------------------
    def close_connection(self, connection, force=False):
        """close the connection passed in.

        This function exists to allow derived classes to override the closing
        behavior.

        parameters:
            connection - the database connection object
            force - unused boolean to force closure; used in derived classes
        """
        print "Postgres - requestng connection to close"
        connection.close()

    #--------------------------------------------------------------------------
    def close(self):
        """close any pooled or cached connections.  Since this base class
        object does no caching, there is no implementation required.  Derived
        classes may implement it."""
        pass
예제 #17
0
    def test_migration_crash_storage(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=MigrationCrashStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'primary.storage_class': (
                'socorro.unittest.external.test_crashstorage_base.A'
            ),
            'fallback.storage_class': (
                'socorro.unittest.external.test_crashstorage_base.B'
            ),
            'date_threshold': '150315'
        }
        cm = ConfigurationManager(
            n,
            values_source_list=[value],
            argv_source=[]
        )
        with cm.context() as config:
            raw_crash = {'ooid': ''}
            before_crash_id = '1498dee9-9a45-45cc-8ec8-71bb62150314'
            after_crash_id = '1498dee9-9a45-45cc-8ec8-71bb62150315'
            dump = '12345'
            processed_crash = {'ooid': '', 'product': 17}
            migration_store = config.storage(config)

            # save_raw tests
            # save to primary
            migration_store.primary_store.save_raw_crash = Mock()
            migration_store.fallback_store.save_raw_crash = Mock()
            migration_store.save_raw_crash(raw_crash, dump, after_crash_id)
            migration_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash,
                dump,
                after_crash_id
            )
            eq_(migration_store.fallback_store.save_raw_crash.call_count, 0)

            # save to fallback
            migration_store.primary_store.save_raw_crash = Mock()
            migration_store.fallback_store.save_raw_crash = Mock()
            migration_store.save_raw_crash(raw_crash, dump, before_crash_id)
            eq_(migration_store.primary_store.save_raw_crash.call_count, 0)
            migration_store.fallback_store.save_raw_crash.assert_called_with(
                raw_crash,
                dump,
                before_crash_id
            )

            # save_processed tests
            # save to primary
            processed_crash['crash_id'] = after_crash_id
            migration_store.primary_store.save_processed = Mock()
            migration_store.fallback_store.save_processed = Mock()
            migration_store.save_processed(processed_crash)
            migration_store.primary_store.save_processed.assert_called_with(
                processed_crash
            )
            eq_(migration_store.fallback_store.save_processed.call_count, 0)

            # save to fallback
            processed_crash['crash_id'] = before_crash_id
            migration_store.primary_store.save_processed = Mock()
            migration_store.fallback_store.save_processed = Mock()
            migration_store.save_processed(processed_crash)
            eq_(migration_store.primary_store.save_processed.call_count, 0)
            migration_store.fallback_store.save_processed.assert_called_with(
                processed_crash
            )

            # close tests
            migration_store.primary_store.close = Mock()
            migration_store.fallback_store.close = Mock()
            migration_store.close()
            migration_store.primary_store.close.assert_called_with()
            migration_store.fallback_store.close.assert_called_with()

            migration_store.primary_store.close = Mock()
            migration_store.fallback_store.close = Mock()
            migration_store.fallback_store.close.side_effect = (
                NotImplementedError()
            )
            migration_store.close()
            migration_store.primary_store.close.assert_called_with()
            migration_store.fallback_store.close.assert_called_with()

            migration_store.primary_store.close = Mock()
            migration_store.primary_store.close.side_effect = Exception('!')
            migration_store.close()
            migration_store.primary_store.close.assert_called_with()
            migration_store.fallback_store.close.assert_called_with()

            migration_store.fallback_store.close = Mock()
            migration_store.fallback_store.close.side_effect = Exception('!')
            assert_raises(PolyStorageError, migration_store.close)
            migration_store.primary_store.close.assert_called_with()
            migration_store.fallback_store.close.assert_called_with()
class ConnectionContextBase(RequiredConfig):
    required_config = Namespace()
    required_config.add_option(
        'access_key',
        doc="access key",
        default="",
        reference_value_from='resource.boto',
    )
    required_config.add_option(
        'secret_access_key',
        doc="secret access key",
        default="",
        secret=True,
        reference_value_from='secrets.boto',
        likely_to_be_changed=True,
    )
    required_config.add_option(
        'bucket_name',
        doc="The name of the bucket.",
        default='crashstats',
        reference_value_from='resource.boto',
        likely_to_be_changed=True,
    )
    required_config.add_option(
        'prefix',
        doc="a prefix to use inside the bucket",
        default='',
        reference_value_from='resource.boto',
        likely_to_be_changed=True,
    )
    required_config.add_option(
        'keybuilder_class',
        default='collector.external.boto.connection_context.KeyBuilderBase',
        doc=('fully qualified dotted Python classname to handle building s3 '
             'pseudo-filenames'),
        from_string_converter=class_converter,
        reference_value_from='resource.boto',
        likely_to_be_changed=True,
    )

    operational_exceptions = (
        socket.timeout,
        # wild guesses at retriable exceptions
        boto.exception.PleaseRetryException,
        boto.exception.ResumableTransferDisposition,
        boto.exception.ResumableUploadException,
    )

    conditional_exceptions = (boto.exception.StorageResponseError)

    #--------------------------------------------------------------------------
    def is_operational_exception(self, x):
        if "not found, no value returned" in str(x):
            # the not found error needs to be re-tryable to compensate for
            # eventual consistency.  However, a method capable of raising this
            # exception should never be used with a transaction executor that
            # has infinite back off.
            return True
        return False

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        self.config = config

        self._CreateError = boto.exception.StorageCreateError
        self.ResponseError = (boto.exception.StorageResponseError, KeyNotFound)
        self.keybuilder = config.keybuilder_class()

        self._bucket_cache = {}

    #--------------------------------------------------------------------------
    def _connect(self):
        try:
            if self.connection:
                return self.connection
        except AttributeError:
            pass

        self.connection = self._connect_to_endpoint(**self._get_credentials())
        return self.connection

    #--------------------------------------------------------------------------
    def _get_credentials(self):
        """each subclass must implement this method to provide the type
        of credentials required for the type of connection"""
        raise NotImplementedError

    #--------------------------------------------------------------------------
    def build_keys(self, prefix, name_of_thing, id):
        """Builds an s3 pseudo-filename using the specified keybuilder class.

        """
        return self.keybuilder.build_keys(prefix, name_of_thing, id)

    #--------------------------------------------------------------------------
    def _get_bucket(self, conn, bucket_name):
        try:
            return self._bucket_cache[bucket_name]
        except KeyError:
            self._bucket_cache[bucket_name] = conn.get_bucket(bucket_name)
            return self._bucket_cache[bucket_name]

    #--------------------------------------------------------------------------
    def _get_or_create_bucket(self, conn, bucket_name):
        try:
            return self._get_bucket(conn, bucket_name)
        except self.ResponseError:
            self._bucket_cache[bucket_name] = conn.create_bucket(bucket_name)
            return self._bucket_cache[bucket_name]

    #--------------------------------------------------------------------------
    def submit(self, id, name_of_thing, thing):
        """submit something to boto.
        """
        # can only submit strings to boto
        assert isinstance(thing, basestring), type(thing)

        conn = self._connect()
        bucket = self._get_or_create_bucket(conn, self.config.bucket_name)

        all_keys = self.build_keys(self.config.prefix, name_of_thing, id)
        # Always submit using the first key
        key = all_keys[0]
        key_object = bucket.new_key(key)
        key_object.set_contents_from_string(thing)

    #--------------------------------------------------------------------------
    def fetch(self, id, name_of_thing):
        """retrieve something from boto.
        """
        conn = self._connect()
        bucket = self._get_bucket(conn, self.config.bucket_name)

        all_keys = self.build_keys(self.config.prefix, name_of_thing, id)
        for key in all_keys:
            key_object = bucket.get_key(key)
            if key_object is not None:
                return key_object.get_contents_as_string()

        # None of the keys worked, so raise an error
        raise KeyNotFound(
            '%s (bucket=%r keys=%r) not found, no value returned' % (
                id,
                self.config.bucket_name,
                all_keys,
            ))

    #--------------------------------------------------------------------------
    def _convert_mapping_to_string(self, a_mapping):
        return json.dumps(a_mapping, cls=JSONISOEncoder)

    #--------------------------------------------------------------------------
    def _convert_list_to_string(self, a_list):
        return json.dumps(a_list)

    #--------------------------------------------------------------------------
    def _convert_string_to_list(self, a_string):
        return json.loads(a_string)

    #--------------------------------------------------------------------------
    def commit(self):
        """boto doesn't support transactions so this silently
        does nothing"""

    #--------------------------------------------------------------------------
    def rollback(self):
        """boto doesn't support transactions so this silently
        does nothing"""

    #--------------------------------------------------------------------------
    @contextlib.contextmanager
    def __call__(self):
        yield self

    #--------------------------------------------------------------------------
    def in_transaction(self, dummy):
        """boto doesn't support transactions, so it is never in
        a transaction."""
        return False

    #--------------------------------------------------------------------------
    def force_reconnect(self):
        try:
            del self.connection
        except AttributeError:
            # already deleted, ignorable
            pass
예제 #19
0
    def test_processed_crash_storage(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=PrimaryDeferredProcessedStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'primary.storage_class': (
                'socorro.unittest.external.test_crashstorage_base.A'
            ),
            'deferred.storage_class': (
                'socorro.unittest.external.test_crashstorage_base.B'
            ),
            'processed.storage_class': (
                'socorro.unittest.external.test_crashstorage_base.B'
            ),
            'deferral_criteria': lambda x: x.get('foo') == 'foo'
        }
        cm = ConfigurationManager(
            n,
            values_source_list=[value],
            argv_source=[]
        )
        with cm.context() as config:
            eq_(config.primary.storage_class.foo, 'a')
            eq_(config.deferred.storage_class.foo, 'b')
            eq_(config.processed.storage_class.foo, 'b')

            raw_crash = {'ooid': ''}
            crash_id = '1498dee9-9a45-45cc-8ec8-71bb62121203'
            dump = '12345'
            deferred_crash = {'ooid': '', 'foo': 'foo'}
            processed_crash = {'ooid': '', 'product': 17}
            pd_store = config.storage(config)

            # save_raw tests
            pd_store.primary_store.save_raw_crash = Mock()
            pd_store.deferred_store.save_raw_crash = Mock()
            pd_store.processed_store.save_raw_crash = Mock()
            pd_store.save_raw_crash(raw_crash, dump, crash_id)
            pd_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash,
                dump,
                crash_id
            )
            eq_(pd_store.deferred_store.save_raw_crash.call_count, 0)

            pd_store.save_raw_crash(deferred_crash, dump, crash_id)
            pd_store.deferred_store.save_raw_crash.assert_called_with(
                deferred_crash,
                dump,
                crash_id
            )

            # save_processed tests
            pd_store.primary_store.save_processed = Mock()
            pd_store.deferred_store.save_processed = Mock()
            pd_store.processed_store.save_processed = Mock()
            pd_store.save_processed(processed_crash)
            pd_store.processed_store.save_processed.assert_called_with(
                processed_crash
            )
            eq_(pd_store.primary_store.save_processed.call_count, 0)

            pd_store.save_processed(deferred_crash)
            pd_store.processed_store.save_processed.assert_called_with(
                deferred_crash
            )

            # close tests
            pd_store.primary_store.close = Mock()
            pd_store.deferred_store.close = Mock()
            pd_store.close()
            pd_store.primary_store.close.assert_called_with()
            pd_store.deferred_store.close.assert_called_with()

            pd_store.primary_store.close = Mock()
            pd_store.deferred_store.close = Mock()
            pd_store.deferred_store.close.side_effect = NotImplementedError()
            pd_store.close()
            pd_store.primary_store.close.assert_called_with()
            pd_store.deferred_store.close.assert_called_with()

            pd_store.primary_store.close = Mock()
            pd_store.primary_store.close.side_effect = Exception('!')
            pd_store.close()
            pd_store.primary_store.close.assert_called_with()
            pd_store.deferred_store.close.assert_called_with()

            pd_store.deferred_store.close = Mock()
            pd_store.deferred_store.close.side_effect = Exception('!')
            assert_raises(PolyStorageError, pd_store.close)
            pd_store.primary_store.close.assert_called_with()
            pd_store.deferred_store.close.assert_called_with()
예제 #20
0
class IndexCleaner(RequiredConfig):
    """Delete elasticsearch indices from our databases."""

    required_config = Namespace()
    required_config.add_option(
        'retention_policy',
        default=26,
        doc='Number of weeks to keep an index alive. ',
    )
    required_config.namespace('elasticsearch')
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )
    required_config.elasticsearch.add_option(
        'elasticsearch_index_regex',
        default='^socorro[0-9]{6}$',
        reference_value_from='resource.elasticsearch',
    )

    def __init__(self, config):
        super(IndexCleaner, self).__init__()
        self.config = config

    def delete_indices(self, predicate=None):
        """Delete crash indices that match the given predicate.

        :arg callable predicate: A callable of the form
            ``predicate(index)``, where ``index`` is a string containing
            the name of the index. If the callable returns true, the
            index will be deleted.

            The default is None, which deletes all crash indices.
        :returns: List of indexes that were deleted

        """
        es_class = self.config.elasticsearch.elasticsearch_class(
            self.config.elasticsearch)
        index_client = es_class.indices_client()

        status = index_client.status()
        indices = status['indices'].keys()

        aliases = index_client.get_aliases()

        deleted_indices = []
        for index in indices:
            # Some indices look like 'socorro%Y%W_%Y%M%d', but they are
            # aliased to the expected format of 'socorro%Y%W'. In such cases,
            # replace the index with the alias.
            if index in aliases and 'aliases' in aliases[index]:
                index_aliases = aliases[index]['aliases'].keys()
                if index_aliases:
                    index = index_aliases[0]

            if not re.match(
                    self.config.elasticsearch.elasticsearch_index_regex,
                    index):
                # This index doesn't look like a crash index, let's skip it.
                continue

            if predicate is None or predicate(index):
                index_client.delete(index)
                deleted_indices.append(index)

        return deleted_indices

    def delete_old_indices(self):
        self.delete_indices(self.is_index_old)

    def is_index_old(self, index):
        now = utc_now()
        policy_delay = datetime.timedelta(weeks=self.config.retention_policy)
        time_limit = (now - policy_delay).replace(tzinfo=None)

        # strptime ignores week numbers if a day isn't specified, so we append
        # '-1' and '-%w' to specify Monday as the day.
        index_date = datetime.datetime.strptime(
            index + '-1',
            self.config.elasticsearch.elasticsearch_index + '-%w')

        return index_date < time_limit
예제 #21
0
class MiddlewareApp(App):
    app_name = 'middleware'
    app_version = '3.1'
    app_description = __doc__

    services_list = []

    #--------------------------------------------------------------------------
    # in this section, define any configuration requirements
    required_config = Namespace()

    #--------------------------------------------------------------------------
    # implementations namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('implementations')
    required_config.implementations.add_option(
        'implementation_list',
        doc='list of packages for service implementations',
        default='psql:socorro.external.postgresql, '
        'hbase:socorro.external.hbase, '
        'es:socorro.external.elasticsearch, '
        'fs:socorro.external.filesystem, '
        'http:socorro.external.http',
        from_string_converter=items_list_converter)

    required_config.implementations.add_option(
        'service_overrides',
        doc='comma separated list of class overrides, e.g `Crashes: hbase`',
        default='CrashData: fs, '
        'Correlations: http, '
        'CorrelationsSignatures: http, '
        'SuperSearch: es',
        from_string_converter=items_list_converter)

    #--------------------------------------------------------------------------
    # database namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('database')
    required_config.database.add_option('database_class',
                                        default=ConnectionContext,
                                        from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # hbase namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('hbase')
    required_config.hbase.add_option('hbase_class',
                                     default=HBaseCrashStorage,
                                     from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # filesystem namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('filesystem')
    required_config.filesystem.add_option(
        'filesystem_class',
        default=FileSystemCrashStorage,
        from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # webapi namespace
    #     this is all config options that used to belong to webapiconfig.py
    #-------------------------------------------------------------------------
    required_config.namespace('webapi')
    required_config.webapi.add_option(
        'elasticSearchHostname',
        default='localhost',
        doc='String containing the URI of the Elastic Search instance.')
    required_config.webapi.add_option(
        'elasticSearchPort',
        default='9200',
        doc='String containing the port on which calling the Elastic '
        'Search instance.')
    required_config.webapi.add_option(
        'elasticsearch_urls',
        default=['http://localhost:9200'],
        doc='the urls to the elasticsearch instances',
        from_string_converter=string_to_list)
    required_config.webapi.add_option(
        'elasticsearch_index',
        default='socorro%Y%W',
        doc='an index format to pull crashes from elasticsearch '
        "(use datetime's strftime format to have "
        'daily, weekly or monthly indexes)')
    required_config.webapi.add_option(
        'elasticsearch_doctype',
        default='crash_reports',
        doc='the default doctype to use in elasticsearch')
    required_config.webapi.add_option(
        'elasticsearch_timeout',
        default=30,
        doc='the time in seconds before a query to elasticsearch fails')
    required_config.webapi.add_option(
        'facets_max_number',
        default=50,
        doc='the maximum number of results a facet will return in search')
    required_config.webapi.add_option(
        'searchMaxNumberOfDistinctSignatures',
        default=1000,
        doc='Integer containing the maximum allowed number of distinct '
        'signatures the system should retrieve. Used mainly for '
        'performances in ElasticSearch')
    required_config.webapi.add_option(
        'search_default_date_range',
        default=7,  # in days
        doc='the default date range for searches, in days')
    required_config.webapi.add_option(
        'platforms',
        default=[
            {
                "id": "windows",
                "name": "Windows NT"
            },
            {
                "id": "mac",
                "name": "Mac OS X"
            },
            {
                "id": "linux",
                "name": "Linux"
            },
        ],
        doc='Array associating OS ids to full names.',
        from_string_converter=lambda x: json.loads(x))
    required_config.webapi.add_option(
        'non_release_channels',
        default=['beta', 'aurora', 'nightly'],
        doc='List of channels, excluding the `release` one.',
        from_string_converter=string_to_list)
    required_config.webapi.add_option(
        'restricted_channels',
        default=['beta'],
        doc='List of channels to restrict based on build ids.',
        from_string_converter=string_to_list)

    #--------------------------------------------------------------------------
    # web_server namespace
    #     the namespace is for config parameters the web server
    #--------------------------------------------------------------------------
    required_config.namespace('web_server')
    required_config.web_server.add_option(
        'wsgi_server_class',
        doc='a class implementing a wsgi web server',
        default='socorro.webapi.servers.CherryPy',
        from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # http namespace
    #     the namespace is for config parameters the http modules
    #--------------------------------------------------------------------------
    required_config.namespace('http')
    required_config.http.namespace('correlations')
    required_config.http.correlations.add_option(
        'base_url',
        doc='Base URL where correlations text files are',
        default='https://crash-analysis.mozilla.com/crash_analysis/',
    )
    required_config.http.correlations.add_option(
        'save_download',
        doc='Whether files downloaded for correlations should be '
        'temporary stored on disk',
        default=True,
    )
    required_config.http.correlations.add_option(
        'save_seconds',
        doc='Number of seconds that the downloaded .txt file is stored '
        'in a temporary place',
        default=60 * 10,
    )
    required_config.http.correlations.add_option(
        'save_root',
        doc='Directory where the temporary downloads are stored '
        '(if left empty will become the systems tmp directory)',
        default='',
    )

    #--------------------------------------------------------------------------
    # sentry namespace
    #     the namespace is for Sentry error capturing with Raven
    #--------------------------------------------------------------------------
    required_config.namespace('sentry')
    required_config.sentry.add_option('dsn',
                                      doc='DSN for Sentry via raven',
                                      default='')

    # because the socorro.webapi.servers classes bring up their own default
    # configurations like port number, the only way to override the default
    # is like this:
    from socorro.webapi.servers import StandAloneServer
    StandAloneServer.required_config.port.set_default(8883, force=True)

    #--------------------------------------------------------------------------
    def main(self):
        # Apache modwsgi requireds a module level name 'application'
        global application

        ## 1 turn these names of classes into real references to classes
        def lookup(file_and_class):
            file_name, class_name = file_and_class.rsplit('.', 1)
            overrides = dict(self.config.implementations.service_overrides)
            _list = self.config.implementations.implementation_list
            for prefix, base_module_path in _list:
                if class_name in overrides:
                    if prefix != overrides[class_name]:
                        continue
                try:
                    module = __import__(
                        '%s.%s' % (base_module_path, file_name), globals(),
                        locals(), [class_name])
                except ImportError:
                    raise ImportError(
                        "Unable to import %s.%s.%s" %
                        (base_module_path, file_name, class_name))
                return getattr(module, class_name)
            raise ImplementationConfigurationError(file_and_class)

        ## 2 wrap each class with the ImplementationWrapper class
        def wrap(cls, file_and_class):
            return type(cls.__name__, (ImplementationWrapper, ), {
                'cls': cls,
                'file_and_class': file_and_class,
            })

        services_list = []
        for url, impl_class in SERVICES_LIST:
            impl_instance = lookup(impl_class)
            wrapped_impl = wrap(impl_instance, impl_class)
            services_list.append((url, wrapped_impl))

        self.web_server = self.config.web_server.wsgi_server_class(
            self.config,  # needs the whole config not the local namespace
            services_list)

        # for modwsgi the 'run' method returns the wsgi function that Apache
        # will use.  For other webservers, the 'run' method actually starts
        # the standalone web server.
        application = self.web_server.run()
예제 #22
0
class HBaseConnectionContext(RequiredConfig):
    """This class implements a connection to HBase for every transaction to be
    executed.
    """
    required_config = Namespace()
    required_config.add_option(
        'hbase_host',
        doc='Host to HBase server',
        default='localhost',
        reference_value_from='resource.hb',
    )
    required_config.add_option(
        'hbase_port',
        doc='Port to HBase server',
        default=9090,
        reference_value_from='resource.hb',
    )
    required_config.add_option(
        'hbase_timeout',
        doc='timeout in milliseconds for an HBase connection',
        default=5000,
        reference_value_from='resource.hb',
    )
    required_config.add_option(
        'temporary_file_system_storage_path',
        doc='a local filesystem path where dumps temporarily '
        'during processing',
        default='/tmp',
        reference_value_from='resource.hb',
    )
    required_config.add_option(
        'dump_file_suffix',
        doc='the suffix used to identify a dump file (for use in temp files)',
        default='.dump',
        reference_value_from='resource.hb',
    )

    operational_exceptions = (
        hbase.ttypes.IOError,
        Thrift.TException,
        socket.timeout,
        socket.error,
    )

    conditional_exceptions = ()

    #--------------------------------------------------------------------------
    def __init__(self, config):
        super(HBaseConnectionContext, self).__init__()
        self.config = config

    #--------------------------------------------------------------------------
    def connection(self, name=None):
        return HBaseConnection(self.config)

    #--------------------------------------------------------------------------
    @contextlib.contextmanager
    def __call__(self, name=None):
        conn = self.connection(name)
        try:
            yield conn
        finally:
            self.close_connection(conn)

    #--------------------------------------------------------------------------
    def force_reconnect(self):
        pass

    #--------------------------------------------------------------------------
    def close(self):
        pass

    #--------------------------------------------------------------------------
    def close_connection(self, connection, force=False):
        connection.close()

    #--------------------------------------------------------------------------
    def is_operational_exception(self, msg):
        return False
예제 #23
0
class BotoS3CrashStorage(BotoCrashStorage):
    required_config = Namespace()
    required_config.resource_class = change_default(
        BotoCrashStorage, 'resource_class',
        'socorro.external.boto.connection_context.RegionalS3ConnectionContext')
예제 #24
0
파일: ftpscraper.py 프로젝트: snorp/socorro
class FTPScraperCronApp(BaseCronApp, ScrapersMixin):
    app_name = 'ftpscraper'
    app_description = 'FTP Scraper'
    app_version = '0.1'

    required_config = Namespace()
    required_config.add_option(
        'products',
        default='firefox,mobile,thunderbird,seamonkey,b2g',
        from_string_converter=lambda line: tuple(
            [x.strip() for x in line.split(',') if x.strip()]),
        doc='a comma-delimited list of URIs for each product')

    required_config.add_option('base_url',
                               default='https://archive.mozilla.org/pub/',
                               doc='The base url to use for fetching builds')

    required_config.add_option('dry_run',
                               default=False,
                               doc='Print instead of storing builds')

    def run(self, date):
        # record_associations
        for product_name in self.config.products:
            self.config.logger.debug('scraping %s releases for date %s',
                                     product_name, date)
            if product_name == 'b2g':
                self.database_transaction_executor(self.scrape_b2g,
                                                   product_name, date)
            elif product_name == 'firefox':
                self.database_transaction_executor(
                    self._scrape_json_releases_and_nightlies, product_name,
                    date)
            else:
                self.database_transaction_executor(
                    self._scrape_releases_and_nightlies, product_name, date)

    def _scrape_releases_and_nightlies(self, connection, product_name, date):
        self.scrape_releases(connection, product_name)
        self.scrape_nightlies(connection, product_name, date)

    def _scrape_json_releases_and_nightlies(self, connection, product_name,
                                            date):
        self.scrape_json_releases(connection, product_name)
        self.scrape_json_nightlies(connection, product_name, date)

    def _insert_build(self, cursor, *args, **kwargs):
        if self.config.dry_run:
            print "INSERT BUILD"
            print args
            print kwargs
        else:
            buildutil.insert_build(cursor, *args, **kwargs)

    def _is_final_beta(self, version):
        # If this is a XX.0 version in the release channel,
        # return True otherwise, False
        # Make a special exception for the out-of-cycle 38.0.5
        return version.endswith('.0') or version == '38.0.5'

    def scrape_json_releases(self, connection, product_name):
        prod_url = urlparse.urljoin(self.config.base_url, product_name + '/')
        logger = self.config.logger
        cursor = connection.cursor()

        for directory in ('nightly', 'candidates'):
            try:
                url, = self.get_links(prod_url, starts_with=directory)
            except IndexError:
                logger.debug('Dir %s not found for %s', directory,
                             product_name)
                continue

            releases = self.get_links(url, ends_with='-candidates/')
            for release in releases:
                dirname = release.replace(url, '')
                if dirname.endswith('/'):
                    dirname = dirname[:-1]
                for info in self.get_json_release(release, dirname):
                    platform, version, kvpairs = info
                    build_type = 'release'
                    beta_number = None
                    repository = kvpairs['repository']
                    if 'b' in version:
                        build_type = 'beta'
                        version, beta_number = version.split('b')

                    if kvpairs.get('buildID'):
                        build_id = kvpairs['buildID']
                        version_build = kvpairs['version_build']
                        self._insert_build(cursor,
                                           product_name,
                                           version,
                                           platform,
                                           build_id,
                                           build_type,
                                           beta_number,
                                           repository,
                                           version_build,
                                           ignore_duplicates=True)

                    if (self._is_final_beta(version)
                            and build_type == 'release' and version > '26.0'
                            and kvpairs.get('buildID')):
                        logger.debug('is final beta version %s', version)
                        repository = 'mozilla-beta'
                        build_id = kvpairs['buildID']
                        build_type = 'beta'
                        version_build = kvpairs['version_build']
                        # just force this to 99 until
                        # we deal with version_build properly
                        beta_number = 99
                        self._insert_build(cursor,
                                           product_name,
                                           version,
                                           platform,
                                           build_id,
                                           build_type,
                                           beta_number,
                                           repository,
                                           version_build,
                                           ignore_duplicates=True)

    def scrape_json_nightlies(self, connection, product_name, date):
        directories = (
            product_name,
            'nightly',
            date.strftime('%Y'),
            date.strftime('%m'),
        )
        nightly_url = self.config.base_url
        for part in directories:
            nightly_url = urlparse.urljoin(nightly_url, part + '/')
        cursor = connection.cursor()
        dir_prefix = date.strftime('%Y-%m-%d')
        nightlies = self.get_links(nightly_url, starts_with=dir_prefix)
        for nightly in nightlies:
            dirname = nightly.replace(nightly_url, '')
            if dirname.endswith('/'):
                dirname = dirname[:-1]
            for info in self.get_json_nightly(nightly, dirname):
                platform, repository, version, kvpairs = info

                build_type = 'nightly'
                if version.endswith('a2'):
                    build_type = 'aurora'

                if kvpairs.get('buildID'):
                    build_id = kvpairs['buildID']
                    self._insert_build(cursor,
                                       product_name,
                                       version,
                                       platform,
                                       build_id,
                                       build_type,
                                       kvpairs.get('beta_number', None),
                                       repository,
                                       ignore_duplicates=True)

    def scrape_releases(self, connection, product_name):
        prod_url = urlparse.urljoin(self.config.base_url, product_name + '/')
        # releases are sometimes in nightly, sometimes in candidates dir.
        # look in both.
        logger = self.config.logger
        cursor = connection.cursor()
        for directory in ('nightly', 'candidates'):
            # expect only one directory link for each
            try:
                url, = self.get_links(prod_url, starts_with=directory)
            except IndexError:
                logger.debug('Dir %s not found for %s', directory,
                             product_name)
                continue

            releases = self.get_links(url, ends_with='-candidates/')
            if not releases:
                self.config.logger.debug('No releases for %s', url)
            for release in releases:
                for info in self.get_release(release):
                    platform, version, kvpairs, bad_lines = info
                    if kvpairs.get('buildID') is None:
                        self.config.logger.warning(
                            "BuildID not found for %s on %s", release, url)
                        continue
                    build_type = 'Release'
                    beta_number = None
                    repository = 'mozilla-release'
                    if 'b' in version:
                        build_type = 'Beta'
                        version, beta_number = version.split('b')
                        repository = 'mozilla-beta'
                    for bad_line in bad_lines:
                        self.config.logger.warning(
                            "Bad line for %s on %s (%r)", release, url,
                            bad_line)

                    # Put a build into the database
                    build_id = kvpairs['buildID']
                    self._insert_build(cursor,
                                       product_name,
                                       version,
                                       platform,
                                       build_id,
                                       build_type,
                                       beta_number,
                                       repository,
                                       ignore_duplicates=True)

                    # If we've got a final beta, add a second record
                    if self._is_final_beta(version):
                        repository = 'mozilla-beta'
                        self._insert_build(cursor,
                                           product_name,
                                           version,
                                           platform,
                                           build_id,
                                           build_type,
                                           beta_number,
                                           repository,
                                           ignore_duplicates=True)

    def scrape_nightlies(self, connection, product_name, date):
        directories = (
            product_name,
            'nightly',
            date.strftime('%Y'),
            date.strftime('%m'),
        )
        nightly_url = self.config.base_url
        for part in directories:
            nightly_url = urlparse.urljoin(nightly_url, part + '/')
        cursor = connection.cursor()
        dir_prefix = date.strftime('%Y-%m-%d')
        nightlies = self.get_links(nightly_url, starts_with=dir_prefix)
        for nightly in nightlies:
            dirname = nightly.replace(nightly_url, '')
            if dirname.endswith('/'):
                dirname = dirname[:-1]
            for info in self.get_nightly(nightly, dirname):
                platform, repository, version, kvpairs, bad_lines = info
                for bad_line in bad_lines:
                    self.config.logger.warning("Bad line for %s (%r)", nightly,
                                               bad_line)
                build_type = 'Nightly'
                if version.endswith('a2'):
                    build_type = 'Aurora'
                if kvpairs.get('buildID'):
                    build_id = kvpairs['buildID']
                    self._insert_build(cursor,
                                       product_name,
                                       version,
                                       platform,
                                       build_id,
                                       build_type,
                                       kvpairs.get('beta_number', None),
                                       repository,
                                       ignore_duplicates=True)

    def scrape_b2g(self, connection, product_name, date):
        if product_name != 'b2g':
            return

        directories = (
            product_name,
            'manifests',
            'nightly',
        )
        b2g_manifests = self.config.base_url
        for part in directories:
            b2g_manifests = urlparse.urljoin(b2g_manifests, part + '/')
        dir_prefix = date.strftime('%Y-%m-%d')
        cursor = connection.cursor()
        version_dirs = self.get_links(b2g_manifests, ends_with='/')
        for version_dir in version_dirs:
            prod_url = urlparse.urljoin(version_dir, date.strftime('%Y/%m/'))
            nightlies = self.get_links(prod_url, starts_with=dir_prefix)
            for nightly in nightlies:
                b2gs = self.get_b2g(
                    nightly,
                    backfill_date=None,
                )
                for info in b2gs:
                    platform, repository, version, kvpairs = info
                    build_id = kvpairs['buildid']
                    build_type = kvpairs['build_type']
                    self._insert_build(cursor,
                                       product_name,
                                       version,
                                       platform,
                                       build_id,
                                       build_type,
                                       kvpairs.get('beta_number', None),
                                       repository,
                                       ignore_duplicates=True)
예제 #25
0
class BotoCrashStorage(CrashStorageBase):
    """This class sends processed crash reports to an end point reachable
    by the boto S3 library.
    """
    required_config = Namespace()
    required_config.add_option(
        "resource_class",
        default=(
            'socorro.external.boto.connection_context.ConnectionContextBase'),
        doc=('fully qualified dotted Python classname to handle Boto '
             'connections'),
        from_string_converter=class_converter,
        reference_value_from='resource.boto')
    required_config.add_option(
        'transaction_executor_class_for_get',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithLimitedBackoff",
        doc='a class that will manage transactions',
        from_string_converter=class_converter,
        reference_value_from='resource.boto',
    )
    required_config.add_option(
        'transaction_executor_class',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithLimitedBackoff",
        doc='a class that will manage transactions',
        from_string_converter=class_converter,
        reference_value_from='resource.boto',
    )
    required_config.add_option(
        'temporary_file_system_storage_path',
        doc='a local filesystem path where dumps temporarily '
        'during processing',
        default='/home/socorro/temp',
        reference_value_from='resource.boto',
    )
    required_config.add_option(
        'dump_file_suffix',
        doc='the suffix used to identify a dump file (for use in temp files)',
        default='.dump',
        reference_value_from='resource.boto',
    )
    required_config.add_option(
        'json_object_hook',
        default='socorro.lib.util.DotDict',
        from_string_converter=class_converter,
    )

    def is_operational_exception(self, x):
        if "not found, no value returned" in str(x):
            # the not found error needs to be re-tryable to compensate for
            # eventual consistency.  However, a method capable of raising this
            # exception should never be used with a transaction executor that
            # has infinite back off.
            return True
        #elif   # for further cases...
        return False

    def __init__(self, config, quit_check_callback=None):
        super(BotoCrashStorage, self).__init__(config, quit_check_callback)

        self.connection_source = config.resource_class(config)
        self.transaction = config.transaction_executor_class(
            config, self.connection_source, quit_check_callback)
        if config.transaction_executor_class_for_get.is_infinite:
            self.config.logger.error(
                'the class %s identifies itself as an infinite iterator. '
                'As a TransactionExecutor for reads from Boto, this may '
                'result in infinite loops that will consume threads forever.' %
                py_obj_to_str(config.transaction_executor_class_for_get))

        self.transaction_for_get = config.transaction_executor_class_for_get(
            config, self.connection_source, quit_check_callback)

    @staticmethod
    def do_save_raw_crash(boto_connection, raw_crash, dumps, crash_id):
        if dumps is None:
            dumps = MemoryDumpsMapping()
        raw_crash_as_string = boto_connection._convert_mapping_to_string(
            raw_crash)
        boto_connection.submit(crash_id, "raw_crash", raw_crash_as_string)
        dump_names_as_string = boto_connection._convert_list_to_string(
            dumps.keys())
        boto_connection.submit(crash_id, "dump_names", dump_names_as_string)

        # we don't know what type of dumps mapping we have.  We do know,
        # however, that by calling the memory_dump_mapping method, we will
        # get a MemoryDumpMapping which is exactly what we need.
        dumps = dumps.as_memory_dumps_mapping()
        for dump_name, dump in dumps.iteritems():
            if dump_name in (None, '', 'upload_file_minidump'):
                dump_name = 'dump'
            boto_connection.submit(crash_id, dump_name, dump)

    def save_raw_crash(self, raw_crash, dumps, crash_id):
        self.transaction(self.do_save_raw_crash, raw_crash, dumps, crash_id)

    @staticmethod
    def _do_save_processed(boto_connection, processed_crash):
        crash_id = processed_crash['uuid']
        processed_crash_as_string = boto_connection._convert_mapping_to_string(
            processed_crash)
        boto_connection.submit(crash_id, "processed_crash",
                               processed_crash_as_string)

    def save_processed(self, processed_crash):
        self.transaction(self._do_save_processed, processed_crash)

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """ bug 866973 - do not put raw_crash back into permanent storage again
            We are doing this in lieu of a queuing solution that could allow
            us to operate an independent crashmover. When the queuing system
            is implemented, we could remove this, and have the raw crash
            saved by a crashmover that's consuming crash_ids the same way
            that the processor consumes them.

            See further comments in the ProcesorApp class.
        """
        self.save_processed(processed_crash)

    @staticmethod
    def do_get_raw_crash(boto_connection, crash_id, json_object_hook):
        try:
            raw_crash_as_string = boto_connection.fetch(crash_id, "raw_crash")
            return json.loads(raw_crash_as_string,
                              object_hook=json_object_hook)
        except boto_connection.ResponseError as x:
            raise CrashIDNotFound('%s not found: %s' % (crash_id, x))

    def get_raw_crash(self, crash_id):
        return self.transaction_for_get(self.do_get_raw_crash, crash_id,
                                        self.config.json_object_hook)

    @staticmethod
    def do_get_raw_dump(boto_connection, crash_id, name=None):
        try:
            if name in (None, '', 'upload_file_minidump'):
                name = 'dump'
            a_dump = boto_connection.fetch(crash_id, name)
            return a_dump
        except boto_connection.ResponseError as x:
            raise CrashIDNotFound('%s not found: %s' % (crash_id, x))

    def get_raw_dump(self, crash_id, name=None):
        return self.transaction_for_get(self.do_get_raw_dump, crash_id, name)

    @staticmethod
    def do_get_raw_dumps(boto_connection, crash_id):
        try:
            dump_names_as_string = boto_connection.fetch(
                crash_id, "dump_names")
            dump_names = boto_connection._convert_string_to_list(
                dump_names_as_string)
            # when we fetch the dumps, they are by default in memory, so we'll
            # put them into a MemoryDumpMapping.
            dumps = MemoryDumpsMapping()
            for dump_name in dump_names:
                if dump_name in (None, '', 'upload_file_minidump'):
                    dump_name = 'dump'
                dumps[dump_name] = boto_connection.fetch(crash_id, dump_name)
            return dumps
        except boto_connection.ResponseError as x:
            raise CrashIDNotFound('%s not found: %s' % (crash_id, x))

    def get_raw_dumps(self, crash_id):
        """this returns a MemoryDumpsMapping"""
        return self.transaction_for_get(self.do_get_raw_dumps, crash_id)

    def get_raw_dumps_as_files(self, crash_id):
        in_memory_dumps = self.get_raw_dumps(crash_id)
        # convert our native memory dump mapping into a file dump mapping.
        return in_memory_dumps.as_file_dumps_mapping(
            crash_id, self.config.temporary_file_system_storage_path,
            self.config.dump_file_suffix)

    @staticmethod
    def _do_get_unredacted_processed(
        boto_connection,
        crash_id,
        json_object_hook,
    ):
        try:
            processed_crash_as_string = boto_connection.fetch(
                crash_id, "processed_crash")
            return json.loads(
                processed_crash_as_string,
                object_hook=json_object_hook,
            )
        except boto_connection.ResponseError as x:
            raise CrashIDNotFound('%s not found: %s' % (crash_id, x))

    def get_unredacted_processed(self, crash_id):
        return self.transaction_for_get(
            self._do_get_unredacted_processed,
            crash_id,
            self.config.json_object_hook,
        )
예제 #26
0
    def test_poly_crash_storage(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=PolyCrashStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'storage_classes': (
                'socorro.unittest.external.test_crashstorage_base.A,'
                'socorro.unittest.external.test_crashstorage_base.A,'
                'socorro.unittest.external.test_crashstorage_base.B'
            ),
            'storage1.y': 37,
        }
        cm = ConfigurationManager(n, values_source_list=[value])
        with cm.context() as config:
            eq_(config.storage0.crashstorage_class.foo, 'a')
            eq_(config.storage1.crashstorage_class.foo, 'a')
            eq_(config.storage1.y, 37)
            eq_(config.storage2.crashstorage_class.foo, 'b')

            poly_store = config.storage(config)
            l = len(poly_store.storage_namespaces)
            eq_(
                l, 3,
                'expected poly_store to have lenth of 3, '
                'but %d was found instead' % l
            )
            eq_(poly_store.storage_namespaces[0], 'storage0')
            eq_(poly_store.storage_namespaces[1], 'storage1')
            eq_(poly_store.storage_namespaces[2], 'storage2')
            l = len(poly_store.stores)
            eq_(
                l, 3,
                'expected poly_store.store to have lenth of 3, '
                'but %d was found instead' % l
            )
            eq_(poly_store.stores.storage0.foo, 'a')
            eq_(poly_store.stores.storage1.foo, 'a')
            eq_(poly_store.stores.storage2.foo, 'b')

            raw_crash = {'ooid': ''}
            dump = '12345'
            processed_crash = {'ooid': '', 'product': 17}
            for v in poly_store.stores.itervalues():
                v.save_raw_crash = Mock()
                v.save_processed = Mock()
                v.close = Mock()

            poly_store.save_raw_crash(raw_crash, dump, '')
            for v in poly_store.stores.itervalues():
                v.save_raw_crash.assert_called_once_with(raw_crash, dump, '')

            poly_store.save_processed(processed_crash)
            for v in poly_store.stores.itervalues():
                v.save_processed.assert_called_once_with(processed_crash)

            poly_store.save_raw_and_processed(
                raw_crash,
                dump,
                processed_crash,
                'n'
            )
            for v in poly_store.stores.itervalues():
                v.save_raw_crash.assert_called_with(raw_crash, dump, 'n')
                v.save_processed.assert_called_with(processed_crash)

            raw_crash = {'ooid': 'oaeu'}
            dump = '5432'
            processed_crash = {'ooid': 'aoeu', 'product': 33}

            poly_store.stores['storage1'].save_raw_crash = Mock()
            poly_store.stores['storage1'].save_raw_crash.side_effect = \
                Exception('this is messed up')
            poly_store.stores['storage2'].save_processed = Mock()
            poly_store.stores['storage2'].save_processed.side_effect = \
                Exception('this is messed up')

            assert_raises(
                PolyStorageError,
                poly_store.save_raw_crash,
                raw_crash,
                dump,
                ''
            )
            for v in poly_store.stores.itervalues():
                v.save_raw_crash.assert_called_with(raw_crash, dump, '')

            assert_raises(
                PolyStorageError,
                poly_store.save_processed,
                processed_crash
            )
            for v in poly_store.stores.itervalues():
                v.save_processed.assert_called_with(processed_crash)

            assert_raises(
                PolyStorageError,
                poly_store.save_raw_and_processed,
                raw_crash,
                dump,
                processed_crash,
                'n'
            )
            for v in poly_store.stores.itervalues():
                v.save_raw_crash.assert_called_with(raw_crash, dump, 'n')
                v.save_processed.assert_called_with(processed_crash)

            poly_store.stores['storage2'].close.side_effect = Exception
            assert_raises(PolyStorageError, poly_store.close)
            for v in poly_store.stores.itervalues():
                v.close.assert_called_with()
예제 #27
0
class Bar(RequiredConfig):
    required_config = Namespace()
    required_config.add_option('x', default=227)
    required_config.add_option('a', default=11)
예제 #28
0
    def test_fallback_crash_storage(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=FallbackCrashStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'primary.storage_class': (
                'socorro.unittest.external.test_crashstorage_base.A'
            ),
            'fallback.storage_class': (
                'socorro.unittest.external.test_crashstorage_base.B'
            ),
        }
        cm = ConfigurationManager(
            n,
            values_source_list=[value],
            argv_source=[]
        )
        with cm.context() as config:
            eq_(config.primary.storage_class.foo, 'a')
            eq_(config.fallback.storage_class.foo, 'b')

            raw_crash = {'ooid': ''}
            crash_id = '1498dee9-9a45-45cc-8ec8-71bb62121203'
            dump = '12345'
            processed_crash = {'ooid': '', 'product': 17}
            fb_store = config.storage(config)

            # save_raw tests
            fb_store.primary_store.save_raw_crash = Mock()
            fb_store.fallback_store.save_raw_crash = Mock()
            fb_store.save_raw_crash(raw_crash, dump, crash_id)
            fb_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash,
                dump,
                crash_id
            )
            eq_(fb_store.fallback_store.save_raw_crash.call_count, 0)

            fb_store.primary_store.save_raw_crash = Mock()
            fb_store.primary_store.save_raw_crash.side_effect = Exception('!')
            fb_store.save_raw_crash(raw_crash, dump, crash_id)
            fb_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash,
                dump,
                crash_id
            )
            fb_store.fallback_store.save_raw_crash.assert_called_with(
                raw_crash,
                dump,
                crash_id
            )

            fb_store.fallback_store.save_raw_crash = Mock()
            fb_store.fallback_store.save_raw_crash.side_effect = Exception('!')
            assert_raises(
                PolyStorageError,
                fb_store.save_raw_crash,
                raw_crash,
                dump,
                crash_id
            )
            fb_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash,
                dump,
                crash_id
            )
            fb_store.fallback_store.save_raw_crash.assert_called_with(
                raw_crash,
                dump,
                crash_id
            )

            # save_processed tests
            fb_store.primary_store.save_processed = Mock()
            fb_store.fallback_store.save_processed = Mock()
            fb_store.save_processed(processed_crash)
            fb_store.primary_store.save_processed.assert_called_with(
                processed_crash
            )
            eq_(fb_store.fallback_store.save_processed.call_count, 0)

            fb_store.primary_store.save_processed = Mock()
            fb_store.primary_store.save_processed.side_effect = Exception('!')
            fb_store.save_processed(processed_crash)
            fb_store.primary_store.save_processed.assert_called_with(
                processed_crash
            )
            fb_store.fallback_store.save_processed.assert_called_with(
                processed_crash
            )

            fb_store.fallback_store.save_processed = Mock()
            fb_store.fallback_store.save_processed.side_effect = Exception('!')
            assert_raises(
                PolyStorageError,
                fb_store.save_processed,
                processed_crash
            )
            fb_store.primary_store.save_processed.assert_called_with(
                processed_crash
            )
            fb_store.fallback_store.save_processed.assert_called_with(
                processed_crash
            )

            # close tests
            fb_store.primary_store.close = Mock()
            fb_store.fallback_store.close = Mock()
            fb_store.close()
            fb_store.primary_store.close.assert_called_with()
            fb_store.fallback_store.close.assert_called_with()

            fb_store.primary_store.close = Mock()
            fb_store.fallback_store.close = Mock()
            fb_store.fallback_store.close.side_effect = NotImplementedError()
            fb_store.close()
            fb_store.primary_store.close.assert_called_with()
            fb_store.fallback_store.close.assert_called_with()

            fb_store.primary_store.close = Mock()
            fb_store.primary_store.close.side_effect = Exception('!')
            fb_store.close()
            fb_store.primary_store.close.assert_called_with()
            fb_store.fallback_store.close.assert_called_with()

            fb_store.fallback_store.close = Mock()
            fb_store.fallback_store.close.side_effect = Exception('!')
            assert_raises(PolyStorageError, fb_store.close)
            fb_store.primary_store.close.assert_called_with()
            fb_store.fallback_store.close.assert_called_with()
예제 #29
0
class JitCrashCategorizeRule(ExternalProcessRule):

    required_config = Namespace()
    required_config.command_line = change_default(
        ExternalProcessRule,
        'command_line',
        'timeout -s KILL 30 {command_pathname} '
        '{dump_file_pathname} '
        '2>/dev/null'
    )
    required_config.command_pathname = change_default(
        ExternalProcessRule,
        'command_pathname',
        '/data/socorro/stackwalk/bin/jit-crash-categorize',
    )
    required_config.result_key = change_default(
        ExternalProcessRule,
        'result_key',
        'classifications.jit.category',
    )
    required_config.return_code_key = change_default(
        ExternalProcessRule,
        'return_code_key',
        'classifications.jit.category_return_code',
    )
    required_config.add_option(
        'threshold',
        doc="max number of frames until encountering target frame",
        default=8
    )

    def __init__(self, config):
        super(JitCrashCategorizeRule, self).__init__(config)

    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        if (
            processed_crash.product != 'Firefox' or
            not processed_crash.os_name.startswith('Windows') or
            processed_crash.cpu_name != 'x86'
        ):
            # we don't want any of these
            return False

        frames = processed_crash.get('json_dump', {}).get('crashing_thread', {}).get('frames', [])
        if frames and frames[0].get('module', False):
            # there is a module at the top of the stack, we don't want this
            return False

        return (
            processed_crash.signature.endswith('EnterBaseline') or
            processed_crash.signature.endswith('EnterIon') or
            processed_crash.signature.endswith('js::jit::FastInvoke') or
            processed_crash.signature.endswith('js::jit::IonCannon') or
            processed_crash.signature.endswith('js::irregexp::ExecuteCode<T>')
        )

    def _interpret_external_command_output(self, fp, processor_meta):
        try:
            result = fp.read()
        except IOError as x:
            processor_meta.processor_notes.append(
                "%s unable to read external command output: %s" % (
                    self.config.command_pathname,
                    x
                )
            )
            return ''
        try:
            return result.strip()
        except AttributeError as x:
            # there's no strip method
            return result
class FlashVersionRule(Rule):
    required_config = Namespace()
    required_config.add_option(
        'known_flash_identifiers',
        doc='A subset of the known "debug identifiers" for flash versions, '
        'associated to the version',
        default={
            '7224164B5918E29AF52365AF3EAF7A500': '10.1.51.66',
            'C6CDEFCDB58EFE5C6ECEF0C463C979F80': '10.1.51.66',
            '4EDBBD7016E8871A461CCABB7F1B16120': '10.1',
            'D1AAAB5D417861E6A5B835B01D3039550': '10.0.45.2',
            'EBD27FDBA9D9B3880550B2446902EC4A0': '10.0.45.2',
            '266780DB53C4AAC830AFF69306C5C0300': '10.0.42.34',
            'C4D637F2C8494896FBD4B3EF0319EBAC0': '10.0.42.34',
            'B19EE2363941C9582E040B99BB5E237A0': '10.0.32.18',
            '025105C956638D665850591768FB743D0': '10.0.32.18',
            '986682965B43DFA62E0A0DFFD7B7417F0': '10.0.23',
            '937DDCC422411E58EF6AD13710B0EF190': '10.0.23',
            '860692A215F054B7B9474B410ABEB5300': '10.0.22.87',
            '77CB5AC61C456B965D0B41361B3F6CEA0': '10.0.22.87',
            '38AEB67F6A0B43C6A341D7936603E84A0': '10.0.12.36',
            '776944FD51654CA2B59AB26A33D8F9B30': '10.0.12.36',
            '974873A0A6AD482F8F17A7C55F0A33390': '9.0.262.0',
            'B482D3DFD57C23B5754966F42D4CBCB60': '9.0.262.0',
            '0B03252A5C303973E320CAA6127441F80': '9.0.260.0',
            'AE71D92D2812430FA05238C52F7E20310': '9.0.246.0',
            '6761F4FA49B5F55833D66CAC0BBF8CB80': '9.0.246.0',
            '27CC04C9588E482A948FB5A87E22687B0': '9.0.159.0',
            '1C8715E734B31A2EACE3B0CFC1CF21EB0': '9.0.159.0',
            'F43004FFC4944F26AF228334F2CDA80B0': '9.0.151.0',
            '890664D4EF567481ACFD2A21E9D2A2420': '9.0.151.0',
            '8355DCF076564B6784C517FD0ECCB2F20': '9.0.124.0',
            '51C00B72112812428EFA8F4A37F683A80': '9.0.124.0',
            '9FA57B6DC7FF4CFE9A518442325E91CB0': '9.0.115.0',
            '03D99C42D7475B46D77E64D4D5386D6D0': '9.0.115.0',
            '0CFAF1611A3C4AA382D26424D609F00B0': '9.0.47.0',
            '0F3262B5501A34B963E5DF3F0386C9910': '9.0.47.0',
            'C5B5651B46B7612E118339D19A6E66360': '9.0.45.0',
            'BF6B3B51ACB255B38FCD8AA5AEB9F1030': '9.0.28.0',
            '83CF4DC03621B778E931FC713889E8F10': '9.0.16.0',
        },
        from_string_converter=ujson.loads)
    required_config.add_option(
        'flash_re',
        doc='a regular expression to match Flash file names',
        default=(r'NPSWF32_?(.*)\.dll|'
                 'FlashPlayerPlugin_?(.*)\.exe|'
                 'libflashplayer(.*)\.(.*)|'
                 'Flash ?Player-?(.*)'),
        from_string_converter=re.compile)

    #--------------------------------------------------------------------------
    def version(self):
        return '1.0'

    #--------------------------------------------------------------------------
    def _get_flash_version(self, **kwargs):
        """If (we recognize this module as Flash and figure out a version):
        Returns version; else (None or '')"""
        filename = kwargs.get('filename', None)
        version = kwargs.get('version', None)
        debug_id = kwargs.get('debug_id', None)
        m = self.config.flash_re.match(filename)
        if m:
            if version:
                return version
            # we didn't get a version passed into us
            # try do deduce it
            groups = m.groups()
            if groups[0]:
                return groups[0].replace('_', '.')
            if groups[1]:
                return groups[1].replace('_', '.')
            if groups[2]:
                return groups[2]
            if groups[4]:
                return groups[4]
            return self.config.known_flash_identifiers.get(debug_id, None)
        return None

    #--------------------------------------------------------------------------
    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        processed_crash.flash_version = ''
        flash_version = None
        for index, a_module in enumerate(
                processed_crash['json_dump']['modules']):
            flash_version = self._get_flash_version(**a_module)
            if flash_version:
                break
        if flash_version:
            processed_crash.flash_version = flash_version
        else:
            processed_crash.flash_version = '[blank]'
        return True