class StandAloneServer(WebServerBase): required_config = Namespace() required_config.add_option('port', doc='the port to listen to for submissions', default=8882)
class Foo(RequiredConfig): required_config = Namespace() required_config.add_option('x', default=17) required_config.add_option('y', default=23)
class BreakpadStackwalkerRule2015(ExternalProcessRule): """Executes the minidump stackwalker external process and puts output in processed crash""" required_config = Namespace() required_config.add_option( name='public_symbols_url', doc='url of the public symbol server', default="https://localhost", likely_to_be_changed=True ) required_config.add_option( name='private_symbols_url', doc='url of the private symbol server', default="https://localhost", likely_to_be_changed=True ) required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL {kill_timeout} {command_pathname} ' '--raw-json {raw_crash_pathname} ' '--symbols-url {public_symbols_url} ' '--symbols-url {private_symbols_url} ' '--symbols-cache {symbol_cache_path} ' '--symbols-tmp {symbol_tmp_path} ' '{dump_file_pathname} ' '2> /dev/null' ) required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', # NOTE(willkg): This is the path for the RPM-based Socorro deploy. When # we switch to Docker, we should change this. '/data/socorro/stackwalk/bin/stackwalker', ) required_config.add_option( 'kill_timeout', doc='amount of time to let mdsw run before declaring it hung', default=600 ) required_config.add_option( 'symbol_tmp_path', doc=( 'directory to use as temp space for downloading symbols--must be on ' 'the same filesystem as symbols-cache' ), default=os.path.join(tempfile.gettempdir(), 'symbols-tmp'), ), required_config.add_option( 'symbol_cache_path', doc=( 'the path where the symbol cache is found, this location must be ' 'readable and writeable (quote path with embedded spaces)' ), default=os.path.join(tempfile.gettempdir(), 'symbols'), ) required_config.add_option( 'temporary_file_system_storage_path', doc='a path where temporary files may be written', default=tempfile.gettempdir(), ) def version(self): return '1.0' @contextmanager def _temp_raw_crash_json_file(self, raw_crash, crash_id): file_pathname = os.path.join( self.config.temporary_file_system_storage_path, "%s.%s.TEMPORARY.json" % ( crash_id, threading.currentThread().getName() ) ) with open(file_pathname, "w") as f: ujson.dump(raw_crash, f) try: yield file_pathname finally: os.unlink(file_pathname) def _execute_external_process(self, command_line, processor_meta): stackwalker_output, return_code = super( BreakpadStackwalkerRule2015, self )._execute_external_process(command_line, processor_meta) if not isinstance(stackwalker_output, Mapping): processor_meta.processor_notes.append( "MDSW produced unexpected output: %s..." % str(stackwalker_output)[:10] ) stackwalker_output = {} stackwalker_data = DotDict() stackwalker_data.json_dump = stackwalker_output stackwalker_data.mdsw_return_code = return_code stackwalker_data.mdsw_status_string = stackwalker_output.get( 'status', 'unknown error' ) stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK' if return_code == 124: processor_meta.processor_notes.append( "MDSW terminated with SIGKILL due to timeout" ) elif return_code != 0 or not stackwalker_data.success: processor_meta.processor_notes.append( "MDSW failed on '%s': %s" % ( command_line, stackwalker_data.mdsw_status_string ) ) return stackwalker_data, return_code def expand_commandline(self, dump_file_pathname, raw_crash_pathname): """Expands the command line parameters and returns the final command line""" # NOTE(willkg): If we ever add new configuration variables, we'll need # to add them here, too, otherwise they won't get expanded in the # command line. params = { # These come from config 'kill_timeout': self.config.kill_timeout, 'command_pathname': self.config.command_pathname, 'public_symbols_url': self.config.public_symbols_url, 'private_symbols_url': self.config.private_symbols_url, 'symbol_cache_path': self.config.symbol_cache_path, 'symbol_tmp_path': self.config.symbol_tmp_path, # These are calculated 'dump_file_pathname': dump_file_pathname, 'raw_crash_pathname': raw_crash_pathname } return self.config.command_line.format(**params) def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): if 'additional_minidumps' not in processed_crash: processed_crash.additional_minidumps = [] with self._temp_raw_crash_json_file( raw_crash, raw_crash.uuid ) as raw_crash_pathname: for dump_name in raw_dumps.iterkeys(): if processor_meta.quit_check: processor_meta.quit_check() # this rule is only interested in dumps targeted for the # minidump stackwalker external program. As of the writing # of this code, there is one other dump type. The only way # to differentiate these dump types is by the name of the # dump. All minidumps targeted for the stackwalker will have # a name with a prefix specified in configuration: if not dump_name.startswith(self.config.dump_field): # dumps not intended for the stackwalker are ignored continue dump_file_pathname = raw_dumps[dump_name] if self.config.chatty: self.config.logger.debug( "BreakpadStackwalkerRule2015: %s, %s", dump_name, dump_file_pathname ) command_line = self.expand_commandline( dump_file_pathname=dump_file_pathname, raw_crash_pathname=raw_crash_pathname ) stackwalker_data, return_code = self._execute_external_process( command_line, processor_meta ) if dump_name == self.config.dump_field: processed_crash.update(stackwalker_data) else: processed_crash.additional_minidumps.append(dump_name) processed_crash[dump_name] = stackwalker_data return True
class PostgreSQLCrashStorage(CrashStorageBase): """this implementation of crashstorage saves processed crashes to an instance of Postgresql. It only saves certain key values to the partitioned reports table, therefore it is not a source for fetching complete processed reports and doesn't not implement any of the 'get' methods.""" required_config = Namespace() required_config.add_option( 'transaction_executor_class', default="socorro.database.transaction_executor." "TransactionExecutorWithInfiniteBackoff", doc='a class that will manage transactions', from_string_converter=class_converter, reference_value_from='resource.postgresql', ) required_config.add_option( 'database_class', default=ConnectionContext, doc='the class responsible for connecting to Postgres', reference_value_from='resource.postgresql', ) _reports_table_mappings = ( # processed name, reports table name ("addons_checked", "addons_checked"), ("address", "address"), ("app_notes", "app_notes"), ("build", "build"), ("client_crash_date", "client_crash_date"), ("completeddatetime", "completed_datetime"), ("cpu_info", "cpu_info"), ("cpu_name", "cpu_name"), ("date_processed", "date_processed"), ("distributor", "distributor"), ("distributor_version", "distributor_version"), ("email", "email"), ("exploitability", "exploitability"), #("flash_process_dump", "flash_process_dump"), # future ("flash_version", "flash_version"), ("hangid", "hangid"), ("install_age", "install_age"), ("last_crash", "last_crash"), ("os_name", "os_name"), ("os_version", "os_version"), ("processor_notes", "processor_notes"), ("process_type", "process_type"), ("product", "product"), ("productid", "productid"), ("reason", "reason"), ("release_channel", "release_channel"), ("signature", "signature"), ("startedDateTime", "started_datetime"), ("success", "success"), ("topmost_filenames", "topmost_filenames"), ("truncated", "truncated"), ("uptime", "uptime"), ("user_comments", "user_comments"), ("user_id", "user_id"), ("url", "url"), ("uuid", "uuid"), ("version", "version"), ) #-------------------------------------------------------------------------- def __init__(self, config, quit_check_callback=None): super(PostgreSQLCrashStorage, self).__init__(config, quit_check_callback=quit_check_callback) self.database = config.database_class(config) self.transaction = config.transaction_executor_class( config, self.database, quit_check_callback=quit_check_callback) #-------------------------------------------------------------------------- def save_raw_crash(self, raw_crash, dumps, crash_id): """nota bene: this function does not save the dumps in PG, only the raw crash json is saved.""" self.transaction(self._save_raw_crash_transaction, raw_crash, crash_id) #-------------------------------------------------------------------------- def _save_raw_crash_transaction(self, connection, raw_crash, crash_id): raw_crash_table_name = ('raw_crashes_%s' % self._table_suffix_for_crash_id(crash_id)) insert_sql = """insert into %s (uuid, raw_crash, date_processed) values (%%s, %%s, %%s)""" % raw_crash_table_name savepoint_name = threading.currentThread().getName().replace('-', '') value_list = (crash_id, json.dumps(raw_crash), raw_crash["submitted_timestamp"]) execute_no_results(connection, "savepoint %s" % savepoint_name) try: execute_no_results(connection, insert_sql, value_list) execute_no_results(connection, "release savepoint %s" % savepoint_name) except self.config.database_class.IntegrityError: # report already exists execute_no_results(connection, "rollback to savepoint %s" % savepoint_name) execute_no_results(connection, "release savepoint %s" % savepoint_name) execute_no_results( connection, "delete from %s where uuid = %%s" % raw_crash_table_name, (crash_id, )) execute_no_results(connection, insert_sql, value_list) #-------------------------------------------------------------------------- def get_raw_crash(self, crash_id): """the default implementation of fetching a raw_crash parameters: crash_id - the id of a raw crash to fetch""" return self.transaction(self._get_raw_crash_transaction, crash_id) #-------------------------------------------------------------------------- def _get_raw_crash_transaction(self, connection, crash_id): raw_crash_table_name = ('raw_crash_%s' % self._table_suffix_for_crash_id(crash_id)) fetch_sql = 'select raw_crash from %s where uuid = %%s' % \ raw_crash_table_name try: return single_value_sql(connection, fetch_sql, (crash_id, )) except SQLDidNotReturnSingleValue: raise CrashIDNotFound(crash_id) #-------------------------------------------------------------------------- def save_processed(self, processed_crash): self.transaction(self._save_processed_transaction, processed_crash) #-------------------------------------------------------------------------- def _save_processed_transaction(self, connection, processed_crash): report_id = self._save_processed_report(connection, processed_crash) self._save_plugins(connection, processed_crash, report_id) self._save_extensions(connection, processed_crash, report_id) self._save_processed_crash(connection, processed_crash) def _save_processed_crash(self, connection, processed_crash): crash_id = processed_crash['uuid'] processed_crashes_table_name = ( 'processed_crashes_%s' % self._table_suffix_for_crash_id(crash_id)) upsert_sql = """ WITH update_processed_crash AS ( UPDATE %(table)s SET processed_crash = %%(processed_json)s, date_processed = %%(date_processed)s WHERE uuid = %%(uuid)s RETURNING 1 ), insert_processed_crash AS ( INSERT INTO %(table)s (uuid, processed_crash, date_processed) ( SELECT %%(uuid)s as uuid, %%(processed_json)s as processed_crash, %%(date_processed)s as date_processed WHERE NOT EXISTS ( SELECT uuid from %(table)s WHERE uuid = %%(uuid)s LIMIT 1 ) ) RETURNING 2 ) SELECT * from update_processed_crash UNION ALL SELECT * from insert_processed_crash """ % { 'table': processed_crashes_table_name, 'uuid': crash_id } values = { 'processed_json': json.dumps(processed_crash, cls=JsonDTEncoder), 'date_processed': processed_crash["date_processed"], 'uuid': crash_id } execute_no_results(connection, upsert_sql, values) #-------------------------------------------------------------------------- def _save_processed_report(self, connection, processed_crash): column_list = [] placeholder_list = [] value_list = [] for pro_crash_name, report_name in self._reports_table_mappings: column_list.append(report_name) placeholder_list.append('%s') value_list.append(processed_crash[pro_crash_name]) crash_id = processed_crash['uuid'] reports_table_name = ('reports_%s' % self._table_suffix_for_crash_id(crash_id)) insert_sql = "insert into %s (%s) values (%s) returning id" % ( reports_table_name, ', '.join(column_list), ', '.join(placeholder_list)) # we want to insert directly into the report table. There is a # chance however that the record already exists. If it does, then # the insert would fail and the connection fall into a "broken" state. # To avoid this, we set a savepoint to which we can roll back if the # record already exists - essentially a nested transaction. # We use the name of the executing thread as the savepoint name. # alternatively we could get a uuid. savepoint_name = threading.currentThread().getName().replace('-', '') execute_no_results(connection, "savepoint %s" % savepoint_name) try: report_id = single_value_sql(connection, insert_sql, value_list) execute_no_results(connection, "release savepoint %s" % savepoint_name) except self.config.database_class.IntegrityError: # report already exists execute_no_results(connection, "rollback to savepoint %s" % savepoint_name) execute_no_results(connection, "release savepoint %s" % savepoint_name) execute_no_results( connection, "delete from %s where uuid = %%s" % reports_table_name, (processed_crash.uuid, )) report_id = single_value_sql(connection, insert_sql, value_list) return report_id #-------------------------------------------------------------------------- def _save_plugins(self, connection, processed_crash, report_id): """ Electrolysis Support - Optional - processed_crash may contain a ProcessType of plugin. In the future this value would be default, content, maybe even Jetpack... This indicates which process was the crashing process. plugin - When set to plugin, the jsonDocument MUST calso contain PluginFilename, PluginName, and PluginVersion """ process_type = processed_crash['process_type'] if not process_type: return if process_type == "plugin": # Bug#543776 We actually will are relaxing the non-null policy... # a null filename, name, and version is OK. We'll use empty strings try: plugin_filename = processed_crash['PluginFilename'] plugin_name = processed_crash['PluginName'] plugin_version = processed_crash['PluginVersion'] except KeyError, x: self.config.logger.error( 'the crash is missing a required field: %s', str(x)) return find_plugin_sql = ('select id from plugins ' 'where filename = %s ' 'and name = %s') try: plugin_id = single_value_sql(connection, find_plugin_sql, (plugin_filename, plugin_name)) except SQLDidNotReturnSingleValue: insert_plugsins_sql = ("insert into plugins (filename, name) " "values (%s, %s) returning id") plugin_id = single_value_sql(connection, insert_plugsins_sql, (plugin_filename, plugin_name)) crash_id = processed_crash['uuid'] table_suffix = self._table_suffix_for_crash_id(crash_id) plugin_reports_table_name = 'plugins_reports_%s' % table_suffix plugins_reports_insert_sql = ( 'insert into %s ' ' (report_id, plugin_id, date_processed, version) ' 'values ' ' (%%s, %%s, %%s, %%s)' % plugin_reports_table_name) values_tuple = (report_id, plugin_id, processed_crash['date_processed'], plugin_version) execute_no_results(connection, plugins_reports_insert_sql, values_tuple)
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage): """Sends a subset of the processed crash to an S3 bucket The subset of the processed crash is based on the JSON Schema which is derived from "socorro/external/es/super_search_fields.py". """ required_config = Namespace() required_config.resource_class = change_default( BotoCrashStorage, 'resource_class', 'socorro.external.boto.connection_context.RegionalS3ConnectionContext') required_config.elasticsearch = Namespace() required_config.elasticsearch.add_option( 'elasticsearch_class', default='socorro.external.es.connection_context.ConnectionContext', from_string_converter=class_converter, reference_value_from='resource.elasticsearch', ) def __init__(self, config, *args, **kwargs): # This class requires that we use # SimpleDatePrefixKeyBuilder, so we stomp on the configuration # to make absolutely sure it gets set that way. config.keybuilder_class = SimpleDatePrefixKeyBuilder super(TelemetryBotoS3CrashStorage, self).__init__(config, *args, **kwargs) def _get_all_fields(self): if (hasattr(self, '_all_fields') and hasattr(self, '_all_fields_timestamp')): # we might have it cached age = time.time() - self._all_fields_timestamp if age < 60 * 60: # fresh enough return self._all_fields self._all_fields = SuperSearchFields(config=self.config).get() self._all_fields_timestamp = time.time() return self._all_fields def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): all_fields = self._get_all_fields() crash_report = {} # TODO Opportunity of optimization; # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list # of all (recursive) keys that are in there and use that # to limit the two following loops to not bother # filling up `crash_report` with keys that will never be # needed. # Rename fields in raw_crash. raw_fields_map = dict((x['in_database_name'], x['name']) for x in all_fields.values() if x['namespace'] == 'raw_crash') for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash. processed_fields_map = dict((x['in_database_name'], x['name']) for x in all_fields.values() if x['namespace'] == 'processed_crash') for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report. crash_report = json_schema_reducer.make_reduced_dict( CRASH_REPORT_JSON_SCHEMA, crash_report) self.save_processed(crash_report) @staticmethod def _do_save_processed(boto_connection, processed_crash): """Overriding this to change "name of thing" to crash_report""" crash_id = processed_crash['uuid'] processed_crash_as_string = boto_connection._convert_mapping_to_string( processed_crash) boto_connection.submit(crash_id, "crash_report", processed_crash_as_string) @staticmethod def _do_get_unredacted_processed(boto_connection, crash_id, json_object_hook): """Overriding this to change "name of thing" to crash_report""" try: processed_crash_as_string = boto_connection.fetch( crash_id, 'crash_report') return json.loads( processed_crash_as_string, object_hook=json_object_hook, ) except boto_connection.ResponseError as x: raise CrashIDNotFound('%s not found: %s' % (crash_id, x))
def test_basic_crashstorage(self): required_config = Namespace() mock_logging = Mock() required_config.add_option('logger', default=mock_logging) required_config.update(CrashStorageBase.required_config) config_manager = ConfigurationManager( [required_config], app_name='testapp', app_version='1.0', app_description='app description', values_source_list=[{ 'logger': mock_logging, }], argv_source=[] ) with config_manager.context() as config: crashstorage = CrashStorageBase( config, quit_check_callback=fake_quit_check ) crashstorage.save_raw_crash({}, 'payload', 'ooid') crashstorage.save_processed({}) assert_raises( NotImplementedError, crashstorage.get_raw_crash, 'ooid' ) assert_raises( NotImplementedError, crashstorage.get_raw_dump, 'ooid' ) assert_raises( NotImplementedError, crashstorage.get_unredacted_processed, 'ooid' ) assert_raises( NotImplementedError, crashstorage.remove, 'ooid' ) eq_(crashstorage.new_crashes(), []) crashstorage.close() with config_manager.context() as config: class MyCrashStorageTest(CrashStorageBase): def save_raw_crash(self, raw_crash, dumps, crash_id): eq_(crash_id, "fake_id") eq_(raw_crash, "fake raw crash") eq_( sorted(dumps.keys()), sorted(['one', 'two', 'three']) ) eq_( sorted(dumps.values()), sorted(['eins', 'zwei', 'drei']) ) values = ['eins', 'zwei', 'drei'] def open_function(*args, **kwargs): return values.pop(0) crashstorage = MyCrashStorageTest( config, quit_check_callback=fake_quit_check ) with mock.patch("__builtin__.open") as open_mock: open_mock.return_value = mock.MagicMock() ( open_mock.return_value.__enter__ .return_value.read.side_effect ) = open_function crashstorage.save_raw_crash_with_file_dumps( "fake raw crash", FileDumpsMapping({ 'one': 'eins', 'two': 'zwei', 'three': 'drei' }), 'fake_id' )
def test_benchmarking_crashstore(self): required_config = Namespace() mock_logging = Mock() required_config.add_option('logger', default=mock_logging) required_config.update(BenchmarkingCrashStorage.get_required_config()) fake_crash_store = Mock() config_manager = ConfigurationManager( [required_config], app_name='testapp', app_version='1.0', app_description='app description', values_source_list=[{ 'logger': mock_logging, 'wrapped_crashstore': fake_crash_store, 'benchmark_tag': 'test' }], argv_source=[] ) with config_manager.context() as config: crashstorage = BenchmarkingCrashStorage( config, quit_check_callback=fake_quit_check ) crashstorage.start_timer = lambda: 0 crashstorage.end_timer = lambda: 1 fake_crash_store.assert_called_with(config, fake_quit_check) crashstorage.save_raw_crash({}, 'payload', 'ooid') crashstorage.wrapped_crashstore.save_raw_crash.assert_called_with( {}, 'payload', 'ooid' ) mock_logging.debug.assert_called_with( '%s save_raw_crash %s', 'test', 1 ) mock_logging.debug.reset_mock() crashstorage.save_processed({}) crashstorage.wrapped_crashstore.save_processed.assert_called_with( {} ) mock_logging.debug.assert_called_with( '%s save_processed %s', 'test', 1 ) mock_logging.debug.reset_mock() crashstorage.save_raw_and_processed({}, 'payload', {}, 'ooid') crashstorage.wrapped_crashstore.save_raw_and_processed \ .assert_called_with( {}, 'payload', {}, 'ooid' ) mock_logging.debug.assert_called_with( '%s save_raw_and_processed %s', 'test', 1 ) mock_logging.debug.reset_mock() crashstorage.get_raw_crash('uuid') crashstorage.wrapped_crashstore.get_raw_crash.assert_called_with( 'uuid' ) mock_logging.debug.assert_called_with( '%s get_raw_crash %s', 'test', 1 ) mock_logging.debug.reset_mock() crashstorage.get_raw_dump('uuid') crashstorage.wrapped_crashstore.get_raw_dump.assert_called_with( 'uuid' ) mock_logging.debug.assert_called_with( '%s get_raw_dump %s', 'test', 1 ) mock_logging.debug.reset_mock() crashstorage.get_raw_dumps('uuid') crashstorage.wrapped_crashstore.get_raw_dumps.assert_called_with( 'uuid' ) mock_logging.debug.assert_called_with( '%s get_raw_dumps %s', 'test', 1 ) mock_logging.debug.reset_mock() crashstorage.get_raw_dumps_as_files('uuid') crashstorage.wrapped_crashstore.get_raw_dumps_as_files \ .assert_called_with( 'uuid' ) mock_logging.debug.assert_called_with( '%s get_raw_dumps_as_files %s', 'test', 1 ) mock_logging.debug.reset_mock() crashstorage.get_unredacted_processed('uuid') crashstorage.wrapped_crashstore.get_unredacted_processed \ .assert_called_with( 'uuid' ) mock_logging.debug.assert_called_with( '%s get_unredacted_processed %s', 'test', 1 ) mock_logging.debug.reset_mock()
class ConnectionContextBase(RequiredConfig): required_config = Namespace() required_config.add_option( "access_key", doc="access key", default=None, reference_value_from="resource.boto", ) required_config.add_option( "secret_access_key", doc="secret access key", default=None, secret=True, reference_value_from="secrets.boto", likely_to_be_changed=True, ) required_config.add_option( "bucket_name", doc="The name of the bucket.", default="crashstats", reference_value_from="resource.boto", likely_to_be_changed=True, ) required_config.add_option( "prefix", doc="a prefix to use inside the bucket", default="", reference_value_from="resource.boto", likely_to_be_changed=True, ) required_config.add_option( "boto_metrics_prefix", doc="a prefix to use for boto metrics", default="", reference_value_from="resource.boto", ) RETRYABLE_EXCEPTIONS = ( socket.timeout, boto.exception.PleaseRetryException, boto.exception.ResumableTransferDisposition, boto.exception.ResumableUploadException, ) def __init__(self, config, quit_check_callback=None): self.config = config self._CreateError = boto.exception.StorageCreateError self.ResponseError = (boto.exception.StorageResponseError, KeyNotFound) self._bucket_cache = {} self.metrics = markus.get_metrics(config.boto_metrics_prefix) def _connect(self): try: return self.connection except AttributeError: self.connection = self._connect_to_endpoint( **self._get_credentials()) return self.connection def _get_credentials(self): """Returns credentials for creating the connection""" raise NotImplementedError def _get_datestamp(self, crashid): """Retrieves datestamp from a crashid or raises an exception""" datestamp = date_from_ooid(crashid) if datestamp is None: # We should never hit this situation unless the crashid is not valid raise CrashidMissingDatestamp("%s is missing datestamp" % crashid) return datestamp def build_keys(self, prefix, name_of_thing, crashid): """Builds a list of s3 pseudo-filenames When using keys for saving a crash, always use the first one given. When using keys for loading a crash, try each key in order. This lets us change our key scheme and continue to access things saved using the old key. :arg prefix: the prefix to use :arg name_of_thing: the kind of thing we're building a filename for; e.g. "raw_crash" :arg crashid: the crash id for the thing being stored :returns: list of keys to try in order """ if name_of_thing == "raw_crash": # Insert the first 3 chars of the crashid providing some entropy # earlier in the key so that consecutive s3 requests get # distributed across multiple s3 partitions entropy = crashid[:3] date = self._get_datestamp(crashid).strftime("%Y%m%d") return [ "%(prefix)s/v2/%(nameofthing)s/%(entropy)s/%(date)s/%(crashid)s" % { "prefix": prefix, "nameofthing": name_of_thing, "entropy": entropy, "date": date, "crashid": crashid, } ] elif name_of_thing == "crash_report": # Crash data from the TelemetryBotoS3CrashStorage date = self._get_datestamp(crashid).strftime("%Y%m%d") return [ "%(prefix)s/v1/%(nameofthing)s/%(date)s/%(crashid)s" % { "prefix": prefix, "nameofthing": name_of_thing, "date": date, "crashid": crashid, } ] return [ "%(prefix)s/v1/%(nameofthing)s/%(crashid)s" % { "prefix": prefix, "nameofthing": name_of_thing, "crashid": crashid } ] def _get_bucket(self, conn, bucket_name): try: return self._bucket_cache[bucket_name] except KeyError: self._bucket_cache[bucket_name] = conn.get_bucket(bucket_name) return self._bucket_cache[bucket_name] def _get_or_create_bucket(self, conn, bucket_name): try: return self._get_bucket(conn, bucket_name) except self.ResponseError: self._bucket_cache[bucket_name] = conn.create_bucket(bucket_name) return self._bucket_cache[bucket_name] def submit(self, id, name_of_thing, thing): """submit something to boto""" # can only submit binary to boto assert isinstance(thing, bytes), type(thing) try: start_time = time.time() conn = self._connect() bucket = self._get_or_create_bucket(conn, self.config.bucket_name) all_keys = self.build_keys(self.config.prefix, name_of_thing, id) # Always submit using the first key key = all_keys[0] key_object = bucket.new_key(key) key_object.set_contents_from_string(thing) index_outcome = "successful" except Exception: index_outcome = "failed" raise finally: elapsed_time = time.time() - start_time self.metrics.histogram( "submit", value=elapsed_time * 1000.0, tags=["kind:" + name_of_thing, "outcome:" + index_outcome], ) def fetch(self, id, name_of_thing): """Retrieve something from boto""" conn = self._connect() bucket = self._get_bucket(conn, self.config.bucket_name) all_keys = self.build_keys(self.config.prefix, name_of_thing, id) for key in all_keys: key_object = bucket.get_key(key) if key_object is not None: # NOTE(willkg): this says "as string", but in Python 3 this # will be bytes. return key_object.get_contents_as_string() # None of the keys worked, so raise an error raise KeyNotFound( "%s (bucket=%r keys=%r) not found, no value returned" % (id, self.config.bucket_name, all_keys)) def _convert_mapping_to_string(self, a_mapping): return json.dumps(a_mapping, cls=JSONISOEncoder) def _convert_list_to_string(self, a_list): return json.dumps(list(a_list)) def _convert_string_to_list(self, a_string): return json.loads(a_string) @contextlib.contextmanager def __call__(self): yield self def force_reconnect(self): pass def is_retryable_exception(self, exc): return isinstance(exc, self.RETRYABLE_EXCEPTIONS)
#------------------------------------------------------------------------------ def query1(conn): """a transaction to be executed by the database""" conn.query('select * from life') #------------------------------------------------------------------------------ def query2(conn): """another transaction to be executed by the database""" raise Exception("not a database related error") #============================================================================== if __name__ == "__main__": definition_source = Namespace() definition_source.add_option('transaction_executor_class', default=TransactionExecutorWithBackoff, doc='a class that will execute transactions') c = ConfigurationManager(definition_source, app_name='advanced_demo_3', app_description=__doc__) with c.context() as config: # the configuration has a class that can execute transactions # we instantiate it here. executor = config.transaction_executor_class(config) # this first query has a 50% probability of failing due to a database # connectivity problem. If the transaction_executor_class is a class
class TarFileCrashStore(CrashStorageBase): required_config = Namespace() required_config.add_option(name='tarball_name', doc='pathname to a the target tarfile', default='fred.tar') required_config.add_option(name='temp_directory', doc='the pathname of a temporary directory', default='/tmp') required_config.add_option( name='tarfile_module', doc='a module that supplies the tarfile interface', default='tarfile', from_string_converter=class_converter) required_config.add_option(name='gzip_module', doc='a module that supplies the gzip interface', default='gzip', from_string_converter=class_converter) required_config.add_option(name='os_module', doc='a module that supplies the os interface', default='os', from_string_converter=class_converter) @staticmethod def stringify_datetimes(obj): if isinstance(obj, datetime.datetime): return obj.strftime("%Y-%m-%d %H:%M:%S.%f") raise TypeError def __init__(self, config, quit_check_callback=None): super(TarFileCrashStore, self).__init__(config, quit_check_callback) self.tarfile_module = config.tarfile_module self.gzip_module = config.gzip_module self.os_module = config.os_module def _save_to_tarfile(self, actual_pathname, target_pathname): try: self.tar_file.add(actual_pathname, target_pathname) except AttributeError: # the tar_file is lazy instantiated. It isn't created until # a process tries to save something to it self.tar_file = self.tarfile_module.open(self.config.tarball_name, 'w') self.tar_file.add(actual_pathname, target_pathname) def close(self): try: self.tar_file.close() except AttributeError: # the tar_file was never actually created because the save_* # were never called. we can silently ignore this pass def save_processed(self, processed_crash): processed_crash_as_string = json.dumps( processed_crash, default=self.stringify_datetimes) crash_id = processed_crash['crash_id'] file_name = os.path.join(self.config.temp_directory, crash_id + '.jsonz') file_handle = self.gzip_module.open(file_name, 'w', 9) try: file_handle.write(processed_crash_as_string) finally: file_handle.close() self._save_to_tarfile( file_name, os.path.join(crash_id[:2], crash_id[2:4], crash_id + '.jsonz')) self.os_module.unlink(file_name) self.config.logger.debug('saved - %s', file_name)
class ThreadedTaskManager(TaskManager): """Given an iterator over a sequence of job parameters and a function, this class will execute the function in a set of threads.""" required_config = Namespace() required_config.add_option('idle_delay', default=7, doc='the delay in seconds if no job is found') # how does one choose how many threads to use? Keep the number low if your # application is compute bound. You can raise it if your app is i/o # bound. The best thing to do is to test the through put of your app with # several values. For Socorro, we've found that setting this value to the # number of processor cores in the system gives the best throughput. required_config.add_option('number_of_threads', default=4, doc='the number of threads') # there is wisdom is setting the maximum queue size to be no more than # twice the number of threads. By keeping the threads starved, the # queing thread will be blocked more more frequently. Once an item # is in the queue, there may be no way to fetch it again if disaster # strikes and this app quits or fails. Potentially anything left in # the queue could be lost. Limiting the queue size insures minimal # damage in a worst case scenario. required_config.add_option('maximum_queue_size', default=8, doc='the maximum size of the internal queue') def __init__(self, config, job_source_iterator=default_iterator, task_func=default_task_func): """the constructor accepts the function that will serve as the data source iterator and the function that the threads will execute on consuming the data. parameters: job_source_iterator - an iterator to serve as the source of data. it can be of the form of a generator or iterator; a function that returns an iterator; a instance of an iterable object; or a class that when instantiated with a config object can be iterated. The iterator must yield a tuple consisting of a function's tuple of args and, optionally, a mapping of kwargs. Ex: (('a', 17), {'x': 23}) task_func - a function that will accept the args and kwargs yielded by the job_source_iterator""" super(ThreadedTaskManager, self).__init__(config, job_source_iterator, task_func) self.thread_list = [] # the thread object storage self.number_of_threads = config.number_of_threads self.task_queue = queue.Queue(config.maximum_queue_size) def start(self): """this function will start the queing thread that executes the iterator and feeds jobs into the queue. It also starts the worker threads that just sit and wait for items to appear on the queue. This is a non blocking call, so the executing thread is free to do other things while the other threads work.""" self.logger.debug('start') # start each of the task threads. for x in range(self.number_of_threads): # each thread is given the config object as well as a reference to # this manager class. The manager class is where the queue lives # and the task threads will refer to it to get their next jobs. new_thread = TaskThread(self.config, self.task_queue) self.thread_list.append(new_thread) new_thread.start() self.queuing_thread = threading.Thread( name="QueuingThread", target=self._queuing_thread_func) self.queuing_thread.start() def wait_for_completion(self, waiting_func=None): """This is a blocking function call that will wait for the queuing thread to complete. parameters: waiting_func - this function will be called every one second while waiting for the queuing thread to quit. This allows for logging timers, status indicators, etc.""" self.logger.debug("waiting to join queuingThread") self._responsive_join(self.queuing_thread, waiting_func) def stop(self): """This function will tell all threads to quit. All threads periodically look at the value of quit. If they detect quit is True, then they commit ritual suicide. After setting the quit flag, this function will wait for the queuing thread to quit.""" self.quit = True self.wait_for_completion() def blocking_start(self, waiting_func=None): """this function is just a wrapper around the start and wait_for_completion methods. It starts the queuing thread and then waits for it to complete. If run by the main thread, it will detect the KeyboardInterrupt exception (which is what SIGTERM and SIGHUP have been translated to) and will order the threads to die.""" try: self.start() self.wait_for_completion(waiting_func) # it only ends if someone hits ^C or sends SIGHUP or SIGTERM - # any of which will get translated into a KeyboardInterrupt except KeyboardInterrupt: while True: try: self.stop() break except KeyboardInterrupt: self.logger.warning( 'We heard you the first time. There ' 'is no need for further keyboard or signal ' 'interrupts. We are waiting for the ' 'worker threads to stop. If this app ' 'does not halt soon, you may have to send ' 'SIGKILL (kill -9)') def wait_for_empty_queue(self, wait_log_interval=0, wait_reason=''): """Sit around and wait for the queue to become empty parameters: wait_log_interval - while sleeping, it is helpful if the thread periodically announces itself so that we know that it is still alive. This number is the time in seconds between log entries. wait_reason - the is for the explaination of why the thread is sleeping. This is likely to be a message like: 'there is no work to do'.""" seconds = 0 while True: if self.task_queue.empty(): break self.quit_check() if wait_log_interval and not seconds % wait_log_interval: self.logger.info('%s: %dsec so far', wait_reason, seconds) self.quit_check() seconds += 1 time.sleep(1.0) def _responsive_join(self, thread, waiting_func=None): """similar to the responsive sleep, a join function blocks a thread until some other thread dies. If that takes a long time, we'd like to have some indicaition as to what the waiting thread is doing. This method will wait for another thread while calling the waiting_func once every second. parameters: thread - an instance of the TaskThread class representing the thread to wait for waiting_func - a function to call every second while waiting for the thread to die""" while True: try: thread.join(1.0) if not thread.isAlive(): break if waiting_func: waiting_func() except KeyboardInterrupt: self.logger.debug('quit detected by _responsive_join') self.quit = True def _kill_worker_threads(self): """This function coerces the consumer/worker threads to kill themselves. When called by the queuing thread, one death token will be placed on the queue for each thread. Each worker thread is always looking for the death token. When it encounters it, it immediately runs to completion without drawing anything more off the queue. This is a blocking call. The thread using this function will wait for all the worker threads to die.""" for x in range(self.number_of_threads): self.task_queue.put((None, None)) self.logger.debug("waiting for standard worker threads to stop") for t in self.thread_list: t.join() def _queuing_thread_func(self): """This is the function responsible for reading the iterator and putting contents into the queue. It loops as long as there are items in the iterator. Should something go wrong with this thread, or it detects the quit flag, it will calmly kill its workers and then quit itself.""" self.logger.debug('_queuing_thread_func start') try: # May never raise StopIteration for job_params in self._get_iterator(): self.config.logger.debug('received %r', job_params) if job_params is None: if self.config.quit_on_empty_queue: self.wait_for_empty_queue( wait_log_interval=10, wait_reason='waiting for queue to drain') raise KeyboardInterrupt self.logger.info("there is nothing to do. Sleeping " "for %d seconds" % self.config.idle_delay) self._responsive_sleep(self.config.idle_delay) continue self.quit_check() # self.logger.debug("queuing job %s", job_params) self.task_queue.put((self.task_func, job_params)) except Exception: self.logger.error('queuing jobs has failed', exc_info=True) except KeyboardInterrupt: self.logger.debug('queuingThread gets quit request') finally: self.logger.debug("we're quitting queuingThread") self._kill_worker_threads() self.logger.debug("all worker threads stopped") # now that we've killed all the workers, we can set the quit flag # to True. This will cause any other threads to die and shut down # the application. Originally, the setting of this flag was at the # start of this "finally" block. However, that meant that the # workers would abort their currently running jobs. In the case of # of the natural ending of an application where an iterater ran to # exhaustion, the workers would die before completing their tasks. # Moving the setting of the flag to this location allows the # workers to finish and then the app shuts down. self.quit = True def executor_identity(self): """this function is likely to be called via the configuration parameter 'executor_identity' at the root of the self.config attribute of the application. It is most frequently used in the Pooled ConnectionContext classes to ensure that connections aren't shared between threads, greenlets, or whatever the unit of execution is. This is useful for maintaining transactional integrity on a resource connection.""" return threading.currentThread().getName()
class FSRadixTreeStorage(CrashStorageBase): """ This class implements basic radix tree storage. It stores crashes using the crash_id radix scheme under ``fs_root``. Files are stored in the following scheme:: root/yyyymmdd/name_branch_base/radix.../crash_id/<files> The date is determined using the date suffix of the crash_id, and the name_branch_base is given in the configuration options. The radix is computed from the crash_id by substringing the UUID in octets to the depth given in the crash_id, for instance: 0bba929f-8721-460c-dead-a43c20071025 is stored in:: root/20071025/name/0b/ba/92/9f/0bba929f-8721-460c-dead-a43c20071025 This storage does not implement ``new_crashes``, but is able to store processed crashes. Used alone, it is intended to store only processed crashes. """ required_config = Namespace() required_config.add_option( 'fs_root', doc='a path to a file system', default='./crashes', # We strip / from the right so we can consistently use os.sep.join # instead of os.path.join (which is faster). from_string_converter=lambda x: x.rstrip('/'), reference_value_from='resource.fs', ) required_config.add_option( 'umask', doc='umask to use for new files', default=0o022, reference_value_from='resource.fs', ) required_config.add_option( 'json_file_suffix', doc='the suffix used to identify a json file', default='.json', reference_value_from='resource.fs', ) required_config.add_option( 'jsonz_file_suffix', doc='the suffix used to identify a gzipped json file', default='.jsonz', reference_value_from='resource.fs', ) required_config.add_option( 'dump_file_suffix', doc='the suffix used to identify a dump file', default='.dump', reference_value_from='resource.fs', ) required_config.add_option( 'dump_field', doc='the default dump field', default='upload_file_minidump', reference_value_from='resource.fs', ) required_config.add_option( 'name_branch_base', doc='the directory base name to use for the named radix tree storage', default='name', reference_value_from='resource.fs', ) def __init__(self, *args, **kwargs): super(FSRadixTreeStorage, self).__init__(*args, **kwargs) try: with using_umask(self.config.umask): os.makedirs(self.config.fs_root) except OSError: self.logger.info("didn't make directory: %s " % self.config.fs_root) @staticmethod def _cleanup_empty_dirs(base, leaf): parts = leaf.split(os.sep) while parts: cur = os.sep.join([base] + parts) parts.pop() try: os.rmdir(cur) except OSError: # this directory isn't empty, so we can stop cleanup break def _get_dump_file_name(self, crash_id, dump_name): if dump_name == self.config.dump_field or not dump_name: return crash_id + self.config.dump_file_suffix else: return "%s.%s%s" % (crash_id, dump_name, self.config.dump_file_suffix) @staticmethod def _get_radix(crash_id): return [ crash_id[i * 2:(i + 1) * 2] for i in range(depthFromOoid(crash_id)) ] def _get_base(self, crash_id): date = dateFromOoid(crash_id) if not date: date = utc_now() date_formatted = "%4d%02d%02d" % (date.year, date.month, date.day) return [self.config.fs_root, date_formatted] def _get_radixed_parent_directory(self, crash_id): return os.sep.join( self._get_base(crash_id) + [self.config.name_branch_base] + self._get_radix(crash_id) + [crash_id]) def _dump_names_from_paths(self, pathnames): dump_names = [] for a_pathname in pathnames: base_name = os.path.basename(a_pathname) dump_name = base_name[37:-len(self.config.dump_file_suffix)] if not dump_name: dump_name = self.config.dump_field dump_names.append(dump_name) return dump_names def _save_files(self, crash_id, files): parent_dir = self._get_radixed_parent_directory(crash_id) with using_umask(self.config.umask): try: os.makedirs(parent_dir) except OSError: # probably already created, ignore pass #self.logger.debug("could not make directory: %s" % #self.config.fs_root) for fn, contents in files.iteritems(): with open(os.sep.join([parent_dir, fn]), 'wb') as f: f.write(contents) def save_processed(self, processed_crash): crash_id = processed_crash['uuid'] processed_crash = processed_crash.copy() f = StringIO() with closing(gzip.GzipFile(mode='wb', fileobj=f)) as fz: json.dump(processed_crash, fz, default=self.json_default) self._save_files( crash_id, {crash_id + self.config.jsonz_file_suffix: f.getvalue()}) def save_raw_crash(self, raw_crash, dumps, crash_id): files = { crash_id + self.config.json_file_suffix: json.dumps(raw_crash) } files.update( dict((self._get_dump_file_name(crash_id, fn), dump) for fn, dump in dumps.iteritems())) self._save_files(crash_id, files) def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """ bug 866973 - do not try to save dumps=None into the Filesystem We are doing this in lieu of a queuing solution that could allow us to operate an independent crashmover. When the queuing system is implemented, we could remove this, and have the raw crash saved by a crashmover that's consuming crash_ids the same way that the processor consumes them. Even though it is ok to resave the raw_crash in this case to the filesystem, the fs does not know what to do with a dumps=None when passed to save_raw, so we are going to avoid that. """ self.save_processed(processed_crash) def get_raw_crash(self, crash_id): parent_dir = self._get_radixed_parent_directory(crash_id) if not os.path.exists(parent_dir): raise CrashIDNotFound with open( os.sep.join( [parent_dir, crash_id + self.config.json_file_suffix]), 'r') as f: return json.load(f, object_hook=DotDict) def get_raw_dump(self, crash_id, name=None): parent_dir = self._get_radixed_parent_directory(crash_id) if not os.path.exists(parent_dir): raise CrashIDNotFound with open( os.sep.join( [parent_dir, self._get_dump_file_name(crash_id, name)]), 'rb') as f: return f.read() def get_raw_dumps_as_files(self, crash_id): parent_dir = self._get_radixed_parent_directory(crash_id) if not os.path.exists(parent_dir): raise CrashIDNotFound dump_paths = [ os.sep.join([parent_dir, dump_file_name]) for dump_file_name in os.listdir(parent_dir) if dump_file_name.startswith(crash_id) and dump_file_name.endswith(self.config.dump_file_suffix) ] return DotDict(zip(self._dump_names_from_paths(dump_paths), dump_paths)) def get_raw_dumps(self, crash_id): def read_with(fn): with open(fn) as f: return f.read() return DotDict( (k, read_with(v)) for k, v in self.get_raw_dumps_as_files(crash_id).iteritems()) def get_unredacted_processed(self, crash_id): """this method returns an unredacted processed crash""" parent_dir = self._get_radixed_parent_directory(crash_id) pathname = os.sep.join( [parent_dir, crash_id + self.config.jsonz_file_suffix]) if not os.path.exists(pathname): raise CrashIDNotFound with closing(gzip.GzipFile(pathname, 'rb')) as f: return json.load(f, object_hook=DotDict) def remove(self, crash_id): parent_dir = self._get_radixed_parent_directory(crash_id) if not os.path.exists(parent_dir): raise CrashIDNotFound shutil.rmtree(parent_dir) @staticmethod def json_default(obj): if isinstance(obj, datetime.datetime): return obj.strftime("%Y-%m-%d %H:%M:%S.%f") raise TypeError
class FSDatedRadixTreeStorage(FSRadixTreeStorage): """ This class implements dated radix tree storage -- it enables for traversing a radix tree using an hour/minute prefix. It allows searching for new crashes, but doesn't store processed crashes. It supplements the basic radix tree storage with indexing by date. It takes the current hour, minute and second and stores items in the following scheme:: root/yyyymmdd/date_branch_base/hour/minute_(minute_slice)/crash_id minute_slice is computed by taking the second of the current timestamp and floor dividing by minute_slice_interval, e.g. a minute slice of 4 provides slots from 0..14. This is a symlink to the items stored in the base radix tree storage. Additionally, a symlink is created in the base radix tree directory called ``date_root` which links to the ``minute_(minute_slice)`` folder. This storage class is suitable for use as raw crash storage, as it supports the ``new_crashes`` method. """ required_config = Namespace() required_config.add_option( 'date_branch_base', doc='the directory base name to use for the dated radix tree storage', default='date', reference_value_from='resource.fs', ) required_config.add_option( 'minute_slice_interval', doc='how finely to slice minutes into slots, e.g. 4 means every 4 ' 'seconds a new slot will be allocated', default=4, reference_value_from='resource.fs', ) # This is just a constant for len(self._current_slot()). SLOT_DEPTH = 2 DIR_DEPTH = 2 def _get_current_date(self): date = utc_now() return "%02d%02d%02d" % (date.year, date.month, date.day) def _get_date_root_name(self, crash_id): return 'date_root' def _get_dump_file_name(self, crash_id, dump_name): if dump_name == self.config.dump_field or dump_name is None: return crash_id + self.config.dump_file_suffix else: return "%s.%s%s" % (crash_id, dump_name, self.config.dump_file_suffix) def _get_dated_parent_directory(self, crash_id, slot): return os.sep.join( self._get_base(crash_id) + [self.config.date_branch_base] + slot) def _current_slot(self): now = utc_now() return [ "%02d" % now.hour, "%02d_%02d" % (now.minute, now.second // self.config.minute_slice_interval) ] def _create_name_to_date_symlink(self, crash_id, slot): """we traverse the path back up from date/slot... to make a link: src: "name"/radix.../crash_id (or "name"/radix... for legacy mode) dest: "date"/slot.../crash_id""" self._get_radixed_parent_directory(crash_id) root = os.sep.join([os.path.pardir] * (self.SLOT_DEPTH + 1)) os.symlink( os.sep.join([root, self.config.name_branch_base] + self._get_radix(crash_id) + [crash_id]), os.sep.join( [self._get_dated_parent_directory(crash_id, slot), crash_id])) def _create_date_to_name_symlink(self, crash_id, slot): """the path is something like name/radix.../crash_id, so what we do is add 2 to the directories to go up _dir_depth + len(radix). we make a link: src: "date"/slot... dest: "name"/radix.../crash_id/date_root_name""" radixed_parent_dir = self._get_radixed_parent_directory(crash_id) root = os.sep.join([os.path.pardir] * (len(self._get_radix(crash_id)) + self.DIR_DEPTH)) os.symlink( os.sep.join([root, self.config.date_branch_base] + slot), os.sep.join( [radixed_parent_dir, self._get_date_root_name(crash_id)])) def save_raw_crash(self, raw_crash, dumps, crash_id): super(FSDatedRadixTreeStorage, self).save_raw_crash(raw_crash, dumps, crash_id) slot = self._current_slot() parent_dir = self._get_dated_parent_directory(crash_id, slot) try: os.makedirs(parent_dir) except OSError: # probably already created, ignore pass #self.logger.debug("could not make directory: %s" % #parent_dir) with using_umask(self.config.umask): # Bug 971496 reversed the order of these calls so that the one that # can fail will fail first and not leave an orphan symlink behind. self._create_date_to_name_symlink(crash_id, slot) self._create_name_to_date_symlink(crash_id, slot) def remove(self, crash_id): dated_path = os.path.realpath( os.sep.join([ self._get_radixed_parent_directory(crash_id), self._get_date_root_name(crash_id) ])) try: # We can just unlink the symlink and later new_crashes will clean # up for us. os.unlink(os.sep.join([dated_path, crash_id])) except OSError: pass # we might be trying to remove a visited crash and that's # okay # Now we actually remove the crash. super(FSDatedRadixTreeStorage, self).remove(crash_id) def _visit_minute_slot(self, minute_slot_base): for crash_id in os.listdir(minute_slot_base): namedir = os.sep.join([minute_slot_base, crash_id]) st_result = os.lstat(namedir) if stat.S_ISLNK(st_result.st_mode): # This is a link, so we can dereference it to find # crashes. if os.path.isfile( os.sep.join( [namedir, crash_id + self.config.json_file_suffix])): date_root_path = os.sep.join( [namedir, self._get_date_root_name(crash_id)]) yield crash_id try: os.unlink(date_root_path) except OSError: self.logger.error( "could not find a date root in " "%s; is crash corrupt?", namedir, exc_info=True) os.unlink(namedir) def new_crashes(self): """ The ``new_crashes`` method returns a generator that visits all new crashes like so: * Traverse the date root to find all crashes. * If we find a symlink in a slot, then we dereference the link and check if the directory has crash data. * if the directory does, then we remove the symlink in the slot, clean up the parent directories if they're empty and then yield the crash_id. """ current_slot = self._current_slot() current_date = self._get_current_date() dates = os.listdir(self.config.fs_root) for date in dates: dated_base = os.sep.join( [self.config.fs_root, date, self.config.date_branch_base]) try: hour_slots = os.listdir(dated_base) except OSError: # it is okay that the date root doesn't exist - skip on to # the next date #self.logger.info("date root for %s doesn't exist" % date) continue for hour_slot in hour_slots: skip_dir = False hour_slot_base = os.sep.join([dated_base, hour_slot]) for minute_slot in os.listdir(hour_slot_base): minute_slot_base = os.sep.join( [hour_slot_base, minute_slot]) slot = [hour_slot, minute_slot] if slot >= current_slot and date >= current_date: # the slot is currently being used, we want to skip it # for now self.logger.info("not processing slot: %s/%s" % tuple(slot)) skip_dir = True continue for x in self._visit_minute_slot(minute_slot_base): yield x try: # We've finished processing the slot, so we can remove # it. os.rmdir(minute_slot_base) except OSError: self.logger.error( "could not fully remove directory: " "%s; are there more crashes in it?", minute_slot_base, exc_info=True) if not skip_dir and hour_slot < current_slot[0]: try: # If the current slot is greater than the hour slot # we're processing, then we can conclude the directory # is safe to remove. os.rmdir(hour_slot_base) except OSError: self.logger.error( "could not fully remove directory: " "%s; are there more crashes in it?", hour_slot_base, exc_info=True)
def test_write_with_imported_module_with_internal_mappings(self): import os from configman.tests.values_for_module_tests_1 import Alpha, foo d = { 'a': 18, 'b': 'hello', 'c': [1, 2, 3], 'd': { 'host': 'localhost', 'port': 5432, } } definitions = { 'os_module': os, 'a': 17, 'imported_class': Alpha, 'imported_function': foo, 'xxx': { 'yyy': Option('yyy', default=d) }, 'e': None, } required_config = Namespace() required_config.add_option( 'minimal_version_for_understanding_refusal', doc='ignore the Thottleable protocol', default={'Firefox': '3.5.4'}, ) cm = ConfigurationManager( [definitions, required_config], values_source_list=[], ) cm.get_config() s = StringIO() @contextlib.contextmanager def s_opener(): yield s cm.write_conf('py', s_opener) generated_python_module_text = s.getvalue() expected = """# generated Python configman file from configman.dotdict import DotDict from configman.tests.values_for_module_tests_1 import ( Alpha, foo, ) import os # the following symbols will be ignored by configman when # this module is used as a value source. This will # suppress the mismatch warning since these symbols are # values for options, not option names themselves. ignore_symbol_list = [ "Alpha", "DotDict", "foo", "os", ] # a a = 17 # e e = None # imported_class imported_class = Alpha # imported_function imported_function = foo # ignore the Thottleable protocol minimal_version_for_understanding_refusal = { "Firefox": "3.5.4" } # os_module os_module = os # Namespace: xxx xxx = DotDict() xxx.yyy = { "a": 18, "b": "hello", "c": [ 1, 2, 3 ], "d": { "host": "localhost", "port": 5432 } } """ self.assertEqual(generated_python_module_text, expected)
class B(A): foo = 'b' required_config = Namespace() required_config.add_option('z', default=2)
class Postgres(RequiredConfig): """a configman compliant class for setup of Postgres transactions""" #-------------------------------------------------------------------------- # configman parameter definition section # here we're setting up the minimal parameters required for connecting # to a database. required_config = Namespace() required_config.add_option( name='database_host', default='localhost', doc='the hostname of the database', ) required_config.add_option( name='database_name', default='breakpad', doc='the name of the database', ) required_config.add_option( name='database_port', default=5432, doc='the port for the database', ) required_config.add_option( name='database_user', default='breakpad_rw', doc='the name of the user within the database', ) required_config.add_option( name='database_password', default='secrets', doc="the user's database password", ) #-------------------------------------------------------------------------- def __init__(self, config, local_config): """Initialize the parts needed to start making database connections parameters: config - the complete config for the app. If a real app, this would be where a logger or other resources could be found. local_config - this is the namespace within the complete config where the actual database parameters are found""" super(Postgres, self).__init__() self.dsn = ("host=%(database_host)s " "dbname=%(database_name)s " "port=%(database_port)s " "user=%(database_user)s " "password=%(database_password)s") % local_config self.operational_exceptions = (FakeDBOperationalError, socket.timeout) #-------------------------------------------------------------------------- def connection(self, name_unused=None): """return a new database connection parameters: name_unused - optional named connections. Used by the derived class """ return FakeDatabaseConnection(self.dsn) #-------------------------------------------------------------------------- @contextlib.contextmanager def __call__(self, name=None): """returns a database connection wrapped in a contextmanager. This function allows database connections to be used in a with statement. Connection/transaction objects will automatically be rolled back if they weren't explicitly committed within the context of the 'with' statement. Additionally, it is equipped with the ability to automatically close the connection when leaving the 'with' block. parameters: name - an optional name for the database connection""" exception_raised = False conn = self.connection(name) try: yield conn except self.operational_exceptions: # we need to close the connection print "Postgres - operational exception caught" exception_raised = True except Exception: print "Postgres - non operational exception caught" exception_raised = True finally: if not exception_raised: try: if conn.in_transaction: conn.rollback() self.close_connection(conn) except self.operational_exceptions: exception_raised = True if exception_raised: try: self.close_connection(conn, force=True) except self.operational_exceptions: pass raise #-------------------------------------------------------------------------- def close_connection(self, connection, force=False): """close the connection passed in. This function exists to allow derived classes to override the closing behavior. parameters: connection - the database connection object force - unused boolean to force closure; used in derived classes """ print "Postgres - requestng connection to close" connection.close() #-------------------------------------------------------------------------- def close(self): """close any pooled or cached connections. Since this base class object does no caching, there is no implementation required. Derived classes may implement it.""" pass
def test_migration_crash_storage(self): n = Namespace() n.add_option( 'storage', default=MigrationCrashStorage, ) n.add_option( 'logger', default=mock.Mock(), ) value = { 'primary.storage_class': ( 'socorro.unittest.external.test_crashstorage_base.A' ), 'fallback.storage_class': ( 'socorro.unittest.external.test_crashstorage_base.B' ), 'date_threshold': '150315' } cm = ConfigurationManager( n, values_source_list=[value], argv_source=[] ) with cm.context() as config: raw_crash = {'ooid': ''} before_crash_id = '1498dee9-9a45-45cc-8ec8-71bb62150314' after_crash_id = '1498dee9-9a45-45cc-8ec8-71bb62150315' dump = '12345' processed_crash = {'ooid': '', 'product': 17} migration_store = config.storage(config) # save_raw tests # save to primary migration_store.primary_store.save_raw_crash = Mock() migration_store.fallback_store.save_raw_crash = Mock() migration_store.save_raw_crash(raw_crash, dump, after_crash_id) migration_store.primary_store.save_raw_crash.assert_called_with( raw_crash, dump, after_crash_id ) eq_(migration_store.fallback_store.save_raw_crash.call_count, 0) # save to fallback migration_store.primary_store.save_raw_crash = Mock() migration_store.fallback_store.save_raw_crash = Mock() migration_store.save_raw_crash(raw_crash, dump, before_crash_id) eq_(migration_store.primary_store.save_raw_crash.call_count, 0) migration_store.fallback_store.save_raw_crash.assert_called_with( raw_crash, dump, before_crash_id ) # save_processed tests # save to primary processed_crash['crash_id'] = after_crash_id migration_store.primary_store.save_processed = Mock() migration_store.fallback_store.save_processed = Mock() migration_store.save_processed(processed_crash) migration_store.primary_store.save_processed.assert_called_with( processed_crash ) eq_(migration_store.fallback_store.save_processed.call_count, 0) # save to fallback processed_crash['crash_id'] = before_crash_id migration_store.primary_store.save_processed = Mock() migration_store.fallback_store.save_processed = Mock() migration_store.save_processed(processed_crash) eq_(migration_store.primary_store.save_processed.call_count, 0) migration_store.fallback_store.save_processed.assert_called_with( processed_crash ) # close tests migration_store.primary_store.close = Mock() migration_store.fallback_store.close = Mock() migration_store.close() migration_store.primary_store.close.assert_called_with() migration_store.fallback_store.close.assert_called_with() migration_store.primary_store.close = Mock() migration_store.fallback_store.close = Mock() migration_store.fallback_store.close.side_effect = ( NotImplementedError() ) migration_store.close() migration_store.primary_store.close.assert_called_with() migration_store.fallback_store.close.assert_called_with() migration_store.primary_store.close = Mock() migration_store.primary_store.close.side_effect = Exception('!') migration_store.close() migration_store.primary_store.close.assert_called_with() migration_store.fallback_store.close.assert_called_with() migration_store.fallback_store.close = Mock() migration_store.fallback_store.close.side_effect = Exception('!') assert_raises(PolyStorageError, migration_store.close) migration_store.primary_store.close.assert_called_with() migration_store.fallback_store.close.assert_called_with()
class ConnectionContextBase(RequiredConfig): required_config = Namespace() required_config.add_option( 'access_key', doc="access key", default="", reference_value_from='resource.boto', ) required_config.add_option( 'secret_access_key', doc="secret access key", default="", secret=True, reference_value_from='secrets.boto', likely_to_be_changed=True, ) required_config.add_option( 'bucket_name', doc="The name of the bucket.", default='crashstats', reference_value_from='resource.boto', likely_to_be_changed=True, ) required_config.add_option( 'prefix', doc="a prefix to use inside the bucket", default='', reference_value_from='resource.boto', likely_to_be_changed=True, ) required_config.add_option( 'keybuilder_class', default='collector.external.boto.connection_context.KeyBuilderBase', doc=('fully qualified dotted Python classname to handle building s3 ' 'pseudo-filenames'), from_string_converter=class_converter, reference_value_from='resource.boto', likely_to_be_changed=True, ) operational_exceptions = ( socket.timeout, # wild guesses at retriable exceptions boto.exception.PleaseRetryException, boto.exception.ResumableTransferDisposition, boto.exception.ResumableUploadException, ) conditional_exceptions = (boto.exception.StorageResponseError) #-------------------------------------------------------------------------- def is_operational_exception(self, x): if "not found, no value returned" in str(x): # the not found error needs to be re-tryable to compensate for # eventual consistency. However, a method capable of raising this # exception should never be used with a transaction executor that # has infinite back off. return True return False #-------------------------------------------------------------------------- def __init__(self, config, quit_check_callback=None): self.config = config self._CreateError = boto.exception.StorageCreateError self.ResponseError = (boto.exception.StorageResponseError, KeyNotFound) self.keybuilder = config.keybuilder_class() self._bucket_cache = {} #-------------------------------------------------------------------------- def _connect(self): try: if self.connection: return self.connection except AttributeError: pass self.connection = self._connect_to_endpoint(**self._get_credentials()) return self.connection #-------------------------------------------------------------------------- def _get_credentials(self): """each subclass must implement this method to provide the type of credentials required for the type of connection""" raise NotImplementedError #-------------------------------------------------------------------------- def build_keys(self, prefix, name_of_thing, id): """Builds an s3 pseudo-filename using the specified keybuilder class. """ return self.keybuilder.build_keys(prefix, name_of_thing, id) #-------------------------------------------------------------------------- def _get_bucket(self, conn, bucket_name): try: return self._bucket_cache[bucket_name] except KeyError: self._bucket_cache[bucket_name] = conn.get_bucket(bucket_name) return self._bucket_cache[bucket_name] #-------------------------------------------------------------------------- def _get_or_create_bucket(self, conn, bucket_name): try: return self._get_bucket(conn, bucket_name) except self.ResponseError: self._bucket_cache[bucket_name] = conn.create_bucket(bucket_name) return self._bucket_cache[bucket_name] #-------------------------------------------------------------------------- def submit(self, id, name_of_thing, thing): """submit something to boto. """ # can only submit strings to boto assert isinstance(thing, basestring), type(thing) conn = self._connect() bucket = self._get_or_create_bucket(conn, self.config.bucket_name) all_keys = self.build_keys(self.config.prefix, name_of_thing, id) # Always submit using the first key key = all_keys[0] key_object = bucket.new_key(key) key_object.set_contents_from_string(thing) #-------------------------------------------------------------------------- def fetch(self, id, name_of_thing): """retrieve something from boto. """ conn = self._connect() bucket = self._get_bucket(conn, self.config.bucket_name) all_keys = self.build_keys(self.config.prefix, name_of_thing, id) for key in all_keys: key_object = bucket.get_key(key) if key_object is not None: return key_object.get_contents_as_string() # None of the keys worked, so raise an error raise KeyNotFound( '%s (bucket=%r keys=%r) not found, no value returned' % ( id, self.config.bucket_name, all_keys, )) #-------------------------------------------------------------------------- def _convert_mapping_to_string(self, a_mapping): return json.dumps(a_mapping, cls=JSONISOEncoder) #-------------------------------------------------------------------------- def _convert_list_to_string(self, a_list): return json.dumps(a_list) #-------------------------------------------------------------------------- def _convert_string_to_list(self, a_string): return json.loads(a_string) #-------------------------------------------------------------------------- def commit(self): """boto doesn't support transactions so this silently does nothing""" #-------------------------------------------------------------------------- def rollback(self): """boto doesn't support transactions so this silently does nothing""" #-------------------------------------------------------------------------- @contextlib.contextmanager def __call__(self): yield self #-------------------------------------------------------------------------- def in_transaction(self, dummy): """boto doesn't support transactions, so it is never in a transaction.""" return False #-------------------------------------------------------------------------- def force_reconnect(self): try: del self.connection except AttributeError: # already deleted, ignorable pass
def test_processed_crash_storage(self): n = Namespace() n.add_option( 'storage', default=PrimaryDeferredProcessedStorage, ) n.add_option( 'logger', default=mock.Mock(), ) value = { 'primary.storage_class': ( 'socorro.unittest.external.test_crashstorage_base.A' ), 'deferred.storage_class': ( 'socorro.unittest.external.test_crashstorage_base.B' ), 'processed.storage_class': ( 'socorro.unittest.external.test_crashstorage_base.B' ), 'deferral_criteria': lambda x: x.get('foo') == 'foo' } cm = ConfigurationManager( n, values_source_list=[value], argv_source=[] ) with cm.context() as config: eq_(config.primary.storage_class.foo, 'a') eq_(config.deferred.storage_class.foo, 'b') eq_(config.processed.storage_class.foo, 'b') raw_crash = {'ooid': ''} crash_id = '1498dee9-9a45-45cc-8ec8-71bb62121203' dump = '12345' deferred_crash = {'ooid': '', 'foo': 'foo'} processed_crash = {'ooid': '', 'product': 17} pd_store = config.storage(config) # save_raw tests pd_store.primary_store.save_raw_crash = Mock() pd_store.deferred_store.save_raw_crash = Mock() pd_store.processed_store.save_raw_crash = Mock() pd_store.save_raw_crash(raw_crash, dump, crash_id) pd_store.primary_store.save_raw_crash.assert_called_with( raw_crash, dump, crash_id ) eq_(pd_store.deferred_store.save_raw_crash.call_count, 0) pd_store.save_raw_crash(deferred_crash, dump, crash_id) pd_store.deferred_store.save_raw_crash.assert_called_with( deferred_crash, dump, crash_id ) # save_processed tests pd_store.primary_store.save_processed = Mock() pd_store.deferred_store.save_processed = Mock() pd_store.processed_store.save_processed = Mock() pd_store.save_processed(processed_crash) pd_store.processed_store.save_processed.assert_called_with( processed_crash ) eq_(pd_store.primary_store.save_processed.call_count, 0) pd_store.save_processed(deferred_crash) pd_store.processed_store.save_processed.assert_called_with( deferred_crash ) # close tests pd_store.primary_store.close = Mock() pd_store.deferred_store.close = Mock() pd_store.close() pd_store.primary_store.close.assert_called_with() pd_store.deferred_store.close.assert_called_with() pd_store.primary_store.close = Mock() pd_store.deferred_store.close = Mock() pd_store.deferred_store.close.side_effect = NotImplementedError() pd_store.close() pd_store.primary_store.close.assert_called_with() pd_store.deferred_store.close.assert_called_with() pd_store.primary_store.close = Mock() pd_store.primary_store.close.side_effect = Exception('!') pd_store.close() pd_store.primary_store.close.assert_called_with() pd_store.deferred_store.close.assert_called_with() pd_store.deferred_store.close = Mock() pd_store.deferred_store.close.side_effect = Exception('!') assert_raises(PolyStorageError, pd_store.close) pd_store.primary_store.close.assert_called_with() pd_store.deferred_store.close.assert_called_with()
class IndexCleaner(RequiredConfig): """Delete elasticsearch indices from our databases.""" required_config = Namespace() required_config.add_option( 'retention_policy', default=26, doc='Number of weeks to keep an index alive. ', ) required_config.namespace('elasticsearch') required_config.elasticsearch.add_option( 'elasticsearch_class', default='socorro.external.es.connection_context.ConnectionContext', from_string_converter=class_converter, reference_value_from='resource.elasticsearch', ) required_config.elasticsearch.add_option( 'elasticsearch_index_regex', default='^socorro[0-9]{6}$', reference_value_from='resource.elasticsearch', ) def __init__(self, config): super(IndexCleaner, self).__init__() self.config = config def delete_indices(self, predicate=None): """Delete crash indices that match the given predicate. :arg callable predicate: A callable of the form ``predicate(index)``, where ``index`` is a string containing the name of the index. If the callable returns true, the index will be deleted. The default is None, which deletes all crash indices. :returns: List of indexes that were deleted """ es_class = self.config.elasticsearch.elasticsearch_class( self.config.elasticsearch) index_client = es_class.indices_client() status = index_client.status() indices = status['indices'].keys() aliases = index_client.get_aliases() deleted_indices = [] for index in indices: # Some indices look like 'socorro%Y%W_%Y%M%d', but they are # aliased to the expected format of 'socorro%Y%W'. In such cases, # replace the index with the alias. if index in aliases and 'aliases' in aliases[index]: index_aliases = aliases[index]['aliases'].keys() if index_aliases: index = index_aliases[0] if not re.match( self.config.elasticsearch.elasticsearch_index_regex, index): # This index doesn't look like a crash index, let's skip it. continue if predicate is None or predicate(index): index_client.delete(index) deleted_indices.append(index) return deleted_indices def delete_old_indices(self): self.delete_indices(self.is_index_old) def is_index_old(self, index): now = utc_now() policy_delay = datetime.timedelta(weeks=self.config.retention_policy) time_limit = (now - policy_delay).replace(tzinfo=None) # strptime ignores week numbers if a day isn't specified, so we append # '-1' and '-%w' to specify Monday as the day. index_date = datetime.datetime.strptime( index + '-1', self.config.elasticsearch.elasticsearch_index + '-%w') return index_date < time_limit
class MiddlewareApp(App): app_name = 'middleware' app_version = '3.1' app_description = __doc__ services_list = [] #-------------------------------------------------------------------------- # in this section, define any configuration requirements required_config = Namespace() #-------------------------------------------------------------------------- # implementations namespace # the namespace is for external implementations of the services #------------------------------------------------------------------------- required_config.namespace('implementations') required_config.implementations.add_option( 'implementation_list', doc='list of packages for service implementations', default='psql:socorro.external.postgresql, ' 'hbase:socorro.external.hbase, ' 'es:socorro.external.elasticsearch, ' 'fs:socorro.external.filesystem, ' 'http:socorro.external.http', from_string_converter=items_list_converter) required_config.implementations.add_option( 'service_overrides', doc='comma separated list of class overrides, e.g `Crashes: hbase`', default='CrashData: fs, ' 'Correlations: http, ' 'CorrelationsSignatures: http, ' 'SuperSearch: es', from_string_converter=items_list_converter) #-------------------------------------------------------------------------- # database namespace # the namespace is for external implementations of the services #------------------------------------------------------------------------- required_config.namespace('database') required_config.database.add_option('database_class', default=ConnectionContext, from_string_converter=class_converter) #-------------------------------------------------------------------------- # hbase namespace # the namespace is for external implementations of the services #------------------------------------------------------------------------- required_config.namespace('hbase') required_config.hbase.add_option('hbase_class', default=HBaseCrashStorage, from_string_converter=class_converter) #-------------------------------------------------------------------------- # filesystem namespace # the namespace is for external implementations of the services #------------------------------------------------------------------------- required_config.namespace('filesystem') required_config.filesystem.add_option( 'filesystem_class', default=FileSystemCrashStorage, from_string_converter=class_converter) #-------------------------------------------------------------------------- # webapi namespace # this is all config options that used to belong to webapiconfig.py #------------------------------------------------------------------------- required_config.namespace('webapi') required_config.webapi.add_option( 'elasticSearchHostname', default='localhost', doc='String containing the URI of the Elastic Search instance.') required_config.webapi.add_option( 'elasticSearchPort', default='9200', doc='String containing the port on which calling the Elastic ' 'Search instance.') required_config.webapi.add_option( 'elasticsearch_urls', default=['http://localhost:9200'], doc='the urls to the elasticsearch instances', from_string_converter=string_to_list) required_config.webapi.add_option( 'elasticsearch_index', default='socorro%Y%W', doc='an index format to pull crashes from elasticsearch ' "(use datetime's strftime format to have " 'daily, weekly or monthly indexes)') required_config.webapi.add_option( 'elasticsearch_doctype', default='crash_reports', doc='the default doctype to use in elasticsearch') required_config.webapi.add_option( 'elasticsearch_timeout', default=30, doc='the time in seconds before a query to elasticsearch fails') required_config.webapi.add_option( 'facets_max_number', default=50, doc='the maximum number of results a facet will return in search') required_config.webapi.add_option( 'searchMaxNumberOfDistinctSignatures', default=1000, doc='Integer containing the maximum allowed number of distinct ' 'signatures the system should retrieve. Used mainly for ' 'performances in ElasticSearch') required_config.webapi.add_option( 'search_default_date_range', default=7, # in days doc='the default date range for searches, in days') required_config.webapi.add_option( 'platforms', default=[ { "id": "windows", "name": "Windows NT" }, { "id": "mac", "name": "Mac OS X" }, { "id": "linux", "name": "Linux" }, ], doc='Array associating OS ids to full names.', from_string_converter=lambda x: json.loads(x)) required_config.webapi.add_option( 'non_release_channels', default=['beta', 'aurora', 'nightly'], doc='List of channels, excluding the `release` one.', from_string_converter=string_to_list) required_config.webapi.add_option( 'restricted_channels', default=['beta'], doc='List of channels to restrict based on build ids.', from_string_converter=string_to_list) #-------------------------------------------------------------------------- # web_server namespace # the namespace is for config parameters the web server #-------------------------------------------------------------------------- required_config.namespace('web_server') required_config.web_server.add_option( 'wsgi_server_class', doc='a class implementing a wsgi web server', default='socorro.webapi.servers.CherryPy', from_string_converter=class_converter) #-------------------------------------------------------------------------- # http namespace # the namespace is for config parameters the http modules #-------------------------------------------------------------------------- required_config.namespace('http') required_config.http.namespace('correlations') required_config.http.correlations.add_option( 'base_url', doc='Base URL where correlations text files are', default='https://crash-analysis.mozilla.com/crash_analysis/', ) required_config.http.correlations.add_option( 'save_download', doc='Whether files downloaded for correlations should be ' 'temporary stored on disk', default=True, ) required_config.http.correlations.add_option( 'save_seconds', doc='Number of seconds that the downloaded .txt file is stored ' 'in a temporary place', default=60 * 10, ) required_config.http.correlations.add_option( 'save_root', doc='Directory where the temporary downloads are stored ' '(if left empty will become the systems tmp directory)', default='', ) #-------------------------------------------------------------------------- # sentry namespace # the namespace is for Sentry error capturing with Raven #-------------------------------------------------------------------------- required_config.namespace('sentry') required_config.sentry.add_option('dsn', doc='DSN for Sentry via raven', default='') # because the socorro.webapi.servers classes bring up their own default # configurations like port number, the only way to override the default # is like this: from socorro.webapi.servers import StandAloneServer StandAloneServer.required_config.port.set_default(8883, force=True) #-------------------------------------------------------------------------- def main(self): # Apache modwsgi requireds a module level name 'application' global application ## 1 turn these names of classes into real references to classes def lookup(file_and_class): file_name, class_name = file_and_class.rsplit('.', 1) overrides = dict(self.config.implementations.service_overrides) _list = self.config.implementations.implementation_list for prefix, base_module_path in _list: if class_name in overrides: if prefix != overrides[class_name]: continue try: module = __import__( '%s.%s' % (base_module_path, file_name), globals(), locals(), [class_name]) except ImportError: raise ImportError( "Unable to import %s.%s.%s" % (base_module_path, file_name, class_name)) return getattr(module, class_name) raise ImplementationConfigurationError(file_and_class) ## 2 wrap each class with the ImplementationWrapper class def wrap(cls, file_and_class): return type(cls.__name__, (ImplementationWrapper, ), { 'cls': cls, 'file_and_class': file_and_class, }) services_list = [] for url, impl_class in SERVICES_LIST: impl_instance = lookup(impl_class) wrapped_impl = wrap(impl_instance, impl_class) services_list.append((url, wrapped_impl)) self.web_server = self.config.web_server.wsgi_server_class( self.config, # needs the whole config not the local namespace services_list) # for modwsgi the 'run' method returns the wsgi function that Apache # will use. For other webservers, the 'run' method actually starts # the standalone web server. application = self.web_server.run()
class HBaseConnectionContext(RequiredConfig): """This class implements a connection to HBase for every transaction to be executed. """ required_config = Namespace() required_config.add_option( 'hbase_host', doc='Host to HBase server', default='localhost', reference_value_from='resource.hb', ) required_config.add_option( 'hbase_port', doc='Port to HBase server', default=9090, reference_value_from='resource.hb', ) required_config.add_option( 'hbase_timeout', doc='timeout in milliseconds for an HBase connection', default=5000, reference_value_from='resource.hb', ) required_config.add_option( 'temporary_file_system_storage_path', doc='a local filesystem path where dumps temporarily ' 'during processing', default='/tmp', reference_value_from='resource.hb', ) required_config.add_option( 'dump_file_suffix', doc='the suffix used to identify a dump file (for use in temp files)', default='.dump', reference_value_from='resource.hb', ) operational_exceptions = ( hbase.ttypes.IOError, Thrift.TException, socket.timeout, socket.error, ) conditional_exceptions = () #-------------------------------------------------------------------------- def __init__(self, config): super(HBaseConnectionContext, self).__init__() self.config = config #-------------------------------------------------------------------------- def connection(self, name=None): return HBaseConnection(self.config) #-------------------------------------------------------------------------- @contextlib.contextmanager def __call__(self, name=None): conn = self.connection(name) try: yield conn finally: self.close_connection(conn) #-------------------------------------------------------------------------- def force_reconnect(self): pass #-------------------------------------------------------------------------- def close(self): pass #-------------------------------------------------------------------------- def close_connection(self, connection, force=False): connection.close() #-------------------------------------------------------------------------- def is_operational_exception(self, msg): return False
class BotoS3CrashStorage(BotoCrashStorage): required_config = Namespace() required_config.resource_class = change_default( BotoCrashStorage, 'resource_class', 'socorro.external.boto.connection_context.RegionalS3ConnectionContext')
class FTPScraperCronApp(BaseCronApp, ScrapersMixin): app_name = 'ftpscraper' app_description = 'FTP Scraper' app_version = '0.1' required_config = Namespace() required_config.add_option( 'products', default='firefox,mobile,thunderbird,seamonkey,b2g', from_string_converter=lambda line: tuple( [x.strip() for x in line.split(',') if x.strip()]), doc='a comma-delimited list of URIs for each product') required_config.add_option('base_url', default='https://archive.mozilla.org/pub/', doc='The base url to use for fetching builds') required_config.add_option('dry_run', default=False, doc='Print instead of storing builds') def run(self, date): # record_associations for product_name in self.config.products: self.config.logger.debug('scraping %s releases for date %s', product_name, date) if product_name == 'b2g': self.database_transaction_executor(self.scrape_b2g, product_name, date) elif product_name == 'firefox': self.database_transaction_executor( self._scrape_json_releases_and_nightlies, product_name, date) else: self.database_transaction_executor( self._scrape_releases_and_nightlies, product_name, date) def _scrape_releases_and_nightlies(self, connection, product_name, date): self.scrape_releases(connection, product_name) self.scrape_nightlies(connection, product_name, date) def _scrape_json_releases_and_nightlies(self, connection, product_name, date): self.scrape_json_releases(connection, product_name) self.scrape_json_nightlies(connection, product_name, date) def _insert_build(self, cursor, *args, **kwargs): if self.config.dry_run: print "INSERT BUILD" print args print kwargs else: buildutil.insert_build(cursor, *args, **kwargs) def _is_final_beta(self, version): # If this is a XX.0 version in the release channel, # return True otherwise, False # Make a special exception for the out-of-cycle 38.0.5 return version.endswith('.0') or version == '38.0.5' def scrape_json_releases(self, connection, product_name): prod_url = urlparse.urljoin(self.config.base_url, product_name + '/') logger = self.config.logger cursor = connection.cursor() for directory in ('nightly', 'candidates'): try: url, = self.get_links(prod_url, starts_with=directory) except IndexError: logger.debug('Dir %s not found for %s', directory, product_name) continue releases = self.get_links(url, ends_with='-candidates/') for release in releases: dirname = release.replace(url, '') if dirname.endswith('/'): dirname = dirname[:-1] for info in self.get_json_release(release, dirname): platform, version, kvpairs = info build_type = 'release' beta_number = None repository = kvpairs['repository'] if 'b' in version: build_type = 'beta' version, beta_number = version.split('b') if kvpairs.get('buildID'): build_id = kvpairs['buildID'] version_build = kvpairs['version_build'] self._insert_build(cursor, product_name, version, platform, build_id, build_type, beta_number, repository, version_build, ignore_duplicates=True) if (self._is_final_beta(version) and build_type == 'release' and version > '26.0' and kvpairs.get('buildID')): logger.debug('is final beta version %s', version) repository = 'mozilla-beta' build_id = kvpairs['buildID'] build_type = 'beta' version_build = kvpairs['version_build'] # just force this to 99 until # we deal with version_build properly beta_number = 99 self._insert_build(cursor, product_name, version, platform, build_id, build_type, beta_number, repository, version_build, ignore_duplicates=True) def scrape_json_nightlies(self, connection, product_name, date): directories = ( product_name, 'nightly', date.strftime('%Y'), date.strftime('%m'), ) nightly_url = self.config.base_url for part in directories: nightly_url = urlparse.urljoin(nightly_url, part + '/') cursor = connection.cursor() dir_prefix = date.strftime('%Y-%m-%d') nightlies = self.get_links(nightly_url, starts_with=dir_prefix) for nightly in nightlies: dirname = nightly.replace(nightly_url, '') if dirname.endswith('/'): dirname = dirname[:-1] for info in self.get_json_nightly(nightly, dirname): platform, repository, version, kvpairs = info build_type = 'nightly' if version.endswith('a2'): build_type = 'aurora' if kvpairs.get('buildID'): build_id = kvpairs['buildID'] self._insert_build(cursor, product_name, version, platform, build_id, build_type, kvpairs.get('beta_number', None), repository, ignore_duplicates=True) def scrape_releases(self, connection, product_name): prod_url = urlparse.urljoin(self.config.base_url, product_name + '/') # releases are sometimes in nightly, sometimes in candidates dir. # look in both. logger = self.config.logger cursor = connection.cursor() for directory in ('nightly', 'candidates'): # expect only one directory link for each try: url, = self.get_links(prod_url, starts_with=directory) except IndexError: logger.debug('Dir %s not found for %s', directory, product_name) continue releases = self.get_links(url, ends_with='-candidates/') if not releases: self.config.logger.debug('No releases for %s', url) for release in releases: for info in self.get_release(release): platform, version, kvpairs, bad_lines = info if kvpairs.get('buildID') is None: self.config.logger.warning( "BuildID not found for %s on %s", release, url) continue build_type = 'Release' beta_number = None repository = 'mozilla-release' if 'b' in version: build_type = 'Beta' version, beta_number = version.split('b') repository = 'mozilla-beta' for bad_line in bad_lines: self.config.logger.warning( "Bad line for %s on %s (%r)", release, url, bad_line) # Put a build into the database build_id = kvpairs['buildID'] self._insert_build(cursor, product_name, version, platform, build_id, build_type, beta_number, repository, ignore_duplicates=True) # If we've got a final beta, add a second record if self._is_final_beta(version): repository = 'mozilla-beta' self._insert_build(cursor, product_name, version, platform, build_id, build_type, beta_number, repository, ignore_duplicates=True) def scrape_nightlies(self, connection, product_name, date): directories = ( product_name, 'nightly', date.strftime('%Y'), date.strftime('%m'), ) nightly_url = self.config.base_url for part in directories: nightly_url = urlparse.urljoin(nightly_url, part + '/') cursor = connection.cursor() dir_prefix = date.strftime('%Y-%m-%d') nightlies = self.get_links(nightly_url, starts_with=dir_prefix) for nightly in nightlies: dirname = nightly.replace(nightly_url, '') if dirname.endswith('/'): dirname = dirname[:-1] for info in self.get_nightly(nightly, dirname): platform, repository, version, kvpairs, bad_lines = info for bad_line in bad_lines: self.config.logger.warning("Bad line for %s (%r)", nightly, bad_line) build_type = 'Nightly' if version.endswith('a2'): build_type = 'Aurora' if kvpairs.get('buildID'): build_id = kvpairs['buildID'] self._insert_build(cursor, product_name, version, platform, build_id, build_type, kvpairs.get('beta_number', None), repository, ignore_duplicates=True) def scrape_b2g(self, connection, product_name, date): if product_name != 'b2g': return directories = ( product_name, 'manifests', 'nightly', ) b2g_manifests = self.config.base_url for part in directories: b2g_manifests = urlparse.urljoin(b2g_manifests, part + '/') dir_prefix = date.strftime('%Y-%m-%d') cursor = connection.cursor() version_dirs = self.get_links(b2g_manifests, ends_with='/') for version_dir in version_dirs: prod_url = urlparse.urljoin(version_dir, date.strftime('%Y/%m/')) nightlies = self.get_links(prod_url, starts_with=dir_prefix) for nightly in nightlies: b2gs = self.get_b2g( nightly, backfill_date=None, ) for info in b2gs: platform, repository, version, kvpairs = info build_id = kvpairs['buildid'] build_type = kvpairs['build_type'] self._insert_build(cursor, product_name, version, platform, build_id, build_type, kvpairs.get('beta_number', None), repository, ignore_duplicates=True)
class BotoCrashStorage(CrashStorageBase): """This class sends processed crash reports to an end point reachable by the boto S3 library. """ required_config = Namespace() required_config.add_option( "resource_class", default=( 'socorro.external.boto.connection_context.ConnectionContextBase'), doc=('fully qualified dotted Python classname to handle Boto ' 'connections'), from_string_converter=class_converter, reference_value_from='resource.boto') required_config.add_option( 'transaction_executor_class_for_get', default="socorro.database.transaction_executor." "TransactionExecutorWithLimitedBackoff", doc='a class that will manage transactions', from_string_converter=class_converter, reference_value_from='resource.boto', ) required_config.add_option( 'transaction_executor_class', default="socorro.database.transaction_executor." "TransactionExecutorWithLimitedBackoff", doc='a class that will manage transactions', from_string_converter=class_converter, reference_value_from='resource.boto', ) required_config.add_option( 'temporary_file_system_storage_path', doc='a local filesystem path where dumps temporarily ' 'during processing', default='/home/socorro/temp', reference_value_from='resource.boto', ) required_config.add_option( 'dump_file_suffix', doc='the suffix used to identify a dump file (for use in temp files)', default='.dump', reference_value_from='resource.boto', ) required_config.add_option( 'json_object_hook', default='socorro.lib.util.DotDict', from_string_converter=class_converter, ) def is_operational_exception(self, x): if "not found, no value returned" in str(x): # the not found error needs to be re-tryable to compensate for # eventual consistency. However, a method capable of raising this # exception should never be used with a transaction executor that # has infinite back off. return True #elif # for further cases... return False def __init__(self, config, quit_check_callback=None): super(BotoCrashStorage, self).__init__(config, quit_check_callback) self.connection_source = config.resource_class(config) self.transaction = config.transaction_executor_class( config, self.connection_source, quit_check_callback) if config.transaction_executor_class_for_get.is_infinite: self.config.logger.error( 'the class %s identifies itself as an infinite iterator. ' 'As a TransactionExecutor for reads from Boto, this may ' 'result in infinite loops that will consume threads forever.' % py_obj_to_str(config.transaction_executor_class_for_get)) self.transaction_for_get = config.transaction_executor_class_for_get( config, self.connection_source, quit_check_callback) @staticmethod def do_save_raw_crash(boto_connection, raw_crash, dumps, crash_id): if dumps is None: dumps = MemoryDumpsMapping() raw_crash_as_string = boto_connection._convert_mapping_to_string( raw_crash) boto_connection.submit(crash_id, "raw_crash", raw_crash_as_string) dump_names_as_string = boto_connection._convert_list_to_string( dumps.keys()) boto_connection.submit(crash_id, "dump_names", dump_names_as_string) # we don't know what type of dumps mapping we have. We do know, # however, that by calling the memory_dump_mapping method, we will # get a MemoryDumpMapping which is exactly what we need. dumps = dumps.as_memory_dumps_mapping() for dump_name, dump in dumps.iteritems(): if dump_name in (None, '', 'upload_file_minidump'): dump_name = 'dump' boto_connection.submit(crash_id, dump_name, dump) def save_raw_crash(self, raw_crash, dumps, crash_id): self.transaction(self.do_save_raw_crash, raw_crash, dumps, crash_id) @staticmethod def _do_save_processed(boto_connection, processed_crash): crash_id = processed_crash['uuid'] processed_crash_as_string = boto_connection._convert_mapping_to_string( processed_crash) boto_connection.submit(crash_id, "processed_crash", processed_crash_as_string) def save_processed(self, processed_crash): self.transaction(self._do_save_processed, processed_crash) def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """ bug 866973 - do not put raw_crash back into permanent storage again We are doing this in lieu of a queuing solution that could allow us to operate an independent crashmover. When the queuing system is implemented, we could remove this, and have the raw crash saved by a crashmover that's consuming crash_ids the same way that the processor consumes them. See further comments in the ProcesorApp class. """ self.save_processed(processed_crash) @staticmethod def do_get_raw_crash(boto_connection, crash_id, json_object_hook): try: raw_crash_as_string = boto_connection.fetch(crash_id, "raw_crash") return json.loads(raw_crash_as_string, object_hook=json_object_hook) except boto_connection.ResponseError as x: raise CrashIDNotFound('%s not found: %s' % (crash_id, x)) def get_raw_crash(self, crash_id): return self.transaction_for_get(self.do_get_raw_crash, crash_id, self.config.json_object_hook) @staticmethod def do_get_raw_dump(boto_connection, crash_id, name=None): try: if name in (None, '', 'upload_file_minidump'): name = 'dump' a_dump = boto_connection.fetch(crash_id, name) return a_dump except boto_connection.ResponseError as x: raise CrashIDNotFound('%s not found: %s' % (crash_id, x)) def get_raw_dump(self, crash_id, name=None): return self.transaction_for_get(self.do_get_raw_dump, crash_id, name) @staticmethod def do_get_raw_dumps(boto_connection, crash_id): try: dump_names_as_string = boto_connection.fetch( crash_id, "dump_names") dump_names = boto_connection._convert_string_to_list( dump_names_as_string) # when we fetch the dumps, they are by default in memory, so we'll # put them into a MemoryDumpMapping. dumps = MemoryDumpsMapping() for dump_name in dump_names: if dump_name in (None, '', 'upload_file_minidump'): dump_name = 'dump' dumps[dump_name] = boto_connection.fetch(crash_id, dump_name) return dumps except boto_connection.ResponseError as x: raise CrashIDNotFound('%s not found: %s' % (crash_id, x)) def get_raw_dumps(self, crash_id): """this returns a MemoryDumpsMapping""" return self.transaction_for_get(self.do_get_raw_dumps, crash_id) def get_raw_dumps_as_files(self, crash_id): in_memory_dumps = self.get_raw_dumps(crash_id) # convert our native memory dump mapping into a file dump mapping. return in_memory_dumps.as_file_dumps_mapping( crash_id, self.config.temporary_file_system_storage_path, self.config.dump_file_suffix) @staticmethod def _do_get_unredacted_processed( boto_connection, crash_id, json_object_hook, ): try: processed_crash_as_string = boto_connection.fetch( crash_id, "processed_crash") return json.loads( processed_crash_as_string, object_hook=json_object_hook, ) except boto_connection.ResponseError as x: raise CrashIDNotFound('%s not found: %s' % (crash_id, x)) def get_unredacted_processed(self, crash_id): return self.transaction_for_get( self._do_get_unredacted_processed, crash_id, self.config.json_object_hook, )
def test_poly_crash_storage(self): n = Namespace() n.add_option( 'storage', default=PolyCrashStorage, ) n.add_option( 'logger', default=mock.Mock(), ) value = { 'storage_classes': ( 'socorro.unittest.external.test_crashstorage_base.A,' 'socorro.unittest.external.test_crashstorage_base.A,' 'socorro.unittest.external.test_crashstorage_base.B' ), 'storage1.y': 37, } cm = ConfigurationManager(n, values_source_list=[value]) with cm.context() as config: eq_(config.storage0.crashstorage_class.foo, 'a') eq_(config.storage1.crashstorage_class.foo, 'a') eq_(config.storage1.y, 37) eq_(config.storage2.crashstorage_class.foo, 'b') poly_store = config.storage(config) l = len(poly_store.storage_namespaces) eq_( l, 3, 'expected poly_store to have lenth of 3, ' 'but %d was found instead' % l ) eq_(poly_store.storage_namespaces[0], 'storage0') eq_(poly_store.storage_namespaces[1], 'storage1') eq_(poly_store.storage_namespaces[2], 'storage2') l = len(poly_store.stores) eq_( l, 3, 'expected poly_store.store to have lenth of 3, ' 'but %d was found instead' % l ) eq_(poly_store.stores.storage0.foo, 'a') eq_(poly_store.stores.storage1.foo, 'a') eq_(poly_store.stores.storage2.foo, 'b') raw_crash = {'ooid': ''} dump = '12345' processed_crash = {'ooid': '', 'product': 17} for v in poly_store.stores.itervalues(): v.save_raw_crash = Mock() v.save_processed = Mock() v.close = Mock() poly_store.save_raw_crash(raw_crash, dump, '') for v in poly_store.stores.itervalues(): v.save_raw_crash.assert_called_once_with(raw_crash, dump, '') poly_store.save_processed(processed_crash) for v in poly_store.stores.itervalues(): v.save_processed.assert_called_once_with(processed_crash) poly_store.save_raw_and_processed( raw_crash, dump, processed_crash, 'n' ) for v in poly_store.stores.itervalues(): v.save_raw_crash.assert_called_with(raw_crash, dump, 'n') v.save_processed.assert_called_with(processed_crash) raw_crash = {'ooid': 'oaeu'} dump = '5432' processed_crash = {'ooid': 'aoeu', 'product': 33} poly_store.stores['storage1'].save_raw_crash = Mock() poly_store.stores['storage1'].save_raw_crash.side_effect = \ Exception('this is messed up') poly_store.stores['storage2'].save_processed = Mock() poly_store.stores['storage2'].save_processed.side_effect = \ Exception('this is messed up') assert_raises( PolyStorageError, poly_store.save_raw_crash, raw_crash, dump, '' ) for v in poly_store.stores.itervalues(): v.save_raw_crash.assert_called_with(raw_crash, dump, '') assert_raises( PolyStorageError, poly_store.save_processed, processed_crash ) for v in poly_store.stores.itervalues(): v.save_processed.assert_called_with(processed_crash) assert_raises( PolyStorageError, poly_store.save_raw_and_processed, raw_crash, dump, processed_crash, 'n' ) for v in poly_store.stores.itervalues(): v.save_raw_crash.assert_called_with(raw_crash, dump, 'n') v.save_processed.assert_called_with(processed_crash) poly_store.stores['storage2'].close.side_effect = Exception assert_raises(PolyStorageError, poly_store.close) for v in poly_store.stores.itervalues(): v.close.assert_called_with()
class Bar(RequiredConfig): required_config = Namespace() required_config.add_option('x', default=227) required_config.add_option('a', default=11)
def test_fallback_crash_storage(self): n = Namespace() n.add_option( 'storage', default=FallbackCrashStorage, ) n.add_option( 'logger', default=mock.Mock(), ) value = { 'primary.storage_class': ( 'socorro.unittest.external.test_crashstorage_base.A' ), 'fallback.storage_class': ( 'socorro.unittest.external.test_crashstorage_base.B' ), } cm = ConfigurationManager( n, values_source_list=[value], argv_source=[] ) with cm.context() as config: eq_(config.primary.storage_class.foo, 'a') eq_(config.fallback.storage_class.foo, 'b') raw_crash = {'ooid': ''} crash_id = '1498dee9-9a45-45cc-8ec8-71bb62121203' dump = '12345' processed_crash = {'ooid': '', 'product': 17} fb_store = config.storage(config) # save_raw tests fb_store.primary_store.save_raw_crash = Mock() fb_store.fallback_store.save_raw_crash = Mock() fb_store.save_raw_crash(raw_crash, dump, crash_id) fb_store.primary_store.save_raw_crash.assert_called_with( raw_crash, dump, crash_id ) eq_(fb_store.fallback_store.save_raw_crash.call_count, 0) fb_store.primary_store.save_raw_crash = Mock() fb_store.primary_store.save_raw_crash.side_effect = Exception('!') fb_store.save_raw_crash(raw_crash, dump, crash_id) fb_store.primary_store.save_raw_crash.assert_called_with( raw_crash, dump, crash_id ) fb_store.fallback_store.save_raw_crash.assert_called_with( raw_crash, dump, crash_id ) fb_store.fallback_store.save_raw_crash = Mock() fb_store.fallback_store.save_raw_crash.side_effect = Exception('!') assert_raises( PolyStorageError, fb_store.save_raw_crash, raw_crash, dump, crash_id ) fb_store.primary_store.save_raw_crash.assert_called_with( raw_crash, dump, crash_id ) fb_store.fallback_store.save_raw_crash.assert_called_with( raw_crash, dump, crash_id ) # save_processed tests fb_store.primary_store.save_processed = Mock() fb_store.fallback_store.save_processed = Mock() fb_store.save_processed(processed_crash) fb_store.primary_store.save_processed.assert_called_with( processed_crash ) eq_(fb_store.fallback_store.save_processed.call_count, 0) fb_store.primary_store.save_processed = Mock() fb_store.primary_store.save_processed.side_effect = Exception('!') fb_store.save_processed(processed_crash) fb_store.primary_store.save_processed.assert_called_with( processed_crash ) fb_store.fallback_store.save_processed.assert_called_with( processed_crash ) fb_store.fallback_store.save_processed = Mock() fb_store.fallback_store.save_processed.side_effect = Exception('!') assert_raises( PolyStorageError, fb_store.save_processed, processed_crash ) fb_store.primary_store.save_processed.assert_called_with( processed_crash ) fb_store.fallback_store.save_processed.assert_called_with( processed_crash ) # close tests fb_store.primary_store.close = Mock() fb_store.fallback_store.close = Mock() fb_store.close() fb_store.primary_store.close.assert_called_with() fb_store.fallback_store.close.assert_called_with() fb_store.primary_store.close = Mock() fb_store.fallback_store.close = Mock() fb_store.fallback_store.close.side_effect = NotImplementedError() fb_store.close() fb_store.primary_store.close.assert_called_with() fb_store.fallback_store.close.assert_called_with() fb_store.primary_store.close = Mock() fb_store.primary_store.close.side_effect = Exception('!') fb_store.close() fb_store.primary_store.close.assert_called_with() fb_store.fallback_store.close.assert_called_with() fb_store.fallback_store.close = Mock() fb_store.fallback_store.close.side_effect = Exception('!') assert_raises(PolyStorageError, fb_store.close) fb_store.primary_store.close.assert_called_with() fb_store.fallback_store.close.assert_called_with()
class JitCrashCategorizeRule(ExternalProcessRule): required_config = Namespace() required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '{dump_file_pathname} ' '2>/dev/null' ) required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/jit-crash-categorize', ) required_config.result_key = change_default( ExternalProcessRule, 'result_key', 'classifications.jit.category', ) required_config.return_code_key = change_default( ExternalProcessRule, 'return_code_key', 'classifications.jit.category_return_code', ) required_config.add_option( 'threshold', doc="max number of frames until encountering target frame", default=8 ) def __init__(self, config): super(JitCrashCategorizeRule, self).__init__(config) def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): if ( processed_crash.product != 'Firefox' or not processed_crash.os_name.startswith('Windows') or processed_crash.cpu_name != 'x86' ): # we don't want any of these return False frames = processed_crash.get('json_dump', {}).get('crashing_thread', {}).get('frames', []) if frames and frames[0].get('module', False): # there is a module at the top of the stack, we don't want this return False return ( processed_crash.signature.endswith('EnterBaseline') or processed_crash.signature.endswith('EnterIon') or processed_crash.signature.endswith('js::jit::FastInvoke') or processed_crash.signature.endswith('js::jit::IonCannon') or processed_crash.signature.endswith('js::irregexp::ExecuteCode<T>') ) def _interpret_external_command_output(self, fp, processor_meta): try: result = fp.read() except IOError as x: processor_meta.processor_notes.append( "%s unable to read external command output: %s" % ( self.config.command_pathname, x ) ) return '' try: return result.strip() except AttributeError as x: # there's no strip method return result
class FlashVersionRule(Rule): required_config = Namespace() required_config.add_option( 'known_flash_identifiers', doc='A subset of the known "debug identifiers" for flash versions, ' 'associated to the version', default={ '7224164B5918E29AF52365AF3EAF7A500': '10.1.51.66', 'C6CDEFCDB58EFE5C6ECEF0C463C979F80': '10.1.51.66', '4EDBBD7016E8871A461CCABB7F1B16120': '10.1', 'D1AAAB5D417861E6A5B835B01D3039550': '10.0.45.2', 'EBD27FDBA9D9B3880550B2446902EC4A0': '10.0.45.2', '266780DB53C4AAC830AFF69306C5C0300': '10.0.42.34', 'C4D637F2C8494896FBD4B3EF0319EBAC0': '10.0.42.34', 'B19EE2363941C9582E040B99BB5E237A0': '10.0.32.18', '025105C956638D665850591768FB743D0': '10.0.32.18', '986682965B43DFA62E0A0DFFD7B7417F0': '10.0.23', '937DDCC422411E58EF6AD13710B0EF190': '10.0.23', '860692A215F054B7B9474B410ABEB5300': '10.0.22.87', '77CB5AC61C456B965D0B41361B3F6CEA0': '10.0.22.87', '38AEB67F6A0B43C6A341D7936603E84A0': '10.0.12.36', '776944FD51654CA2B59AB26A33D8F9B30': '10.0.12.36', '974873A0A6AD482F8F17A7C55F0A33390': '9.0.262.0', 'B482D3DFD57C23B5754966F42D4CBCB60': '9.0.262.0', '0B03252A5C303973E320CAA6127441F80': '9.0.260.0', 'AE71D92D2812430FA05238C52F7E20310': '9.0.246.0', '6761F4FA49B5F55833D66CAC0BBF8CB80': '9.0.246.0', '27CC04C9588E482A948FB5A87E22687B0': '9.0.159.0', '1C8715E734B31A2EACE3B0CFC1CF21EB0': '9.0.159.0', 'F43004FFC4944F26AF228334F2CDA80B0': '9.0.151.0', '890664D4EF567481ACFD2A21E9D2A2420': '9.0.151.0', '8355DCF076564B6784C517FD0ECCB2F20': '9.0.124.0', '51C00B72112812428EFA8F4A37F683A80': '9.0.124.0', '9FA57B6DC7FF4CFE9A518442325E91CB0': '9.0.115.0', '03D99C42D7475B46D77E64D4D5386D6D0': '9.0.115.0', '0CFAF1611A3C4AA382D26424D609F00B0': '9.0.47.0', '0F3262B5501A34B963E5DF3F0386C9910': '9.0.47.0', 'C5B5651B46B7612E118339D19A6E66360': '9.0.45.0', 'BF6B3B51ACB255B38FCD8AA5AEB9F1030': '9.0.28.0', '83CF4DC03621B778E931FC713889E8F10': '9.0.16.0', }, from_string_converter=ujson.loads) required_config.add_option( 'flash_re', doc='a regular expression to match Flash file names', default=(r'NPSWF32_?(.*)\.dll|' 'FlashPlayerPlugin_?(.*)\.exe|' 'libflashplayer(.*)\.(.*)|' 'Flash ?Player-?(.*)'), from_string_converter=re.compile) #-------------------------------------------------------------------------- def version(self): return '1.0' #-------------------------------------------------------------------------- def _get_flash_version(self, **kwargs): """If (we recognize this module as Flash and figure out a version): Returns version; else (None or '')""" filename = kwargs.get('filename', None) version = kwargs.get('version', None) debug_id = kwargs.get('debug_id', None) m = self.config.flash_re.match(filename) if m: if version: return version # we didn't get a version passed into us # try do deduce it groups = m.groups() if groups[0]: return groups[0].replace('_', '.') if groups[1]: return groups[1].replace('_', '.') if groups[2]: return groups[2] if groups[4]: return groups[4] return self.config.known_flash_identifiers.get(debug_id, None) return None #-------------------------------------------------------------------------- def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): processed_crash.flash_version = '' flash_version = None for index, a_module in enumerate( processed_crash['json_dump']['modules']): flash_version = self._get_flash_version(**a_module) if flash_version: break if flash_version: processed_crash.flash_version = flash_version else: processed_crash.flash_version = '[blank]' return True