def test_get_iterator(self): config = DotDict() config.logger = self.logger config.quit_on_empty_queue = False tm = TaskManager( config, job_source_iterator=range(1), ) assert tm._get_iterator() == [0] def an_iter(self): for i in range(5): yield i tm = TaskManager( config, job_source_iterator=an_iter, ) assert list(tm._get_iterator()) == [0, 1, 2, 3, 4] class X(object): def __init__(self, config): self.config = config def __iter__(self): for key in self.config: yield key tm = TaskManager( config, job_source_iterator=X(config) ) assert list(tm._get_iterator()) == list(config.keys())
def test_wrong_signature(self, mocked_subprocess_module): config = self.get_basic_config() raw_crash = copy.copy(canonical_standard_raw_crash) raw_dumps = {config.dump_field: 'a_fake_dump.dump'} processed_crash = DotDict() processed_crash.product = 'Firefox' processed_crash.os_name = 'Windows NT' processed_crash.cpu_name = 'x86' processed_crash.signature = 'this-is-not-a-JIT-signature' processed_crash['json_dump.crashing_thread.frames'] = [ DotDict({'not_module': 'not-a-module',}), DotDict({'module': 'a-module',}) ] processor_meta = self.get_basic_processor_meta() mocked_subprocess_handle = ( mocked_subprocess_module.Popen.return_value ) mocked_subprocess_handle.stdout.read.return_value = ( 'EXTRA-SPECIAL' ) mocked_subprocess_handle.wait.return_value = 0 rule = JitCrashCategorizeRule(config) # the call to be tested rule.act(raw_crash, raw_dumps, processed_crash, processor_meta) ok_('classifications.jit.category' not in processed_crash) ok_('classifications.jit.category_return_code' not in processed_crash)
def test_save_raw_crash(self): config = self._setup_config() crash_store = RabbitMQCrashStorage(config) crash_store.save_raw_crash( raw_crash=DotDict(), dumps=DotDict(), crash_id='crash_id' ) config.logger.reset_mock() raw_crash = DotDict() raw_crash.legacy_processing = 0; crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id' ) crash_store.transaction.assert_called_with( crash_store._save_raw_crash_transaction, 'crash_id' ) crash_store.transaction.reset_mock() raw_crash = DotDict() raw_crash.legacy_processing = 5; crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id' )
def test_statistics_all_missing_prefix_and_missing_name(self): d = DotDict() d.statsd_host = "localhost" d.statsd_port = 666 d.prefix = None d.active_counters_list = ["x", "y", "z"] with patch("socorro.lib.statistics.StatsClient") as StatsClientMocked: s = StatisticsForStatsd(d, None) StatsClientMocked.assert_called_with("localhost", 666, "") s.incr("x") StatsClientMocked.assert_has_calls(StatsClientMocked.mock_calls, [call.incr("x")]) s.incr("y") StatsClientMocked.assert_has_calls(StatsClientMocked.mock_calls, [call.incr("y")]) s.incr("z") StatsClientMocked.assert_has_calls(StatsClientMocked.mock_calls, [call.incr("z")]) s.incr("w") StatsClientMocked.assert_has_calls( StatsClientMocked.mock_calls, [call.incr("y"), call.incr("x"), call.incr("y")] ) s.incr(None) StatsClientMocked.assert_has_calls( StatsClientMocked.mock_calls, [call.incr("y"), call.incr("x"), call.incr("y"), call.incr("unknown")] )
def test_doing_work_with_two_workers_and_generator(self): config = DotDict() config.logger = self.logger config.number_of_threads = 2 config.maximum_queue_size = 2 my_list = [] def insert_into_list(anItem): my_list.append(anItem) ttm = ThreadedTaskManager(config, task_func=insert_into_list, job_source_iterator=(((x,), {}) for x in xrange(10)) ) try: ttm.start() time.sleep(0.2) assert len(ttm.thread_list) == 2 assert len(my_list) == 10 assert sorted(my_list) == list(range(10)) except Exception: # we got threads to join ttm.wait_for_completion() raise
def _add_process_type_to_processed_crash(self, raw_crash): """ Electrolysis Support - Optional - raw_crash may contain a ProcessType of plugin. In the future this value would be default, content, maybe even Jetpack... This indicates which process was the crashing process. """ process_type_additions_dict = DotDict() process_type = self._get_truncate_or_none(raw_crash, 'ProcessType', 10) if not process_type: return process_type_additions_dict process_type_additions_dict.process_type = process_type #logger.debug('processType %s', processType) if process_type == 'plugin': # Bug#543776 We actually will are relaxing the non-null policy... # a null filename, name, and version is OK. We'll use empty strings process_type_additions_dict.PluginFilename = ( raw_crash.get('PluginFilename', '') ) process_type_additions_dict.PluginName = ( raw_crash.get('PluginName', '') ) process_type_additions_dict.PluginVersion = ( raw_crash.get('PluginVersion', '') ) return process_type_additions_dict
def test_add_classification_to_processed_crash(self): rc = DotDict() pc = DotDict() pc.classifications = DotDict() processor = None skunk_rule = SkunkClassificationRule() skunk_rule._add_classification( pc, 'stupid', 'extra stuff' ) self.assertTrue('classifications' in pc) self.assertTrue('skunk_works' in pc.classifications) self.assertEqual( 'stupid', pc.classifications.skunk_works.classification ) self.assertEqual( 'extra stuff', pc.classifications.skunk_works.classification_data ) self.assertEqual( '0.0', pc.classifications.skunk_works.classification_version )
def test_new_crash_duplicate_discovered(self): """ Tests queue with standard queue items only """ config = self._setup_config() config.transaction_executor_class = TransactionExecutor crash_store = RabbitMQCrashStorage(config) crash_store.rabbitmq.config.standard_queue_name = "socorro.normal" crash_store.rabbitmq.config.reprocessing_queue_name = "socorro.reprocessing" crash_store.rabbitmq.config.priority_queue_name = "socorro.priority" faked_methodframe = DotDict() faked_methodframe.delivery_tag = "delivery_tag" test_queue = [(None, None, None), (faked_methodframe, "1", "normal_crash_id"), (None, None, None)] def basic_get(queue="socorro.priority"): if len(test_queue) == 0: raise StopIteration return test_queue.pop() crash_store.rabbitmq.return_value.__enter__.return_value.channel.basic_get = MagicMock(side_effect=basic_get) transaction_connection = crash_store.transaction.db_conn_context_source.return_value.__enter__.return_value # load the cache as if this crash had alredy been seen crash_store.acknowledgement_token_cache["normal_crash_id"] = faked_methodframe for result in crash_store.new_crashes(): # new crash should be suppressed eq_(None, result) # we should ack the new crash even though we did use it for processing transaction_connection.channel.basic_ack.assert_called_with(delivery_tag=faked_methodframe.delivery_tag)
def test_no_crashing_thread(self, mocked_subprocess_module): config = self.get_basic_config() raw_crash = copy.copy(canonical_standard_raw_crash) raw_dumps = {config.dump_field: 'a_fake_dump.dump'} processed_crash = DotDict() processed_crash.product = 'Firefox' processed_crash.os_name = 'Windows NT' processed_crash.cpu_name = 'x86' processed_crash.signature = 'EnterBaseline' processed_crash['json_dump'] = {} # note the empty json_dump processor_meta = self.get_basic_processor_meta() mocked_subprocess_handle = ( mocked_subprocess_module.Popen.return_value ) mocked_subprocess_handle.stdout.read.return_value = ( 'EXTRA-SPECIAL' ) mocked_subprocess_handle.wait.return_value = 0 rule = JitCrashCategorizeRule(config) # the call to be tested res = rule._predicate( raw_crash, raw_dumps, processed_crash, processor_meta ) # Simply verify that no exception is raised. ok_(res)
def test_action_case_1(self): """sentinel exsits in stack, but no secondaries""" pc = DotDict() pc.process_type = 'plugin' pijd = copy.deepcopy(cannonical_json_dump) pc.json_dump = pijd pc.json_dump['crashing_thread']['frames'][2]['function'] = \ 'NtUserSetWindowPos' f2jd = copy.deepcopy(cannonical_json_dump) pc.upload_file_minidump_flash2 = DotDict() pc.upload_file_minidump_flash2.json_dump = f2jd fake_processor = create_basic_fake_processor() rc = DotDict() rule = SetWindowPos() action_result = rule.action(rc, pc, fake_processor) ok_(action_result) ok_('classifications' in pc) ok_('skunk_works' in pc.classifications) eq_( pc.classifications.skunk_works.classification, 'NtUserSetWindowPos | other' )
def test_doing_work_with_two_workers_and_generator(self): config = DotDict() config.logger = self.logger config.number_of_threads = 2 config.maximum_queue_size = 2 my_list = [] def insert_into_list(anItem): my_list.append(anItem) ttm = ThreadedTaskManager(config, task_func=insert_into_list, job_source_iterator=(((x,), {}) for x in xrange(10)) ) try: ttm.start() time.sleep(0.2) ok_(len(ttm.thread_list) == 2, "expected 2 threads, but found %d" % len(ttm.thread_list)) ok_(len(my_list) == 10, 'expected to do 10 inserts, ' 'but %d were done instead' % len(my_list)) ok_(sorted(my_list) == range(10), 'expected %s, but got %s' % (range(10), sorted(my_list))) except Exception: # we got threads to join ttm.wait_for_completion() raise
def test_doing_work_with_one_worker(self): config = DotDict() config.logger = self.logger config.number_of_threads = 1 config.maximum_queue_size = 1 my_list = [] def insert_into_list(anItem): my_list.append(anItem) ttm = ThreadedTaskManager(config, task_func=insert_into_list ) try: ttm.start() time.sleep(0.2) ok_(len(my_list) == 10, 'expected to do 10 inserts, ' 'but %d were done instead' % len(my_list)) ok_(my_list == range(10), 'expected %s, but got %s' % (range(10), my_list)) ttm.stop() except Exception: # we got threads to join ttm.wait_for_completion() raise
def test_stuff_missing(self): config = self.get_basic_config() raw_crash = copy.copy(canonical_standard_raw_crash) raw_dumps = {} system_info = copy.copy( canonical_processed_crash['json_dump']['system_info'] ) del system_info['cpu_count'] processed_crash = DotDict() processed_crash.json_dump = { 'system_info': system_info } processor_meta = self.get_basic_processor_meta() rule = CPUInfoRule(config) # the call to be tested rule.act(raw_crash, raw_dumps, processed_crash, processor_meta) eq_( processed_crash.cpu_info, "GenuineIntel family 6 model 42 stepping 7" ) eq_(processed_crash.cpu_name, 'x86') # raw crash should be unchanged eq_(raw_crash, canonical_standard_raw_crash)
def test_save_raw_crash_normal(self): config = self._setup_config() crash_store = RabbitMQCrashStorage(config) # test for "legacy_processing" missing from crash crash_store.save_raw_crash( raw_crash=DotDict(), dumps=DotDict(), crash_id='crash_id') ok_(not crash_store.transaction.called) config.logger.reset_mock() # test for normal save raw_crash = DotDict() raw_crash.legacy_processing = 0 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id') crash_store.transaction.assert_called_with( crash_store._save_raw_crash_transaction, 'crash_id') crash_store.transaction.reset_mock() # test for save rejection because of "legacy_processing" raw_crash = DotDict() raw_crash.legacy_processing = 5 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id') ok_(not crash_store.transaction.called)
def test_save_raw_crash_no_legacy(self): config = self._setup_config() config.filter_on_legacy_processing = False crash_store = RabbitMQCrashStorage(config) # test for "legacy_processing" missing from crash crash_store.save_raw_crash( raw_crash=DotDict(), dumps=DotDict(), crash_id='crash_id') crash_store.transaction.assert_called_with( crash_store._save_raw_crash_transaction, 'crash_id') config.logger.reset_mock() # test for normal save raw_crash = DotDict() raw_crash.legacy_processing = 0 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id') crash_store.transaction.assert_called_with( crash_store._save_raw_crash_transaction, 'crash_id') crash_store.transaction.reset_mock() # test for save without regard to "legacy_processing" value raw_crash = DotDict() raw_crash.legacy_processing = 5 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id') crash_store.transaction.assert_called_with( crash_store._save_raw_crash_transaction, 'crash_id')
def test_action_case_3(self): """nothing in 1st dump, sentinel and secondary in upload_file_minidump_flash2 dump""" pc = DotDict() pc.dump = DotDict() pijd = copy.deepcopy(cannonical_json_dump) pc.dump.json_dump = pijd f2jd = copy.deepcopy(cannonical_json_dump) pc.upload_file_minidump_flash2 = DotDict() pc.upload_file_minidump_flash2.json_dump = f2jd pc.upload_file_minidump_flash2.json_dump['threads'][0]['frames'][2] \ ['function'] = 'NtUserSetWindowPos' pc.upload_file_minidump_flash2.json_dump['threads'][0]['frames'][4] \ ['function'] = 'F455544145' fake_processor = create_basic_fake_processor() rc = DotDict() rule = SetWindowPos() action_result = rule.action(rc, pc, fake_processor) self.assertTrue(action_result) self.assertTrue('classifications' in pc) self.assertTrue('skunk_works' in pc.classifications) self.assertEqual( pc.classifications.skunk_works.classification, 'NtUserSetWindowPos | F455544145' )
def _execute_external_process(self, command_line, processor_meta): stackwalker_output, return_code = super(BreakpadStackwalkerRule2015, self)._execute_external_process( command_line, processor_meta ) if not isinstance(stackwalker_output, Mapping): processor_meta.processor_notes.append( "MDSW produced unexpected output: %s..." % str(stackwalker_output)[:10] ) stackwalker_output = {} stackwalker_data = DotDict() stackwalker_data.json_dump = stackwalker_output stackwalker_data.mdsw_return_code = return_code stackwalker_data.mdsw_status_string = stackwalker_output.get("status", "unknown error") stackwalker_data.success = stackwalker_data.mdsw_status_string == "OK" if return_code == 124: processor_meta.processor_notes.append("MDSW terminated with SIGKILL due to timeout") elif return_code != 0 or not stackwalker_data.success: processor_meta.processor_notes.append( "MDSW failed on '%s': %s" % (command_line, stackwalker_data.mdsw_status_string) ) return stackwalker_data, return_code
def test_action_case_4(self): """nothing in 1st dump, sentinel but no secondary in upload_file_minidump_flash2 dump""" pc = DotDict() pc.dump = DotDict() pijd = copy.deepcopy(cannonical_json_dump) pc.dump.json_dump = pijd f2jd = copy.deepcopy(cannonical_json_dump) pc.upload_file_minidump_flash2 = DotDict() pc.upload_file_minidump_flash2.json_dump = f2jd pc.upload_file_minidump_flash2.json_dump['crashing_thread']['frames'][2] \ ['function'] = 'NtUserSetWindowPos' fake_processor = create_basic_fake_processor() rc = DotDict() rule = SetWindowPos() action_result = rule.action(rc, pc, fake_processor) ok_(action_result) ok_('classifications' in pc) ok_('skunk_works' in pc.classifications) eq_( pc.classifications.skunk_works.classification, 'NtUserSetWindowPos | other' )
def setup_mocked_s3_storage( self, executor=TransactionExecutor, executor_for_gets=TransactionExecutor, storage_class='BotoS3CrashStorage', host='', port=0, resource_class=S3ConnectionContext, **extra ): config = DotDict({ 'resource_class': resource_class, 'logger': mock.Mock(), 'host': host, 'port': port, 'access_key': 'this is the access key', 'secret_access_key': 'secrets', 'bucket_name': 'silliness', 'prefix': 'dev', 'calling_format': mock.Mock() }) config.update(extra) s3_conn = resource_class(config) s3_conn._connect_to_endpoint = mock.Mock() s3_conn._mocked_connection = s3_conn._connect_to_endpoint.return_value s3_conn._calling_format.return_value = mock.Mock() s3_conn._CreateError = mock.Mock() s3_conn.ResponseError = mock.Mock() s3_conn._open = mock.MagicMock() return s3_conn
def test_get_iterator(self): config = DotDict() config.logger = self.logger config.quit_on_empty_queue = False tm = TaskManager(config, job_source_iterator=range(1)) eq_(tm._get_iterator(), [0]) def an_iter(self): for i in range(5): yield i tm = TaskManager(config, job_source_iterator=an_iter) eq_([x for x in tm._get_iterator()], [0, 1, 2, 3, 4]) class X(object): def __init__(self, config): self.config = config def __iter__(self): for key in self.config: yield key tm = TaskManager(config, job_source_iterator=X(config)) eq_([x for x in tm._get_iterator()], [y for y in config.keys()])
def test_blocking_start(self): config = DotDict() config.logger = self.logger config.idle_delay = 1 config.quit_on_empty_queue = False class MyTaskManager(TaskManager): def _responsive_sleep( self, seconds, wait_log_interval=0, wait_reason='' ): try: if self.count >= 2: self.quit = True self.count += 1 except AttributeError: self.count = 0 tm = MyTaskManager( config, task_func=Mock() ) waiting_func = Mock() tm.blocking_start(waiting_func=waiting_func) eq_( tm.task_func.call_count, 10 ) eq_(waiting_func.call_count, 0)
def test_action_case_1(self): """success - both targets found in top 5 frames of stack""" pc = DotDict() f2jd = copy.deepcopy(cannonical_json_dump) pc.upload_file_minidump_flash2 = DotDict() pc.upload_file_minidump_flash2.json_dump = f2jd pc.upload_file_minidump_flash2.json_dump['crashing_thread']['frames'][1]['function'] = ( 'NtUserPeekMessage' ) pc.upload_file_minidump_flash2.json_dump['crashing_thread']['frames'][2]['function'] = ( 'F849276792______________________________' ) fake_processor = create_basic_fake_processor() rc = DotDict() rd = {} rule = Bug812318() action_result = rule.action(rc, rd, pc, fake_processor) ok_(action_result) ok_('classifications' in pc) eq_( pc.classifications.skunk_works.classification, 'bug812318-PeekMessage' )
def test_action_wrong_order(self): jd = copy.deepcopy(cannonical_json_dump) jd['crashing_thread']['frames'][4]['function'] = ( "F_1152915508___________________________________" ) jd['crashing_thread']['frames'][3]['function'] = ( "mozilla::plugins::PluginInstanceChild::UpdateWindowAttributes" "(bool)" ) jd['crashing_thread']['frames'][5]['function'] = ( "mozilla::ipc::RPCChannel::Call(IPC::Message*, IPC::Message*)" ) pc = DotDict() pc.dump = DotDict() pc.dump.json_dump = jd fake_processor = create_basic_fake_processor() rc = DotDict() rd = {} rule = UpdateWindowAttributes() action_result = rule.action(rc, rd, pc, fake_processor) ok_(not action_result) ok_('classifications' not in pc)
def test_OOMAllocationSize_predicate_signature_fragment_2(self): pc = DotDict() pc.signature = 'mozalloc_handle_oom | this | is | bad' rc = DotDict() fake_processor = create_basic_fake_processor() rule = OOMSignature() predicate_result = rule.predicate(rc, pc, fake_processor) ok_(predicate_result)
def test_SigTrunc_predicate(self): pc = DotDict() pc.signature = '9' * 256 rc = DotDict() fake_processor = create_basic_fake_processor() rule = SigTrunc() predicate_result = rule.predicate(rc, pc, fake_processor) ok_(predicate_result)
def test_OOMAllocationSize_predicate_no_match(self): pc = DotDict() pc.signature = 'hello' rc = DotDict() fake_processor = create_basic_fake_processor() rule = OOMSignature() predicate_result = rule.predicate(rc, pc, fake_processor) ok_(not predicate_result)
def test_OOMAllocationSize_predicate_signature_fragment_3(self): pc = DotDict() pc.signature = 'CrashAtUnhandlableOOM' rc = DotDict() fake_processor = create_basic_fake_processor() rule = OOMSignature() predicate_result = rule.predicate(rc, pc, fake_processor) ok_(predicate_result)
def create_basic_fake_processor(): fake_processor = DotDict() fake_processor.c_signature_tool = c_signature_tool fake_processor.config = DotDict() # need help figuring out failures? switch to FakeLogger and read stdout fake_processor.config.logger = SilentFakeLogger() #fake_processor.config.logger = FakeLogger() return fake_processor
def test_OOMAllocationSize_predicate_signature_fragment_1(self): pc = DotDict() pc.signature = 'this | is | a | NS_ABORT_OOM | signature' rc = DotDict() fake_processor = create_basic_fake_processor() rule = OOMSignature() predicate_result = rule.predicate(rc, pc, fake_processor) ok_(predicate_result)
def test_SigTrunc_predicate_no_match(self): pc = DotDict() pc.signature = '0' * 100 rc = DotDict() fake_processor = create_basic_fake_processor() rule = SigTrunc() predicate_result = rule.predicate(rc, pc, fake_processor) ok_(not predicate_result)
def test_hbase_usage_with_transaction(self, mocked_hbcl): local_config = DotDict({ 'hbase_host': 'host', 'database_name': 'name', 'hbase_port': 9090, 'hbase_timeout': 9000, 'number_of_retries': 2, 'logger': SilentFakeLogger(), 'executor_identity': lambda: 'dwight' # bogus thread id }) a_fake_hbase_connection = FakeHB_Connection() mocked_hbcl.HBaseConnectionForCrashReports = \ mock.Mock(return_value=a_fake_hbase_connection) hb_context = HBaseConnectionContextPooled( local_config, local_config ) def all_ok(connection, dummy): eq_(dummy, 'hello') return True transaction = TransactionExecutor(local_config, hb_context) result = transaction(all_ok, 'hello') ok_(result) eq_( mocked_hbcl.HBaseConnectionForCrashReports.call_count, 2 ) eq_( a_fake_hbase_connection.close_counter, 1 ) eq_( a_fake_hbase_connection.rollback_counter, 0 ) eq_( a_fake_hbase_connection.commit_counter, 1 ) def bad_deal(connection, dummy): raise KeyError('fred') assert_raises(KeyError, transaction, bad_deal, 'hello') eq_( mocked_hbcl.HBaseConnectionForCrashReports.call_count, 2 ) eq_( a_fake_hbase_connection.close_counter, 1 ) eq_( a_fake_hbase_connection.rollback_counter, 1 ) eq_( a_fake_hbase_connection.commit_counter, 1 ) hb_context.close() eq_( a_fake_hbase_connection.close_counter, 2 )
def test_basic_hbase_usage(self, mocked_hbcl): local_config = DotDict({ 'hbase_host': 'host', 'database_name': 'name', 'hbase_port': 9090, 'hbase_timeout': 9000, 'number_of_retries': 2, 'logger': SilentFakeLogger(), 'executor_identity': lambda: 'dwight' # bogus thread id }) a_fake_hbase_connection = FakeHB_Connection() mocked_hbcl.HBaseConnectionForCrashReports = \ mock.Mock(return_value=a_fake_hbase_connection) hb_context = HBaseConnectionContextPooled( local_config, local_config ) eq_( mocked_hbcl.HBaseConnectionForCrashReports.call_count, 1 ) eq_( a_fake_hbase_connection.close_counter, 1 ) # open a connection with hb_context() as conn: eq_( mocked_hbcl.HBaseConnectionForCrashReports.call_count, 2 ) eq_( a_fake_hbase_connection.close_counter, 1 ) # get that same connection again with hb_context() as conn: eq_( mocked_hbcl.HBaseConnectionForCrashReports.call_count, 2 ) eq_( a_fake_hbase_connection.close_counter, 1 ) # get a named connection with hb_context('fred') as conn: eq_( mocked_hbcl.HBaseConnectionForCrashReports.call_count, 3 ) eq_( a_fake_hbase_connection.close_counter, 1 ) eq_( len(hb_context.pool), 2 ) # get that original same connection again with hb_context() as conn: eq_( mocked_hbcl.HBaseConnectionForCrashReports.call_count, 3 ) eq_( a_fake_hbase_connection.close_counter, 1 ) # close all connections hb_context.close() eq_( a_fake_hbase_connection.close_counter, 3 )
class ProcessorApp(FetchTransformSaveWithSeparateNewCrashSourceApp): """the Socorro processor converts raw_crashes into processed_crashes""" app_name = 'processor' app_version = '3.0' app_description = __doc__ required_config = Namespace() # configuration is broken into three namespaces: processor, # new_crash_source, and companion_process #-------------------------------------------------------------------------- # processor namespace # this namespace is for config parameter having to do with the # implementation of the algorithm of converting raw crashes into # processed crashes. This algorithm can be swapped out for alternate # algorithms. #-------------------------------------------------------------------------- required_config.namespace('processor') required_config.processor.add_option( 'processor_class', doc='the class that transforms raw crashes into processed crashes', default='socorro.processor.socorrolite_processor_2015' '.SocorroLiteProcessorAlgorithm2015', from_string_converter=class_converter) #-------------------------------------------------------------------------- # companion_process namespace # this namespace is for config parameters having to do with registering # a companion process that runs alongside processor #-------------------------------------------------------------------------- required_config.namespace('companion_process') required_config.companion_process.add_option( 'companion_class', doc='a classname that runs a process in parallel with the processor', default='', # default='socorro.processor.symbol_cache_manager.SymbolLRUCacheManager', from_string_converter=class_converter) ########################################################################### # TODO: implement an __init__ and a waiting func. The waiting func # will take registrations of periodic things to do over some time # interval. the first periodic thing is the rereading of the # signature generation stuff from the database. ########################################################################### required_config.namespace('sentry') required_config.sentry.add_option( 'dsn', doc='DSN for Sentry via raven', default='', reference_value_from='secrets.sentry', ) #-------------------------------------------------------------------------- @staticmethod def get_application_defaults(): return { "source.crashstorage_class": FSDatedPermanentStorage, "destination.crashstorage_class": FSDatedPermanentStorage, } #-------------------------------------------------------------------------- def _transform(self, crash_id): """this implementation is the framework on how a raw crash is converted into a processed crash. The 'crash_id' passed in is used as a key to fetch the raw crash from the 'source', the conversion funtion implemented by the 'processor_class' is applied, the processed crash is saved to the 'destination'""" try: raw_crash = self.source.get_raw_crash(crash_id) dumps = self.source.get_raw_dumps_as_files(crash_id) except CrashIDNotFound: self.processor.reject_raw_crash( crash_id, 'this crash cannot be found in raw crash storage') return except Exception, x: self.config.logger.warning('error loading crash %s', crash_id, exc_info=True) self.processor.reject_raw_crash(crash_id, 'error in loading: %s' % x) return try: processed_crash = self.source.get_unredacted_processed(crash_id) except CrashIDNotFound: processed_crash = DotDict() try: if 'uuid' not in raw_crash: raw_crash.uuid = crash_id processed_crash = (self.processor.process_crash( raw_crash, dumps, processed_crash, )) """ bug 866973 - save_raw_and_processed() instead of just save_processed(). The raw crash may have been modified by the processor rules. The individual crash storage implementations may choose to honor re-saving the raw_crash or not. """ self.destination.save_raw_and_processed(raw_crash, None, processed_crash, crash_id) self.config.logger.info('saved - %s', crash_id) except Exception as exception: # Immediately capture this as local variables. # During this error handling we're going to be using other # try:except: constructs (e.g. swallowing raven send errors) # so we can't reference `sys.exc_info()` later. exc_type, exc_value, exc_tb = sys.exc_info() if self.config.sentry and self.config.sentry.dsn: try: if isinstance(exception, collections.Sequence): # Then it's already an iterable! exceptions = exception else: exceptions = [exception] client = raven.Client(dsn=self.config.sentry.dsn) client.context.activate() client.context.merge({'extra': { 'crash_id': crash_id, }}) try: for exception in exceptions: identifier = client.captureException(exception) self.config.logger.info( 'Error captured in Sentry! ' 'Reference: {}'.format(identifier)) finally: client.context.clear() except Exception: self.config.logger.error( 'Unable to report error with Raven', exc_info=True, ) else: self.config.logger.warning( 'Raven DSN is not configured and an exception happened') # Why not just do `raise exception`? # Because if we don't do it this way, the eventual traceback # is going to point to *this* line (right after this comment) # rather than the actual error where it originally happened. raise exc_type, exc_value, exc_tb finally: # earlier, we created the dumps as files on the file system, # we need to clean up after ourselves. for a_dump_pathname in dumps.itervalues(): try: if "TEMPORARY" in a_dump_pathname: os.unlink(a_dump_pathname) except OSError, x: # the file does not actually exist self.config.logger.info( 'deletion of dump failed: %s', x, )
def test_bogus_source_and_destination(self): class NonInfiniteFTSAppClass(FetchTransformSaveApp): def source_iterator(self): for x in self.source.new_crashes(): yield ((x, ), {}) class FakeStorageSource(object): def __init__(self, config, quit_check_callback): self.store = DotDict({ '1234': DotDict({ 'ooid': '1234', 'Product': 'FireFloozy', 'Version': '1.0' }), '1235': DotDict({ 'ooid': '1235', 'Product': 'ThunderRat', 'Version': '1.0' }), '1236': DotDict({ 'ooid': '1236', 'Product': 'Caminimal', 'Version': '1.0' }), '1237': DotDict({ 'ooid': '1237', 'Product': 'Fennicky', 'Version': '1.0' }), }) def get_raw_crash(self, ooid): return self.store[ooid] def get_dump(self, ooid): return 'this is a fake dump' def new_ooids(self): for k in self.store.keys(): yield k class FakeStorageDestination(object): def __init__(self, config, quit_check_callback): self.store = DotDict() self.dumps = DotDict() def save_raw_crash(self, raw_crash, dump): self.store[raw_crash.ooid] = raw_crash self.dumps[raw_crash.ooid] = dump logger = SilentFakeLogger() config = DotDict({ 'logger': logger, 'number_of_threads': 2, 'maximum_queue_size': 2, 'source': DotDict({'crashstorage': FakeStorageSource}), 'destination': DotDict({'crashstorage': FakeStorageDestination}) }) fts_app = NonInfiniteFTSAppClass(config) fts_app.main() source = fts_app.source destination = fts_app.destination self.assertEqual(source.store, destination.store) self.assertEqual(len(destination.dumps), 4) self.assertEqual(destination.dumps['1237'], source.get_dump('1237'))
def parse_arguments(filters, arguments, modern=False): """ Return a dict of parameters. Take a list of filters and for each try to get the corresponding value in arguments or a default value. Then check that value's type. The @modern parameter indicates how the arguments should be interpreted. The old way is that you always specify a list and in the list you write the names of types as strings. I.e. instad of `str` you write `'str'`. The modern way allows you to specify arguments by real Python types and entering it as a list means you accept and expect it to be a list. For example, using the modern way: filters = [ ("param1", "default", [str]), ("param2", None, int), ("param3", ["list", "of", 4, "values"], [str]) ] arguments = { "param1": "value1", "unknown": 12345 } => { "param1": ["value1"], "param2": 0, "param3": ["list", "of", "4", "values"] } And an example for the old way: filters = [ ("param1", "default", ["list", "str"]), ("param2", None, "int"), ("param3", ["list", "of", 4, "values"], ["list", "str"]) ] arguments = { "param1": "value1", "unknown": 12345 } => { "param1": ["value1"], "param2": 0, "param3": ["list", "of", "4", "values"] } The reason for having the modern and the non-modern way is transition of legacy code. One day it will all be the modern way. """ params = DotDict() for i in filters: count = len(i) param = None if count <= 1: param = arguments.get(i[0]) else: param = arguments.get(i[0], i[1]) # proceed and do the type checking if count >= 3: types = i[2] if modern: if isinstance(types, list) and param is not None: assert len(types) == 1 if not isinstance(param, list): param = [param] param = [check_type(x, types[0]) for x in param] else: param = check_type(param, types) else: if not isinstance(types, list): types = [types] for t in reversed(types): if t == "list" and not isinstance(param, list): if param is None or param == '': param = [] else: param = [param] elif t == "list" and isinstance(param, list): continue elif isinstance(param, list) and "list" not in types: param = " ".join(param) param = check_type(param, t) elif isinstance(param, list): param = [check_type(x, t) for x in param] else: param = check_type(param, t) params[i[0]] = param return params
def get_basic_processor_meta(self): processor_meta = DotDict() processor_meta.processor_notes = [] return processor_meta
def test_extract_OS_info_fail(self): info = ['OS', ] d = DotDict() bpj._extract_OS_info(info, d) ok_('system_info' in d) eq_(d.system_info, {})
def test_no_destination(self): class FakeStorageSource(object): def __init__(self, config, quit_check_callback): self.store = DotDict({ '1234': DotDict({ 'ooid': '1234', 'Product': 'FireSquid', 'Version': '1.0' }), '1235': DotDict({ 'ooid': '1235', 'Product': 'ThunderRat', 'Version': '1.0' }), '1236': DotDict({ 'ooid': '1236', 'Product': 'Caminimal', 'Version': '1.0' }), '1237': DotDict({ 'ooid': '1237', 'Product': 'Fennicky', 'Version': '1.0' }), }) def get_raw_crash(self, ooid): return self.store[ooid] def get_raw_dumps(self, ooid): return { 'upload_file_minidump': 'this is a fake dump', 'flash1': 'broken flash dump' } def new_ooids(self): for k in self.store.keys(): yield k logger = SilentFakeLogger() config = DotDict({ 'logger': logger, 'number_of_threads': 2, 'maximum_queue_size': 2, 'source': DotDict({'crashstorage_class': FakeStorageSource}), 'destination': DotDict({'crashstorage_class': None}), 'producer_consumer': DotDict({ 'producer_consumer_class': ThreadedTaskManager, 'logger': logger, 'number_of_threads': 1, 'maximum_queue_size': 1 }) }) fts_app = CrashMoverApp(config) assert_raises(TypeError, fts_app.main)
def test_bogus_source_and_destination(self): class NonInfiniteFTSAppClass(CrashMoverApp): def source_iterator(self): for x in self.source.new_crashes(): yield ((x, ), {}) class FakeStorageSource(object): def __init__(self, config, quit_check_callback): self.store = DotDict({ '1234': DotDict({ 'ooid': '1234', 'Product': 'FireSquid', 'Version': '1.0' }), '1235': DotDict({ 'ooid': '1235', 'Product': 'ThunderRat', 'Version': '1.0' }), '1236': DotDict({ 'ooid': '1236', 'Product': 'Caminimal', 'Version': '1.0' }), '1237': DotDict({ 'ooid': '1237', 'Product': 'Fennicky', 'Version': '1.0' }), }) def get_raw_crash(self, ooid): return self.store[ooid] def get_raw_dumps(self, ooid): return { 'upload_file_minidump': 'this is a fake dump', 'flash1': 'broken flash dump' } def new_crashes(self): for k in self.store.keys(): yield k class FakeStorageDestination(object): def __init__(self, config, quit_check_callback): self.store = DotDict() self.dumps = DotDict() def save_raw_crash(self, raw_crash, dumps, crash_id): self.store[crash_id] = raw_crash self.dumps[crash_id] = dumps logger = SilentFakeLogger() config = DotDict({ 'logger': logger, 'number_of_threads': 2, 'maximum_queue_size': 2, 'source': DotDict({'crashstorage_class': FakeStorageSource}), 'destination': DotDict({'crashstorage_class': FakeStorageDestination}), 'producer_consumer': DotDict({ 'producer_consumer_class': ThreadedTaskManager, 'logger': logger, 'number_of_threads': 1, 'maximum_queue_size': 1 }) }) fts_app = NonInfiniteFTSAppClass(config) fts_app.main() source = fts_app.source destination = fts_app.destination eq_(source.store, destination.store) eq_(len(destination.dumps), 4) eq_(destination.dumps['1237'], source.get_raw_dumps('1237'))
def setUp(self): super(TestIntegrationFTPScraper, self).setUp() cursor = self.conn.cursor() # Insert data now = utc_now() build_date = now - datetime.timedelta(days=30) sunset_date = now + datetime.timedelta(days=30) cursor.execute(""" TRUNCATE products CASCADE; INSERT INTO products (product_name, sort, release_name) VALUES ( 'Firefox', 1, 'firefox' ), ( 'Fennec', 1, 'mobile' ); """) cursor.execute(""" TRUNCATE product_versions CASCADE; INSERT INTO product_versions (product_version_id, product_name, major_version, release_version, version_string, version_sort, build_date, sunset_date, featured_version, build_type) VALUES ( 1, 'Firefox', '15.0', '15.0', '15.0a1', '000000150a1', '%(build_date)s', '%(sunset_date)s', 't', 'nightly' ) ,( 2, 'Firefox', '24.5', '24.5.0esr', '24.5.0esr', '024005000x000', '%(build_date)s', '%(sunset_date)s', 't', 'esr' ) ; """ % { "build_date": build_date, "sunset_date": sunset_date }) cursor.execute(""" TRUNCATE release_channels CASCADE; INSERT INTO release_channels (release_channel, sort) VALUES ('nightly', 1), ('aurora', 2), ('beta', 3), ('release', 4); """) cursor.execute(""" TRUNCATE product_release_channels CASCADE; INSERT INTO product_release_channels (product_name, release_channel, throttle) VALUES ('Firefox', 'nightly', 1), ('Firefox', 'aurora', 1), ('Firefox', 'beta', 1), ('Firefox', 'release', 1), ('Fennec', 'release', 1), ('Fennec', 'beta', 1); """) self.conn.commit() self.mocked_session = requests.Session() def download(url): return self.mocked_session.get(url).content def skip_json_file(url): return False self.scrapers = ftpscraper.ScrapersMixin() self.scrapers.download = download self.scrapers.skip_json_file = skip_json_file self.scrapers.config = DotDict({'logger': mock.Mock()})
import logging from socorro.lib.util import DotDict from socorro.signature.rules import CSignatureTool csig_config = DotDict() csig_config.irrelevant_signature_re = '' csig_config.prefix_signature_re = '' csig_config.signatures_with_line_numbers_re = '' csig_config.signature_sentinels = [] csig_config.collapse_arguments = True c_signature_tool = CSignatureTool(csig_config) def create_basic_fake_processor(): """Creates fake processor configuration""" fake_processor = DotDict() fake_processor.c_signature_tool = c_signature_tool fake_processor.config = DotDict() fake_processor.config.logger = logging.getLogger(__name__) fake_processor.processor_notes = [] return fake_processor
def process_crash(self, raw_crash, raw_dumps, processed_crash): """Take a raw_crash and its associated raw_dumps and return a processed_crash. """ # processor_meta_data will be used to ferry "inside information" to # transformation rules. Sometimes rules need a bit more extra # information about the transformation process itself. processor_meta_data = DotDict() processor_meta_data.processor_notes = [ self.config.processor_name, self.__class__.__name__ ] processor_meta_data.quit_check = self.quit_check processor_meta_data.processor = self processor_meta_data.config = self.config if "processor_notes" in processed_crash: original_processor_notes = [ x.strip() for x in processed_crash.processor_notes.split(";") ] processor_meta_data.processor_notes.append( "earlier processing: %s" % processed_crash.get("started_datetime", 'Unknown Date')) else: original_processor_notes = [] processed_crash.success = False processed_crash.started_datetime = utc_now() # for backwards compatibility: processed_crash.startedDateTime = processed_crash.started_datetime processed_crash.signature = 'EMPTY: crash failed to process' crash_id = raw_crash['uuid'] try: # quit_check calls ought to be scattered around the code to allow # the processor to be responsive to requests to shut down. self.quit_check() start_time = self.config.logger.info( "starting transform for crash: %s", crash_id) processor_meta_data.started_timestamp = start_time # apply_all_rules for rule in self.rules: rule.act(raw_crash, raw_dumps, processed_crash, processor_meta_data) self.quit_check() # the crash made it through the processor rules with no exceptions # raised, call it a success. processed_crash.success = True except Exception as exception: self.config.logger.warning('Error while processing %s: %s', crash_id, str(exception), exc_info=True) processor_meta_data.processor_notes.append( 'unrecoverable processor error: %s' % exception) # the processor notes are in the form of a list. Join them all # together to make a single string processor_meta_data.processor_notes.extend(original_processor_notes) processed_crash.processor_notes = '; '.join( processor_meta_data.processor_notes) completed_datetime = utc_now() processed_crash.completed_datetime = completed_datetime # for backwards compatibility: processed_crash.completeddatetime = completed_datetime self.config.logger.info( "finishing %s transform for crash: %s", 'successful' if processed_crash.success else 'failed', crash_id) return processed_crash
canonical_standard_raw_crash = DotDict({ "uuid": '00000000-0000-0000-0000-000002140504', "InstallTime": "1335439892", "AdapterVendorID": "0x1002", "TotalVirtualMemory": "4294836224", "Comments": "why did my browser crash? #fail", "Theme": "classic/1.0", "Version": "12.0", "Email": "*****@*****.**", "Vendor": "Mozilla", "EMCheckCompatibility": "true", "Throttleable": "1", "id": "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}", "buildid": "20120420145725", "AvailablePageFile": "10641510400", "version": "12.0", "AdapterDeviceID": "0x7280", "ReleaseChannel": "release", "submitted_timestamp": "2012-05-08T23:26:33.454482+00:00", "URL": "http://www.mozilla.com", "timestamp": 1336519593.454627, "Notes": "AdapterVendorID: 0x1002, AdapterDeviceID: 0x7280, " "AdapterSubsysID: 01821043, " "AdapterDriverVersion: 8.593.100.0\nD3D10 Layers? D3D10 " "Layers- D3D9 Layers? D3D9 Layers- ", "CrashTime": "1336519554", "Winsock_LSP": "MSAFD Tcpip [TCP/IPv6] : 2 : 1 : \n " "MSAFD Tcpip [UDP/IPv6] : 2 : 2 : " "%SystemRoot%\\system32\\mswsock.dll \n " "MSAFD Tcpip [RAW/IPv6] : 2 : 3 : \n " "MSAFD Tcpip [TCP/IP] : 2 : 1 : " "%SystemRoot%\\system32\\mswsock.dll \n " "MSAFD Tcpip [UDP/IP] : 2 : 2 : \n " "MSAFD Tcpip [RAW/IP] : 2 : 3 : " "%SystemRoot%\\system32\\mswsock.dll \n " "\u041f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a " "\u0443\u0441\u043b\u0443\u0433 RSVP TCPv6 : 2 : 1 : \n " "\u041f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a " "\u0443\u0441\u043b\u0443\u0433 RSVP TCP : 2 : 1 : " "%SystemRoot%\\system32\\mswsock.dll \n " "\u041f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a " "\u0443\u0441\u043b\u0443\u0433 RSVP UDPv6 : 2 : 2 : \n " "\u041f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a " "\u0443\u0441\u043b\u0443\u0433 RSVP UDP : 2 : 2 : " "%SystemRoot%\\system32\\mswsock.dll", "FramePoisonBase": "00000000f0de0000", "AvailablePhysicalMemory": "2227773440", "FramePoisonSize": "65536", "StartupTime": "1336499438", "Add-ons": "[email protected]:0.3," "dmpluginff%40westbyte.com:1%2C4.8," "[email protected]:1.9.1," "[email protected]:2.4," "[email protected]:1.0," "[email protected]:2.1," "{a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7}:20111107," "{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}:2.0.3," "[email protected]:2.4.6.4," "{972ce4c6-7e08-4474-a285-3208198ce6fd}:12.0," "[email protected]:1.2.1", "BuildID": "20120420145725", "SecondsSinceLastCrash": "86985", "ProductName": "Firefox", "legacy_processing": 0, "AvailableVirtualMemory": "3812708352", "SystemMemoryUsePercentage": "48", "ProductID": "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}", "Distributor": "Mozilla", "Distributor_version": "12.0", })
def test_save_raw_crash_normal_throttle(self, randint_mock): random_ints = [100, 49, 50, 51, 1, 100] def side_effect(*args, **kwargs): return random_ints.pop(0) randint_mock.side_effect = side_effect config = self._setup_config() config.throttle = 50 crash_store = RabbitMQCrashStorage(config) # test for "legacy_processing" missing from crash #0: 100 crash_store.save_raw_crash( raw_crash=DotDict(), dumps=DotDict(), crash_id='crash_id' ) assert not crash_store.transaction.called config.logger.reset_mock() # test for normal save #1: 49 raw_crash = DotDict() raw_crash.legacy_processing = 0 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id' ) crash_store.transaction.assert_called_with( crash_store._save_raw_crash_transaction, 'crash_id' ) crash_store.transaction.reset_mock() # test for normal save #2: 50 raw_crash = DotDict() raw_crash.legacy_processing = 0 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id' ) crash_store.transaction.assert_called_with( crash_store._save_raw_crash_transaction, 'crash_id' ) crash_store.transaction.reset_mock() # test for normal save #3: 51 raw_crash = DotDict() raw_crash.legacy_processing = 0 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id' ) assert not crash_store.transaction.called crash_store.transaction.reset_mock() # test for save rejection because of "legacy_processing" #4: 1 raw_crash = DotDict() raw_crash.legacy_processing = 5 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id' ) assert not crash_store.transaction.called # test for save rejection because of "legacy_processing" #5: 100 raw_crash = DotDict() raw_crash.legacy_processing = 5 crash_store.save_raw_crash( raw_crash=raw_crash, dumps=DotDict, crash_id='crash_id' ) assert not crash_store.transaction.called
def _setup_config(self): config = DotDict() config.transaction_executor_class = Mock() config.backoff_delays = (0, 0, 0) config.logger = Mock() config.rabbitmq_class = MagicMock() config.routing_key = 'socorro.normal' config.filter_on_legacy_processing = True config.redactor_class = Redactor config.forbidden_keys = Redactor.required_config.forbidden_keys.default config.throttle = 100 return config
def test_extract_OS_info_fail(self): info = ['OS', ] d = DotDict() bpj._extract_OS_info(info, d) assert 'system_info' in d assert d.system_info == {}
def test_extract_OS_info_fail(self): info = ['OS',] d = DotDict() bpj._extract_OS_info(info, d) self.assertTrue('system_info' in d) self.assertEqual(d.system_info, {})
def _analyze_frames(self, hang_type, java_stack_trace, make_modules_lower_case, dump_analysis_line_iterator, submitted_timestamp, crashed_thread, processor_notes): """ After the header information, the dump file consists of just frame information. This function cycles through the frame information looking for frames associated with the crashed thread (determined in analyzeHeader). Each frame from that thread is written to the database until it has found a maximum of ten frames. returns: a dictionary will various values to be used to update report in the database, including: truncated - boolean: True - due to excessive length the frames of the crashing thread may have been truncated. signature - string: an overall signature calculated for this crash processor_notes - string: any errors or warnings that happened during the processing input parameters: hang_type - 0: if this is not a hang -1: if "HangID" present in json, but "Hang" was not present "Hang" value: if "Hang" present - probably 1 java_stack_trace - a source for java lang signature information make_modules_lower_case - boolean, should modules be forced to lower case for signature generation? dump_analysis_line_iterator - an iterator that cycles through lines from the crash dump submitted_timestamp crashed_thread - the number of the thread that crashed - we want frames only from the crashed thread processor_notes """ #logger.info("analyzeFrames") frame_counter = 0 is_truncated = False frame_lines_were_found = False signature_generation_frames = [] topmost_sourcefiles = [] if hang_type == 1: thread_for_signature = 0 else: thread_for_signature = crashed_thread max_topmost_sourcefiles = 1 # Bug 519703 calls for just one. # Lets build in some flex for line in dump_analysis_line_iterator: frame_lines_were_found = True #logger.debug(" %s", line) line = line.strip() if line == '': processor_notes.append("An unexpected blank line in " "this dump was ignored") continue # ignore unexpected blank lines (thread_num, frame_num, module_name, function, source, source_line, instruction) = [emptyFilter(x) for x in line.split("|")] if len(topmost_sourcefiles) < max_topmost_sourcefiles and source: topmost_sourcefiles.append(source) if thread_for_signature == int(thread_num): if frame_counter < 30: if make_modules_lower_case: try: module_name = module_name.lower() except AttributeError: pass this_frame_signature = \ self.c_signature_tool.normalize_signature( module_name, function, source, source_line, instruction ) signature_generation_frames.append(this_frame_signature) if (frame_counter == self.config.crashing_thread_frame_threshold): processor_notes.append( "This dump is too long and has triggered the automatic" " truncation routine") dump_analysis_line_iterator.useSecondaryCache() is_truncated = True frame_counter += 1 elif frame_counter: break dump_analysis_line_iterator.stopUsingSecondaryCache() signature = self._generate_signature(signature_generation_frames, java_stack_trace, hang_type, crashed_thread, processor_notes) if not frame_lines_were_found: message = "No frame data available" processor_notes.append(message) self.config.logger.info("%s", message) return DotDict({ "signature": signature, "truncated": is_truncated, "topmost_filenames": topmost_sourcefiles, })
def __init__(self, config, quit_check_callback): self.store = DotDict() self.dumps = DotDict()
from socorro.processor.skunk_classifiers import ( SkunkClassificationRule, DontConsiderTheseFilter, UpdateWindowAttributes, SetWindowPos, SendWaitReceivePort, Bug811804, Bug812318, ) from socorro.processor.signature_utilities import CSignatureTool from socorro.unittest.processor.test_breakpad_pipe_to_json import ( cannonical_json_dump ) from socorro.unittest.testbase import TestCase csig_config = DotDict() csig_config.irrelevant_signature_re = '' csig_config.prefix_signature_re = '' csig_config.signatures_with_line_numbers_re = '' csig_config.signature_sentinels = [] c_signature_tool = CSignatureTool(csig_config) def create_basic_fake_processor(): fake_processor = DotDict() fake_processor.c_signature_tool = c_signature_tool fake_processor.config = DotDict() # need help figuring out failures? switch to FakeLogger and read stdout fake_processor.config.logger = SilentFakeLogger() fake_processor.processor_notes = [] #fake_processor.config.logger = FakeLogger() return fake_processor
def test_source_iterator(self): class FakeStorageSource(object): def __init__(self): self.first = True def new_crashes(self): if self.first: self.first = False else: for k in range(999): # ensure that both forms work if k % 4: yield k else: yield ((k, ), { 'some_crap': True, }) for k in range(2): yield None class FakeStorageDestination(object): def __init__(self, config, quit_check_callback): self.store = DotDict() self.dumps = DotDict() def save_raw_crash(self, raw_crash, dump, crash_id): self.store[crash_id] = raw_crash self.dumps[crash_id] = dump logger = SilentFakeLogger() config = DotDict({ 'logger': logger, 'number_of_threads': 2, 'maximum_queue_size': 2, 'source': DotDict({'crashstorage_class': FakeStorageSource}), 'destination': DotDict({'crashstorage_class': FakeStorageDestination}), 'producer_consumer': DotDict({ 'producer_consumer_class': ThreadedTaskManager, 'logger': logger, 'number_of_threads': 1, 'maximum_queue_size': 1 }) }) fts_app = CrashMoverApp(config) fts_app.source = FakeStorageSource() fts_app.destination = FakeStorageDestination error_detected = False for x, y in zip(xrange(1002), (a for a in fts_app.source_iterator())): if x == 0: ok_(y is None) elif x < 1000: if x - 1 != y[0][0] and not error_detected: error_detected = True eq_(x, y, 'iterator fails on iteration %d' % x) else: if y is not None and not error_detected: error_detected = True ok_(x is None, 'iterator fails on iteration %d' % x)
class BreakpadStackwalkerRule(Rule): required_config = Namespace() required_config.add_option( 'dump_field', doc='the default name of a dump', default='upload_file_minidump', ) required_config.add_option( 'stackwalk_command_line', doc='the template for the command to invoke stackwalker', default=( 'timeout -s KILL 30 $minidump_stackwalk_pathname ' '--raw-json $rawfilePathname $dumpfilePathname ' '$processor_symbols_pathname_list 2>/dev/null' ), ) required_config.add_option( 'minidump_stackwalk_pathname', doc='the full pathname to the external program stackwalker ' '(quote path with embedded spaces)', default='/data/socorro/stackwalk/bin/stackwalker', ) required_config.add_option( 'symbol_cache_path', doc='the path where the symbol cache is found (quote path with ' 'embedded spaces)', default='/mnt/socorro/symbols', ) required_config.add_option( 'processor_symbols_pathname_list', doc='comma or space separated list of symbol files for ' 'minidump_stackwalk (quote paths with embedded spaces)', default='/home/socorro/symbols', from_string_converter=_create_symbol_path_str ) required_config.add_option( 'temporary_file_system_storage_path', doc='a path where temporary files may be written', default=tempfile.gettempdir(), ) def __init__(self, config): super(BreakpadStackwalkerRule, self).__init__(config) # the code in this section originally hales from 2008 ExternalProcessor # class. It defines the template subsitution syntax used to spcecify # the shell command used to invoke the minidump stackwalker program. # The syntax was was requested to be of a Perl/shell style rather than # the original Pythonic syntax. This code takes that foreign syntax # and converts it to a Pythonic syntax for later use. strip_parens_re = re.compile(r'\$(\()(\w+)(\))') convert_to_python_substitution_format_re = re.compile(r'\$(\w+)') # Canonical form of $(param) is $param. Convert any that are needed tmp = strip_parens_re.sub( r'$\2', config.stackwalk_command_line ) # Convert canonical $dumpfilePathname and $rawfilePathname tmp = tmp.replace('$dumpfilePathname', 'DUMPFILEPATHNAME') tmp = tmp.replace('$rawfilePathname', 'RAWFILEPATHNAME') # finally, convert any remaining $param to pythonic %(param)s tmp = convert_to_python_substitution_format_re.sub(r'%(\1)s', tmp) self.mdsw_command_line = tmp % config def version(self): return '1.0' @contextmanager def _temp_raw_crash_json_file(self, raw_crash, crash_id): file_pathname = os.path.join( self.config.temporary_file_system_storage_path, "%s.%s.TEMPORARY.json" % ( crash_id, threading.currentThread().getName() ) ) with open(file_pathname, "w") as f: ujson.dump(dict(raw_crash), f) try: yield file_pathname finally: os.unlink(file_pathname) @contextmanager def _temp_file_context(self, raw_dump_path): """this contextmanager implements conditionally deleting a pathname at the end of a context if the pathname indicates that it is a temp file by having the word 'TEMPORARY' embedded in it.""" try: yield raw_dump_path finally: if 'TEMPORARY' in raw_dump_path: try: os.unlink(raw_dump_path) except OSError: self.config.logger.warning( 'unable to delete %s. manual deletion is required.', raw_dump_path, ) def _invoke_minidump_stackwalk( self, dump_name, dump_pathname, raw_crash_pathname, processor_notes ): """ This function invokes breakpad_stackdump as an external process capturing and returning the text output of stdout. This version represses the stderr output. input parameters: dump_pathname: the complete pathname of the dumpfile to be analyzed """ command_line = self.mdsw_command_line.replace( "DUMPFILEPATHNAME", dump_pathname ).replace( "RAWFILEPATHNAME", raw_crash_pathname ) if self.config.chatty: self.config.logger.debug( "BreakpadStackwalkerRule: %s", command_line ) subprocess_handle = subprocess.Popen( command_line, shell=True, stdout=subprocess.PIPE ) with closing(subprocess_handle.stdout): try: stackwalker_output = ujson.load(subprocess_handle.stdout) except Exception, x: processor_notes.append( "MDSW output failed in json: %s" % x ) stackwalker_output = {} return_code = subprocess_handle.wait() if not isinstance(stackwalker_output, Mapping): processor_notes.append( "MDSW produced unexpected output: %s..." % str(stackwalker_output)[:10] ) stackwalker_output = {} stackwalker_data = DotDict() stackwalker_data.json_dump = stackwalker_output stackwalker_data.mdsw_return_code = return_code stackwalker_data.mdsw_status_string = stackwalker_output.get( 'status', 'unknown error' ) stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK' if return_code == 124: processor_notes.append( "MDSW terminated with SIGKILL due to timeout" ) elif return_code != 0 or not stackwalker_data.success: processor_notes.append( "MDSW failed on '%s': %s" % ( dump_name, stackwalker_data.mdsw_status_string ) ) return stackwalker_data
def _create_basic_processed_crash(self, uuid, raw_crash, submitted_timestamp, started_timestamp, processor_notes): """ This function is run only by a worker thread. Create the record for the current job in the 'reports' table input parameters: uuid: the unique id identifying the job - corresponds with the uuid column in the 'jobs' and the 'reports' tables jsonDocument: an object with a dictionary interface for fetching the components of the json document submitted_timestamp: when job came in (a key used in partitioning) processor_notes: list of strings of error messages """ #logger.debug("starting insertReportIntoDatabase") processed_crash = DotDict() processed_crash.success = False processed_crash.uuid = uuid processed_crash.startedDateTime = started_timestamp processed_crash.product = self._get_truncate_or_warn( raw_crash, 'ProductName', processor_notes, None, 30) processed_crash.version = self._get_truncate_or_warn( raw_crash, 'Version', processor_notes, None, 16) processed_crash.build = self._get_truncate_or_warn( raw_crash, 'BuildID', processor_notes, None, 16) processed_crash.url = self._get_truncate_or_none(raw_crash, 'URL', 255) processed_crash.user_comments = self._get_truncate_or_none( raw_crash, 'Comments', 500) processed_crash.app_notes = self._get_truncate_or_none( raw_crash, 'Notes', 1000) processed_crash.distributor = self._get_truncate_or_none( raw_crash, 'Distributor', 20) processed_crash.distributor_version = self._get_truncate_or_none( raw_crash, 'Distributor_version', 20) processed_crash.email = self._get_truncate_or_none( raw_crash, 'Email', 100) processed_crash.process_type = self._get_truncate_or_none( raw_crash, 'ProcessType', 10) processed_crash.release_channel = raw_crash.get( 'ReleaseChannel', 'unknown') # userId is now deprecated and replace with empty string processed_crash.user_id = "" # ++++++++++++++++++++ # date transformations processed_crash.date_processed = submitted_timestamp # defaultCrashTime: must have crashed before date processed submitted_timestamp_as_epoch = int( time.mktime(submitted_timestamp.timetuple())) timestampTime = int( raw_crash.get( 'timestamp', submitted_timestamp_as_epoch)) # the old name for crash time crash_time = int( self._get_truncate_or_warn(raw_crash, 'CrashTime', processor_notes, timestampTime, 10)) processed_crash.crash_time = crash_time if crash_time == submitted_timestamp_as_epoch: processor_notes.append("WARNING: No 'client_crash_date' " "could be determined from the raw_crash") # StartupTime: must have started up some time before crash startupTime = int(raw_crash.get('StartupTime', crash_time)) # InstallTime: must have installed some time before startup installTime = int(raw_crash.get('InstallTime', startupTime)) processed_crash.client_crash_date = datetime.datetime.fromtimestamp( crash_time, UTC) processed_crash.install_age = crash_time - installTime processed_crash.uptime = max(0, crash_time - startupTime) try: last_crash = int(raw_crash.SecondsSinceLastCrash) except: last_crash = None processed_crash.last_crash = last_crash # TODO: not sure how to reimplemnt this #if crash_id in self.priority_job_set: #processor_notes.append('Priority Job') #self.priority_job_set.remove(crash_id) # can't get report id because we don't have the database here #reportId = processed_crash["id"] processed_crash.dump = '' try: processed_crash.ReleaseChannel = \ raw_crash.ReleaseChannel except KeyError: processed_crash.ReleaseChannel = 'unknown' if self.config.collect_addon: #logger.debug("collecting Addons") # formerly 'insertAdddonsIntoDatabase' addons_as_a_list_of_tuples = self._process_list_of_addons( raw_crash, processor_notes) processed_crash.addons = addons_as_a_list_of_tuples if self.config.collect_crash_process: #logger.debug("collecting Crash Process") # formerly insertCrashProcess processed_crash.update( self._add_process_type_to_processed_crash(raw_crash)) processed_crash.addons_checked = None try: addons_checked_txt = raw_crash.EMCheckCompatibility.lower() processed_crash.addons_checked = False if addons_checked_txt == 'true': processed_crash.addons_checked = True except KeyError: pass # leaving it as None if not in the document processed_crash.hangid = raw_crash.get('HangID', None) if 'Hang' in raw_crash: processed_crash.hang_type = raw_crash.Hang elif processed_crash.hangid: processed_crash.hang_type = -1 else: processed_crash.hang_type = 0 processed_crash.java_stack_trace = \ raw_crash.setdefault('JavaStackTrace', None) return processed_crash
def _setup_config(self): config = DotDict() config.crashstorage_class = FakeCrashStore return config
def convert_raw_crash_to_processed_crash(self, raw_crash, raw_dump): """ This function is run only by a worker thread. Given a job, fetch a thread local database connection and the json document. Use these to create the record in the 'reports' table, then start the analysis of the dump file. input parameters: """ try: self.quit_check() crash_id = raw_crash.uuid processor_notes = [] processed_crash = DotDict() processed_crash.uuid = raw_crash.uuid processed_crash.success = False started_timestamp = self._log_job_start(crash_id) #self.config.logger.debug('about to apply rules') self.raw_crash_transform_rule_system.apply_all_rules( raw_crash, self) #self.config.logger.debug('done applying transform rules') try: submitted_timestamp = datetimeFromISOdateString( raw_crash.submitted_timestamp) except KeyError: submitted_timestamp = dateFromOoid(crash_id) # formerly the call to 'insertReportIntoDatabase' processed_crash_update = self._create_basic_processed_crash( crash_id, raw_crash, submitted_timestamp, started_timestamp, processor_notes) processed_crash.update(processed_crash_update) temp_dump_pathname = self._get_temp_dump_pathname( crash_id, raw_dump) try: #logger.debug('about to doBreakpadStackDumpAnalysis') processed_crash_update_dict = \ self._do_breakpad_stack_dump_analysis( crash_id, temp_dump_pathname, processed_crash.hang_type, processed_crash.java_stack_trace, submitted_timestamp, processor_notes ) processed_crash.update(processed_crash_update_dict) finally: self._cleanup_temp_file(temp_dump_pathname) processed_crash.topmost_filenames = "|".join( processed_crash.get('topmost_filenames', [])) try: processed_crash.Winsock_LSP = raw_crash.Winsock_LSP except KeyError: pass # if it's not in the original raw_crash, # it does get into the jsonz #except (KeyboardInterrupt, SystemExit): #self.config.logger.info("quit request detected") #raise except Exception, x: self.config.logger.warning('Error while processing %s: %s', crash_id, str(x), exc_info=True) processor_notes.append(str(x))
def _analyze_header(self, crash_id, dump_analysis_line_iterator, submitted_timestamp, processor_notes): """ Scan through the lines of the dump header: - extract data to update the record for this crash in 'reports', including the id of the crashing thread Returns: Dictionary of the various values that were updated in the database Input parameters: - dump_analysis_line_iterator - an iterator object that feeds lines from crash dump data - submitted_timestamp - processor_notes """ crashed_thread = None processed_crash_update = DotDict() # minimal update requirements processed_crash_update.success = True processed_crash_update.os_name = None processed_crash_update.os_version = None processed_crash_update.cpu_name = None processed_crash_update.cpu_info = None processed_crash_update.reason = None processed_crash_update.address = None header_lines_were_found = False flash_version = None for line in dump_analysis_line_iterator: line = line.strip() # empty line separates header data from thread data if line == '': break header_lines_were_found = True values = map(lambda x: x.strip(), line.split('|')) if len(values) < 3: processor_notes.append('Cannot parse header line "%s"' % line) continue values = map(emptyFilter, values) if values[0] == 'OS': name = self._truncate_or_none(values[1], 100) version = self._truncate_or_none(values[2], 100) processed_crash_update.os_name = name processed_crash_update.os_version = version elif values[0] == 'CPU': processed_crash_update.cpu_name = \ self._truncate_or_none(values[1], 100) processed_crash_update.cpu_info = \ self._truncate_or_none(values[2], 100) try: processed_crash_update.cpu_info = ( '%s | %s' % (processed_crash_update.cpu_info, self._get_truncate_or_none(values[3], 100))) except IndexError: pass elif values[0] == 'Crash': processed_crash_update.reason = \ self._truncate_or_none(values[1], 255) try: processed_crash_update.address = \ self._truncate_or_none(values[2], 20) except IndexError: processed_crash_update.address = None try: crashed_thread = int(values[3]) except Exception: crashed_thread = None elif values[0] == 'Module': # grab only the flash version, which is not quite as easy as # it looks if not flash_version: flash_version = self._get_flash_version(values) if not header_lines_were_found: message = "%s returned no header lines for crash_id: %s" % \ (self.config.minidump_stackwalk_pathname, crash_id) processor_notes.append(message) #self.config.logger.warning("%s", message) if crashed_thread is None: message = "No thread was identified as the cause of the crash" processor_notes.append(message) self.config.logger.info("%s", message) processed_crash_update.crashedThread = crashed_thread if not flash_version: flash_version = '[blank]' processed_crash_update.flash_version = flash_version #self.config.logger.debug( # " updated values %s", # processed_crash_update #) return processed_crash_update