def test_fatal_operational_exception(self, espy_mock): """Test an index attempt that experiences an operational exception that it can't recover from. """ # It's mocks all the way down. sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock # ESCrashStorage uses the "limited backoff" transaction executor. # In real life this will retry operational exceptions over time, but # in unit tests, we just want it to hurry up and fail. backoff_config = self.config backoff_config['backoff_delays'] = [0, 0, 0] backoff_config['wait_log_interval'] = 0 es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] # It's bad but at least we expected it. failure_exception = elasticsearch.exceptions.ConnectionError(500, '') sub_mock.index.side_effect = failure_exception # Submit a crash and ensure that it failed. with pytest.raises(elasticsearch.exceptions.ConnectionError): es_storage.save_raw_and_processed(a_raw_crash, None, a_processed_crash, crash_id)
def test_success_operational_exception(self, espy_mock): """Test an index attempt that experiences an operational exception that it managed to recover from. """ # It's mocks all the way down. sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock # ESCrashStorage uses the "limited backoff" transaction executor. # In real life this will retry operational exceptions over time, but # in unit tests, we just want it to hurry up and fail. backoff_config = self.config backoff_config['backoff_delays'] = [0, 0, 0] backoff_config['wait_log_interval'] = 0 es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] # The transaction executor will try three times, so we will fail # twice for the purposes of this test. bad_results = [ elasticsearch.exceptions.ConnectionError(500, ''), elasticsearch.exceptions.ConnectionError(500, '') ] # Replace the underlying index method with this function. def esindex_fn(*args, **kwargs): try: result = bad_results.pop(0) raise result except IndexError: return sub_mock.index sub_mock.index.side_effect = esindex_fn # Submit a crash like normal, except that the index method will # raise twice, then pass as normal. es_storage.save_raw_and_processed( raw_crash=a_raw_crash, dumps=None, processed_crash=a_processed_crash, crash_id=crash_id, ) # The actual call to index the document (crash). document = { 'crash_id': crash_id, 'processed_crash': a_processed_crash, 'raw_crash': a_raw_crash } additional = { 'doc_type': 'crash_reports', 'id': crash_id, 'index': 'socorro_integration_test_reports' } sub_mock.index.assert_called_with(body=document, **additional)
def test_index_crash_with_bad_keys(self): a_raw_crash_with_bad_keys = { 'foo': 'alpha', '': 'bad key 1', '.': 'bad key 2', u'na\xefve': 'bad key 3', } es_storage = ESCrashStorage(config=self.config) es_storage.save_raw_and_processed( raw_crash=deepcopy(a_raw_crash_with_bad_keys), dumps=None, processed_crash=deepcopy(a_processed_crash), crash_id=a_processed_crash['uuid'] ) # Ensure that the document was indexed by attempting to retreive it. doc = self.es_client.get( index=self.config.elasticsearch.elasticsearch_index, id=a_processed_crash['uuid'] ) # Make sure the invalid keys aren't in the crash. raw_crash = doc['_source']['raw_crash'] assert raw_crash == {'foo': 'alpha'} es_storage.close()
def test_get_index_for_crash_dynamic_name(self): """Test a dynamic (date-based) index name """ # The crashstorage class looks for '%' in the index name; if that # symbol is present, it will attempt to generate a new date-based # index name. Since the test base config doesn't use this pattern, # we need to specify it now. modified_config = self.get_tuned_config( ESCrashStorage, { "resource.elasticsearch.elasticsearch_index": "socorro_integration_test_reports%Y%m%d" }, ) es_storage = ESCrashStorage(config=modified_config) # The date is used to generate the name of the index; it must be a # datetime object. date = string_to_datetime(a_processed_crash["client_crash_date"]) index = es_storage.get_index_for_crash(date) # The base index name is obtained from the test base class and the # date is appended to it according to pattern specified above. assert type(index) is str assert index == "socorro_integration_test_reports20120408"
def test_get_index_for_crash_dynamic_name(self): """Test a dynamic (date-based) index name. """ # The crashstorage class looks for '%' in the index name; if that # symbol is present, it will attempt to generate a new date-based # index name. Since the test base config doesn't use this pattern, # we need to specify it now. modified_config = self.get_tuned_config( ESCrashStorage, {'resource.elasticsearch.elasticsearch_index': 'socorro_integration_test_reports%Y%m%d'} ) es_storage = ESCrashStorage(config=modified_config) # The date is used to generate the name of the index; it must be a # datetime object. date = string_to_datetime( a_processed_crash['client_crash_date'] ) index = es_storage.get_index_for_crash(date) # The base index name is obtained from the test base class and the # date is appended to it according to pattern specified above. ok_(type(index) is str) eq_(index, 'socorro_integration_test_reports20120408')
def test_index_crash_indexable_keys(self): # Check super_search_fields.py for valid keys to update this raw_crash = { "InvalidKey": "alpha", "BuildID": "20200506000000", } processed_crash = { "AnotherInvalidKey": "alpha", "date_processed": "2012-04-08 10:56:41.558922", "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", } es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) # Ensure that the document was indexed by attempting to retreive it. doc = self.es_client.get( index=self.config.elasticsearch.elasticsearch_index, id=processed_crash["uuid"], ) # Verify keys that aren't in super_search_fields aren't in the raw or processed # crash parts raw_crash = doc["_source"]["raw_crash"] assert raw_crash == {"BuildID": "20200506000000"} processed_crash = doc["_source"]["processed_crash"] assert processed_crash == { "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", "date_processed": "2012-04-08T10:56:41.558922+00:00", }
def test_index_data_capture(self): """Verify we capture index data in ES crashstorage""" with MetricsMock() as mm: es_storage = ESCrashStorage(config=self.config, namespace='processor.es') mock_connection = mock.Mock() # Do a successful indexing es_storage._index_crash( connection=mock_connection, es_index=None, es_doctype=None, crash_document=None, crash_id=None ) # Do a failed indexing mock_connection.index.side_effect = Exception with pytest.raises(Exception): es_storage._index_crash( connection=mock_connection, es_index=None, es_doctype=None, crash_document=None, crash_id=None ) assert ( len(mm.filter_records(stat='processor.es.index', tags=['outcome:successful'])) == 1 ) assert ( len(mm.filter_records(stat='processor.es.index', tags=['outcome:failed'])) == 1 )
def test_index_crash_indexable_keys(self): # Check super_search_fields.py for valid keys to update this raw_crash = { "InvalidKey": "alpha", "BuildID": "20200506000000", } processed_crash = { "AnotherInvalidKey": "alpha", "date_processed": date_to_string(utc_now()), "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", } es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) # Ensure that the document was indexed by attempting to retreive it. doc = self.conn.get( index=self.es_context.get_index_for_date(utc_now()), id=processed_crash["uuid"], ) # Verify keys that aren't in super_search_fields aren't in the raw or processed # crash parts raw_crash = doc["_source"]["raw_crash"] assert list(sorted(raw_crash.keys())) == ["BuildID"] processed_crash = doc["_source"]["processed_crash"] assert list(sorted( processed_crash.keys())) == ["date_processed", "uuid"]
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] raw_crash = {} processed_crash = { 'date_processed': '2012-04-08 10:56:41.558922', 'bogus-field': 1234567890, 'foo': 'bar', } def mock_index(*args, **kwargs): if 'bogus-field' in kwargs['body']['processed_crash']: raise elasticsearch.exceptions.TransportError( 400, 'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:' '9300]][indices:data/write/index]]; nested: ' 'MapperParsingException[failed to parse ' '[processed_crash.bogus-field]]; nested: ' 'NumberFormatException[For input string: ' '"18446744073709480735"]; ' ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_raw_and_processed( raw_crash=deepcopy(raw_crash), dumps=None, processed_crash=deepcopy(processed_crash), crash_id=crash_id ) expected_doc = { 'crash_id': crash_id, 'removed_fields': 'processed_crash.bogus-field', 'processed_crash': { 'date_processed': string_to_datetime( '2012-04-08 10:56:41.558922' ), 'foo': 'bar', }, 'raw_crash': {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id )
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = SAMPLE_PROCESSED_CRASH["uuid"] raw_crash = {} processed_crash = { "date_processed": date_to_string(utc_now()), # NOTE(willkg): This needs to be a key that's in super_search_fields, but is # rejected by our mock_index call--this is wildly contrived. "version": 1234567890, "uuid": crash_id, } def mock_index(*args, **kwargs): if "version" in kwargs["body"]["processed_crash"]: raise elasticsearch.exceptions.TransportError( 400, ("RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:" "9300]][indices:data/write/index]]; nested: " "MapperParsingException[failed to parse " "[processed_crash.version]]; nested: " "NumberFormatException[For input string: " '"18446744073709480735"]; '), ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_processed_crash( raw_crash=deepcopy(raw_crash), processed_crash=deepcopy(processed_crash), ) expected_doc = { "crash_id": crash_id, "removed_fields": "processed_crash.version", "processed_crash": { "date_processed": string_to_datetime(processed_crash["date_processed"]), "uuid": crash_id, }, "raw_crash": {}, } es_class_mock().index.assert_called_with( index=self.es_context.get_index_for_date(utc_now()), doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id, )
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash["uuid"] raw_crash = {} processed_crash = { "date_processed": "2012-04-08 10:56:41.558922", "bogus-field": 1234567890, "foo": "bar", } def mock_index(*args, **kwargs): if "bogus-field" in kwargs["body"]["processed_crash"]: raise elasticsearch.exceptions.TransportError( 400, ("RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:" "9300]][indices:data/write/index]]; nested: " "MapperParsingException[failed to parse " "[processed_crash.bogus-field]]; nested: " "NumberFormatException[For input string: " '"18446744073709480735"]; '), ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_raw_and_processed( raw_crash=deepcopy(raw_crash), dumps=None, processed_crash=deepcopy(processed_crash), crash_id=crash_id, ) expected_doc = { "crash_id": crash_id, "removed_fields": "processed_crash.bogus-field", "processed_crash": { "date_processed": string_to_datetime("2012-04-08 10:56:41.558922"), "foo": "bar", }, "raw_crash": {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id, )
def test_get_index_for_crash_static_name(self): """Test a static index name """ es_storage = ESCrashStorage(config=self.config) # The actual date isn't important since the index name won't use it. index = es_storage.get_index_for_crash("some_date") # The index name is obtained from the test base class. assert type(index) is str assert index == "socorro_integration_test_reports"
def test_get_index_for_crash_static_name(self): """Test a static index name """ es_storage = ESCrashStorage(config=self.config) # The actual date isn't important since the index name won't use it. index = es_storage.get_index_for_crash('some_date') # The index name is obtained from the test base class. assert type(index) is str assert index == 'socorro_integration_test_reports'
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ # ESCrashStorage uses the "limited backoff" transaction executor. # In real life this will retry operational exceptions over time, but # in unit tests, we just want it to hurry up and fail. backoff_config = self.config backoff_config['backoff_delays'] = [0, 0, 0] backoff_config['wait_log_interval'] = 0 es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] raw_crash = {} processed_crash = { 'date_processed': '2012-04-08 10:56:41.558922', 'bogus-field': 1234567890, 'foo': 'bar', } def mock_index(*args, **kwargs): if 'bogus-field' in kwargs['body']['processed_crash']: raise elasticsearch.exceptions.TransportError( 400, 'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:' '9300]][indices:data/write/index]]; nested: ' 'MapperParsingException[failed to parse ' '[processed_crash.bogus-field]]; nested: ' 'NumberFormatException[For input string: ' '"18446744073709480735"]; ') return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_raw_and_processed(raw_crash, None, processed_crash, crash_id) expected_doc = { 'crash_id': crash_id, 'removed_fields': 'processed_crash.bogus-field', 'processed_crash': { 'date_processed': string_to_datetime('2012-04-08 10:56:41.558922'), 'foo': 'bar', }, 'raw_crash': {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id)
def test_indexing_unhandled_errors(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of unhandled errors. Expected behavior is to fail indexing and raise the error. """ es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] raw_crash = {} processed_crash = { 'date_processed': '2012-04-08 10:56:41.558922', } # Test with an error from which a field name cannot be extracted. def mock_index_unparsable_error(*args, **kwargs): raise elasticsearch.exceptions.TransportError( 400, 'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:' '9300]][indices:data/write/index]]; nested: ' 'MapperParsingException[BROKEN PART]; NumberFormatException' ) return True es_class_mock().index.side_effect = mock_index_unparsable_error with pytest.raises(elasticsearch.exceptions.TransportError): es_storage.save_raw_and_processed( raw_crash=deepcopy(raw_crash), dumps=None, processed_crash=deepcopy(processed_crash), crash_id=crash_id ) # Test with an error that we do not handle. def mock_index_unhandled_error(*args, **kwargs): raise elasticsearch.exceptions.TransportError( 400, 'Something went wrong' ) return True es_class_mock().index.side_effect = mock_index_unhandled_error with pytest.raises(elasticsearch.exceptions.TransportError): es_storage.save_raw_and_processed( raw_crash=deepcopy(raw_crash), dumps=None, processed_crash=deepcopy(processed_crash), crash_id=crash_id )
def test_success(self, espy_mock): """Test a successful index of a crash report""" raw_crash = { "BuildID": "20200605000", "ProductName": "Firefox", "ReleaseChannel": "nightly", } processed_crash = { "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", "json_dump": {}, "date_processed": "2012-04-08 10:56:41.558922", } sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock crash_id = processed_crash["uuid"] # Submit a crash like normal, except that the back-end ES object is # mocked (see the decorator above). es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) # Ensure that the ES objects were instantiated by ConnectionContext. assert espy_mock.Elasticsearch.called # Ensure that the IndicesClient was also instantiated (this happens in # IndexCreator but is part of the crashstorage workflow). assert espy_mock.client.IndicesClient.called expected_processed_crash = deepcopy(processed_crash) reconstitute_datetimes(expected_processed_crash) # The actual call to index the document (crash). document = { "crash_id": crash_id, "processed_crash": expected_processed_crash, "raw_crash": raw_crash, } additional = { "doc_type": "crash_reports", "id": crash_id, "index": "socorro_integration_test_reports", } sub_mock.index.assert_called_with(body=document, **additional)
def test_index_crash(self): """Mock test the entire crash submission mechanism""" es_storage = ESCrashStorage(config=self.config) # This is the function that would actually connect to ES; by mocking # it entirely we are ensuring that ES doesn't actually get touched. es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash), processed_crash=deepcopy(a_processed_crash), ) # Ensure that the indexing function is only called once. assert es_storage._submit_crash_to_elasticsearch.call_count == 1
def test_indexing_unhandled_errors(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of unhandled errors. Expected behavior is to fail indexing and raise the error. """ # ESCrashStorage uses the "limited backoff" transaction executor. # In real life this will retry operational exceptions over time, but # in unit tests, we just want it to hurry up and fail. backoff_config = self.config backoff_config['backoff_delays'] = [0, 0, 0] backoff_config['wait_log_interval'] = 0 es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] raw_crash = {} processed_crash = { 'date_processed': '2012-04-08 10:56:41.558922', } # Test with an error from which a field name cannot be extracted. def mock_index_unparsable_error(*args, **kwargs): raise elasticsearch.exceptions.TransportError( 400, 'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:' '9300]][indices:data/write/index]]; nested: ' 'MapperParsingException[BROKEN PART]; NumberFormatException' ) return True es_class_mock().index.side_effect = mock_index_unparsable_error assert_raises( elasticsearch.exceptions.TransportError, es_storage.save_raw_and_processed, raw_crash, None, processed_crash, crash_id ) # Test with an error that we do not handle. def mock_index_unhandled_error(*args, **kwargs): raise elasticsearch.exceptions.TransportError( 400, 'Something went wrong' ) return True es_class_mock().index.side_effect = mock_index_unhandled_error assert_raises( elasticsearch.exceptions.TransportError, es_storage.save_raw_and_processed, raw_crash, None, processed_crash, crash_id )
def test_fatal_failure(self, espy_mock): """Test an index attempt that fails catastrophically. """ # It's mocks all the way down. sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] # Oh the humanity! failure_exception = Exception('horrors') sub_mock.index.side_effect = failure_exception # Submit a crash and ensure that it failed. assert_raises( Exception, es_storage.save_raw_and_processed, a_raw_crash, None, a_processed_crash, crash_id )
def test_index_crash(self): """Test indexing a crash document. """ es_storage = ESCrashStorage(config=self.config) es_storage.save_raw_and_processed(raw_crash=a_raw_crash, dumps=None, processed_crash=a_processed_crash, crash_id=a_processed_crash['uuid']) # Ensure that the document was indexed by attempting to retreive it. ok_( self.es_client.get( index=self.config.elasticsearch.elasticsearch_index, id=a_processed_crash['uuid']))
def test_crash_size_capture(self): """Verify we capture raw/processed crash sizes in ES crashstorage""" es_storage = ESCrashStorage(config=self.config) es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_raw_and_processed(raw_crash=a_raw_crash, dumps=None, processed_crash=a_processed_crash, crash_id=a_processed_crash['uuid']) mock_calls = [str(call) for call in self.config.metrics.mock_calls] # NOTE(willkg): The sizes of these json documents depend on what's in them. If we changed # a_processed_crash and a_raw_crash, then these numbers will change. assert 'call.histogram(\'processor.es.raw_crash_size\', 27)' in mock_calls assert 'call.histogram(\'processor.es.processed_crash_size\', 1785)' in mock_calls
def test_index_crash(self): """Mock test the entire crash submission mechanism""" es_storage = ESCrashStorage(config=self.config) # This is the function that would actually connect to ES; by mocking # it entirely we are ensuring that ES doesn't actually get touched. es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_raw_and_processed( raw_crash=deepcopy(a_raw_crash), dumps=None, processed_crash=deepcopy(a_processed_crash), crash_id=a_processed_crash['uuid'] ) # Ensure that the indexing function is only called once. assert es_storage._submit_crash_to_elasticsearch.call_count == 1
def test_index_crash(self): """Mock test the entire crash submission mechanism. """ es_storage = ESCrashStorage(config=self.config) # This is the function that would actually connect to ES; by mocking # it entirely we are ensuring that ES doesn't actually get touched. es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_raw_and_processed(raw_crash=a_raw_crash, dumps=None, processed_crash=a_processed_crash, crash_id=a_processed_crash['uuid']) # Ensure that the indexing function is only called once. eq_(es_storage._submit_crash_to_elasticsearch.call_count, 1)
def test_success(self, espy_mock): """Test a successful index of a crash report. """ # It's mocks all the way down. sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] # Submit a crash like normal, except that the back-end ES object is # mocked (see the decorator above). es_storage.save_raw_and_processed( raw_crash=a_raw_crash, dumps=None, processed_crash=a_processed_crash, crash_id=crash_id, ) # Ensure that the ES objects were instantiated by ConnectionContext. ok_(espy_mock.Elasticsearch.called) # Ensure that the IndicesClient was also instantiated (this happens in # IndexCreator but is part of the crashstorage workflow). ok_(espy_mock.client.IndicesClient.called) # The actual call to index the document (crash). document = { 'crash_id': crash_id, 'processed_crash': a_processed_crash, 'raw_crash': a_raw_crash } additional = { 'doc_type': 'crash_reports', 'id': crash_id, 'index': 'socorro_integration_test_reports' } sub_mock.index.assert_called_with( body=document, **additional )
def test_crash_size_capture(self): """Verify we capture raw/processed crash sizes in ES crashstorage""" es_storage = ESCrashStorage(config=self.config) es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_raw_and_processed( raw_crash=a_raw_crash, dumps=None, processed_crash=a_processed_crash, crash_id=a_processed_crash['uuid'] ) mock_calls = [str(call) for call in self.config.metrics.mock_calls] # NOTE(willkg): The sizes of these json documents depend on what's in them. If we changed # a_processed_crash and a_raw_crash, then these numbers will change. assert 'call.histogram(\'processor.es.raw_crash_size\', 27)' in mock_calls assert 'call.histogram(\'processor.es.processed_crash_size\', 1785)' in mock_calls
def test_crash_size_capture(self): """Verify we capture raw/processed crash sizes in ES crashstorage""" with MetricsMock() as mm: es_storage = ESCrashStorage(config=self.config, namespace='processor.es') es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_raw_and_processed( raw_crash=deepcopy(a_raw_crash), dumps=None, processed_crash=deepcopy(a_processed_crash), crash_id=a_processed_crash['uuid'] ) # NOTE(willkg): The sizes of these json documents depend on what's # in them. If we changed a_processed_crash and a_raw_crash, then # these numbers will change. assert mm.has_record('histogram', stat='processor.es.raw_crash_size', value=27) assert mm.has_record('histogram', stat='processor.es.processed_crash_size', value=1738)
def test_index_crash(self): """Test indexing a crash document.""" es_storage = ESCrashStorage(config=self.config) es_storage.save_raw_and_processed( raw_crash=deepcopy(a_raw_crash), dumps=None, processed_crash=deepcopy(a_processed_crash), crash_id=a_processed_crash['uuid'] ) # Ensure that the document was indexed by attempting to retreive it. assert ( self.es_client.get( index=es_storage.es_context.get_index_template(), id=a_processed_crash['uuid'] ) ) es_storage.close()
def test_index_crash(self): """Test indexing a crash document. """ es_storage = ESCrashStorage(config=self.config) es_storage.save_raw_and_processed( raw_crash=a_raw_crash, dumps=None, processed_crash=a_processed_crash, crash_id=a_processed_crash['uuid'] ) # Ensure that the document was indexed by attempting to retreive it. ok_( self.es_client.get( index=self.config.elasticsearch.elasticsearch_index, id=a_processed_crash['uuid'] ) )
def test_crash_size_capture(self): """Verify we capture raw/processed crash sizes in ES crashstorage""" with MetricsMock() as mm: es_storage = ESCrashStorage(config=self.config, namespace="processor.es") es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash), processed_crash=deepcopy(a_processed_crash), ) # NOTE(willkg): The sizes of these json documents depend on what's # in them. If we changed a_processed_crash and a_raw_crash, then # these numbers will change. mm.print_records() mm.assert_histogram("processor.es.raw_crash_size", value=27) mm.assert_histogram("processor.es.processed_crash_size", value=1721)
def test_fatal_failure(self, espy_mock): """Test an index attempt that fails catastrophically""" sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] # Oh the humanity! failure_exception = Exception('horrors') sub_mock.index.side_effect = failure_exception # Submit a crash and ensure that it failed. with pytest.raises(Exception): es_storage.save_raw_and_processed( raw_crash=deepcopy(a_raw_crash), dumps=None, processed_crsah=deepcopy(a_processed_crash), crash_id=crash_id)
def test_success(self, espy_mock): """Test a successful index of a crash report""" sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock crash_id = a_processed_crash["uuid"] # Submit a crash like normal, except that the back-end ES object is # mocked (see the decorator above). es_storage = ESCrashStorage(config=self.config) es_storage.save_raw_and_processed( raw_crash=deepcopy(a_raw_crash), dumps=None, processed_crash=deepcopy(a_processed_crash), crash_id=crash_id, ) # Ensure that the ES objects were instantiated by ConnectionContext. assert espy_mock.Elasticsearch.called # Ensure that the IndicesClient was also instantiated (this happens in # IndexCreator but is part of the crashstorage workflow). assert espy_mock.client.IndicesClient.called expected_processed_crash = deepcopy(a_processed_crash) reconstitute_datetimes(expected_processed_crash) # The actual call to index the document (crash). document = { "crash_id": crash_id, "processed_crash": expected_processed_crash, "raw_crash": a_raw_crash, } additional = { "doc_type": "crash_reports", "id": crash_id, "index": "socorro_integration_test_reports", } sub_mock.index.assert_called_with(body=document, **additional)
def test_crash_size_capture(self): """Verify we capture raw/processed crash sizes in ES crashstorage""" raw_crash = {"ProductName": "Firefox", "ReleaseChannel": "nightly"} processed_crash = { "date_processed": "2012-04-08 10:56:41.558922", "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", } with MetricsMock() as mm: es_storage = ESCrashStorage(config=self.config, namespace="processor.es") es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) mm.assert_histogram("processor.es.raw_crash_size", value=55) mm.assert_histogram("processor.es.processed_crash_size", value=96)
def test_fatal_failure(self, espy_mock): """Test an index attempt that fails catastrophically""" sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] # Oh the humanity! failure_exception = Exception('horrors') sub_mock.index.side_effect = failure_exception # Submit a crash and ensure that it failed. with pytest.raises(Exception): es_storage.save_raw_and_processed( raw_crash=deepcopy(a_raw_crash), dumps=None, processed_crsah=deepcopy(a_processed_crash), crash_id=crash_id )
def test_index_data_capture(self): """Verify we capture index data in ES crashstorage""" with MetricsMock() as mm: es_storage = ESCrashStorage(config=self.config, namespace="processor.es") mock_connection = mock.Mock() # Do a successful indexing es_storage._index_crash( connection=mock_connection, es_index=None, es_doctype=None, crash_document=None, crash_id=None, ) # Do a failed indexing mock_connection.index.side_effect = Exception with pytest.raises(Exception): es_storage._index_crash( connection=mock_connection, es_index=None, es_doctype=None, crash_document=None, crash_id=None, ) mm.assert_histogram_once("processor.es.index", tags=["outcome:successful"]) mm.assert_histogram_once("processor.es.index", tags=["outcome:failed"])
def test_index_crash_with_bad_keys(self): a_raw_crash_with_bad_keys = { "foo": "alpha", "": "bad key 1", ".": "bad key 2", "na\xefve": "bad key 3", } es_storage = ESCrashStorage(config=self.config) es_storage.save_raw_and_processed( raw_crash=deepcopy(a_raw_crash_with_bad_keys), dumps=None, processed_crash=deepcopy(a_processed_crash), crash_id=a_processed_crash["uuid"], ) # Ensure that the document was indexed by attempting to retreive it. doc = self.es_client.get( index=self.config.elasticsearch.elasticsearch_index, id=a_processed_crash["uuid"], ) # Make sure the invalid keys aren't in the crash. raw_crash = doc["_source"]["raw_crash"] assert raw_crash == {"foo": "alpha"} es_storage.close()
def test_index_data_capture(self): """Verify we capture index data in ES crashstorage""" with MetricsMock() as mm: es_storage = ESCrashStorage(config=self.config, namespace='processor.es') mock_connection = mock.Mock() # Do a successful indexing es_storage._index_crash(connection=mock_connection, es_index=None, es_doctype=None, crash_document=None, crash_id=None) # Do a failed indexing mock_connection.index.side_effect = Exception with pytest.raises(Exception): es_storage._index_crash(connection=mock_connection, es_index=None, es_doctype=None, crash_document=None, crash_id=None) assert (len( mm.filter_records(stat='processor.es.index', tags=['outcome:successful'])) == 1) assert (len( mm.filter_records(stat='processor.es.index', tags=['outcome:failed'])) == 1)
def setup_method(self): super().setup_method() self.config = self.get_base_config() self.es_context = ConnectionContext(self.config) self.crashstorage = ESCrashStorage( config=self.get_tuned_config(ESCrashStorage)) self.index_client = self.es_context.indices_client() self.conn = self.es_context.connection() # Delete everything there first for index_name in self.es_context.get_indices(): print(f"setup: delete test index: {index_name}") self.es_context.delete_index(index_name) to_create = [ self.es_context.get_index_for_date(utc_now()), self.es_context.get_index_for_date(utc_now() - timedelta(days=7)), ] for index_name in to_create: print(f"setup: creating index: {index_name}") self.es_context.create_index(index_name)
def test_index_crash(self): """Test indexing a crash document.""" es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash), processed_crash=deepcopy(a_processed_crash), ) # Ensure that the document was indexed by attempting to retreive it. assert self.es_client.get( index=es_storage.es_context.get_index_template(), id=a_processed_crash["uuid"], ) es_storage.close()
def test_success_operational_exception(self, espy_mock): """Test an index attempt that experiences a operational exception that it managed to recover from. """ # It's mocks all the way down. sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock # ESCrashStorage uses the "limited backoff" transaction executor. # In real life this will retry operational exceptions over time, but # in unit tests, we just want it to hurry up and fail. backoff_config = self.config backoff_config['backoff_delays'] = [0, 0, 0] backoff_config['wait_log_interval'] = 0 es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] # The transaction executor will try three times, so we will fail # twice for the purposes of this test. bad_results = [ elasticsearch.exceptions.ConnectionError, elasticsearch.exceptions.ConnectionError ] # Replace the underlying index method with this function. def esindex_fn(*args, **kwargs): try: result = bad_results.pop(0) raise result except IndexError: return sub_mock.index sub_mock.index.side_effect = esindex_fn # Submit a crash like normal, except that the index method will # raise twice, then pass as normal. es_storage.save_raw_and_processed( raw_crash=a_raw_crash, dumps=None, processed_crash=a_processed_crash, crash_id=crash_id, ) # The actual call to index the document (crash). document = { 'crash_id': crash_id, 'processed_crash': a_processed_crash, 'raw_crash': a_raw_crash } additional = { 'doc_type': 'crash_reports', 'id': crash_id, 'index': 'socorro_integration_test_reports' } sub_mock.index.assert_called_with( body=document, **additional )
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ # ESCrashStorage uses the "limited backoff" transaction executor. # In real life this will retry operational exceptions over time, but # in unit tests, we just want it to hurry up and fail. backoff_config = self.config backoff_config['backoff_delays'] = [0, 0, 0] backoff_config['wait_log_interval'] = 0 es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] raw_crash = {} processed_crash = { 'date_processed': '2012-04-08 10:56:41.558922', 'bogus-field': 1234567890, 'foo': 'bar', } def mock_index(*args, **kwargs): if 'bogus-field' in kwargs['body']['processed_crash']: raise elasticsearch.exceptions.TransportError( 400, 'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:' '9300]][indices:data/write/index]]; nested: ' 'MapperParsingException[failed to parse ' '[processed_crash.bogus-field]]; nested: ' 'NumberFormatException[For input string: ' '"18446744073709480735"]; ' ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_raw_and_processed( raw_crash, None, processed_crash, crash_id ) expected_doc = { 'crash_id': crash_id, 'removed_fields': 'processed_crash.bogus-field', 'processed_crash': { 'date_processed': string_to_datetime( '2012-04-08 10:56:41.558922' ), 'foo': 'bar', }, 'raw_crash': {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id )
def test_indexing_bogus_string_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus string field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] raw_crash = {} processed_crash = { 'date_processed': '2012-04-08 10:56:41.558922', 'bogus-field': 'some bogus value', 'foo': 'bar', } def mock_index(*args, **kwargs): if 'bogus-field' in kwargs['body']['processed_crash']: raise elasticsearch.exceptions.TransportError( 400, 'RemoteTransportException[[i-5exxx97][inet[/172.3.9.12:' '9300]][indices:data/write/index]]; nested: ' 'IllegalArgumentException[Document contains at least one ' 'immense term in field="processed_crash.bogus-field.full" ' '(whose UTF8 encoding is longer than the max length 32766)' ', all of which were skipped. Please correct the analyzer' ' to not produce such terms. The prefix of the first ' 'immense term is: \'[124, 91, 48, 93, 91, 71, 70, 88, 49, ' '45, 93, 58, 32, 65, 116, 116, 101, 109, 112, 116, 32, ' '116, 111, 32, 99, 114, 101, 97, 116, 101]...\', original ' 'message: bytes can be at most 32766 in length; got 98489]' '; nested: MaxBytesLengthExceededException' '[bytes can be at most 32766 in length; got 98489]; ' ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_raw_and_processed( raw_crash=deepcopy(raw_crash), dumps=None, processed_crash=deepcopy(processed_crash), crash_id=crash_id ) expected_doc = { 'crash_id': crash_id, 'removed_fields': 'processed_crash.bogus-field', 'processed_crash': { 'date_processed': string_to_datetime( '2012-04-08 10:56:41.558922' ), 'foo': 'bar', }, 'raw_crash': {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id )