def test_index_crash_indexable_keys(self): # Check super_search_fields.py for valid keys to update this raw_crash = { "InvalidKey": "alpha", "BuildID": "20200506000000", } processed_crash = { "AnotherInvalidKey": "alpha", "date_processed": "2012-04-08 10:56:41.558922", "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", } es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) # Ensure that the document was indexed by attempting to retreive it. doc = self.es_client.get( index=self.config.elasticsearch.elasticsearch_index, id=processed_crash["uuid"], ) # Verify keys that aren't in super_search_fields aren't in the raw or processed # crash parts raw_crash = doc["_source"]["raw_crash"] assert raw_crash == {"BuildID": "20200506000000"} processed_crash = doc["_source"]["processed_crash"] assert processed_crash == { "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", "date_processed": "2012-04-08T10:56:41.558922+00:00", }
def test_index_crash_indexable_keys(self): # Check super_search_fields.py for valid keys to update this raw_crash = { "InvalidKey": "alpha", "BuildID": "20200506000000", } processed_crash = { "AnotherInvalidKey": "alpha", "date_processed": date_to_string(utc_now()), "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", } es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) # Ensure that the document was indexed by attempting to retreive it. doc = self.conn.get( index=self.es_context.get_index_for_date(utc_now()), id=processed_crash["uuid"], ) # Verify keys that aren't in super_search_fields aren't in the raw or processed # crash parts raw_crash = doc["_source"]["raw_crash"] assert list(sorted(raw_crash.keys())) == ["BuildID"] processed_crash = doc["_source"]["processed_crash"] assert list(sorted( processed_crash.keys())) == ["date_processed", "uuid"]
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = SAMPLE_PROCESSED_CRASH["uuid"] raw_crash = {} processed_crash = { "date_processed": date_to_string(utc_now()), # NOTE(willkg): This needs to be a key that's in super_search_fields, but is # rejected by our mock_index call--this is wildly contrived. "version": 1234567890, "uuid": crash_id, } def mock_index(*args, **kwargs): if "version" in kwargs["body"]["processed_crash"]: raise elasticsearch.exceptions.TransportError( 400, ("RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:" "9300]][indices:data/write/index]]; nested: " "MapperParsingException[failed to parse " "[processed_crash.version]]; nested: " "NumberFormatException[For input string: " '"18446744073709480735"]; '), ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_processed_crash( raw_crash=deepcopy(raw_crash), processed_crash=deepcopy(processed_crash), ) expected_doc = { "crash_id": crash_id, "removed_fields": "processed_crash.version", "processed_crash": { "date_processed": string_to_datetime(processed_crash["date_processed"]), "uuid": crash_id, }, "raw_crash": {}, } es_class_mock().index.assert_called_with( index=self.es_context.get_index_for_date(utc_now()), doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id, )
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash["uuid"] raw_crash = {} processed_crash = { "date_processed": "2012-04-08 10:56:41.558922", "bogus-field": 1234567890, "foo": "bar", "uuid": crash_id, } def mock_index(*args, **kwargs): if "bogus-field" in kwargs["body"]["processed_crash"]: raise elasticsearch.exceptions.TransportError( 400, ("RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:" "9300]][indices:data/write/index]]; nested: " "MapperParsingException[failed to parse " "[processed_crash.bogus-field]]; nested: " "NumberFormatException[For input string: " '"18446744073709480735"]; '), ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_processed_crash( raw_crash=deepcopy(raw_crash), processed_crash=deepcopy(processed_crash), ) expected_doc = { "crash_id": crash_id, "removed_fields": "processed_crash.bogus-field", "processed_crash": { "date_processed": string_to_datetime("2012-04-08 10:56:41.558922"), "foo": "bar", "uuid": crash_id, }, "raw_crash": {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id, )
def test_success(self, espy_mock): """Test a successful index of a crash report""" raw_crash = { "BuildID": "20200605000", "ProductName": "Firefox", "ReleaseChannel": "nightly", } processed_crash = { "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", "json_dump": {}, "date_processed": "2012-04-08 10:56:41.558922", } sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock crash_id = processed_crash["uuid"] # Submit a crash like normal, except that the back-end ES object is # mocked (see the decorator above). es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) # Ensure that the ES objects were instantiated by ConnectionContext. assert espy_mock.Elasticsearch.called # Ensure that the IndicesClient was also instantiated (this happens in # IndexCreator but is part of the crashstorage workflow). assert espy_mock.client.IndicesClient.called expected_processed_crash = deepcopy(processed_crash) reconstitute_datetimes(expected_processed_crash) # The actual call to index the document (crash). document = { "crash_id": crash_id, "processed_crash": expected_processed_crash, "raw_crash": raw_crash, } additional = { "doc_type": "crash_reports", "id": crash_id, "index": "socorro_integration_test_reports", } sub_mock.index.assert_called_with(body=document, **additional)
def test_index_crash(self): """Mock test the entire crash submission mechanism""" es_storage = ESCrashStorage(config=self.config) # This is the function that would actually connect to ES; by mocking # it entirely we are ensuring that ES doesn't actually get touched. es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash), processed_crash=deepcopy(a_processed_crash), ) # Ensure that the indexing function is only called once. assert es_storage._submit_crash_to_elasticsearch.call_count == 1
def test_index_crash(self): """Test indexing a crash document.""" es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash), processed_crash=deepcopy(a_processed_crash), ) # Ensure that the document was indexed by attempting to retreive it. assert self.es_client.get( index=es_storage.es_context.get_index_template(), id=a_processed_crash["uuid"], ) es_storage.close()
def test_indexing_unhandled_errors(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of unhandled errors. Expected behavior is to fail indexing and raise the error. """ es_storage = ESCrashStorage(config=self.config) raw_crash = {} processed_crash = { "uuid": "9d8e7127-9d98-4d92-8ab1-065982200317", "date_processed": "2012-04-08 10:56:41.558922", } # Test with an error from which a field name cannot be extracted. def mock_index_unparsable_error(*args, **kwargs): raise elasticsearch.exceptions.TransportError( 400, "RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:" "9300]][indices:data/write/index]]; nested: " "MapperParsingException[BROKEN PART]; NumberFormatException", ) return True es_class_mock().index.side_effect = mock_index_unparsable_error with pytest.raises(elasticsearch.exceptions.TransportError): es_storage.save_processed_crash( raw_crash=deepcopy(raw_crash), processed_crash=deepcopy(processed_crash), ) # Test with an error that we do not handle. def mock_index_unhandled_error(*args, **kwargs): raise elasticsearch.exceptions.TransportError( 400, "Something went wrong") return True es_class_mock().index.side_effect = mock_index_unhandled_error with pytest.raises(elasticsearch.exceptions.TransportError): es_storage.save_processed_crash( raw_crash=deepcopy(raw_crash), processed_crash=deepcopy(processed_crash), )
def test_index_crash(self): """Test indexing a crash document.""" es_storage = ESCrashStorage(config=self.config) raw_crash = deepcopy(SAMPLE_RAW_CRASH) processed_crash = deepcopy(SAMPLE_PROCESSED_CRASH) processed_crash["date_processed"] = date_to_string(utc_now()) es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) # Ensure that the document was indexed by attempting to retreive it. assert self.conn.get( index=self.es_context.get_index_for_date(utc_now()), id=SAMPLE_PROCESSED_CRASH["uuid"], ) es_storage.close()
def test_crash_size_capture(self): """Verify we capture raw/processed crash sizes in ES crashstorage""" with MetricsMock() as mm: es_storage = ESCrashStorage(config=self.config, namespace="processor.es") es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash), processed_crash=deepcopy(a_processed_crash), ) # NOTE(willkg): The sizes of these json documents depend on what's # in them. If we changed a_processed_crash and a_raw_crash, then # these numbers will change. mm.print_records() mm.assert_histogram("processor.es.raw_crash_size", value=27) mm.assert_histogram("processor.es.processed_crash_size", value=1721)
def test_crash_size_capture(self): """Verify we capture raw/processed crash sizes in ES crashstorage""" raw_crash = {"ProductName": "Firefox", "ReleaseChannel": "nightly"} processed_crash = { "date_processed": "2012-04-08 10:56:41.558922", "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", } with MetricsMock() as mm: es_storage = ESCrashStorage(config=self.config, namespace="processor.es") es_storage._submit_crash_to_elasticsearch = mock.Mock() es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) mm.assert_histogram("processor.es.raw_crash_size", value=55) mm.assert_histogram("processor.es.processed_crash_size", value=96)
def test_fatal_failure(self, espy_mock): """Test an index attempt that fails catastrophically""" sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash["uuid"] # Oh the humanity! failure_exception = Exception("horrors") sub_mock.index.side_effect = failure_exception # Submit a crash and ensure that it failed. with pytest.raises(Exception): es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash), dumps=None, processed_crsah=deepcopy(a_processed_crash), crash_id=crash_id, )
def test_success(self, espy_mock): """Test a successful index of a crash report""" sub_mock = mock.MagicMock() espy_mock.Elasticsearch.return_value = sub_mock crash_id = a_processed_crash["uuid"] # Submit a crash like normal, except that the back-end ES object is # mocked (see the decorator above). es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash), processed_crash=deepcopy(a_processed_crash), ) # Ensure that the ES objects were instantiated by ConnectionContext. assert espy_mock.Elasticsearch.called # Ensure that the IndicesClient was also instantiated (this happens in # IndexCreator but is part of the crashstorage workflow). assert espy_mock.client.IndicesClient.called expected_processed_crash = deepcopy(a_processed_crash) reconstitute_datetimes(expected_processed_crash) # The actual call to index the document (crash). document = { "crash_id": crash_id, "processed_crash": expected_processed_crash, "raw_crash": a_raw_crash, } additional = { "doc_type": "crash_reports", "id": crash_id, "index": "socorro_integration_test_reports", } sub_mock.index.assert_called_with(body=document, **additional)
def test_index_crash_with_bad_keys(self): a_raw_crash_with_bad_keys = { "foo": "alpha", "": "bad key 1", ".": "bad key 2", "na\xefve": "bad key 3", } es_storage = ESCrashStorage(config=self.config) es_storage.save_processed_crash( raw_crash=deepcopy(a_raw_crash_with_bad_keys), processed_crash=deepcopy(a_processed_crash), ) # Ensure that the document was indexed by attempting to retreive it. doc = self.es_client.get( index=self.config.elasticsearch.elasticsearch_index, id=a_processed_crash["uuid"], ) # Make sure the invalid keys aren't in the crash. raw_crash = doc["_source"]["raw_crash"] assert raw_crash == {"foo": "alpha"} es_storage.close()
class ElasticsearchTestCase(TestCaseWithConfig): """Base class for Elastic Search related unit tests""" def setup_method(self): super().setup_method() self.config = self.get_base_config() self.es_context = ConnectionContext(self.config) self.crashstorage = ESCrashStorage( config=self.get_tuned_config(ESCrashStorage)) self.index_client = self.es_context.indices_client() self.conn = self.es_context.connection() # Delete everything there first for index_name in self.es_context.get_indices(): print(f"setup: delete test index: {index_name}") self.es_context.delete_index(index_name) to_create = [ self.es_context.get_index_for_date(utc_now()), self.es_context.get_index_for_date(utc_now() - timedelta(days=7)), ] for index_name in to_create: print(f"setup: creating index: {index_name}") self.es_context.create_index(index_name) def teardown_method(self): for index_name in self.es_context.get_indices(): print(f"teardown: delete test index: {index_name}") self.es_context.delete_index(index_name) super().teardown_method() def health_check(self): self.conn.cluster.health(wait_for_status="yellow", request_timeout=5) def get_url(self): """Returns the first url in the elasticsearch_urls list""" return self.config.elasticsearch_urls[0] def get_tuned_config(self, sources, extra_values=None): values_source = DEFAULT_VALUES.copy() if extra_values: values_source.update(extra_values) return super().get_tuned_config(sources, values_source) def get_base_config(self, cls=ConnectionContext, es_index=None): extra_values = None if es_index: extra_values = { "resource.elasticsearch.elasticsearch_index": es_index } return self.get_tuned_config(cls, extra_values=extra_values) def index_crash(self, processed_crash=None, raw_crash=None, crash_id=None, refresh=True): """Index a single crash and refresh""" if crash_id is None: crash_id = str(uuid.UUID(int=random.getrandbits(128))) raw_crash = raw_crash or {} processed_crash = processed_crash or {} raw_crash["uuid"] = crash_id processed_crash["crash_id"] = crash_id processed_crash["uuid"] = crash_id self.crashstorage.save_processed_crash(raw_crash, processed_crash) if refresh: self.es_context.refresh() return crash_id def index_many_crashes(self, number, processed_crash=None, raw_crash=None, loop_field=None): """Index multiple crashes and refresh at the end""" processed_crash = processed_crash or {} raw_crash = raw_crash or {} crash_ids = [] for i in range(number): if loop_field is not None: processed_copy = processed_crash.copy() processed_copy[loop_field] = processed_crash[loop_field] % i else: processed_copy = processed_crash crash_ids.append( self.index_crash(raw_crash=raw_crash, processed_crash=processed_copy, refresh=False)) self.es_context.refresh() return crash_ids
def test_indexing_unknown_property_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of an unknown property. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = create_new_ooid() raw_crash = { "ProductName": "Firefox", } processed_crash = { "date_processed": "2019-12-11 10:56:41.558922", # NOTE(willkg): This needs to be a key that's in super_search_fields, but is # rejected by our mock_index call--this is wildly contrived. "version": { "key": { "nested_key": "val" } }, "uuid": crash_id, } def mock_index(*args, **kwargs): if "version" in kwargs["body"]["processed_crash"]: raise elasticsearch.exceptions.TransportError( 400, ("RemoteTransportException[[Madam Slay]" "[inet[/172.31.22.181:9300]][indices:data/write/index]]; " "nested: MapperParsingException" "[failed to parse [processed_crash.version]]; " "nested: " "ElasticsearchIllegalArgumentException[unknown property [key]]" ), ) return True es_class_mock().index.side_effect = mock_index # Submit crash and verify. es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) expected_doc = { "crash_id": crash_id, "removed_fields": "processed_crash.version", "processed_crash": { "date_processed": string_to_datetime("2019-12-11 10:56:41.558922"), "uuid": crash_id, }, "raw_crash": raw_crash, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id, )
def test_indexing_bogus_string_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus string field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash["uuid"] raw_crash = {} processed_crash = { "date_processed": "2012-04-08 10:56:41.558922", # NOTE(willkg): This needs to be a key that's in super_search_fields, but is # rejected by our mock_index call--this is wildly contrived. "version": "some bogus value", "uuid": crash_id, } def mock_index(*args, **kwargs): if "version" in kwargs["body"]["processed_crash"]: raise elasticsearch.exceptions.TransportError( 400, "RemoteTransportException[[i-5exxx97][inet[/172.3.9.12:" "9300]][indices:data/write/index]]; nested: " "IllegalArgumentException[Document contains at least one " 'immense term in field="processed_crash.version.full" ' "(whose UTF8 encoding is longer than the max length 32766)" ", all of which were skipped. Please correct the analyzer" " to not produce such terms. The prefix of the first " "immense term is: '[124, 91, 48, 93, 91, 71, 70, 88, 49, " "45, 93, 58, 32, 65, 116, 116, 101, 109, 112, 116, 32, " "116, 111, 32, 99, 114, 101, 97, 116, 101]...', original " "message: bytes can be at most 32766 in length; got 98489]" "; nested: MaxBytesLengthExceededException" "[bytes can be at most 32766 in length; got 98489]; ", ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_processed_crash( raw_crash=deepcopy(raw_crash), processed_crash=deepcopy(processed_crash), ) expected_doc = { "crash_id": crash_id, "removed_fields": "processed_crash.version", "processed_crash": { "date_processed": string_to_datetime("2012-04-08 10:56:41.558922"), "uuid": crash_id, }, "raw_crash": {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id, )
def test_index_crash_mapping_keys(self): """Test indexing a crash that has keys not in the mapping Indexing a crash that has keys that aren't in the mapping for the index should cause those keys to be removed from the crash. """ # The test harness creates an index for this week and last week. So let's create # one for 4 weeks ago. now = utc_now() four_weeks_ago = now - timedelta(days=28) field = "user_comments" # We're going to use a mapping that's what SuperSearchFields gives us, but # remove the user_comments field. mappings = SuperSearchFields(context=self.es_context).get_mapping() doctype = self.es_context.get_doctype() del mappings[doctype]["properties"]["processed_crash"]["properties"][ field] # Create the index for 4 weeks ago self.es_context.create_index( index_name=self.es_context.get_index_for_date(four_weeks_ago), mappings=mappings, ) es_storage = ESCrashStorage(config=self.config) # Create a crash for this week and save it now_uuid = "00000000-0000-0000-0000-000000120408" raw_crash = { "BuildID": "20200506000000", } processed_crash = { field: "this week", "date_processed": date_to_string(now), "uuid": now_uuid, } es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) # Create a crash for four weeks ago with the bum mapping and save it old_uuid = "11111111-1111-1111-1111-111111120408" raw_crash = { "BuildID": "20200506000000", } processed_crash = { field: "this week", "date_processed": date_to_string(now - timedelta(days=28)), "uuid": old_uuid, } es_storage.save_processed_crash( raw_crash=raw_crash, processed_crash=processed_crash, ) self.es_context.refresh() # Retrieve the document from this week and verify it has the user_comments # field doc = self.conn.get( index=self.es_context.get_index_for_date(now), id=now_uuid, ) assert field in doc["_source"]["processed_crash"] # Retrieve the document from four weeks ago and verify it doesn't have the # user_comments field doc = self.conn.get( index=self.es_context.get_index_for_date(four_weeks_ago), id=old_uuid, ) assert field not in doc["_source"]["processed_crash"]