def _submit_crash_to_elasticsearch(self, queue, crash_document): # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. # First create a datetime object from the string in the crash # report. self.reconstitute_datetimes(crash_document['processed_crash']) # Obtain the index name. es_index = self.get_index_for_crash( crash_document['processed_crash']['date_processed']) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) action = { '_index': es_index, '_type': es_doctype, '_id': crash_id, '_source': crash_document, } queue.put(action)
def _submit_crash_to_elasticsearch(self, queue, crash_document): # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. # First create a datetime object from the string in the crash # report. self.reconstitute_datetimes(crash_document['processed_crash']) # Obtain the index name. es_index = self.get_index_for_crash( crash_document['processed_crash']['date_processed'] ) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) action = { '_index': es_index, '_type': es_doctype, '_id': crash_id, '_source': crash_document, } queue.put(action)
def _submit_crash_to_elasticsearch(self, connection, crash_document): """Submit a crash report to elasticsearch. """ # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. self.reconstitute_datetimes(crash_document['processed_crash']) # Obtain the index name. es_index = self.get_index_for_crash( crash_document['processed_crash']['date_processed']) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) # Submit the crash for indexing. try: connection.index(index=es_index, doc_type=es_doctype, body=crash_document, id=crash_id) except elasticsearch.exceptions.ElasticsearchException as e: self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True) raise
def test_create_socorro_index(self): index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index( self.config.elasticsearch.elasticsearch_index) assert self.index_client.exists( self.config.elasticsearch.elasticsearch_index)
def test_create_socorro_index(self): index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index( self.config.elasticsearch.elasticsearch_index ) assert self.index_client.exists( self.config.elasticsearch.elasticsearch_index )
def test_create_socorro_index(self): index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index( self.config.elasticsearch.elasticsearch_index ) ok_( index_creator.get_index_client().exists( self.config.elasticsearch.elasticsearch_index ) )
def _submit_crash_to_elasticsearch(self, connection, crash_document): """Submit a crash report to elasticsearch. """ # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. # First create a datetime object from the string in the crash report. crash_date = datetimeutil.string_to_datetime( crash_document['processed_crash']['date_processed'] ) # Then convert it back to a string with the expected formatting. crash_date_with_t = datetimeutil.date_to_string(crash_date) # Finally, re-insert that string back into the report for indexing. crash_document['processed_crash']['date_processed'] = crash_date_with_t # Obtain the index name. es_index = self.get_index_for_crash(crash_date) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) # Submit the crash for indexing. try: connection.index( index=es_index, doc_type=es_doctype, body=crash_document, id=crash_id ) except elasticsearch.exceptions.ElasticsearchException as e: self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True ) raise
def _submit_crash_to_elasticsearch(self, connection, crash_document): """Submit a crash report to elasticsearch. """ # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. self.reconstitute_datetimes(crash_document['processed_crash']) # Obtain the index name. es_index = self.get_index_for_crash( crash_document['processed_crash']['date_processed'] ) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) # Submit the crash for indexing. try: connection.index( index=es_index, doc_type=es_doctype, body=crash_document, id=crash_id ) except elasticsearch.exceptions.ElasticsearchException as e: self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True ) raise
class ElasticsearchTestCase(TestCase): """Base class for Elastic Search related unit tests. """ def __init__(self, *args, **kwargs): super(ElasticsearchTestCase, self).__init__(*args, **kwargs) self.config = self.get_base_config() es_context = self.config.elasticsearch.elasticsearch_class( config=self.config.elasticsearch ) creator_config = self.get_tuned_config(IndexCreator) self.index_creator = IndexCreator(creator_config) self.index_client = self.index_creator.get_index_client() with es_context() as conn: self.connection = conn def setUp(self): # Create the supersearch fields. self.index_super_search_fields() self.index_creator.create_socorro_index( self.config.elasticsearch.elasticsearch_index ) super(ElasticsearchTestCase, self).setUp() def tearDown(self): # Clear the test indices. self.index_client.delete( self.config.elasticsearch.elasticsearch_default_index ) self.index_client.delete( self.config.elasticsearch.elasticsearch_index ) super(ElasticsearchTestCase, self).tearDown() def get_tuned_config(self, sources, extra_values=None): if not isinstance(sources, (list, tuple)): sources = [sources] mock_logging = mock.Mock() config_definitions = [] for source in sources: conf = source.get_required_config() conf.add_option('logger', default=mock_logging) config_definitions.append(conf) values_source = DEFAULT_VALUES.copy() values_source.update({'logger': mock_logging}) if extra_values: values_source.update(extra_values) config_manager = ConfigurationManager( config_definitions, app_name='testapp', app_version='1.0', app_description='Elasticsearch integration tests', values_source_list=[environment, values_source], argv_source=[], ) return config_manager.get_config() def get_base_config(self, es_index=None): extra_values = None if es_index: extra_values = { 'resource.elasticsearch.elasticsearch_index': es_index } return self.get_tuned_config( ElasticsearchConfig, extra_values=extra_values ) def index_super_search_fields(self, fields=None): if fields is None: fields = SUPERSEARCH_FIELDS es_index = self.config.elasticsearch.elasticsearch_default_index actions = [] for name, field in fields.iteritems(): action = { '_index': es_index, '_type': 'supersearch_fields', '_id': name, '_source': field, } actions.append(action) bulk( client=self.connection, actions=actions, ) self.index_client.refresh(index=[es_index]) def index_crash(self, processed_crash, raw_crash=None, crash_id=None): if crash_id is None: crash_id = str(uuid.UUID(int=random.getrandbits(128))) if raw_crash is None: raw_crash = {} doc = { 'crash_id': crash_id, 'processed_crash': processed_crash, 'raw_crash': raw_crash, } res = self.connection.index( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, id=crash_id, body=doc, ) return res['_id'] def index_many_crashes( self, number, processed_crash=None, raw_crash=None, loop_field=None ): if processed_crash is None: processed_crash = {} if raw_crash is None: raw_crash = {} actions = [] for i in range(number): crash_id = str(uuid.UUID(int=random.getrandbits(128))) if loop_field is not None: processed_copy = processed_crash.copy() processed_copy[loop_field] = processed_crash[loop_field] % i else: processed_copy = processed_crash doc = { 'crash_id': crash_id, 'processed_crash': processed_copy, 'raw_crash': raw_crash, } action = { '_index': self.config.elasticsearch.elasticsearch_index, '_type': self.config.elasticsearch.elasticsearch_doctype, '_id': crash_id, '_source': doc, } actions.append(action) bulk( client=self.connection, actions=actions, ) self.refresh_index() def refresh_index(self): self.index_client.refresh( index=self.config.elasticsearch.elasticsearch_index )
class ElasticsearchTestCase(TestCase): """Base class for Elastic Search related unit tests. """ def __init__(self, *args, **kwargs): super(ElasticsearchTestCase, self).__init__(*args, **kwargs) self.config = self.get_base_config() es_context = self.config.elasticsearch.elasticsearch_class( config=self.config.elasticsearch) creator_config = self.get_tuned_config(IndexCreator) self.index_creator = IndexCreator(creator_config) self.index_client = self.index_creator.get_index_client() with es_context() as conn: self.connection = conn def setUp(self): # Create the supersearch fields. self.index_super_search_fields() self.index_creator.create_socorro_index( self.config.elasticsearch.elasticsearch_index) super(ElasticsearchTestCase, self).setUp() def tearDown(self): # Clear the test indices. self.index_client.delete( self.config.elasticsearch.elasticsearch_default_index) self.index_client.delete(self.config.elasticsearch.elasticsearch_index) super(ElasticsearchTestCase, self).tearDown() def health_check(self): self.connection.cluster.health(wait_for_status='yellow', request_timeout=1) def get_tuned_config(self, sources, extra_values=None): if not isinstance(sources, (list, tuple)): sources = [sources] mock_logging = mock.Mock() config_definitions = [] for source in sources: conf = source.get_required_config() conf.add_option('logger', default=mock_logging) config_definitions.append(conf) values_source = DEFAULT_VALUES.copy() values_source.update({'logger': mock_logging}) if extra_values: values_source.update(extra_values) config_manager = ConfigurationManager( config_definitions, app_name='testapp', app_version='1.0', app_description='Elasticsearch integration tests', values_source_list=[environment, values_source], argv_source=[], ) return config_manager.get_config() def get_base_config(self, es_index=None): extra_values = None if es_index: extra_values = { 'resource.elasticsearch.elasticsearch_index': es_index } return self.get_tuned_config(ElasticsearchConfig, extra_values=extra_values) def index_super_search_fields(self, fields=None): if fields is None: fields = SUPERSEARCH_FIELDS es_index = self.config.elasticsearch.elasticsearch_default_index actions = [] for name, field in fields.iteritems(): action = { '_index': es_index, '_type': 'supersearch_fields', '_id': name, '_source': field, } actions.append(action) bulk( client=self.connection, actions=actions, ) self.index_client.refresh(index=[es_index]) def index_crash(self, processed_crash, raw_crash=None, crash_id=None, root_doc=None): if crash_id is None: crash_id = str(uuid.UUID(int=random.getrandbits(128))) if raw_crash is None: raw_crash = {} doc = {} if root_doc: doc = dict(root_doc) doc.update({ 'crash_id': crash_id, 'processed_crash': processed_crash, 'raw_crash': raw_crash, }) res = self.connection.index( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, id=crash_id, body=doc, ) return res['_id'] def index_many_crashes(self, number, processed_crash=None, raw_crash=None, loop_field=None): if processed_crash is None: processed_crash = {} if raw_crash is None: raw_crash = {} actions = [] for i in range(number): crash_id = str(uuid.UUID(int=random.getrandbits(128))) if loop_field is not None: processed_copy = processed_crash.copy() processed_copy[loop_field] = processed_crash[loop_field] % i else: processed_copy = processed_crash doc = { 'crash_id': crash_id, 'processed_crash': processed_copy, 'raw_crash': raw_crash, } action = { '_index': self.config.elasticsearch.elasticsearch_index, '_type': self.config.elasticsearch.elasticsearch_doctype, '_id': crash_id, '_source': doc, } actions.append(action) bulk( client=self.connection, actions=actions, ) self.refresh_index() def refresh_index(self, es_index=None): self.index_client.refresh( index=es_index or self.config.elasticsearch.elasticsearch_index)
class ElasticsearchTestCase(TestCaseWithConfig): """Base class for Elastic Search related unit tests""" def __init__(self, *args, **kwargs): super(ElasticsearchTestCase, self).__init__(*args, **kwargs) self.config = self.get_base_config() es_context = self.config.elasticsearch.elasticsearch_class( config=self.config.elasticsearch ) creator_config = self.get_tuned_config(IndexCreator) self.index_creator = IndexCreator(creator_config) self.index_client = self.index_creator.get_index_client() with es_context() as conn: self.connection = conn def setUp(self): super(ElasticsearchTestCase, self).setUp() self.index_creator.create_socorro_index(self.config.elasticsearch.elasticsearch_index) def tearDown(self): # Clear the test indices. self.index_client.delete( self.config.elasticsearch.elasticsearch_index ) super(ElasticsearchTestCase, self).tearDown() def health_check(self): self.connection.cluster.health( wait_for_status='yellow', request_timeout=5 ) def get_url(self): """Returns the first url in the elasticsearch_urls list""" return self.config.elasticsearch.elasticsearch_urls[0] def get_tuned_config(self, sources, extra_values=None): values_source = DEFAULT_VALUES.copy() if extra_values: values_source.update(extra_values) return super(ElasticsearchTestCase, self).get_tuned_config( sources, values_source ) def get_base_config(self, es_index=None): extra_values = None if es_index: extra_values = { 'resource.elasticsearch.elasticsearch_index': es_index } return self.get_tuned_config( ElasticsearchConfig, extra_values=extra_values ) def index_crash(self, processed_crash=None, raw_crash=None, crash_id=None): if crash_id is None: crash_id = str(uuid.UUID(int=random.getrandbits(128))) raw_crash = raw_crash or {} processed_crash = processed_crash or {} doc = { 'crash_id': crash_id, 'processed_crash': processed_crash, 'raw_crash': raw_crash, } res = self.connection.index( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, id=crash_id, body=doc, ) return res['_id'] def index_many_crashes( self, number, processed_crash=None, raw_crash=None, loop_field=None ): processed_crash = processed_crash or {} raw_crash = raw_crash or {} actions = [] for i in range(number): crash_id = str(uuid.UUID(int=random.getrandbits(128))) if loop_field is not None: processed_copy = processed_crash.copy() processed_copy[loop_field] = processed_crash[loop_field] % i else: processed_copy = processed_crash doc = { 'crash_id': crash_id, 'processed_crash': processed_copy, 'raw_crash': raw_crash, } action = { '_index': self.config.elasticsearch.elasticsearch_index, '_type': self.config.elasticsearch.elasticsearch_doctype, '_id': crash_id, '_source': doc, } actions.append(action) bulk( client=self.connection, actions=actions, ) self.refresh_index() def refresh_index(self, es_index=None): self.index_client.refresh( index=es_index or self.config.elasticsearch.elasticsearch_index )
class ElasticsearchTestCase(TestCaseWithConfig): """Base class for Elastic Search related unit tests""" def __init__(self, *args, **kwargs): super(ElasticsearchTestCase, self).__init__(*args, **kwargs) self.config = self.get_base_config() es_context = self.config.elasticsearch.elasticsearch_class( config=self.config.elasticsearch) creator_config = self.get_tuned_config(IndexCreator) self.index_creator = IndexCreator(creator_config) self.index_client = self.index_creator.get_index_client() with es_context() as conn: self.connection = conn def setUp(self): super(ElasticsearchTestCase, self).setUp() self.index_creator.create_socorro_index( self.config.elasticsearch.elasticsearch_index) def tearDown(self): # Clear the test indices. self.index_client.delete(self.config.elasticsearch.elasticsearch_index) super(ElasticsearchTestCase, self).tearDown() def health_check(self): self.connection.cluster.health(wait_for_status='yellow', request_timeout=5) def get_url(self): """Returns the first url in the elasticsearch_urls list""" return self.config.elasticsearch.elasticsearch_urls[0] def get_tuned_config(self, sources, extra_values=None): values_source = DEFAULT_VALUES.copy() if extra_values: values_source.update(extra_values) return super(ElasticsearchTestCase, self).get_tuned_config(sources, values_source) def get_base_config(self, es_index=None): extra_values = None if es_index: extra_values = { 'resource.elasticsearch.elasticsearch_index': es_index } return self.get_tuned_config(ElasticsearchConfig, extra_values=extra_values) def index_crash(self, processed_crash=None, raw_crash=None, crash_id=None): if crash_id is None: crash_id = str(uuid.UUID(int=random.getrandbits(128))) raw_crash = raw_crash or {} processed_crash = processed_crash or {} doc = { 'crash_id': crash_id, 'processed_crash': processed_crash, 'raw_crash': raw_crash, } res = self.connection.index( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, id=crash_id, body=doc, ) return res['_id'] def index_many_crashes(self, number, processed_crash=None, raw_crash=None, loop_field=None): processed_crash = processed_crash or {} raw_crash = raw_crash or {} actions = [] for i in range(number): crash_id = str(uuid.UUID(int=random.getrandbits(128))) if loop_field is not None: processed_copy = processed_crash.copy() processed_copy[loop_field] = processed_crash[loop_field] % i else: processed_copy = processed_crash doc = { 'crash_id': crash_id, 'processed_crash': processed_copy, 'raw_crash': raw_crash, } action = { '_index': self.config.elasticsearch.elasticsearch_index, '_type': self.config.elasticsearch.elasticsearch_doctype, '_id': crash_id, '_source': doc, } actions.append(action) bulk( client=self.connection, actions=actions, ) self.refresh_index() def refresh_index(self, es_index=None): self.index_client.refresh( index=es_index or self.config.elasticsearch.elasticsearch_index)
def _submit_crash_to_elasticsearch(self, connection, crash_document): """Submit a crash report to elasticsearch. """ # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. self.reconstitute_datetimes(crash_document['processed_crash']) # Obtain the index name. es_index = self.get_index_for_crash( crash_document['processed_crash']['date_processed'] ) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) # Submit the crash for indexing. # Don't retry more than 5 times. That is to avoid infinite loops in # case of an unhandled exception. times = range(5) while times.pop(-1): try: connection.index( index=es_index, doc_type=es_doctype, body=crash_document, id=crash_id ) break except elasticsearch.exceptions.TransportError as e: field_name = None if 'MaxBytesLengthExceededException' in e.error: # This is caused by a string that is way too long for # Elasticsearch. matches = self.field_name_string_error_re.findall(e.error) if matches: field_name = matches[0] elif 'NumberFormatException' in e.error: # This is caused by a number that is either too big for # Elasticsearch or just not a number. matches = self.field_name_number_error_re.findall(e.error) if matches: field_name = matches[0] if not field_name: # We are unable to parse which field to remove, we cannot # try to fix the document. Let it raise. self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True ) raise if field_name.endswith('.full'): # Remove the `.full` at the end, that is a special mapping # construct that is not part of the real field name. field_name = field_name.rstrip('.full') # Now remove that field from the document before trying again. field_path = field_name.split('.') parent = crash_document for i, field in enumerate(field_path): if i == len(field_path) - 1: # This is the last level, so `field` contains the name # of the field that we want to remove from `parent`. del parent[field] else: parent = parent[field] # Add a note in the document that a field has been removed. if crash_document.get('removed_fields'): crash_document['removed_fields'] = '{} {}'.format( crash_document['removed_fields'], field_name ) else: crash_document['removed_fields'] = field_name except elasticsearch.exceptions.ElasticsearchException as e: self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True ) raise
class ElasticsearchTestCase(TestCase): """Base class for Elastic Search related unit tests. """ def __init__(self, *args, **kwargs): super(ElasticsearchTestCase, self).__init__(*args, **kwargs) self.config = self.get_base_config() es_context = self.config.elasticsearch.elasticsearch_class(config=self.config.elasticsearch) creator_config = self.get_tuned_config(IndexCreator) self.index_creator = IndexCreator(creator_config) self.index_client = self.index_creator.get_index_client() with es_context() as conn: self.connection = conn def setUp(self): # Create the supersearch fields. self.index_super_search_fields() self.index_creator.create_socorro_index(self.config.elasticsearch.elasticsearch_index) super(ElasticsearchTestCase, self).setUp() def tearDown(self): # Clear the test indices. self.index_client.delete(self.config.elasticsearch.elasticsearch_default_index) self.index_client.delete(self.config.elasticsearch.elasticsearch_index) super(ElasticsearchTestCase, self).tearDown() def health_check(self): self.connection.cluster.health(wait_for_status="yellow", request_timeout=1) def get_tuned_config(self, sources, extra_values=None): if not isinstance(sources, (list, tuple)): sources = [sources] mock_logging = mock.Mock() config_definitions = [] for source in sources: conf = source.get_required_config() conf.add_option("logger", default=mock_logging) config_definitions.append(conf) values_source = DEFAULT_VALUES.copy() values_source.update({"logger": mock_logging}) if extra_values: values_source.update(extra_values) config_manager = ConfigurationManager( config_definitions, app_name="testapp", app_version="1.0", app_description="Elasticsearch integration tests", values_source_list=[environment, values_source], argv_source=[], ) return config_manager.get_config() def get_base_config(self, es_index=None): extra_values = None if es_index: extra_values = {"resource.elasticsearch.elasticsearch_index": es_index} return self.get_tuned_config(ElasticsearchConfig, extra_values=extra_values) def index_super_search_fields(self, fields=None): if fields is None: fields = SUPERSEARCH_FIELDS es_index = self.config.elasticsearch.elasticsearch_default_index actions = [] for name, field in fields.iteritems(): action = {"_index": es_index, "_type": "supersearch_fields", "_id": name, "_source": field} actions.append(action) bulk(client=self.connection, actions=actions) self.index_client.refresh(index=[es_index]) def index_crash(self, processed_crash, raw_crash=None, crash_id=None, root_doc=None): if crash_id is None: crash_id = str(uuid.UUID(int=random.getrandbits(128))) if raw_crash is None: raw_crash = {} doc = {} if root_doc: doc = dict(root_doc) doc.update({"crash_id": crash_id, "processed_crash": processed_crash, "raw_crash": raw_crash}) res = self.connection.index( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, id=crash_id, body=doc, ) return res["_id"] def index_many_crashes(self, number, processed_crash=None, raw_crash=None, loop_field=None): if processed_crash is None: processed_crash = {} if raw_crash is None: raw_crash = {} actions = [] for i in range(number): crash_id = str(uuid.UUID(int=random.getrandbits(128))) if loop_field is not None: processed_copy = processed_crash.copy() processed_copy[loop_field] = processed_crash[loop_field] % i else: processed_copy = processed_crash doc = {"crash_id": crash_id, "processed_crash": processed_copy, "raw_crash": raw_crash} action = { "_index": self.config.elasticsearch.elasticsearch_index, "_type": self.config.elasticsearch.elasticsearch_doctype, "_id": crash_id, "_source": doc, } actions.append(action) bulk(client=self.connection, actions=actions) self.refresh_index() def refresh_index(self, es_index=None): self.index_client.refresh(index=es_index or self.config.elasticsearch.elasticsearch_index)