def test_mapping(self, mapping): """Verify that a mapping is correct. This function does so by first creating a new, temporary index in elasticsearch using the mapping. It then takes some recent crash reports that are in elasticsearch and tries to insert them in the temporary index. Any failure in any of those steps will raise an exception. If any is raised, that means the mapping is incorrect in some way (either it doesn't validate against elasticsearch's rules, or is not compatible with the data we currently store). If no exception is raised, the mapping is likely correct. This function is to be used in any place that can change the `storage_mapping` field in any Super Search Field. Methods `create_field` and `update_field` use it, see above. """ temp_index = 'socorro_mapping_test' es_connection = self.get_connection() # Import at runtime to avoid dependency circle. from socorro.external.es.index_creator import IndexCreator index_creator = IndexCreator(self.config) try: index_creator.create_index( temp_index, mapping, ) now = datetimeutil.utc_now() last_week = now - datetime.timedelta(days=7) current_indices = self.generate_list_of_indexes(last_week, now) crashes_sample = es_connection.search( index=current_indices, doc_type=self.config.elasticsearch.elasticsearch_doctype, size=self.config.elasticsearch.mapping_test_crash_number, ) crashes = [x['_source'] for x in crashes_sample['hits']['hits']] for crash in crashes: es_connection.index( index=temp_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=crash, ) except elasticsearch.exceptions.ElasticsearchException as e: raise BadArgumentError( 'storage_mapping', msg='Indexing existing data in Elasticsearch failed with the ' 'new mapping. Error is: %s' % str(e), ) finally: try: index_creator.get_index_client().delete(temp_index) except elasticsearch.exceptions.NotFoundError: # If the index does not exist (if the index creation failed # for example), we don't need to do anything. pass
def _submit_crash_to_elasticsearch(self, connection, crash_document): """Submit a crash report to elasticsearch. """ # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. self.reconstitute_datetimes(crash_document['processed_crash']) # Obtain the index name. es_index = self.get_index_for_crash( crash_document['processed_crash']['date_processed']) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) # Submit the crash for indexing. try: connection.index(index=es_index, doc_type=es_doctype, body=crash_document, id=crash_id) except elasticsearch.exceptions.ElasticsearchException as e: self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True) raise
def _submit_crash_to_elasticsearch(self, queue, crash_document): # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. # First create a datetime object from the string in the crash # report. self.reconstitute_datetimes(crash_document['processed_crash']) # Obtain the index name. es_index = self.get_index_for_crash( crash_document['processed_crash']['date_processed']) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) action = { '_index': es_index, '_type': es_doctype, '_id': crash_id, '_source': crash_document, } queue.put(action)
def test_create_index(self): index_creator = IndexCreator(config=self.config) index_creator.create_index( self.config.elasticsearch.elasticsearch_index, {'foo': 'bar'}) assert self.index_client.exists( self.config.elasticsearch.elasticsearch_index)
def test_create_socorro_index(self): index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index( self.config.elasticsearch.elasticsearch_index) assert self.index_client.exists( self.config.elasticsearch.elasticsearch_index)
def test_create_emails_index(self): index_creator = IndexCreator(config=self.config) index_creator.create_emails_index() ok_( self.index_client.exists( self.config.elasticsearch.elasticsearch_emails_index))
def __init__(self, *args, **kwargs): super(ElasticsearchTestCase, self).__init__(*args, **kwargs) self.config = self.get_mware_config() es_context = self.config.elasticsearch.elasticsearch_class( config=self.config.elasticsearch) self.index_creator = IndexCreator(self.config) self.index_client = self.index_creator.get_index_client() with es_context() as conn: self.connection = conn
def setup_method(self, method): super(ElasticsearchTestCase, self).setup_method(method) self.config = self.get_base_config() es_context = self.config.elasticsearch.elasticsearch_class(config=self.config.elasticsearch) creator_config = self.get_tuned_config(IndexCreator) self.index_creator = IndexCreator(creator_config) self.index_client = self.index_creator.get_index_client() with es_context() as conn: self.connection = conn self.index_creator.create_socorro_index(self.config.elasticsearch.elasticsearch_index)
def _submit_crash_to_elasticsearch(self, connection, crash_document): """Submit a crash report to elasticsearch. """ # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. # First create a datetime object from the string in the crash report. crash_date = datetimeutil.string_to_datetime( crash_document['processed_crash']['date_processed'] ) # Then convert it back to a string with the expected formatting. crash_date_with_t = datetimeutil.date_to_string(crash_date) # Finally, re-insert that string back into the report for indexing. crash_document['processed_crash']['date_processed'] = crash_date_with_t # Obtain the index name. es_index = self.get_index_for_crash(crash_date) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) # Submit the crash for indexing. try: connection.index( index=es_index, doc_type=es_doctype, body=crash_document, id=crash_id ) except elasticsearch.exceptions.ElasticsearchException as e: self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True ) raise
def _submit_crash_to_elasticsearch(self, connection, crash_document): """Submit a crash report to elasticsearch. """ # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. self.reconstitute_datetimes(crash_document['processed_crash']) # Obtain the index name. es_index = self.get_index_for_crash( crash_document['processed_crash']['date_processed'] ) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) # Submit the crash for indexing. # Don't retry more than 5 times. That is to avoid infinite loops in # case of an unhandled exception. times = range(5) while times.pop(-1): try: connection.index( index=es_index, doc_type=es_doctype, body=crash_document, id=crash_id ) break except elasticsearch.exceptions.TransportError as e: field_name = None if 'MaxBytesLengthExceededException' in e.error: # This is caused by a string that is way too long for # Elasticsearch. matches = self.field_name_string_error_re.findall(e.error) if matches: field_name = matches[0] elif 'NumberFormatException' in e.error: # This is caused by a number that is either too big for # Elasticsearch or just not a number. matches = self.field_name_number_error_re.findall(e.error) if matches: field_name = matches[0] if not field_name: # We are unable to parse which field to remove, we cannot # try to fix the document. Let it raise. self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True ) raise if field_name.endswith('.full'): # Remove the `.full` at the end, that is a special mapping # construct that is not part of the real field name. field_name = field_name.rstrip('.full') # Now remove that field from the document before trying again. field_path = field_name.split('.') parent = crash_document for i, field in enumerate(field_path): if i == len(field_path) - 1: # This is the last level, so `field` contains the name # of the field that we want to remove from `parent`. del parent[field] else: parent = parent[field] # Add a note in the document that a field has been removed. if crash_document.get('removed_fields'): crash_document['removed_fields'] = '{} {}'.format( crash_document['removed_fields'], field_name ) else: crash_document['removed_fields'] = field_name except elasticsearch.exceptions.ElasticsearchException as e: self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True ) raise