예제 #1
0
    def test_create_socorro_index(self):
        index_creator = IndexCreator(config=self.config)
        index_creator.create_socorro_index(
            self.config.elasticsearch.elasticsearch_index)

        assert self.index_client.exists(
            self.config.elasticsearch.elasticsearch_index)
예제 #2
0
        def _submit_crash_to_elasticsearch(self, queue, crash_document):
            # Massage the crash such that the date_processed field is formatted
            # in the fashion of our established mapping.
            # First create a datetime object from the string in the crash
            # report.
            self.reconstitute_datetimes(crash_document['processed_crash'])

            # Obtain the index name.
            es_index = self.get_index_for_crash(
                crash_document['processed_crash']['date_processed']
            )
            es_doctype = self.config.elasticsearch.elasticsearch_doctype
            crash_id = crash_document['crash_id']

            # Attempt to create the index; it's OK if it already exists.
            if es_index not in self.indices_cache:
                index_creator = IndexCreator(config=self.config)
                index_creator.create_socorro_index(es_index)

            action = {
                '_index': es_index,
                '_type': es_doctype,
                '_id': crash_id,
                '_source': crash_document,
            }
            queue.put(action)
예제 #3
0
    def test_create_emails_index(self):
        index_creator = IndexCreator(config=self.config)
        index_creator.create_emails_index()

        ok_(
            self.index_client.exists(
                self.config.elasticsearch.elasticsearch_emails_index))
예제 #4
0
    def _submit_crash_to_elasticsearch(self, connection, crash_document):
        """Submit a crash report to elasticsearch.
        """
        # Massage the crash such that the date_processed field is formatted
        # in the fashion of our established mapping.
        self.reconstitute_datetimes(crash_document['processed_crash'])

        # Obtain the index name.
        es_index = self.get_index_for_crash(
            crash_document['processed_crash']['date_processed'])
        es_doctype = self.config.elasticsearch.elasticsearch_doctype
        crash_id = crash_document['crash_id']

        # Attempt to create the index; it's OK if it already exists.
        if es_index not in self.indices_cache:
            index_creator = IndexCreator(config=self.config)
            index_creator.create_socorro_index(es_index)

        # Submit the crash for indexing.
        try:
            connection.index(index=es_index,
                             doc_type=es_doctype,
                             body=crash_document,
                             id=crash_id)

        except elasticsearch.exceptions.ElasticsearchException as e:
            self.config.logger.critical(
                'Submission to Elasticsearch failed for %s (%s)',
                crash_id,
                e,
                exc_info=True)
            raise
예제 #5
0
        def _submit_crash_to_elasticsearch(self, queue, crash_document):
            # Massage the crash such that the date_processed field is formatted
            # in the fashion of our established mapping.
            # First create a datetime object from the string in the crash
            # report.
            self.reconstitute_datetimes(crash_document['processed_crash'])

            # Obtain the index name.
            es_index = self.get_index_for_crash(
                crash_document['processed_crash']['date_processed'])
            es_doctype = self.config.elasticsearch.elasticsearch_doctype
            crash_id = crash_document['crash_id']

            # Attempt to create the index; it's OK if it already exists.
            if es_index not in self.indices_cache:
                index_creator = IndexCreator(config=self.config)
                index_creator.create_socorro_index(es_index)

            action = {
                '_index': es_index,
                '_type': es_doctype,
                '_id': crash_id,
                '_source': crash_document,
            }
            queue.put(action)
예제 #6
0
    def test_mapping(self, mapping):
        """Verify that a mapping is correct.

        This function does so by first creating a new, temporary index in
        elasticsearch using the mapping. It then takes some recent crash
        reports that are in elasticsearch and tries to insert them in the
        temporary index. Any failure in any of those steps will raise an
        exception. If any is raised, that means the mapping is incorrect in
        some way (either it doesn't validate against elasticsearch's rules,
        or is not compatible with the data we currently store).

        If no exception is raised, the mapping is likely correct.

        This function is to be used in any place that can change the
        `storage_mapping` field in any Super Search Field.
        Methods `create_field` and `update_field` use it, see above.
        """
        temp_index = 'socorro_mapping_test'

        es_connection = self.get_connection()

        # Import at runtime to avoid dependency circle.
        from socorro.external.es.index_creator import IndexCreator
        index_creator = IndexCreator(self.config)
        try:
            index_creator.create_index(
                temp_index,
                mapping,
            )

            now = datetimeutil.utc_now()
            last_week = now - datetime.timedelta(days=7)
            current_indices = self.generate_list_of_indexes(last_week, now)

            crashes_sample = es_connection.search(
                index=current_indices,
                doc_type=self.config.elasticsearch.elasticsearch_doctype,
                size=self.config.elasticsearch.mapping_test_crash_number,
            )
            crashes = [x['_source'] for x in crashes_sample['hits']['hits']]

            for crash in crashes:
                es_connection.index(
                    index=temp_index,
                    doc_type=self.config.elasticsearch.elasticsearch_doctype,
                    body=crash,
                )
        except elasticsearch.exceptions.ElasticsearchException as e:
            raise BadArgumentError(
                'storage_mapping',
                msg='Indexing existing data in Elasticsearch failed with the '
                    'new mapping. Error is: %s' % str(e),
            )
        finally:
            try:
                index_creator.get_index_client().delete(temp_index)
            except elasticsearch.exceptions.NotFoundError:
                # If the index does not exist (if the index creation failed
                # for example), we don't need to do anything.
                pass
예제 #7
0
    def test_create_index(self):
        index_creator = IndexCreator(config=self.config)
        index_creator.create_index(
            self.config.elasticsearch.elasticsearch_index, {'foo': 'bar'})

        assert self.index_client.exists(
            self.config.elasticsearch.elasticsearch_index)
예제 #8
0
    def test_create_socorro_index(self):
        index_creator = IndexCreator(config=self.config)
        index_creator.create_socorro_index(
            self.config.elasticsearch.elasticsearch_index
        )

        assert self.index_client.exists(
            self.config.elasticsearch.elasticsearch_index
        )
    def test_create_emails_index(self):
        index_creator = IndexCreator(config=self.config)
        index_creator.create_emails_index()

        ok_(
            self.index_client.exists(
                self.config.elasticsearch.elasticsearch_emails_index
            )
        )
예제 #10
0
    def test_create_index(self):
        index_creator = IndexCreator(config=self.config)
        index_creator.create_index(
            self.config.elasticsearch.elasticsearch_index,
            {'foo': 'bar'}
        )

        assert self.index_client.exists(
            self.config.elasticsearch.elasticsearch_index
        )
예제 #11
0
    def test_create_socorro_index(self):
        index_creator = IndexCreator(config=self.config)
        index_creator.create_socorro_index(
            self.config.elasticsearch.elasticsearch_index
        )

        ok_(
            index_creator.get_index_client().exists(
                self.config.elasticsearch.elasticsearch_index
            )
        )
예제 #12
0
    def __init__(self, *args, **kwargs):
        super(ElasticsearchTestCase, self).__init__(*args, **kwargs)

        self.config = self.get_mware_config()
        es_context = self.config.elasticsearch.elasticsearch_class(
            config=self.config.elasticsearch)

        self.index_creator = IndexCreator(self.config)
        self.index_client = self.index_creator.get_index_client()

        with es_context() as conn:
            self.connection = conn
예제 #13
0
    def setup_method(self, method):
        super(ElasticsearchTestCase, self).setup_method(method)
        self.config = self.get_base_config()
        es_context = self.config.elasticsearch.elasticsearch_class(config=self.config.elasticsearch)

        creator_config = self.get_tuned_config(IndexCreator)

        self.index_creator = IndexCreator(creator_config)
        self.index_client = self.index_creator.get_index_client()

        with es_context() as conn:
            self.connection = conn

        self.index_creator.create_socorro_index(self.config.elasticsearch.elasticsearch_index)
예제 #14
0
    def tearDown(self):
        index_client = IndexCreator(config=self.config).get_index_client()
        try:
            index_client.delete(self.config.elasticsearch.elasticsearch_index)
        except elasticsearch.exceptions.NotFoundError:
            pass

        try:
            index_client.delete(
                self.config.elasticsearch.elasticsearch_emails_index
            )
        except elasticsearch.exceptions.NotFoundError:
            pass

        super(IntegrationTestIndexCreator, self).tearDown()
예제 #15
0
    def _submit_crash_to_elasticsearch(self, connection, crash_document):
        """Submit a crash report to elasticsearch.
        """

        # Massage the crash such that the date_processed field is formatted
        # in the fashion of our established mapping.
        # First create a datetime object from the string in the crash report.
        crash_date = datetimeutil.string_to_datetime(
            crash_document['processed_crash']['date_processed']
        )
        # Then convert it back to a string with the expected formatting.
        crash_date_with_t = datetimeutil.date_to_string(crash_date)
        # Finally, re-insert that string back into the report for indexing.
        crash_document['processed_crash']['date_processed'] = crash_date_with_t

        # Obtain the index name.
        es_index = self.get_index_for_crash(crash_date)
        es_doctype = self.config.elasticsearch.elasticsearch_doctype
        crash_id = crash_document['crash_id']

        # Attempt to create the index; it's OK if it already exists.
        if es_index not in self.indices_cache:
            index_creator = IndexCreator(config=self.config)
            index_creator.create_socorro_index(es_index)

        # Submit the crash for indexing.
        try:
            connection.index(
                index=es_index,
                doc_type=es_doctype,
                body=crash_document,
                id=crash_id
            )

        except elasticsearch.exceptions.ElasticsearchException as e:
            self.config.logger.critical(
                'Submission to Elasticsearch failed for %s (%s)',
                crash_id,
                e,
                exc_info=True
            )
            raise
예제 #16
0
    def test_mapping(self, mapping):
        """Verify that a mapping is correct.

        This function does so by first creating a new, temporary index in
        elasticsearch using the mapping. It then takes some recent crash
        reports that are in elasticsearch and tries to insert them in the
        temporary index. Any failure in any of those steps will raise an
        exception. If any is raised, that means the mapping is incorrect in
        some way (either it doesn't validate against elasticsearch's rules,
        or is not compatible with the data we currently store).

        If no exception is raised, the mapping is likely correct.

        This function is to be used in any place that can change the
        `storage_mapping` field in any Super Search Field.
        Methods `create_field` and `update_field` use it, see above.
        """
        temp_index = 'socorro_mapping_test'

        es_connection = self.get_connection()

        # Import at runtime to avoid dependency circle.
        from socorro.external.es.index_creator import IndexCreator
        index_creator = IndexCreator(self.config)
        try:
            index_creator.create_index(
                temp_index,
                mapping,
            )

            now = datetimeutil.utc_now()
            last_week = now - datetime.timedelta(days=7)
            current_indices = self.generate_list_of_indexes(last_week, now)

            crashes_sample = es_connection.search(
                index=current_indices,
                doc_type=self.config.elasticsearch.elasticsearch_doctype,
                size=self.config.elasticsearch.mapping_test_crash_number,
            )
            crashes = [x['_source'] for x in crashes_sample['hits']['hits']]

            for crash in crashes:
                es_connection.index(
                    index=temp_index,
                    doc_type=self.config.elasticsearch.elasticsearch_doctype,
                    body=crash,
                )
        except elasticsearch.exceptions.ElasticsearchException as e:
            raise BadArgumentError(
                'storage_mapping',
                msg='Indexing existing data in Elasticsearch failed with the '
                'new mapping. Error is: %s' % str(e),
            )
        finally:
            try:
                index_creator.get_index_client().delete(temp_index)
            except elasticsearch.exceptions.NotFoundError:
                # If the index does not exist (if the index creation failed
                # for example), we don't need to do anything.
                pass
예제 #17
0
    def __init__(self, *args, **kwargs):
        super(ElasticsearchTestCase, self).__init__(*args, **kwargs)

        self.config = self.get_base_config()
        es_context = self.config.elasticsearch.elasticsearch_class(config=self.config.elasticsearch)

        creator_config = self.get_tuned_config(IndexCreator)

        self.index_creator = IndexCreator(creator_config)
        self.index_client = self.index_creator.get_index_client()

        with es_context() as conn:
            self.connection = conn
예제 #18
0
    def _submit_crash_to_elasticsearch(self, connection, crash_document):
        """Submit a crash report to elasticsearch.
        """
        # Massage the crash such that the date_processed field is formatted
        # in the fashion of our established mapping.
        self.reconstitute_datetimes(crash_document['processed_crash'])

        # Obtain the index name.
        es_index = self.get_index_for_crash(
            crash_document['processed_crash']['date_processed']
        )
        es_doctype = self.config.elasticsearch.elasticsearch_doctype
        crash_id = crash_document['crash_id']

        # Attempt to create the index; it's OK if it already exists.
        if es_index not in self.indices_cache:
            index_creator = IndexCreator(config=self.config)
            index_creator.create_socorro_index(es_index)

        # Submit the crash for indexing.
        try:
            connection.index(
                index=es_index,
                doc_type=es_doctype,
                body=crash_document,
                id=crash_id
            )

        except elasticsearch.exceptions.ElasticsearchException as e:
            self.config.logger.critical(
                'Submission to Elasticsearch failed for %s (%s)',
                crash_id,
                e,
                exc_info=True
            )
            raise
예제 #19
0
파일: base.py 프로젝트: Tchanders/socorro
class ElasticsearchTestCase(TestCase):
    """Base class for Elastic Search related unit tests. """

    def __init__(self, *args, **kwargs):
        super(ElasticsearchTestCase, self).__init__(*args, **kwargs)

        self.config = self.get_base_config()
        es_context = self.config.elasticsearch.elasticsearch_class(
            config=self.config.elasticsearch
        )

        creator_config = self.get_tuned_config(IndexCreator)

        self.index_creator = IndexCreator(creator_config)
        self.index_client = self.index_creator.get_index_client()

        with es_context() as conn:
            self.connection = conn

    def setUp(self):
        # Create the supersearch fields.
        self.index_super_search_fields()

        self.index_creator.create_socorro_index(
            self.config.elasticsearch.elasticsearch_index
        )

        super(ElasticsearchTestCase, self).setUp()

    def tearDown(self):
        # Clear the test indices.
        self.index_client.delete(
            self.config.elasticsearch.elasticsearch_default_index
        )
        self.index_client.delete(
            self.config.elasticsearch.elasticsearch_index
        )

        super(ElasticsearchTestCase, self).tearDown()

    def get_tuned_config(self, sources, extra_values=None):
        if not isinstance(sources, (list, tuple)):
            sources = [sources]

        mock_logging = mock.Mock()

        config_definitions = []
        for source in sources:
            conf = source.get_required_config()
            conf.add_option('logger', default=mock_logging)
            config_definitions.append(conf)

        values_source = DEFAULT_VALUES.copy()
        values_source.update({'logger': mock_logging})
        if extra_values:
            values_source.update(extra_values)

        config_manager = ConfigurationManager(
            config_definitions,
            app_name='testapp',
            app_version='1.0',
            app_description='Elasticsearch integration tests',
            values_source_list=[environment, values_source],
            argv_source=[],
        )

        return config_manager.get_config()

    def get_base_config(self, es_index=None):
        extra_values = None
        if es_index:
            extra_values = {
                'resource.elasticsearch.elasticsearch_index': es_index
            }

        return self.get_tuned_config(
            ElasticsearchConfig,
            extra_values=extra_values
        )

    def index_super_search_fields(self, fields=None):
        if fields is None:
            fields = SUPERSEARCH_FIELDS

        es_index = self.config.elasticsearch.elasticsearch_default_index

        actions = []
        for name, field in fields.iteritems():
            action = {
                '_index': es_index,
                '_type': 'supersearch_fields',
                '_id': name,
                '_source': field,
            }
            actions.append(action)

        bulk(
            client=self.connection,
            actions=actions,
        )
        self.index_client.refresh(index=[es_index])

    def index_crash(self, processed_crash, raw_crash=None, crash_id=None):
        if crash_id is None:
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

        if raw_crash is None:
            raw_crash = {}

        doc = {
            'crash_id': crash_id,
            'processed_crash': processed_crash,
            'raw_crash': raw_crash,
        }
        res = self.connection.index(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            id=crash_id,
            body=doc,
        )
        return res['_id']

    def index_many_crashes(
        self, number, processed_crash=None, raw_crash=None, loop_field=None
    ):
        if processed_crash is None:
            processed_crash = {}

        if raw_crash is None:
            raw_crash = {}

        actions = []
        for i in range(number):
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

            if loop_field is not None:
                processed_copy = processed_crash.copy()
                processed_copy[loop_field] = processed_crash[loop_field] % i
            else:
                processed_copy = processed_crash

            doc = {
                'crash_id': crash_id,
                'processed_crash': processed_copy,
                'raw_crash': raw_crash,
            }
            action = {
                '_index': self.config.elasticsearch.elasticsearch_index,
                '_type': self.config.elasticsearch.elasticsearch_doctype,
                '_id': crash_id,
                '_source': doc,
            }
            actions.append(action)

        bulk(
            client=self.connection,
            actions=actions,
        )
        self.refresh_index()

    def refresh_index(self):
        self.index_client.refresh(
            index=self.config.elasticsearch.elasticsearch_index
        )
예제 #20
0
class ElasticsearchTestCase(TestCase):
    """Base class for Elastic Search related unit tests. """
    def __init__(self, *args, **kwargs):
        super(ElasticsearchTestCase, self).__init__(*args, **kwargs)

        self.config = self.get_base_config()
        es_context = self.config.elasticsearch.elasticsearch_class(
            config=self.config.elasticsearch)

        creator_config = self.get_tuned_config(IndexCreator)

        self.index_creator = IndexCreator(creator_config)
        self.index_client = self.index_creator.get_index_client()

        with es_context() as conn:
            self.connection = conn

    def setUp(self):
        # Create the supersearch fields.
        self.index_super_search_fields()

        self.index_creator.create_socorro_index(
            self.config.elasticsearch.elasticsearch_index)

        super(ElasticsearchTestCase, self).setUp()

    def tearDown(self):
        # Clear the test indices.
        self.index_client.delete(
            self.config.elasticsearch.elasticsearch_default_index)
        self.index_client.delete(self.config.elasticsearch.elasticsearch_index)

        super(ElasticsearchTestCase, self).tearDown()

    def health_check(self):
        self.connection.cluster.health(wait_for_status='yellow',
                                       request_timeout=1)

    def get_tuned_config(self, sources, extra_values=None):
        if not isinstance(sources, (list, tuple)):
            sources = [sources]

        mock_logging = mock.Mock()

        config_definitions = []
        for source in sources:
            conf = source.get_required_config()
            conf.add_option('logger', default=mock_logging)
            config_definitions.append(conf)

        values_source = DEFAULT_VALUES.copy()
        values_source.update({'logger': mock_logging})
        if extra_values:
            values_source.update(extra_values)

        config_manager = ConfigurationManager(
            config_definitions,
            app_name='testapp',
            app_version='1.0',
            app_description='Elasticsearch integration tests',
            values_source_list=[environment, values_source],
            argv_source=[],
        )

        return config_manager.get_config()

    def get_base_config(self, es_index=None):
        extra_values = None
        if es_index:
            extra_values = {
                'resource.elasticsearch.elasticsearch_index': es_index
            }

        return self.get_tuned_config(ElasticsearchConfig,
                                     extra_values=extra_values)

    def index_super_search_fields(self, fields=None):
        if fields is None:
            fields = SUPERSEARCH_FIELDS

        es_index = self.config.elasticsearch.elasticsearch_default_index

        actions = []
        for name, field in fields.iteritems():
            action = {
                '_index': es_index,
                '_type': 'supersearch_fields',
                '_id': name,
                '_source': field,
            }
            actions.append(action)

        bulk(
            client=self.connection,
            actions=actions,
        )
        self.index_client.refresh(index=[es_index])

    def index_crash(self,
                    processed_crash,
                    raw_crash=None,
                    crash_id=None,
                    root_doc=None):
        if crash_id is None:
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

        if raw_crash is None:
            raw_crash = {}

        doc = {}
        if root_doc:
            doc = dict(root_doc)

        doc.update({
            'crash_id': crash_id,
            'processed_crash': processed_crash,
            'raw_crash': raw_crash,
        })
        res = self.connection.index(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            id=crash_id,
            body=doc,
        )
        return res['_id']

    def index_many_crashes(self,
                           number,
                           processed_crash=None,
                           raw_crash=None,
                           loop_field=None):
        if processed_crash is None:
            processed_crash = {}

        if raw_crash is None:
            raw_crash = {}

        actions = []
        for i in range(number):
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

            if loop_field is not None:
                processed_copy = processed_crash.copy()
                processed_copy[loop_field] = processed_crash[loop_field] % i
            else:
                processed_copy = processed_crash

            doc = {
                'crash_id': crash_id,
                'processed_crash': processed_copy,
                'raw_crash': raw_crash,
            }
            action = {
                '_index': self.config.elasticsearch.elasticsearch_index,
                '_type': self.config.elasticsearch.elasticsearch_doctype,
                '_id': crash_id,
                '_source': doc,
            }
            actions.append(action)

        bulk(
            client=self.connection,
            actions=actions,
        )
        self.refresh_index()

    def refresh_index(self, es_index=None):
        self.index_client.refresh(
            index=es_index or self.config.elasticsearch.elasticsearch_index)
예제 #21
0
class ElasticsearchTestCase(TestCaseWithConfig):
    """Base class for Elastic Search related unit tests"""

    def __init__(self, *args, **kwargs):
        super(ElasticsearchTestCase, self).__init__(*args, **kwargs)

        self.config = self.get_base_config()
        es_context = self.config.elasticsearch.elasticsearch_class(
            config=self.config.elasticsearch
        )

        creator_config = self.get_tuned_config(IndexCreator)

        self.index_creator = IndexCreator(creator_config)
        self.index_client = self.index_creator.get_index_client()

        with es_context() as conn:
            self.connection = conn

    def setUp(self):
        super(ElasticsearchTestCase, self).setUp()
        self.index_creator.create_socorro_index(self.config.elasticsearch.elasticsearch_index)

    def tearDown(self):
        # Clear the test indices.
        self.index_client.delete(
            self.config.elasticsearch.elasticsearch_index
        )

        super(ElasticsearchTestCase, self).tearDown()

    def health_check(self):
        self.connection.cluster.health(
            wait_for_status='yellow',
            request_timeout=5
        )

    def get_url(self):
        """Returns the first url in the elasticsearch_urls list"""
        return self.config.elasticsearch.elasticsearch_urls[0]

    def get_tuned_config(self, sources, extra_values=None):
        values_source = DEFAULT_VALUES.copy()
        if extra_values:
            values_source.update(extra_values)

        return super(ElasticsearchTestCase, self).get_tuned_config(
            sources, values_source
        )

    def get_base_config(self, es_index=None):
        extra_values = None
        if es_index:
            extra_values = {
                'resource.elasticsearch.elasticsearch_index': es_index
            }

        return self.get_tuned_config(
            ElasticsearchConfig,
            extra_values=extra_values
        )

    def index_crash(self, processed_crash=None, raw_crash=None, crash_id=None):
        if crash_id is None:
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

        raw_crash = raw_crash or {}
        processed_crash = processed_crash or {}

        doc = {
            'crash_id': crash_id,
            'processed_crash': processed_crash,
            'raw_crash': raw_crash,
        }
        res = self.connection.index(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            id=crash_id,
            body=doc,
        )
        return res['_id']

    def index_many_crashes(
        self, number, processed_crash=None, raw_crash=None, loop_field=None
    ):
        processed_crash = processed_crash or {}
        raw_crash = raw_crash or {}

        actions = []
        for i in range(number):
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

            if loop_field is not None:
                processed_copy = processed_crash.copy()
                processed_copy[loop_field] = processed_crash[loop_field] % i
            else:
                processed_copy = processed_crash

            doc = {
                'crash_id': crash_id,
                'processed_crash': processed_copy,
                'raw_crash': raw_crash,
            }
            action = {
                '_index': self.config.elasticsearch.elasticsearch_index,
                '_type': self.config.elasticsearch.elasticsearch_doctype,
                '_id': crash_id,
                '_source': doc,
            }
            actions.append(action)

        bulk(
            client=self.connection,
            actions=actions,
        )
        self.refresh_index()

    def refresh_index(self, es_index=None):
        self.index_client.refresh(
            index=es_index or self.config.elasticsearch.elasticsearch_index
        )
예제 #22
0
파일: base.py 프로젝트: uglide/socorro
class ElasticsearchTestCase(TestCaseWithConfig):
    """Base class for Elastic Search related unit tests"""
    def __init__(self, *args, **kwargs):
        super(ElasticsearchTestCase, self).__init__(*args, **kwargs)

        self.config = self.get_base_config()
        es_context = self.config.elasticsearch.elasticsearch_class(
            config=self.config.elasticsearch)

        creator_config = self.get_tuned_config(IndexCreator)

        self.index_creator = IndexCreator(creator_config)
        self.index_client = self.index_creator.get_index_client()

        with es_context() as conn:
            self.connection = conn

    def setUp(self):
        super(ElasticsearchTestCase, self).setUp()
        self.index_creator.create_socorro_index(
            self.config.elasticsearch.elasticsearch_index)

    def tearDown(self):
        # Clear the test indices.
        self.index_client.delete(self.config.elasticsearch.elasticsearch_index)

        super(ElasticsearchTestCase, self).tearDown()

    def health_check(self):
        self.connection.cluster.health(wait_for_status='yellow',
                                       request_timeout=5)

    def get_url(self):
        """Returns the first url in the elasticsearch_urls list"""
        return self.config.elasticsearch.elasticsearch_urls[0]

    def get_tuned_config(self, sources, extra_values=None):
        values_source = DEFAULT_VALUES.copy()
        if extra_values:
            values_source.update(extra_values)

        return super(ElasticsearchTestCase,
                     self).get_tuned_config(sources, values_source)

    def get_base_config(self, es_index=None):
        extra_values = None
        if es_index:
            extra_values = {
                'resource.elasticsearch.elasticsearch_index': es_index
            }

        return self.get_tuned_config(ElasticsearchConfig,
                                     extra_values=extra_values)

    def index_crash(self, processed_crash=None, raw_crash=None, crash_id=None):
        if crash_id is None:
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

        raw_crash = raw_crash or {}
        processed_crash = processed_crash or {}

        doc = {
            'crash_id': crash_id,
            'processed_crash': processed_crash,
            'raw_crash': raw_crash,
        }
        res = self.connection.index(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            id=crash_id,
            body=doc,
        )
        return res['_id']

    def index_many_crashes(self,
                           number,
                           processed_crash=None,
                           raw_crash=None,
                           loop_field=None):
        processed_crash = processed_crash or {}
        raw_crash = raw_crash or {}

        actions = []
        for i in range(number):
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

            if loop_field is not None:
                processed_copy = processed_crash.copy()
                processed_copy[loop_field] = processed_crash[loop_field] % i
            else:
                processed_copy = processed_crash

            doc = {
                'crash_id': crash_id,
                'processed_crash': processed_copy,
                'raw_crash': raw_crash,
            }
            action = {
                '_index': self.config.elasticsearch.elasticsearch_index,
                '_type': self.config.elasticsearch.elasticsearch_doctype,
                '_id': crash_id,
                '_source': doc,
            }
            actions.append(action)

        bulk(
            client=self.connection,
            actions=actions,
        )
        self.refresh_index()

    def refresh_index(self, es_index=None):
        self.index_client.refresh(
            index=es_index or self.config.elasticsearch.elasticsearch_index)
예제 #23
0
class ElasticsearchTestCase(TestCase):
    """Base class for Elastic Search related unit tests. """

    def __init__(self, *args, **kwargs):
        super(ElasticsearchTestCase, self).__init__(*args, **kwargs)

        self.config = self.get_base_config()
        es_context = self.config.elasticsearch.elasticsearch_class(config=self.config.elasticsearch)

        creator_config = self.get_tuned_config(IndexCreator)

        self.index_creator = IndexCreator(creator_config)
        self.index_client = self.index_creator.get_index_client()

        with es_context() as conn:
            self.connection = conn

    def setUp(self):
        # Create the supersearch fields.
        self.index_super_search_fields()

        self.index_creator.create_socorro_index(self.config.elasticsearch.elasticsearch_index)

        super(ElasticsearchTestCase, self).setUp()

    def tearDown(self):
        # Clear the test indices.
        self.index_client.delete(self.config.elasticsearch.elasticsearch_default_index)
        self.index_client.delete(self.config.elasticsearch.elasticsearch_index)

        super(ElasticsearchTestCase, self).tearDown()

    def health_check(self):
        self.connection.cluster.health(wait_for_status="yellow", request_timeout=1)

    def get_tuned_config(self, sources, extra_values=None):
        if not isinstance(sources, (list, tuple)):
            sources = [sources]

        mock_logging = mock.Mock()

        config_definitions = []
        for source in sources:
            conf = source.get_required_config()
            conf.add_option("logger", default=mock_logging)
            config_definitions.append(conf)

        values_source = DEFAULT_VALUES.copy()
        values_source.update({"logger": mock_logging})
        if extra_values:
            values_source.update(extra_values)

        config_manager = ConfigurationManager(
            config_definitions,
            app_name="testapp",
            app_version="1.0",
            app_description="Elasticsearch integration tests",
            values_source_list=[environment, values_source],
            argv_source=[],
        )

        return config_manager.get_config()

    def get_base_config(self, es_index=None):
        extra_values = None
        if es_index:
            extra_values = {"resource.elasticsearch.elasticsearch_index": es_index}

        return self.get_tuned_config(ElasticsearchConfig, extra_values=extra_values)

    def index_super_search_fields(self, fields=None):
        if fields is None:
            fields = SUPERSEARCH_FIELDS

        es_index = self.config.elasticsearch.elasticsearch_default_index

        actions = []
        for name, field in fields.iteritems():
            action = {"_index": es_index, "_type": "supersearch_fields", "_id": name, "_source": field}
            actions.append(action)

        bulk(client=self.connection, actions=actions)
        self.index_client.refresh(index=[es_index])

    def index_crash(self, processed_crash, raw_crash=None, crash_id=None, root_doc=None):
        if crash_id is None:
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

        if raw_crash is None:
            raw_crash = {}

        doc = {}
        if root_doc:
            doc = dict(root_doc)

        doc.update({"crash_id": crash_id, "processed_crash": processed_crash, "raw_crash": raw_crash})
        res = self.connection.index(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            id=crash_id,
            body=doc,
        )
        return res["_id"]

    def index_many_crashes(self, number, processed_crash=None, raw_crash=None, loop_field=None):
        if processed_crash is None:
            processed_crash = {}

        if raw_crash is None:
            raw_crash = {}

        actions = []
        for i in range(number):
            crash_id = str(uuid.UUID(int=random.getrandbits(128)))

            if loop_field is not None:
                processed_copy = processed_crash.copy()
                processed_copy[loop_field] = processed_crash[loop_field] % i
            else:
                processed_copy = processed_crash

            doc = {"crash_id": crash_id, "processed_crash": processed_copy, "raw_crash": raw_crash}
            action = {
                "_index": self.config.elasticsearch.elasticsearch_index,
                "_type": self.config.elasticsearch.elasticsearch_doctype,
                "_id": crash_id,
                "_source": doc,
            }
            actions.append(action)

        bulk(client=self.connection, actions=actions)
        self.refresh_index()

    def refresh_index(self, es_index=None):
        self.index_client.refresh(index=es_index or self.config.elasticsearch.elasticsearch_index)
예제 #24
0
    def _submit_crash_to_elasticsearch(self, connection, crash_document):
        """Submit a crash report to elasticsearch.
        """
        # Massage the crash such that the date_processed field is formatted
        # in the fashion of our established mapping.
        self.reconstitute_datetimes(crash_document['processed_crash'])

        # Obtain the index name.
        es_index = self.get_index_for_crash(
            crash_document['processed_crash']['date_processed']
        )
        es_doctype = self.config.elasticsearch.elasticsearch_doctype
        crash_id = crash_document['crash_id']

        # Attempt to create the index; it's OK if it already exists.
        if es_index not in self.indices_cache:
            index_creator = IndexCreator(config=self.config)
            index_creator.create_socorro_index(es_index)

        # Submit the crash for indexing.
        # Don't retry more than 5 times. That is to avoid infinite loops in
        # case of an unhandled exception.
        times = range(5)
        while times.pop(-1):
            try:
                connection.index(
                    index=es_index,
                    doc_type=es_doctype,
                    body=crash_document,
                    id=crash_id
                )
                break
            except elasticsearch.exceptions.TransportError as e:
                field_name = None

                if 'MaxBytesLengthExceededException' in e.error:
                    # This is caused by a string that is way too long for
                    # Elasticsearch.
                    matches = self.field_name_string_error_re.findall(e.error)
                    if matches:
                        field_name = matches[0]
                elif 'NumberFormatException' in e.error:
                    # This is caused by a number that is either too big for
                    # Elasticsearch or just not a number.
                    matches = self.field_name_number_error_re.findall(e.error)
                    if matches:
                        field_name = matches[0]

                if not field_name:
                    # We are unable to parse which field to remove, we cannot
                    # try to fix the document. Let it raise.
                    self.config.logger.critical(
                        'Submission to Elasticsearch failed for %s (%s)',
                        crash_id,
                        e,
                        exc_info=True
                    )
                    raise

                if field_name.endswith('.full'):
                    # Remove the `.full` at the end, that is a special mapping
                    # construct that is not part of the real field name.
                    field_name = field_name.rstrip('.full')

                # Now remove that field from the document before trying again.
                field_path = field_name.split('.')
                parent = crash_document
                for i, field in enumerate(field_path):
                    if i == len(field_path) - 1:
                        # This is the last level, so `field` contains the name
                        # of the field that we want to remove from `parent`.
                        del parent[field]
                    else:
                        parent = parent[field]

                # Add a note in the document that a field has been removed.
                if crash_document.get('removed_fields'):
                    crash_document['removed_fields'] = '{} {}'.format(
                        crash_document['removed_fields'],
                        field_name
                    )
                else:
                    crash_document['removed_fields'] = field_name
            except elasticsearch.exceptions.ElasticsearchException as e:
                self.config.logger.critical(
                    'Submission to Elasticsearch failed for %s (%s)',
                    crash_id,
                    e,
                    exc_info=True
                )
                raise
예제 #25
0
    def test_create_index(self):
        index_creator = IndexCreator(config=self.config)
        index_creator.create_index(self.config.elasticsearch.elasticsearch_index, {"foo": "bar"})

        ok_(self.index_client.exists(self.config.elasticsearch.elasticsearch_index))