コード例 #1
0
    def create_socorro_index(self, es_index, mappings=None):
        """Create an index that will receive crash reports. """
        if mappings is None:
            mappings = SuperSearchFields(config=self.config).get_mapping()

        es_settings = self.get_socorro_index_settings(mappings)
        self.create_index(es_index, es_settings)
コード例 #2
0
    def create_index(self, index_name, mappings=None):
        """Create an index that will receive crash reports.

        :arg index_name: the name of the index to create
        :arg mappings: dict of doctype->ES mapping

        :returns: True if the index was created, False if it already
            existed

        """
        if mappings is None:
            mappings = SuperSearchFields(context=self).get_mapping()

        es_settings = self.get_socorro_index_settings(mappings)

        try:
            client = self.indices_client()
            client.create(index=index_name, body=es_settings)
            return True

        except elasticsearch.exceptions.RequestError as e:
            # If this index already exists, swallow the error.
            # NOTE! This is NOT how the error looks like in ES 2.x
            if 'IndexAlreadyExistsException' not in str(e):
                raise
            return False
コード例 #3
0
    def create_socorro_index(self, es_index, mappings=None):
        """Create an index that will receive crash reports. """
        if mappings is None:
            mappings = SuperSearchFields(config=self.config).get_mapping()

        es_settings = self.get_socorro_index_settings(mappings)
        if self.config.elasticsearch.dry_run:
            print(json.dumps(es_settings, indent=2))
        else:
            self.create_index(es_index, es_settings)
コード例 #4
0
    def create_socorro_index(self, es_index, mappings=None):
        """Create an index that will receive crash reports. """
        if mappings is None:
            # Import at runtime to avoid dependency circle.
            from socorro.external.es.super_search_fields import (
                SuperSearchFields)
            mappings = SuperSearchFields(config=self.config).get_mapping()

        es_settings = self.get_socorro_index_settings(mappings)
        self.create_index(es_index, es_settings)
コード例 #5
0
    def _get_all_fields(self):
        if (hasattr(self, '_all_fields')
                and hasattr(self, '_all_fields_timestamp')):
            # we might have it cached
            age = time.time() - self._all_fields_timestamp
            if age < 60 * 60:
                # fresh enough
                return self._all_fields

        self._all_fields = SuperSearchFields(config=self.config).get()
        self._all_fields_timestamp = time.time()
        return self._all_fields
コード例 #6
0
    def __init__(self, *args, **kwargs):
        self.config = kwargs.get('config')
        self.es_context = self.config.elasticsearch.elasticsearch_class(
            self.config.elasticsearch)

        self.all_fields = SuperSearchFields(config=self.config).get_fields()

        # Create a map to associate a field's name in the database to its
        # exposed name (in the results and facets).
        self.database_name_to_field_name_map = dict(
            (x['in_database_name'], x['name'])
            for x in self.all_fields.values())

        kwargs.update(fields=self.all_fields)
        super(SuperSearch, self).__init__(*args, **kwargs)
コード例 #7
0
    def create_socorro_index(self,
                             index_name,
                             mappings=None,
                             log_result=False):
        """Create an index that will receive crash reports.

        Note: This function can get called in two contexts: when the processor
        is saving crash reports and also in the local dev environment scripts.
        The former wants to ignore index-existing errors quietly but the latter
        wants to log the result. Hence the fickle nature of this function.

        """
        if mappings is None:
            mappings = SuperSearchFields(context=self).get_mapping()

        es_settings = self.get_socorro_index_settings(mappings)
        self.create_index(index_name, es_settings, log_result)
コード例 #8
0
 def get(self, **kwargs):
     kwargs['_fields'] = SuperSearchFields(config=self.config).get_fields()
     return super(SuperSearchWithFields, self).get(**kwargs)
コード例 #9
0
 def create_socorro_index(self, es_index):
     """Create an index that will receive crash reports. """
     es_settings = SuperSearchFields(config=self.config).get_mapping()
     self.create_index(es_index, es_settings)
コード例 #10
0
    def setUp(self):
        super(IntegrationTestSuperSearchFields, self).setUp()

        self.api = SuperSearchFields(config=self.config)
コード例 #11
0
 def delete_field(self, **kwargs):
     return SuperSearchFields(config=self.config).delete_field(**kwargs)
コード例 #12
0
    def run(self, end_datetime):
        # Truncate to the hour
        end_datetime = end_datetime.replace(minute=0, second=0, microsecond=0)

        # Do a super search and get the signature, buildid, and date processed for
        # every crash in the range
        all_fields = SuperSearchFields(config=self.config).get()
        api = SuperSearch(config=self.config)
        start_datetime = end_datetime - datetime.timedelta(
            minutes=self.config.period)
        self.config.logger.info('Looking at %s to %s', start_datetime,
                                end_datetime)

        params = {
            'date': [
                '>={}'.format(start_datetime.isoformat()),
                '<{}'.format(end_datetime.isoformat()),
            ],
            '_columns': ['signature', 'build_id', 'date'],
            '_facets_size':
            0,
            '_fields':
            all_fields,

            # Set up first page
            '_results_offset':
            0,
            '_results_number':
            MAX_PAGE,
        }

        results = {}
        crashids_count = 0

        while True:
            resp = api.get(**params)
            hits = resp['hits']
            for hit in hits:
                crashids_count += 1

                if not hit['build_id']:
                    # Not all crashes have a build id, so skip the ones that don't.
                    continue

                if hit['signature'] in results:
                    data = results[hit['signature']]
                    data['build_id'] = min(data['build_id'], hit['build_id'])
                    data['date'] = min(data['date'], hit['date'])
                else:
                    data = {
                        'signature': hit['signature'],
                        'build_id': hit['build_id'],
                        'date': hit['date']
                    }
                results[hit['signature']] = data

            # If there are no more crash ids to get, we return
            total = resp['total']
            if not hits or crashids_count >= total:
                break

            # Get the next page, but only as many results as we need
            params['_results_offset'] += MAX_PAGE
            params['_results_number'] = min(
                # MAX_PAGE is the maximum we can request
                MAX_PAGE,

                # The number of results Super Search can return to us that is hasn't returned so far
                total - crashids_count)

        signature_data = results.values()

        # Save signature data to the db
        for item in signature_data:
            if self.config.dry_run:
                self.config.logger.info(
                    'Inserting/updating signature (%s, %s, %s)',
                    item['signature'], item['date'], item['build_id'])
            else:
                self.update_crashstats_signature(
                    signature=item['signature'],
                    report_date=item['date'],
                    report_build=item['build_id'],
                )

        self.config.logger.info('Inserted/updated %d signatures.',
                                len(signature_data))
コード例 #13
0
 def __init__(self, config, *args, **kwargs):
     super(TelemetryBotoS3CrashStorage,
           self).__init__(config, *args, **kwargs)
     self._all_fields = SuperSearchFields(config=self.config).get()
コード例 #14
0
    def test_index_crash_mapping_keys(self):
        """Test indexing a crash that has keys not in the mapping

        Indexing a crash that has keys that aren't in the mapping for the index
        should cause those keys to be removed from the crash.

        """
        # The test harness creates an index for this week and last week. So let's create
        # one for 4 weeks ago.
        now = utc_now()
        four_weeks_ago = now - timedelta(days=28)

        field = "user_comments"

        # We're going to use a mapping that's what SuperSearchFields gives us, but
        # remove the user_comments field.
        mappings = SuperSearchFields(context=self.es_context).get_mapping()
        doctype = self.es_context.get_doctype()
        del mappings[doctype]["properties"]["processed_crash"]["properties"][
            field]

        # Create the index for 4 weeks ago
        self.es_context.create_index(
            index_name=self.es_context.get_index_for_date(four_weeks_ago),
            mappings=mappings,
        )

        es_storage = ESCrashStorage(config=self.config)

        # Create a crash for this week and save it
        now_uuid = "00000000-0000-0000-0000-000000120408"
        raw_crash = {
            "BuildID": "20200506000000",
        }
        processed_crash = {
            field: "this week",
            "date_processed": date_to_string(now),
            "uuid": now_uuid,
        }

        es_storage.save_processed_crash(
            raw_crash=raw_crash,
            processed_crash=processed_crash,
        )

        # Create a crash for four weeks ago with the bum mapping and save it
        old_uuid = "11111111-1111-1111-1111-111111120408"
        raw_crash = {
            "BuildID": "20200506000000",
        }
        processed_crash = {
            field: "this week",
            "date_processed": date_to_string(now - timedelta(days=28)),
            "uuid": old_uuid,
        }

        es_storage.save_processed_crash(
            raw_crash=raw_crash,
            processed_crash=processed_crash,
        )

        self.es_context.refresh()

        # Retrieve the document from this week and verify it has the user_comments
        # field
        doc = self.conn.get(
            index=self.es_context.get_index_for_date(now),
            id=now_uuid,
        )
        assert field in doc["_source"]["processed_crash"]

        # Retrieve the document from four weeks ago and verify it doesn't have the
        # user_comments field
        doc = self.conn.get(
            index=self.es_context.get_index_for_date(four_weeks_ago),
            id=old_uuid,
        )
        assert field not in doc["_source"]["processed_crash"]
コード例 #15
0
 def get_fields(self, **kwargs):
     return SuperSearchFields(config=self.config).get_fields(**kwargs)
コード例 #16
0
 def update_field(self, **kwargs):
     return SuperSearchFields(config=self.config).update_field(**kwargs)
コード例 #17
0
    def setUp(self):
        super(IntegrationTestSuperSearchFields, self).setUp()

        self.api = SuperSearchFields(config=self.config)
        self.api.get_fields = lambda: copy.deepcopy(SUPERSEARCH_FIELDS)
コード例 #18
0
 def get_missing_fields(self):
     return SuperSearchFields(config=self.config).get_missing_fields()
コード例 #19
0
    def test_get_missing_fields(self):
        config = self.get_base_config(es_index='socorro_integration_test_%W')

        fake_mappings = [
            {
                'mappings': {
                    config.elasticsearch.elasticsearch_doctype: {
                        'properties': {
                            # Add a bunch of unknown fields.
                            'field_z': {
                                'type': 'string'
                            },
                            'namespace1': {
                                'type': 'object',
                                'properties': {
                                    'field_a': {
                                        'type': 'string'
                                    },
                                    'field_b': {
                                        'type': 'long'
                                    }
                                }
                            },
                            'namespace2': {
                                'type': 'object',
                                'properties': {
                                    'subspace1': {
                                        'type': 'object',
                                        'properties': {
                                            'field_b': {
                                                'type': 'long'
                                            }
                                        }
                                    }
                                }
                            },
                            # Add a few known fields that should not appear.
                            'processed_crash': {
                                'type': 'object',
                                'properties': {
                                    'signature': {
                                        'type': 'string'
                                    },
                                    'product': {
                                        'type': 'string'
                                    },
                                }
                            }
                        }
                    }
                }
            },
            {
                'mappings': {
                    config.elasticsearch.elasticsearch_doctype: {
                        'properties': {
                            'namespace1': {
                                'type': 'object',
                                'properties': {
                                    'subspace1': {
                                        'type': 'object',
                                        'properties': {
                                            'field_d': {
                                                'type': 'long'
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            },
        ]

        now = datetimeutil.utc_now()
        indices = []

        try:
            # Using "2" here means that an index will be missing, hence testing
            # that it swallows the subsequent error.
            for i in range(2):
                date = now - datetime.timedelta(weeks=i)
                index = date.strftime(config.elasticsearch.elasticsearch_index)
                mapping = fake_mappings[i % len(fake_mappings)]

                self.index_creator.create_index(index, mapping)
                indices.append(index)

            api = SuperSearchFields(config=config)
            missing_fields = api.get_missing_fields()
            expected = [
                'field_z',
                'namespace1.field_a',
                'namespace1.field_b',
                'namespace1.subspace1.field_d',
                'namespace2.subspace1.field_b',
            ]

            assert missing_fields['hits'] == expected
            assert missing_fields['total'] == 5

        finally:
            for index in indices:
                self.index_client.delete(index=index)