コード例 #1
0
ファイル: test_supersearch.py プロジェクト: Tchanders/socorro
    def test_get_against_nonexistent_index(self):
        config = self.get_base_config(es_index='socorro_test_reports_%W')
        api = SuperSearch(config=config)
        params = {'date': ['>2000-01-01T00:00:00', '<2000-01-10T00:00:00']}

        res = api.get(**params)
        eq_(res, {'total': 0, 'hits': [], 'facets': {}})
コード例 #2
0
ファイル: test_supersearch.py プロジェクト: Tchanders/socorro
    def test_get_indices(self):
        now = datetime.datetime(2001, 1, 2, 0, 0)
        lastweek = now - datetime.timedelta(weeks=1)
        lastmonth = now - datetime.timedelta(weeks=4)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = self.api.get_indices(dates)
        eq_(res, ['socorro_integration_test_reports'])

        config = self.get_base_config(es_index='socorro_%Y%W')
        api = SuperSearch(config=config)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = api.get_indices(dates)
        eq_(res, ['socorro_200052', 'socorro_200101'])

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastmonth, '>'),
        ]

        res = api.get_indices(dates)
        eq_(res, [
            'socorro_200049', 'socorro_200050', 'socorro_200051',
            'socorro_200052', 'socorro_200101'
        ])
コード例 #3
0
ファイル: test_supersearch.py プロジェクト: 1Smert1/socorro
    def test_get_against_nonexistent_index(self):
        config = self.get_base_config(es_index='socorro_test_reports_%W')
        api = SuperSearch(config=config)
        params = {
            'date': ['>2000-01-01T00:00:00', '<2000-01-10T00:00:00']
        }

        res = api.get(**params)
        eq_(res, {'total': 0, 'hits': [], 'facets': {}})
コード例 #4
0
ファイル: test_analyzers.py プロジェクト: johnmcwade/socorro
class TestIntegrationAnalyzers(ElasticsearchTestCase):
    """Test the custom analyzers we create in our indices"""
    def setup_method(self):
        super().setup_method()

        config = self.get_base_config(cls=SuperSearch)
        self.api = SuperSearch(config=config)
        self.now = datetimeutil.utc_now()

    def test_semicolon_keywords(self):
        """Test the analyzer called `semicolon_keywords`.

        That analyzer creates tokens (terms) by splitting the input on
        semicolons (;) only.

        """
        self.index_crash(
            processed_crash={"date_processed": self.now},
            raw_crash={"AppInitDLLs": "/path/to/dll;;foo;C:\\bar\\boo"},
        )
        self.index_crash(
            processed_crash={"date_processed": self.now},
            raw_crash={"AppInitDLLs": "/path/to/dll;D:\\bar\\boo"},
        )
        self.es_context.refresh()

        res = self.api.get(app_init_dlls="/path/to/dll",
                           _facets=["app_init_dlls"],
                           _fields=FIELDS)
        assert res["total"] == 2
        assert "app_init_dlls" in res["facets"]
        facet_terms = [x["term"] for x in res["facets"]["app_init_dlls"]]
        assert "/path/to/dll" in facet_terms
        assert "c:\\bar\\boo" in facet_terms
        assert "foo" in facet_terms
コード例 #5
0
ファイル: test_supersearch.py プロジェクト: 1Smert1/socorro
    def test_get_indices(self):
        now = datetime.datetime(2001, 1, 2, 0, 0)
        lastweek = now - datetime.timedelta(weeks=1)
        lastmonth = now - datetime.timedelta(weeks=4)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = self.api.get_indices(dates)
        eq_(res, ['socorro_integration_test_reports'])

        config = self.get_base_config(es_index='socorro_%Y%W')
        api = SuperSearch(config=config)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = api.get_indices(dates)
        eq_(res, ['socorro_200052', 'socorro_200101'])

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastmonth, '>'),
        ]

        res = api.get_indices(dates)
        eq_(
            res,
            [
                'socorro_200049', 'socorro_200050', 'socorro_200051',
                'socorro_200052', 'socorro_200101'
            ]
        )
コード例 #6
0
ファイル: test_supersearch.py プロジェクト: Tchanders/socorro
    def setUp(self):
        super(IntegrationTestSuperSearch, self).setUp()

        self.api = SuperSearch(config=self.config)
        self.now = datetimeutil.utc_now()
コード例 #7
0
ファイル: test_supersearch.py プロジェクト: Tchanders/socorro
class IntegrationTestSuperSearch(ElasticsearchTestCase):
    """Test SuperSearch with an elasticsearch database containing fake
    data. """
    def setUp(self):
        super(IntegrationTestSuperSearch, self).setUp()

        self.api = SuperSearch(config=self.config)
        self.now = datetimeutil.utc_now()

    def test_get_indices(self):
        now = datetime.datetime(2001, 1, 2, 0, 0)
        lastweek = now - datetime.timedelta(weeks=1)
        lastmonth = now - datetime.timedelta(weeks=4)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = self.api.get_indices(dates)
        eq_(res, ['socorro_integration_test_reports'])

        config = self.get_base_config(es_index='socorro_%Y%W')
        api = SuperSearch(config=config)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = api.get_indices(dates)
        eq_(res, ['socorro_200052', 'socorro_200101'])

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastmonth, '>'),
        ]

        res = api.get_indices(dates)
        eq_(res, [
            'socorro_200049', 'socorro_200050', 'socorro_200051',
            'socorro_200052', 'socorro_200101'
        ])

    @minimum_es_version('1.0')
    def test_get(self):
        """Run a very basic test, just to see if things work. """
        self.index_crash({
            'signature': 'js::break_your_browser',
            'date_processed': self.now,
            'build': 20000000,
            'os_name': 'Linux',
            'json_dump': {
                'write_combine_size': 9823012
            }
        })
        self.refresh_index()

        res = self.api.get(_columns=[
            'date', 'build_id', 'platform', 'signature', 'write_combine_size'
        ])

        ok_('hits' in res)
        ok_('total' in res)
        ok_('facets' in res)

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['signature'], 'js::break_your_browser')

        eq_(res['facets'].keys(), ['signature'])
        eq_(res['facets']['signature'][0], {
            'term': 'js::break_your_browser',
            'count': 1
        })

        # Test fields are being renamed.
        ok_('date' in res['hits'][0])  # date_processed -> date
        ok_('build_id' in res['hits'][0])  # build -> build_id
        ok_('platform' in res['hits'][0])  # os_name -> platform

        # Test namespaces are correctly removed.
        # processed_crash.json_dump.write_combine_size > write_combine_size
        ok_('write_combine_size' in res['hits'][0])

    @minimum_es_version('1.0')
    def test_get_with_enum_operators(self):
        self.index_crash({
            'product': 'WaterWolf',
            'app_notes': 'somebody that I used to know',
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'NightTrain',
            'app_notes': None,
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'NightTrain',
            'app_notes': 'processor that I used to run',
            'date_processed': self.now,
        })
        self.refresh_index()

        # A term that exists.
        res = self.api.get(product='WaterWolf'  # has terms
                           )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['product'], 'WaterWolf')

        # Not a term that exists.
        res = self.api.get(product='!WaterWolf'  # does not have terms
                           )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        eq_(res['hits'][0]['product'], 'NightTrain')

        # A term that does not exist.
        res = self.api.get(product='EarthRacoon'  # has terms
                           )

        eq_(res['total'], 0)

        # A phrase instead of a term.
        res = self.api.get(
            app_notes='that I used',  # has terms
            _columns=['app_notes'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_('that I used' in hit['app_notes'])

    @minimum_es_version('1.0')
    def test_get_with_string_operators(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'mozilla::js::function',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'json_Is_Kewl',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'OhILoveMyBrowser',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'date_processed': self.now,
        })
        self.refresh_index()

        # Test the "contains" operator.
        res = self.api.get(signature='~js'  # contains
                           )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_('js' in hit['signature'])

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_('js' in facet['term'])
            eq_(facet['count'], 1)

        res = self.api.get(signature='!~js'  # does not contain
                           )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_('js' not in hit['signature'])

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_('js' not in facet['term'])
            eq_(facet['count'], 1)

        # Test the "starts with" operator.
        res = self.api.get(signature='$js'  # starts with
                           )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['signature'].startswith('js'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_(facet['term'].startswith('js'))
            eq_(facet['count'], 1)

        res = self.api.get(signature='!$js'  # does not start with
                           )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_(not hit['signature'].startswith('js'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_(not facet['term'].startswith('js'))
            eq_(facet['count'], 1)

        # Test the "ends with" operator.
        res = self.api.get(signature='^browser'  # ends with
                           )

        # Those operators are case-sensitive, so here we expect only 1 result.
        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['signature'], 'js::break_your_browser')

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 1)
        eq_(res['facets']['signature'][0], {
            'term': 'js::break_your_browser',
            'count': 1
        })

        res = self.api.get(signature='^rowser'  # ends with
                           )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['signature'].endswith('rowser'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_(facet['term'].endswith('rowser'))
            eq_(facet['count'], 1)

        res = self.api.get(signature='!^rowser'  # does not end with
                           )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_(not hit['signature'].endswith('rowser'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_(not facet['term'].endswith('rowser'))
            eq_(facet['count'], 1)

    @minimum_es_version('1.0')
    def test_get_with_range_operators(self):
        self.index_crash({
            'build': 2000,
            'date_processed': self.now,
        })
        self.index_crash({
            'build': 2001,
            'date_processed': self.now,
        })
        self.index_crash({
            'build': 1999,
            'date_processed': self.now,
        })
        self.refresh_index()

        # Test the "has terms" operator.
        res = self.api.get(
            build_id='2000',  # has terms
            _columns=['build_id'],
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 2000)

        res = self.api.get(
            build_id='!2000',  # does not have terms
            _columns=['build_id'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] != 2000)

        # Test the "greater than" operator.
        res = self.api.get(
            build_id='>2000',  # greater than
            _columns=['build_id'],
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 2001)

        # Test the "greater than or equal" operator.
        res = self.api.get(
            build_id='>=2000',  # greater than or equal
            _columns=['build_id'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] >= 2000)

        # Test the "lower than" operator.
        res = self.api.get(
            build_id='<2000',  # lower than
            _columns=['build_id'],
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 1999)

        # Test the "lower than or equal" operator.
        res = self.api.get(
            build_id='<=2000',  # lower than or equal
            _columns=['build_id'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] <= 2000)

    @minimum_es_version('1.0')
    def test_get_with_bool_operators(self):
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': True,
            },
        )
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': False,
            },
        )
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': True,
            },
        )
        self.refresh_index()

        # Test the "has terms" operator.
        res = self.api.get(
            accessibility='true',  # is true
            _columns=['accessibility'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['accessibility'])

        res = self.api.get(
            accessibility='f',  # is false
            _columns=['accessibility'],
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        ok_(not res['hits'][0]['accessibility'])

    @minimum_es_version('1.0')
    def test_get_with_pagination(self):
        number_of_crashes = 21
        processed_crash = {
            'signature': 'something',
            'date_processed': self.now,
        }
        self.index_many_crashes(number_of_crashes, processed_crash)

        kwargs = {
            '_results_number': '10',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 10)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '10',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 10)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '15',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 6)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '30',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 0)

    @minimum_es_version('1.0')
    def test_get_with_sorting(self):
        """Test a search with sort returns expected results. """
        self.index_crash({
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.refresh_index()

        res = self.api.get(_sort='product')
        ok_(res['total'] > 0)

        last_item = ''
        for hit in res['hits']:
            ok_(last_item <= hit['product'], (last_item, hit['product']))
            last_item = hit['product']

        # Descending order.
        res = self.api.get(_sort='-product')
        ok_(res['total'] > 0)

        last_item = 'zzzzz'
        for hit in res['hits']:
            ok_(last_item >= hit['product'], (last_item, hit['product']))
            last_item = hit['product']

        # Several fields.
        res = self.api.get(
            _sort=['product', 'platform'],
            _columns=['product', 'platform'],
        )
        ok_(res['total'] > 0)

        last_product = ''
        last_platform = ''
        for hit in res['hits']:
            if hit['product'] != last_product:
                last_platform = ''

            ok_(last_product <= hit['product'], (last_product, hit['product']))
            last_product = hit['product']

            ok_(last_platform <= hit['platform'],
                (last_platform, hit['platform']))
            last_platform = hit['platform']

        # Invalid field.
        assert_raises(
            BadArgumentError,
            self.api.get,
            _sort='something',
        )  # `something` is invalid

    @minimum_es_version('1.0')
    def test_get_with_facets(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'product': 'EarthRacoon',
            'os_name': 'Linux',
            'date_processed': self.now,
        })

        # Index a lot of distinct values to test the results limit.
        number_of_crashes = 51
        processed_crash = {
            'version': '10.%s',
            'date_processed': self.now,
        }
        self.index_many_crashes(
            number_of_crashes,
            processed_crash,
            loop_field='version',
        )
        # Note: index_many_crashes does the index refreshing.

        # Test several facets
        kwargs = {'_facets': ['signature', 'platform']}
        res = self.api.get(**kwargs)

        ok_('facets' in res)
        ok_('signature' in res['facets'])

        expected_terms = [
            {
                'term': 'js::break_your_browser',
                'count': 3
            },
            {
                'term': 'foo(bar)',
                'count': 1
            },
        ]
        eq_(res['facets']['signature'], expected_terms)

        ok_('platform' in res['facets'])
        expected_terms = [
            {
                'term': 'Linux',
                'count': 3
            },
            {
                'term': 'Windows NT',
                'count': 1
            },
        ]
        eq_(res['facets']['platform'], expected_terms)

        # Test one facet with filters
        kwargs = {
            '_facets': ['product'],
            'product': 'WaterWolf',
        }
        res = self.api.get(**kwargs)

        ok_('product' in res['facets'])
        expected_terms = [
            {
                'term': 'WaterWolf',
                'count': 2
            },
        ]
        eq_(res['facets']['product'], expected_terms)

        # Test one facet with a different filter
        kwargs = {
            '_facets': ['product'],
            'platform': 'linux',
        }
        res = self.api.get(**kwargs)

        ok_('product' in res['facets'])

        expected_terms = [
            {
                'term': 'EarthRacoon',
                'count': 1
            },
            {
                'term': 'NightTrain',
                'count': 1
            },
            {
                'term': 'WaterWolf',
                'count': 1
            },
        ]
        eq_(res['facets']['product'], expected_terms)

        # Test the number of results.
        kwargs = {
            '_facets': ['version'],
        }
        res = self.api.get(**kwargs)

        ok_('version' in res['facets'])
        eq_(len(res['facets']['version']), 50)  # 50 is the default value

        # Test with a different number of facets results.
        kwargs = {'_facets': ['version'], '_facets_size': 20}
        res = self.api.get(**kwargs)

        ok_('version' in res['facets'])
        eq_(len(res['facets']['version']), 20)

        kwargs = {'_facets': ['version'], '_facets_size': 100}
        res = self.api.get(**kwargs)

        ok_('version' in res['facets'])
        eq_(len(res['facets']['version']), number_of_crashes)

        # Test errors
        assert_raises(BadArgumentError, self.api.get, _facets=['unkownfield'])

    @minimum_es_version('1.0')
    def test_get_with_signature_aggregations(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'product': 'EarthRacoon',
            'os_name': 'Linux',
            'date_processed': self.now,
        })

        # Index a lot of distinct values to test the results limit.
        number_of_crashes = 51
        processed_crash = {
            'version': '10.%s',
            'signature': 'crash_me_I_m_famous',
            'date_processed': self.now,
        }
        self.index_many_crashes(
            number_of_crashes,
            processed_crash,
            loop_field='version',
        )
        # Note: index_many_crashes does the index refreshing.

        # Test several facets
        kwargs = {
            '_aggs.signature': ['product', 'platform'],
            'signature': '!=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('facets' in res)
        ok_('signature' in res['facets'])

        expected_terms = [
            {
                'term': 'js::break_your_browser',
                'count': 3,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 2
                        },
                        {
                            'term': 'NightTrain',
                            'count': 1
                        },
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 2
                        },
                        {
                            'term': 'Windows NT',
                            'count': 1
                        },
                    ]
                }
            },
            {
                'term': 'foo(bar)',
                'count': 1,
                'facets': {
                    'product': [{
                        'term': 'EarthRacoon',
                        'count': 1
                    }],
                    'platform': [{
                        'term': 'Linux',
                        'count': 1
                    }],
                }
            },
        ]
        eq_(res['facets']['signature'], expected_terms)

        # Test one facet with filters
        kwargs = {
            '_aggs.signature': ['product'],
            'product': 'WaterWolf',
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['facets'])
        expected_terms = [
            {
                'term': 'js::break_your_browser',
                'count': 2,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 2
                        },
                    ]
                }
            },
        ]
        eq_(res['facets']['signature'], expected_terms)

        # Test one facet with a different filter
        kwargs = {
            '_aggs.signature': ['product'],
            'platform': 'linux',
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['facets'])

        expected_terms = [
            {
                'term': 'js::break_your_browser',
                'count': 2,
                'facets': {
                    'product': [
                        {
                            'term': 'NightTrain',
                            'count': 1
                        },
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ],
                }
            },
            {
                'term': 'foo(bar)',
                'count': 1,
                'facets': {
                    'product': [{
                        'term': 'EarthRacoon',
                        'count': 1
                    }],
                }
            },
        ]
        eq_(res['facets']['signature'], expected_terms)

        # Test the number of results.
        kwargs = {
            '_aggs.signature': ['version'],
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['facets'])
        ok_('version' in res['facets']['signature'][0]['facets'])

        version_sub_facet = res['facets']['signature'][0]['facets']['version']
        eq_(len(version_sub_facet), 50)  # 50 is the default

        # Test with a different number of facets results.
        kwargs = {
            '_aggs.signature': ['version'],
            '_facets_size': 20,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['facets'])
        ok_('version' in res['facets']['signature'][0]['facets'])

        version_sub_facet = res['facets']['signature'][0]['facets']['version']
        eq_(len(version_sub_facet), 20)

        kwargs = {
            '_aggs.signature': ['version'],
            '_facets_size': 100,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        version_sub_facet = res['facets']['signature'][0]['facets']['version']
        eq_(len(version_sub_facet), number_of_crashes)

        # Test errors
        args = {}
        args['_aggs.signature'] = ['unkownfield']
        assert_raises(BadArgumentError, self.api.get, **args)

    @minimum_es_version('1.0')
    def test_get_with_date_histogram(self):
        yesterday = self.now - datetime.timedelta(days=1)
        the_day_before = self.now - datetime.timedelta(days=2)

        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': yesterday,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': the_day_before,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'product': 'EarthRacoon',
            'os_name': 'Linux',
            'date_processed': self.now,
        })

        # Index a lot of distinct values to test the results limit.
        number_of_crashes = 51
        processed_crash = {
            'version': '10.%s',
            'signature': 'crash_me_I_m_famous',
            'date_processed': self.now,
        }
        self.index_many_crashes(
            number_of_crashes,
            processed_crash,
            loop_field='version',
        )
        # Note: index_many_crashes does the index refreshing.

        # Test several facets
        kwargs = {
            '_histogram.date': ['product', 'platform'],
            'signature': '!=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('facets' in res)
        ok_('histogram_date' in res['facets'])

        def dt_to_midnight(date):
            return date.replace(hour=0, minute=0, second=0, microsecond=0)

        today_str = dt_to_midnight(self.now).isoformat()
        yesterday_str = dt_to_midnight(yesterday).isoformat()
        day_before_str = dt_to_midnight(the_day_before).isoformat()

        expected_terms = [{
            'term': day_before_str,
            'count': 1,
            'facets': {
                'product': [
                    {
                        'term': 'NightTrain',
                        'count': 1
                    },
                ],
                'platform': [{
                    'term': 'Linux',
                    'count': 1
                }],
            }
        }, {
            'term': yesterday_str,
            'count': 1,
            'facets': {
                'product': [{
                    'term': 'WaterWolf',
                    'count': 1
                }],
                'platform': [{
                    'term': 'Linux',
                    'count': 1
                }],
            }
        }, {
            'term': today_str,
            'count': 2,
            'facets': {
                'product': [
                    {
                        'term': 'EarthRacoon',
                        'count': 1
                    },
                    {
                        'term': 'WaterWolf',
                        'count': 1
                    },
                ],
                'platform': [{
                    'term': 'Linux',
                    'count': 1
                }, {
                    'term': 'Windows NT',
                    'count': 1
                }],
            }
        }]
        eq_(res['facets']['histogram_date'], expected_terms)

        # Test one facet with filters
        kwargs = {
            '_histogram.date': ['product'],
            'product': 'WaterWolf',
        }
        res = self.api.get(**kwargs)

        ok_('histogram_date' in res['facets'])
        expected_terms = [
            {
                'term': yesterday_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ]
                }
            },
            {
                'term': today_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ]
                }
            },
        ]
        eq_(res['facets']['histogram_date'], expected_terms)

        # Test one facet with a different filter
        kwargs = {
            '_histogram.date': ['product'],
            'platform': 'linux',
        }
        res = self.api.get(**kwargs)

        ok_('histogram_date' in res['facets'])

        expected_terms = [{
            'term': day_before_str,
            'count': 1,
            'facets': {
                'product': [{
                    'term': 'NightTrain',
                    'count': 1
                }],
            }
        }, {
            'term': yesterday_str,
            'count': 1,
            'facets': {
                'product': [{
                    'term': 'WaterWolf',
                    'count': 1
                }],
            }
        }, {
            'term': today_str,
            'count': 1,
            'facets': {
                'product': [{
                    'term': 'EarthRacoon',
                    'count': 1
                }],
            }
        }]
        eq_(res['facets']['histogram_date'], expected_terms)

        # Test the number of results.
        kwargs = {
            '_histogram.date': ['version'],
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('histogram_date' in res['facets'])
        ok_('version' in res['facets']['histogram_date'][0]['facets'])

        version_facet = res['facets']['histogram_date'][0]['facets']['version']
        eq_(len(version_facet), 50)  # 50 is the default

        # Test with a different number of facets results.
        kwargs = {
            '_histogram.date': ['version'],
            '_facets_size': 20,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('histogram_date' in res['facets'])
        ok_('version' in res['facets']['histogram_date'][0]['facets'])

        version_facet = res['facets']['histogram_date'][0]['facets']['version']
        eq_(len(version_facet), 20)

        kwargs = {
            '_histogram.date': ['version'],
            '_facets_size': 100,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        version_facet = res['facets']['histogram_date'][0]['facets']['version']
        eq_(len(version_facet), number_of_crashes)

        # Test errors
        args = {}
        args['_histogram.date'] = ['unkownfield']
        assert_raises(BadArgumentError, self.api.get, **args)

    @minimum_es_version('1.0')
    def test_get_with_columns(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.refresh_index()

        # Test several facets
        kwargs = {'_columns': ['signature', 'platform']}
        res = self.api.get(**kwargs)

        ok_('signature' in res['hits'][0])
        ok_('platform' in res['hits'][0])
        ok_('date' not in res['hits'][0])

        # Test errors
        assert_raises(BadArgumentError, self.api.get, _columns=['unkownfield'])
        assert_raises(BadArgumentError, self.api.get, _columns=['fake_field'])

    def test_get_against_nonexistent_index(self):
        config = self.get_base_config(es_index='socorro_test_reports_%W')
        api = SuperSearch(config=config)
        params = {'date': ['>2000-01-01T00:00:00', '<2000-01-10T00:00:00']}

        res = api.get(**params)
        eq_(res, {'total': 0, 'hits': [], 'facets': {}})

    def test_get_return_query_mode(self):
        res = self.api.get(signature='js', _return_query=True)
        ok_('query' in res)
        ok_('indices' in res)

        query = res['query']
        ok_('query' in query)
        ok_('aggs' in query)
        ok_('size' in query)
コード例 #8
0
ファイル: test_analyzers.py プロジェクト: johnmcwade/socorro
    def setup_method(self):
        super().setup_method()

        config = self.get_base_config(cls=SuperSearch)
        self.api = SuperSearch(config=config)
        self.now = datetimeutil.utc_now()
コード例 #9
0
    def run(self, end_datetime):
        # Truncate to the hour
        end_datetime = end_datetime.replace(minute=0, second=0, microsecond=0)

        # Do a super search and get the signature, buildid, and date processed for
        # every crash in the range
        all_fields = SuperSearchFieldsData().get()
        api = SuperSearch(self.config)
        start_datetime = end_datetime - datetime.timedelta(
            minutes=self.config.period)
        self.logger.info('Looking at %s to %s', start_datetime, end_datetime)

        params = {
            'date': [
                '>={}'.format(start_datetime.isoformat()),
                '<{}'.format(end_datetime.isoformat()),
            ],
            '_columns': ['signature', 'build_id', 'date'],
            '_facets_size':
            0,
            '_fields':
            all_fields,

            # Set up first page
            '_results_offset':
            0,
            '_results_number':
            MAX_PAGE,
        }

        results = {}
        crashids_count = 0

        while True:
            resp = api.get(**params)
            hits = resp['hits']
            for hit in hits:
                crashids_count += 1

                if not hit['build_id']:
                    # Not all crashes have a build id, so skip the ones that don't.
                    continue

                if hit['signature'] in results:
                    data = results[hit['signature']]
                    data['build_id'] = min(data['build_id'], hit['build_id'])
                    data['date'] = min(data['date'], hit['date'])
                else:
                    data = {
                        'signature': hit['signature'],
                        'build_id': hit['build_id'],
                        'date': hit['date']
                    }
                results[hit['signature']] = data

            # If there are no more crash ids to get, we return
            total = resp['total']
            if not hits or crashids_count >= total:
                break

            # Get the next page, but only as many results as we need
            params['_results_offset'] += MAX_PAGE
            params['_results_number'] = min(
                # MAX_PAGE is the maximum we can request
                MAX_PAGE,

                # The number of results Super Search can return to us that is hasn't returned so far
                total - crashids_count)

        signature_data = results.values()

        # Save signature data to the db
        for item in signature_data:
            if self.config.dry_run:
                self.logger.info('Inserting/updating signature (%s, %s, %s)',
                                 item['signature'], item['date'],
                                 item['build_id'])
            else:
                self.update_crashstats_signature(
                    signature=item['signature'],
                    report_date=item['date'],
                    report_build=item['build_id'],
                )

        self.logger.info('Inserted/updated %d signatures.',
                         len(signature_data))
コード例 #10
0
ファイル: test_supersearch.py プロジェクト: 1Smert1/socorro
    def setUp(self):
        super(IntegrationTestSuperSearch, self).setUp()

        self.api = SuperSearch(config=self.config)
        self.now = datetimeutil.utc_now()
コード例 #11
0
ファイル: test_supersearch.py プロジェクト: 1Smert1/socorro
class IntegrationTestSuperSearch(ElasticsearchTestCase):
    """Test SuperSearch with an elasticsearch database containing fake
    data. """

    def setUp(self):
        super(IntegrationTestSuperSearch, self).setUp()

        self.api = SuperSearch(config=self.config)
        self.now = datetimeutil.utc_now()

    def test_get_indices(self):
        now = datetime.datetime(2001, 1, 2, 0, 0)
        lastweek = now - datetime.timedelta(weeks=1)
        lastmonth = now - datetime.timedelta(weeks=4)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = self.api.get_indices(dates)
        eq_(res, ['socorro_integration_test_reports'])

        config = self.get_base_config(es_index='socorro_%Y%W')
        api = SuperSearch(config=config)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = api.get_indices(dates)
        eq_(res, ['socorro_200052', 'socorro_200101'])

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastmonth, '>'),
        ]

        res = api.get_indices(dates)
        eq_(
            res,
            [
                'socorro_200049', 'socorro_200050', 'socorro_200051',
                'socorro_200052', 'socorro_200101'
            ]
        )

    @minimum_es_version('1.0')
    def test_get(self):
        """Run a very basic test, just to see if things work. """
        self.index_crash({
            'signature': 'js::break_your_browser',
            'date_processed': self.now,
            'build': 20000000,
            'os_name': 'Linux',
            'json_dump': {
                'write_combine_size': 9823012
            }
        })
        self.refresh_index()

        res = self.api.get(_columns=[
            'date', 'build_id', 'platform', 'signature', 'write_combine_size'
        ])

        ok_('hits' in res)
        ok_('total' in res)
        ok_('facets' in res)

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['signature'], 'js::break_your_browser')

        eq_(res['facets'].keys(), ['signature'])
        eq_(
            res['facets']['signature'][0],
            {'term': 'js::break_your_browser', 'count': 1}
        )

        # Test fields are being renamed.
        ok_('date' in res['hits'][0])  # date_processed -> date
        ok_('build_id' in res['hits'][0])  # build -> build_id
        ok_('platform' in res['hits'][0])  # os_name -> platform

        # Test namespaces are correctly removed.
        # processed_crash.json_dump.write_combine_size > write_combine_size
        ok_('write_combine_size' in res['hits'][0])

    @minimum_es_version('1.0')
    def test_get_with_enum_operators(self):
        self.index_crash({
            'product': 'WaterWolf',
            'app_notes': 'somebody that I used to know',
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'NightTrain',
            'app_notes': None,
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'NightTrain',
            'app_notes': 'processor that I used to run',
            'date_processed': self.now,
        })
        self.refresh_index()

        # A term that exists.
        res = self.api.get(
            product='WaterWolf'  # has terms
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['product'], 'WaterWolf')

        # Not a term that exists.
        res = self.api.get(
            product='!WaterWolf'  # does not have terms
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        eq_(res['hits'][0]['product'], 'NightTrain')

        # A term that does not exist.
        res = self.api.get(
            product='EarthRacoon'  # has terms
        )

        eq_(res['total'], 0)

        # A phrase instead of a term.
        res = self.api.get(
            app_notes='that I used',  # has terms
            _columns=['app_notes'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_('that I used' in hit['app_notes'])

    @minimum_es_version('1.0')
    def test_get_with_string_operators(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'mozilla::js::function',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'json_Is_Kewl',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'OhILoveMyBrowser',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'date_processed': self.now,
        })
        self.refresh_index()

        # Test the "contains" operator.
        res = self.api.get(
            signature='~js'  # contains
        )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_('js' in hit['signature'])

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_('js' in facet['term'])
            eq_(facet['count'], 1)

        res = self.api.get(
            signature='!~js'  # does not contain
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_('js' not in hit['signature'])

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_('js' not in facet['term'])
            eq_(facet['count'], 1)

        # Test the "starts with" operator.
        res = self.api.get(
            signature='$js'  # starts with
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['signature'].startswith('js'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_(facet['term'].startswith('js'))
            eq_(facet['count'], 1)

        res = self.api.get(
            signature='!$js'  # does not start with
        )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_(not hit['signature'].startswith('js'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_(not facet['term'].startswith('js'))
            eq_(facet['count'], 1)

        # Test the "ends with" operator.
        res = self.api.get(
            signature='^browser'  # ends with
        )

        # Those operators are case-sensitive, so here we expect only 1 result.
        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['signature'], 'js::break_your_browser')

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 1)
        eq_(
            res['facets']['signature'][0],
            {'term': 'js::break_your_browser', 'count': 1}
        )

        res = self.api.get(
            signature='^rowser'  # ends with
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['signature'].endswith('rowser'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_(facet['term'].endswith('rowser'))
            eq_(facet['count'], 1)

        res = self.api.get(
            signature='!^rowser'  # does not end with
        )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_(not hit['signature'].endswith('rowser'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_(not facet['term'].endswith('rowser'))
            eq_(facet['count'], 1)

    @minimum_es_version('1.0')
    def test_get_with_range_operators(self):
        self.index_crash({
            'build': 2000,
            'date_processed': self.now,
        })
        self.index_crash({
            'build': 2001,
            'date_processed': self.now,
        })
        self.index_crash({
            'build': 1999,
            'date_processed': self.now,
        })
        self.refresh_index()

        # Test the "has terms" operator.
        res = self.api.get(
            build_id='2000',  # has terms
            _columns=['build_id'],
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 2000)

        res = self.api.get(
            build_id='!2000',  # does not have terms
            _columns=['build_id'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] != 2000)

        # Test the "greater than" operator.
        res = self.api.get(
            build_id='>2000',  # greater than
            _columns=['build_id'],
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 2001)

        # Test the "greater than or equal" operator.
        res = self.api.get(
            build_id='>=2000',  # greater than or equal
            _columns=['build_id'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] >= 2000)

        # Test the "lower than" operator.
        res = self.api.get(
            build_id='<2000',  # lower than
            _columns=['build_id'],
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 1999)

        # Test the "lower than or equal" operator.
        res = self.api.get(
            build_id='<=2000',  # lower than or equal
            _columns=['build_id'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] <= 2000)

    @minimum_es_version('1.0')
    def test_get_with_bool_operators(self):
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': True,
            },
        )
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': False,
            },
        )
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': True,
            },
        )
        self.refresh_index()

        # Test the "has terms" operator.
        res = self.api.get(
            accessibility='true',  # is true
            _columns=['accessibility'],
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['accessibility'])

        res = self.api.get(
            accessibility='f',  # is false
            _columns=['accessibility'],
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        ok_(not res['hits'][0]['accessibility'])

    @minimum_es_version('1.0')
    def test_get_with_combined_operators(self):
        sigs = (
            'js::break_your_browser',
            'mozilla::js::function',
            'js<isKewl>',
            'foo(bar)',
        )

        self.index_crash({
            'signature': sigs[0],
            'app_notes': 'foo bar mozilla',
            'product': 'WaterWolf',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': sigs[1],
            'app_notes': 'foo bar',
            'product': 'WaterWolf',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': sigs[2],
            'app_notes': 'foo mozilla',
            'product': 'EarthRacoon',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': sigs[3],
            'app_notes': 'mozilla bar',
            'product': 'EarthRacoon',
            'date_processed': self.now,
        })
        self.refresh_index()

        res = self.api.get(
            signature=['js', '~::'],
        )
        eq_(res['total'], 3)
        eq_(
            sorted([x['signature'] for x in res['hits']]),
            sorted([sigs[0], sigs[1], sigs[2]])
        )

        res = self.api.get(
            signature=['js', '~::'],
            product=['Unknown'],
        )
        eq_(res['total'], 0)
        eq_(len(res['hits']), 0)

        res = self.api.get(
            signature=['js', '~::'],
            product=['WaterWolf', 'EarthRacoon'],
        )
        eq_(res['total'], 3)
        eq_(
            sorted([x['signature'] for x in res['hits']]),
            sorted([sigs[0], sigs[1], sigs[2]])
        )

        res = self.api.get(
            signature=['js', '~::'],
            app_notes=['foo bar'],
        )
        eq_(res['total'], 2)
        eq_(
            sorted([x['signature'] for x in res['hits']]),
            sorted([sigs[0], sigs[1]])
        )

    @minimum_es_version('1.0')
    def test_get_with_pagination(self):
        number_of_crashes = 21
        processed_crash = {
            'signature': 'something',
            'date_processed': self.now,
        }
        self.index_many_crashes(number_of_crashes, processed_crash)

        kwargs = {
            '_results_number': '10',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 10)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '10',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 10)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '15',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 6)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '30',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 0)

    @minimum_es_version('1.0')
    def test_get_with_sorting(self):
        """Test a search with sort returns expected results. """
        self.index_crash({
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.refresh_index()

        res = self.api.get(_sort='product')
        ok_(res['total'] > 0)

        last_item = ''
        for hit in res['hits']:
            ok_(last_item <= hit['product'], (last_item, hit['product']))
            last_item = hit['product']

        # Descending order.
        res = self.api.get(_sort='-product')
        ok_(res['total'] > 0)

        last_item = 'zzzzz'
        for hit in res['hits']:
            ok_(last_item >= hit['product'], (last_item, hit['product']))
            last_item = hit['product']

        # Several fields.
        res = self.api.get(
            _sort=['product', 'platform'],
            _columns=['product', 'platform'],
        )
        ok_(res['total'] > 0)

        last_product = ''
        last_platform = ''
        for hit in res['hits']:
            if hit['product'] != last_product:
                last_platform = ''

            ok_(last_product <= hit['product'], (last_product, hit['product']))
            last_product = hit['product']

            ok_(
                last_platform <= hit['platform'],
                (last_platform, hit['platform'])
            )
            last_platform = hit['platform']

        # Invalid field.
        assert_raises(
            BadArgumentError,
            self.api.get,
            _sort='something',
        )  # `something` is invalid

    @minimum_es_version('1.0')
    def test_get_with_facets(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'product': 'EarthRacoon',
            'os_name': 'Linux',
            'date_processed': self.now,
        })

        # Index a lot of distinct values to test the results limit.
        number_of_crashes = 51
        processed_crash = {
            'version': '10.%s',
            'date_processed': self.now,
        }
        self.index_many_crashes(
            number_of_crashes,
            processed_crash,
            loop_field='version',
        )
        # Note: index_many_crashes does the index refreshing.

        # Test several facets
        kwargs = {
            '_facets': ['signature', 'platform']
        }
        res = self.api.get(**kwargs)

        ok_('facets' in res)
        ok_('signature' in res['facets'])

        expected_terms = [
            {'term': 'js::break_your_browser', 'count': 3},
            {'term': 'foo(bar)', 'count': 1},
        ]
        eq_(res['facets']['signature'], expected_terms)

        ok_('platform' in res['facets'])
        expected_terms = [
            {'term': 'Linux', 'count': 3},
            {'term': 'Windows NT', 'count': 1},
        ]
        eq_(res['facets']['platform'], expected_terms)

        # Test one facet with filters
        kwargs = {
            '_facets': ['product'],
            'product': 'WaterWolf',
        }
        res = self.api.get(**kwargs)

        ok_('product' in res['facets'])
        expected_terms = [
            {'term': 'WaterWolf', 'count': 2},
        ]
        eq_(res['facets']['product'], expected_terms)

        # Test one facet with a different filter
        kwargs = {
            '_facets': ['product'],
            'platform': 'linux',
        }
        res = self.api.get(**kwargs)

        ok_('product' in res['facets'])

        expected_terms = [
            {'term': 'EarthRacoon', 'count': 1},
            {'term': 'NightTrain', 'count': 1},
            {'term': 'WaterWolf', 'count': 1},
        ]
        eq_(res['facets']['product'], expected_terms)

        # Test the number of results.
        kwargs = {
            '_facets': ['version'],
        }
        res = self.api.get(**kwargs)

        ok_('version' in res['facets'])
        eq_(len(res['facets']['version']), 50)  # 50 is the default value

        # Test with a different number of facets results.
        kwargs = {
            '_facets': ['version'],
            '_facets_size': 20
        }
        res = self.api.get(**kwargs)

        ok_('version' in res['facets'])
        eq_(len(res['facets']['version']), 20)

        kwargs = {
            '_facets': ['version'],
            '_facets_size': 100
        }
        res = self.api.get(**kwargs)

        ok_('version' in res['facets'])
        eq_(len(res['facets']['version']), number_of_crashes)

        # Test errors
        assert_raises(
            BadArgumentError,
            self.api.get,
            _facets=['unknownfield']
        )

    @minimum_es_version('1.0')
    def test_get_with_signature_aggregations(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'product': 'EarthRacoon',
            'os_name': 'Linux',
            'date_processed': self.now,
        })

        # Index a lot of distinct values to test the results limit.
        number_of_crashes = 51
        processed_crash = {
            'version': '10.%s',
            'signature': 'crash_me_I_m_famous',
            'date_processed': self.now,
        }
        self.index_many_crashes(
            number_of_crashes,
            processed_crash,
            loop_field='version',
        )
        # Note: index_many_crashes does the index refreshing.

        # Test several facets
        kwargs = {
            '_aggs.signature': ['product', 'platform'],
            'signature': '!=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('facets' in res)
        ok_('signature' in res['facets'])

        expected_terms = [
            {
                'term': 'js::break_your_browser',
                'count': 3,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 2
                        },
                        {
                            'term': 'NightTrain',
                            'count': 1
                        },
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 2
                        },
                        {
                            'term': 'Windows NT',
                            'count': 1
                        },
                    ]
                }
            },
            {
                'term': 'foo(bar)',
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'EarthRacoon',
                            'count': 1
                        }
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 1
                        }
                    ],
                }
            },
        ]
        eq_(res['facets']['signature'], expected_terms)

        # Test one facet with filters
        kwargs = {
            '_aggs.signature': ['product'],
            'product': 'WaterWolf',
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['facets'])
        expected_terms = [
            {
                'term': 'js::break_your_browser',
                'count': 2,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 2
                        },
                    ]
                }
            },
        ]
        eq_(res['facets']['signature'], expected_terms)

        # Test one facet with a different filter
        kwargs = {
            '_aggs.signature': ['product'],
            'platform': 'linux',
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['facets'])

        expected_terms = [
            {
                'term': 'js::break_your_browser',
                'count': 2,
                'facets': {
                    'product': [
                        {
                            'term': 'NightTrain',
                            'count': 1
                        },
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ],
                }
            },
            {
                'term': 'foo(bar)',
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'EarthRacoon',
                            'count': 1
                        }
                    ],
                }
            },
        ]
        eq_(res['facets']['signature'], expected_terms)

        # Test the number of results.
        kwargs = {
            '_aggs.signature': ['version'],
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['facets'])
        ok_('version' in res['facets']['signature'][0]['facets'])

        version_sub_facet = res['facets']['signature'][0]['facets']['version']
        eq_(len(version_sub_facet), 50)  # 50 is the default

        # Test with a different number of facets results.
        kwargs = {
            '_aggs.signature': ['version'],
            '_facets_size': 20,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['facets'])
        ok_('version' in res['facets']['signature'][0]['facets'])

        version_sub_facet = res['facets']['signature'][0]['facets']['version']
        eq_(len(version_sub_facet), 20)

        kwargs = {
            '_aggs.signature': ['version'],
            '_facets_size': 100,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        version_sub_facet = res['facets']['signature'][0]['facets']['version']
        eq_(len(version_sub_facet), number_of_crashes)

        # Test errors
        args = {}
        args['_aggs.signature'] = ['unknownfield']
        assert_raises(
            BadArgumentError,
            self.api.get,
            **args
        )

    @minimum_es_version('1.0')
    def test_get_with_date_histogram(self):
        yesterday = self.now - datetime.timedelta(days=1)
        the_day_before = self.now - datetime.timedelta(days=2)

        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': yesterday,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': the_day_before,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'product': 'EarthRacoon',
            'os_name': 'Linux',
            'date_processed': self.now,
        })

        # Index a lot of distinct values to test the results limit.
        number_of_crashes = 51
        processed_crash = {
            'version': '10.%s',
            'signature': 'crash_me_I_m_famous',
            'date_processed': self.now,
        }
        self.index_many_crashes(
            number_of_crashes,
            processed_crash,
            loop_field='version',
        )
        # Note: index_many_crashes does the index refreshing.

        # Test several facets
        kwargs = {
            '_histogram.date': ['product', 'platform'],
            'signature': '!=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('facets' in res)
        ok_('histogram_date' in res['facets'])

        def dt_to_midnight(date):
            return date.replace(hour=0, minute=0, second=0, microsecond=0)

        today_str = dt_to_midnight(self.now).isoformat()
        yesterday_str = dt_to_midnight(yesterday).isoformat()
        day_before_str = dt_to_midnight(the_day_before).isoformat()

        expected_terms = [
            {
                'term': day_before_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'NightTrain',
                            'count': 1
                        },
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 1
                        }
                    ],
                }
            },
            {
                'term': yesterday_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        }
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 1
                        }
                    ],
                }
            },
            {
                'term': today_str,
                'count': 2,
                'facets': {
                    'product': [
                        {
                            'term': 'EarthRacoon',
                            'count': 1
                        },
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 1
                        },
                        {
                            'term': 'Windows NT',
                            'count': 1
                        }
                    ],
                }
            }
        ]
        eq_(res['facets']['histogram_date'], expected_terms)

        # Test one facet with filters
        kwargs = {
            '_histogram.date': ['product'],
            'product': 'WaterWolf',
        }
        res = self.api.get(**kwargs)

        ok_('histogram_date' in res['facets'])
        expected_terms = [
            {
                'term': yesterday_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ]
                }
            },
            {
                'term': today_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ]
                }
            },
        ]
        eq_(res['facets']['histogram_date'], expected_terms)

        # Test one facet with a different filter
        kwargs = {
            '_histogram.date': ['product'],
            'platform': 'linux',
        }
        res = self.api.get(**kwargs)

        ok_('histogram_date' in res['facets'])

        expected_terms = [
            {
                'term': day_before_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'NightTrain',
                            'count': 1
                        }
                    ],
                }
            },
            {
                'term': yesterday_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        }
                    ],
                }
            },
            {
                'term': today_str,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'EarthRacoon',
                            'count': 1
                        }
                    ],
                }
            }
        ]
        eq_(res['facets']['histogram_date'], expected_terms)

        # Test the number of results.
        kwargs = {
            '_histogram.date': ['version'],
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('histogram_date' in res['facets'])
        ok_('version' in res['facets']['histogram_date'][0]['facets'])

        version_facet = res['facets']['histogram_date'][0]['facets']['version']
        eq_(len(version_facet), 50)  # 50 is the default

        # Test with a different number of facets results.
        kwargs = {
            '_histogram.date': ['version'],
            '_facets_size': 20,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('histogram_date' in res['facets'])
        ok_('version' in res['facets']['histogram_date'][0]['facets'])

        version_facet = res['facets']['histogram_date'][0]['facets']['version']
        eq_(len(version_facet), 20)

        kwargs = {
            '_histogram.date': ['version'],
            '_facets_size': 100,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        version_facet = res['facets']['histogram_date'][0]['facets']['version']
        eq_(len(version_facet), number_of_crashes)

        # Test errors
        args = {}
        args['_histogram.date'] = ['unknownfield']
        assert_raises(
            BadArgumentError,
            self.api.get,
            **args
        )

    @minimum_es_version('1.0')
    def test_get_with_number_histogram(self):
        yesterday = self.now - datetime.timedelta(days=1)
        the_day_before = self.now - datetime.timedelta(days=2)

        time_str = '%Y%m%d%H%M%S'
        today_int = int(self.now.strftime(time_str))
        yesterday_int = int(yesterday.strftime(time_str))
        day_before_int = int(the_day_before.strftime(time_str))

        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'build': today_int,
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'build': yesterday_int,
            'date_processed': yesterday,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'NightTrain',
            'os_name': 'Linux',
            'build': day_before_int,
            'date_processed': the_day_before,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'product': 'EarthRacoon',
            'os_name': 'Linux',
            'build': today_int,
            'date_processed': self.now,
        })

        # Index a lot of distinct values to test the results limit.
        number_of_crashes = 51
        processed_crash = {
            'version': '10.%s',
            'signature': 'crash_me_I_m_famous',
            'build': today_int,
            'date_processed': self.now,
        }
        self.index_many_crashes(
            number_of_crashes,
            processed_crash,
            loop_field='version',
        )
        # Note: index_many_crashes does the index refreshing.

        # Test several facets
        kwargs = {
            '_histogram.build_id': ['product', 'platform'],
            'signature': '!=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('facets' in res)

        expected_terms = [
            {
                'term': day_before_int,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'NightTrain',
                            'count': 1
                        },
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 1
                        }
                    ],
                }
            },
            {
                'term': yesterday_int,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        }
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 1
                        }
                    ],
                }
            },
            {
                'term': today_int,
                'count': 2,
                'facets': {
                    'product': [
                        {
                            'term': 'EarthRacoon',
                            'count': 1
                        },
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ],
                    'platform': [
                        {
                            'term': 'Linux',
                            'count': 1
                        },
                        {
                            'term': 'Windows NT',
                            'count': 1
                        }
                    ],
                }
            }
        ]
        eq_(res['facets']['histogram_build_id'], expected_terms)

        # Test one facet with filters
        kwargs = {
            '_histogram.build_id': ['product'],
            'product': 'WaterWolf',
        }
        res = self.api.get(**kwargs)

        expected_terms = [
            {
                'term': yesterday_int,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ]
                }
            },
            {
                'term': today_int,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        },
                    ]
                }
            },
        ]
        eq_(res['facets']['histogram_build_id'], expected_terms)

        # Test one facet with a different filter
        kwargs = {
            '_histogram.build_id': ['product'],
            'platform': 'linux',
        }
        res = self.api.get(**kwargs)

        expected_terms = [
            {
                'term': day_before_int,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'NightTrain',
                            'count': 1
                        }
                    ],
                }
            },
            {
                'term': yesterday_int,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'WaterWolf',
                            'count': 1
                        }
                    ],
                }
            },
            {
                'term': today_int,
                'count': 1,
                'facets': {
                    'product': [
                        {
                            'term': 'EarthRacoon',
                            'count': 1
                        }
                    ],
                }
            }
        ]
        eq_(res['facets']['histogram_build_id'], expected_terms)

        # Test the number of results.
        kwargs = {
            '_histogram.build_id': ['version'],
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('version' in res['facets']['histogram_build_id'][0]['facets'])

        version_facet = (
            res['facets']['histogram_build_id'][0]['facets']['version']
        )
        eq_(len(version_facet), 50)  # 50 is the default

        # Test with a different number of facets results.
        kwargs = {
            '_histogram.build_id': ['version'],
            '_facets_size': 20,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        ok_('version' in res['facets']['histogram_build_id'][0]['facets'])

        version_facet = (
            res['facets']['histogram_build_id'][0]['facets']['version']
        )
        eq_(len(version_facet), 20)

        kwargs = {
            '_histogram.build_id': ['version'],
            '_facets_size': 100,
            'signature': '=crash_me_I_m_famous',
        }
        res = self.api.get(**kwargs)

        version_facet = (
            res['facets']['histogram_build_id'][0]['facets']['version']
        )
        eq_(len(version_facet), number_of_crashes)

        # Test errors
        args = {}
        args['_histogram.build_id'] = ['unknownfield']
        assert_raises(
            BadArgumentError,
            self.api.get,
            **args
        )

    @minimum_es_version('1.0')
    def test_get_with_columns(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.refresh_index()

        # Test several facets
        kwargs = {
            '_columns': ['signature', 'platform']
        }
        res = self.api.get(**kwargs)

        ok_('signature' in res['hits'][0])
        ok_('platform' in res['hits'][0])
        ok_('date' not in res['hits'][0])

        # Test errors
        assert_raises(
            BadArgumentError,
            self.api.get,
            _columns=['unknownfield']
        )
        assert_raises(
            BadArgumentError,
            self.api.get,
            _columns=['fake_field']
        )

    def test_get_against_nonexistent_index(self):
        config = self.get_base_config(es_index='socorro_test_reports_%W')
        api = SuperSearch(config=config)
        params = {
            'date': ['>2000-01-01T00:00:00', '<2000-01-10T00:00:00']
        }

        res = api.get(**params)
        eq_(res, {'total': 0, 'hits': [], 'facets': {}})

    def test_get_too_large_date_range(self):
        # this is a whole year apart
        params = {
            'date': ['>2000-01-01T00:00:00', '<2001-01-10T00:00:00']
        }
        assert_raises(
            BadArgumentError,
            self.api.get,
            **params
        )

    def test_get_return_query_mode(self):
        res = self.api.get(
            signature='js',
            _return_query=True
        )
        ok_('query' in res)
        ok_('indices' in res)

        query = res['query']
        ok_('query' in query)
        ok_('aggs' in query)
        ok_('size' in query)

    @minimum_es_version('1.0')
    def test_get_with_zero(self):
        res = self.api.get(
            _results_number=0,
        )
        eq_(len(res['hits']), 0)
コード例 #12
0
    def run(self, end_datetime):
        # Truncate to the hour
        end_datetime = end_datetime.replace(minute=0, second=0, microsecond=0)

        # Do a super search and get the signature, buildid, and date processed for
        # every crash in the range
        all_fields = SuperSearchFields(config=self.config).get()
        api = SuperSearch(config=self.config)
        start_datetime = end_datetime - datetime.timedelta(minutes=self.config.period)
        self.config.logger.info('Looking at %s to %s', start_datetime, end_datetime)

        params = {
            'date': [
                '>={}'.format(start_datetime.isoformat()),
                '<{}'.format(end_datetime.isoformat()),
            ],
            '_columns': ['signature', 'build_id', 'date'],
            '_facets_size': 0,
            '_fields': all_fields,

            # Set up first page
            '_results_offset': 0,
            '_results_number': MAX_PAGE,
        }

        results = {}
        crashids_count = 0

        while True:
            resp = api.get(**params)
            hits = resp['hits']
            for hit in hits:
                crashids_count += 1

                if not hit['build_id']:
                    # Not all crashes have a build id, so skip the ones that don't.
                    continue

                if hit['signature'] in results:
                    data = results[hit['signature']]
                    data['build_id'] = min(data['build_id'], hit['build_id'])
                    data['date'] = min(data['date'], hit['date'])
                else:
                    data = {
                        'signature': hit['signature'],
                        'build_id': hit['build_id'],
                        'date': hit['date']
                    }
                results[hit['signature']] = data

            # If there are no more crash ids to get, we return
            total = resp['total']
            if not hits or crashids_count >= total:
                break

            # Get the next page, but only as many results as we need
            params['_results_offset'] += MAX_PAGE
            params['_results_number'] = min(
                # MAX_PAGE is the maximum we can request
                MAX_PAGE,

                # The number of results Super Search can return to us that is hasn't returned so far
                total - crashids_count
            )

        signature_data = results.values()

        # Save signature data to the db
        signature_first_date_api = SignatureFirstDate(config=self.config)
        for item in signature_data:
            if self.config.dry_run:
                self.config.logger.info(
                    'Inserting/updating signature (%s, %s, %s)',
                    item['signature'],
                    item['date'],
                    item['build_id']
                )
            else:
                signature_first_date_api.post(
                    signature=item['signature'],
                    first_report=item['date'],
                    first_build=item['build_id']
                )

        self.config.logger.info('Inserted/updated %d signatures.', len(signature_data))
コード例 #13
0
class IntegrationTestSuperSearch(ElasticsearchTestCase):
    """Test SuperSearch with an elasticsearch database containing fake
    data. """

    def setUp(self):
        super(IntegrationTestSuperSearch, self).setUp()

        self.api = SuperSearch(config=self.config)
        self.now = datetimeutil.utc_now()

    def test_get_indices(self):
        now = datetime.datetime(2001, 1, 2, 0, 0)
        lastweek = now - datetime.timedelta(weeks=1)
        lastmonth = now - datetime.timedelta(weeks=4)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = self.api.get_indices(dates)
        eq_(res, ['socorro_integration_test_reports'])

        config = self.get_mware_config(es_index='socorro_%Y%W')
        api = SuperSearch(config=config)

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastweek, '>'),
        ]

        res = api.get_indices(dates)
        eq_(res, ['socorro_200052', 'socorro_200101'])

        dates = [
            search_common.SearchParam('date', now, '<'),
            search_common.SearchParam('date', lastmonth, '>'),
        ]

        res = api.get_indices(dates)
        eq_(
            res,
            [
                'socorro_200049', 'socorro_200050', 'socorro_200051',
                'socorro_200052', 'socorro_200101'
            ]
        )

    @minimum_es_version('1.0')
    def test_get(self):
        """Run a very basic test, just to see if things work. """
        self.index_crash({
            'signature': 'js::break_your_browser',
            'date_processed': self.now,
            'build': 20000000,
            'os_name': 'Linux',
            'json_dump': {
                'write_combine_size': 9823012
            }
        })
        self.refresh_index()

        res = self.api.get()

        ok_('hits' in res)
        ok_('total' in res)
        ok_('facets' in res)

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['signature'], 'js::break_your_browser')

        eq_(res['facets'].keys(), ['signature'])
        eq_(
            res['facets']['signature'][0],
            {'term': 'js::break_your_browser', 'count': 1}
        )

        # Test fields are being renamed.
        ok_('date' in res['hits'][0])  # date_processed -> date
        ok_('build_id' in res['hits'][0])  # build -> build_id
        ok_('platform' in res['hits'][0])  # os_name -> platform

        # Test namespaces are correctly removed.
        # processed_crash.json_dump.write_combine_size > write_combine_size
        ok_('write_combine_size' in res['hits'][0])

    @minimum_es_version('1.0')
    def test_get_with_enum_operators(self):
        self.index_crash({
            'product': 'WaterWolf',
            'app_notes': 'somebody that I used to know',
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'NightTrain',
            'app_notes': None,
            'date_processed': self.now,
        })
        self.index_crash({
            'product': 'NightTrain',
            'app_notes': 'processor that I used to run',
            'date_processed': self.now,
        })
        self.refresh_index()

        # A term that exists.
        res = self.api.get(
            product='WaterWolf'  # has terms
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['product'], 'WaterWolf')

        # Not a term that exists.
        res = self.api.get(
            product='!WaterWolf'  # does not have terms
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        eq_(res['hits'][0]['product'], 'NightTrain')

        # A term that does not exist.
        res = self.api.get(
            product='EarthRacoon'  # has terms
        )

        eq_(res['total'], 0)

        # A phrase instead of a term.
        res = self.api.get(
            app_notes='that I used'  # has terms
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_('that I used' in hit['app_notes'])

    @minimum_es_version('1.0')
    def test_get_with_string_operators(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'mozilla::js::function',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'json_Is_Kewl',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'OhILoveMyBrowser',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'date_processed': self.now,
        })
        self.refresh_index()

        # Test the "contains" operator.
        res = self.api.get(
            signature='~js'  # contains
        )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_('js' in hit['signature'])

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_('js' in facet['term'])
            eq_(facet['count'], 1)

        res = self.api.get(
            signature='!~js'  # does not contain
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_('js' not in hit['signature'])

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_('js' not in facet['term'])
            eq_(facet['count'], 1)

        # Test the "starts with" operator.
        res = self.api.get(
            signature='$js'  # starts with
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['signature'].startswith('js'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_(facet['term'].startswith('js'))
            eq_(facet['count'], 1)

        res = self.api.get(
            signature='!$js'  # does not start with
        )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_(not hit['signature'].startswith('js'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_(not facet['term'].startswith('js'))
            eq_(facet['count'], 1)

        # Test the "ends with" operator.
        res = self.api.get(
            signature='^browser'  # ends with
        )

        # Those operators are case-sensitive, so here we expect only 1 result.
        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['signature'], 'js::break_your_browser')

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 1)
        eq_(
            res['facets']['signature'][0],
            {'term': 'js::break_your_browser', 'count': 1}
        )

        res = self.api.get(
            signature='^rowser'  # ends with
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['signature'].endswith('rowser'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 2)
        for facet in res['facets']['signature']:
            ok_(facet['term'].endswith('rowser'))
            eq_(facet['count'], 1)

        res = self.api.get(
            signature='!^rowser'  # does not end with
        )

        eq_(res['total'], 3)
        eq_(len(res['hits']), 3)
        for hit in res['hits']:
            ok_(not hit['signature'].endswith('rowser'))

        ok_('signature' in res['facets'])
        eq_(len(res['facets']['signature']), 3)
        for facet in res['facets']['signature']:
            ok_(not facet['term'].endswith('rowser'))
            eq_(facet['count'], 1)

    @minimum_es_version('1.0')
    def test_get_with_range_operators(self):
        self.index_crash({
            'build': 2000,
            'date_processed': self.now,
        })
        self.index_crash({
            'build': 2001,
            'date_processed': self.now,
        })
        self.index_crash({
            'build': 1999,
            'date_processed': self.now,
        })
        self.refresh_index()

        # Test the "has terms" operator.
        res = self.api.get(
            build_id='2000'  # has terms
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 2000)

        res = self.api.get(
            build_id='!2000'  # does not have terms
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] != 2000)

        # Test the "greater than" operator.
        res = self.api.get(
            build_id='>2000'  # greater than
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 2001)

        # Test the "greater than or equal" operator.
        res = self.api.get(
            build_id='>=2000'  # greater than or equal
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] >= 2000)

        # Test the "lower than" operator.
        res = self.api.get(
            build_id='<2000'  # lower than
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        eq_(res['hits'][0]['build_id'], 1999)

        # Test the "lower than or equal" operator.
        res = self.api.get(
            build_id='<=2000'  # lower than or equal
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['build_id'] <= 2000)

    @minimum_es_version('1.0')
    def test_get_with_bool_operators(self):
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': True,
            },
        )
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': False,
            },
        )
        self.index_crash(
            processed_crash={
                'date_processed': self.now,
            },
            raw_crash={
                'Accessibility': True,
            },
        )
        self.refresh_index()

        # Test the "has terms" operator.
        res = self.api.get(
            accessibility='true'  # is true
        )

        eq_(res['total'], 2)
        eq_(len(res['hits']), 2)
        for hit in res['hits']:
            ok_(hit['accessibility'])

        res = self.api.get(
            accessibility='f'  # is false
        )

        eq_(res['total'], 1)
        eq_(len(res['hits']), 1)
        ok_(not res['hits'][0]['accessibility'])

    @minimum_es_version('1.0')
    def test_get_with_pagination(self):
        number_of_crashes = 21
        processed_crash = {
            'signature': 'something',
            'date_processed': self.now,
        }
        self.index_many_crashes(number_of_crashes, processed_crash)

        kwargs = {
            '_results_number': '10',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 10)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '10',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 10)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '15',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 6)

        kwargs = {
            '_results_number': '10',
            '_results_offset': '30',
        }
        res = self.api.get(**kwargs)
        eq_(res['total'], number_of_crashes)
        eq_(len(res['hits']), 0)

    @minimum_es_version('1.0')
    def test_get_with_facets(self):
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Windows NT',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'WaterWolf',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'js::break_your_browser',
            'product': 'NightTrain',
            'os_name': 'Linux',
            'date_processed': self.now,
        })
        self.index_crash({
            'signature': 'foo(bar)',
            'product': 'EarthRacoon',
            'os_name': 'Linux',
            'date_processed': self.now,
        })

        # Index a lot of distinct values to test the results limit.
        number_of_crashes = 51
        processed_crash = {
            'version': '10.%s',
            'date_processed': self.now,
        }
        self.index_many_crashes(
            number_of_crashes,
            processed_crash,
            loop_field='version',
        )
        # Note: index_many_crashes does the index refreshing.

        # Test several facets
        kwargs = {
            '_facets': ['signature', 'platform']
        }
        res = self.api.get(**kwargs)

        ok_('facets' in res)
        ok_('signature' in res['facets'])

        expected_terms = [
            {'term': 'js::break_your_browser', 'count': 3},
            {'term': 'foo(bar)', 'count': 1},
        ]
        eq_(res['facets']['signature'], expected_terms)

        ok_('platform' in res['facets'])
        expected_terms = [
            {'term': 'Linux', 'count': 3},
            {'term': 'Windows NT', 'count': 1},
        ]
        eq_(res['facets']['platform'], expected_terms)

        # Test one facet with filters
        kwargs = {
            '_facets': ['product'],
            'product': 'WaterWolf',
        }
        res = self.api.get(**kwargs)

        ok_('product' in res['facets'])
        expected_terms = [
            {'term': 'WaterWolf', 'count': 2},
        ]
        eq_(res['facets']['product'], expected_terms)

        # Test one facet with a different filter
        kwargs = {
            '_facets': ['product'],
            'platform': 'linux',
        }
        res = self.api.get(**kwargs)

        ok_('product' in res['facets'])

        expected_terms = [
            {'term': 'EarthRacoon', 'count': 1},
            {'term': 'NightTrain', 'count': 1},
            {'term': 'WaterWolf', 'count': 1},
        ]
        eq_(res['facets']['product'], expected_terms)

        # Test the number of results.
        kwargs = {
            '_facets': ['version'],
        }
        res = self.api.get(**kwargs)

        ok_('version' in res['facets'])
        eq_(len(res['facets']['version']), self.api.config.facets_max_number)

        # Test errors
        assert_raises(
            BadArgumentError,
            self.api.get,
            _facets=['unkownfield']
        )

    def test_get_against_nonexistent_index(self):
        config = self.get_mware_config(es_index='socorro_test_reports_%W')
        api = SuperSearch(config=config)
        params = {
            'date': ['>2000-01-01T00:00:00', '<2000-01-10T00:00:00']
        }

        res = api.get(**params)
        eq_(res, {'total': 0, 'hits': [], 'facets': {}})

    def test_get_return_query_mode(self):
        res = self.api.get(
            signature='js',
            _return_query=True
        )
        ok_('query' in res)
        ok_('indices' in res)

        query = res['query']
        ok_('query' in query)
        ok_('aggs' in query)
        ok_('size' in query)