Esempio n. 1
0
    def test_get_parameters_date_defaults(self):
        with _get_config_manager().context() as config:
            search = SearchBaseWithFields(
                config=config,
            )

        now = datetimeutil.utc_now()

        # Test default values when nothing is passed
        params = search.get_parameters()
        ok_('date' in params)
        eq_(len(params['date']), 2)

        # Pass only the high value
        args = {
            'date': '<%s' % datetimeutil.date_to_string(now)
        }
        params = search.get_parameters(**args)
        ok_('date' in params)
        eq_(len(params['date']), 2)
        eq_(params['date'][0].operator, '<')
        eq_(params['date'][1].operator, '>=')
        eq_(params['date'][0].value.date(), now.date())
        eq_(
            params['date'][1].value.date(),
            now.date() - datetime.timedelta(days=7)
        )

        # Pass only the low value
        pasttime = now - datetime.timedelta(days=10)
        args = {
            'date': '>=%s' % datetimeutil.date_to_string(pasttime)
        }
        params = search.get_parameters(**args)
        ok_('date' in params)
        eq_(len(params['date']), 2)
        eq_(params['date'][0].operator, '<=')
        eq_(params['date'][1].operator, '>=')
        eq_(params['date'][0].value.date(), now.date())
        eq_(params['date'][1].value.date(), pasttime.date())

        # Pass the two values
        pasttime = now - datetime.timedelta(days=10)
        args = {
            'date': [
                '<%s' % datetimeutil.date_to_string(now),
                '>%s' % datetimeutil.date_to_string(pasttime),
            ]
        }
        params = search.get_parameters(**args)
        ok_('date' in params)
        eq_(len(params['date']), 2)
        eq_(params['date'][0].operator, '<')
        eq_(params['date'][1].operator, '>')
        eq_(params['date'][0].value.date(), now.date())
        eq_(params['date'][1].value.date(), pasttime.date())
Esempio n. 2
0
def twoPeriodTopCrasherComparison(
            databaseConnection, context,
            closestEntryFunction=latestEntryBeforeOrEqualTo,
            listOfTopCrashersFunction=getListOfTopCrashersBySignature):
    try:
        context['logger'].debug('entered twoPeriodTopCrasherComparison')
    except KeyError:
        context['logger'] = util.SilentFakeLogger()

    assertions = ['to_date', 'duration', 'product', 'version']

    for param in assertions:
        assert param in context, (
            "%s is missing from the configuration" % param)

    context['numberOfComparisonPoints'] = 2
    if not context['limit']:
        context['limit'] = 100

    #context['logger'].debug('about to latestEntryBeforeOrEqualTo')
    context['to_date'] = closestEntryFunction(databaseConnection,
                                              context['to_date'],
                                              context['product'],
                                              context['version'])
    context['logger'].debug('New to_date: %s' % context['to_date'])
    context['startDate'] = context.to_date - (context.duration *
                                              context.numberOfComparisonPoints)
    #context['logger'].debug('after %s' % context)
    listOfTopCrashers = listOfListsWithChangeInRank(
                                            rangeOfQueriesGenerator(
                                                databaseConnection,
                                                context,
                                                listOfTopCrashersFunction))[0]
    #context['logger'].debug('listOfTopCrashers %s' % listOfTopCrashers)
    totalNumberOfCrashes = totalPercentOfTotal = 0
    for x in listOfTopCrashers:
        if 'total_crashes' in x:
            totalNumberOfCrashes = x['total_crashes']
            del x['total_crashes']
        totalPercentOfTotal += x.get('percentOfTotal', 0)

    result = {
        'crashes': listOfTopCrashers,
        'start_date': datetimeutil.date_to_string(
            context.to_date - context.duration
        ),
        'end_date': datetimeutil.date_to_string(context.to_date),
        'totalNumberOfCrashes': totalNumberOfCrashes,
        'totalPercentage': totalPercentOfTotal,
    }
    #logger.debug("about to return %s", result)
    return result
Esempio n. 3
0
    def post(self, **kwargs):
        params = external_common.parse_arguments(self.filters, kwargs)

        if not params['signatures']:
            raise MissingArgumentError('signatures')

        sql_params = [tuple(params['signatures'])]
        sql = """
            SELECT
                signature,
                first_report AS first_date,
                first_build
            FROM signatures
            WHERE signature IN %s
        """

        error_message = 'Failed to retrieve signatures from PostgreSQL'
        results = self.query(sql, sql_params, error_message=error_message)

        signatures = []
        for sig in results.zipped():
            sig['first_date'] = datetimeutil.date_to_string(sig['first_date'])
            signatures.append(sig)

        return {
            'hits': signatures,
            'total': len(signatures)
        }
Esempio n. 4
0
    def test_get_exploitibility_by_report_date(self):
        crashes = Crashes(config=self.config)
        yesterday_date = (self.now - datetime.timedelta(days=1)).date()
        yesterday = datetimeutil.date_to_string(yesterday_date)

        res_expected = {
            "hits": [
                {
                    "signature": "canIhaveYourSignature()",
                    "null_count": 2,
                    "none_count": 2,
                    "low_count": 2,
                    "medium_count": 2,
                    "high_count": 2
                },
                {
                    "signature": "ofCourseYouCan()",
                    "null_count": 4,
                    "none_count": 3,
                    "low_count": 2,
                    "medium_count": 1,
                    "high_count": 0
                }
            ],
            "total": 2,
        }

        res = crashes.get_exploitability(
            start_date=yesterday,
            end_date=yesterday
        )
        eq_(res, res_expected)
Esempio n. 5
0
def test_date_to_string():
    # Datetime with timezone
    date = datetime.datetime(2012, 1, 3, 12, 23, 34, tzinfo=UTC)
    res_exp = '2012-01-03T12:23:34+00:00'
    res = datetimeutil.date_to_string(date)
    eq_(res, res_exp)

    # Datetime without timezone
    date = datetime.datetime(2012, 1, 3, 12, 23, 34)
    res_exp = '2012-01-03T12:23:34'
    res = datetimeutil.date_to_string(date)
    eq_(res, res_exp)

    # Date (no time, no timezone)
    date = datetime.date(2012, 1, 3)
    res_exp = '2012-01-03'
    res = datetimeutil.date_to_string(date)
    eq_(res, res_exp)
Esempio n. 6
0
    def test_twoPeriodTopCrasherComparisonLimited(self):

        lastweek = self.now - datetime.timedelta(days=7)
        lastweek_str = datetimeutil.date_to_string(lastweek.date())
        two_weeks = datetimeutil.date_to_string(self.now.date() -
                                                datetime.timedelta(days=14))

        self.params.limit = 1
        res = tcbs.twoPeriodTopCrasherComparison(
            self.connection,
            self.params
        )

        res_expected = {
            'totalPercentage': 0.58333333333333304,
            'end_date': lastweek_str,
            'start_date': two_weeks,
            'crashes': [{
                'count': 14L,
                'mac_count': 1L,
                'content_count': 0,
                'first_report': lastweek_str,
                'previousRank': 'null',
                'currentRank': 0,
                'startup_percent': None,
                'versions': 'plugin1, plugin2',
                'first_report_exact': lastweek_str + ' 00:00:00',
                'percentOfTotal': 0.58333333333333304,
                'changeInRank': 'new',
                'is_gc_count': 1L,
                'win_count': 12L,
                'changeInPercentOfTotal': 'new',
                'linux_count': 1L,
                'hang_count': 0L,
                'signature': 'Fake Signature #1',
                'versions_count': 2,
                'previousPercentOfTotal': 'null',
                'plugin_count': 0
            }],
            'totalNumberOfCrashes': 24L
        }

        eq_(res, res_expected)
Esempio n. 7
0
    def test_get_adu_by_signature(self):
        crashes = Crashes(config=self.config)

        signature = "canIhaveYourSignature()"
        channel = "release"
        yesterday_date = (self.now - datetime.timedelta(days=1)).date()
        yesterday = datetimeutil.date_to_string(yesterday_date)

        res_expected = {
            "hits": [
                {
                    "product_name": "WaterWolf",
                    "signature": signature,
                    "adu_date": yesterday,
                    "build_date": "2014-03-01",
                    "buildid": '201403010101',
                    "crash_count": 3,
                    "adu_count": 1023,
                    "os_name": "Mac OS X",
                    "channel": channel,
                },
                {
                    "product_name": "WaterWolf",
                    "signature": signature,
                    "adu_date": yesterday,
                    "build_date": "2014-04-01",
                    "buildid": '201404010101',
                    "crash_count": 4,
                    "adu_count": 1024,
                    "os_name": "Windows NT",
                    "channel": channel,
                },
            ],
            "total": 2,
        }

        res = crashes.get_adu_by_signature(
            product_name="WaterWolf",
            start_date=yesterday,
            end_date=yesterday,
            signature=signature,
            channel=channel,
        )
        eq_(res, res_expected)

        assert_raises(
            BadArgumentError,
            crashes.get_adu_by_signature,
            start_date=(yesterday_date - datetime.timedelta(days=366)),
            end_date=yesterday,
            signature=signature,
            channel=channel
        )
Esempio n. 8
0
    def test_get_list_with_sort(self):
        now = self.now
        yesterday = now - datetime.timedelta(days=1)
        yesterday = datetimeutil.date_to_string(yesterday)
        report = Report(config=self.config)

        base_params = {
            'signature': 'sig1',
            'from_date': yesterday,
            'to_date': now,
        }

        res = report.get_list(**base_params)
        # by default it's sorted by date_processed
        eq_(
            res['hits'],
            sorted(res['hits'], key=lambda x: x['date_processed'])
        )
        # now sort by 'reason'
        res = report.get_list(**dict(base_params, sort='reason'))
        eq_(
            res['hits'],
            sorted(res['hits'], key=lambda x: x['reason'])
        )
        res = report.get_list(**dict(
            base_params,
            sort='reason',
            reverse=True
        ))
        # be default it's sorted by date_processed
        eq_(
            res['hits'],
            sorted(res['hits'], key=lambda x: x['reason'], reverse=True)
        )
        assert_raises(
            BadArgumentError,
            report.get_list,
            **dict(
                base_params,
                sort='neverheardof'
            )
        )
Esempio n. 9
0
    def get(self, **kwargs):
        """Return the current state of all Crontabber jobs"""

        sql = """
        /* socorro.external.postgresql.crontabber_state.CrontabberState.get */
            SELECT
                app_name,
                next_run,
                first_run,
                last_run,
                last_success,
                error_count,
                depends_on,
                last_error,
                ongoing
            FROM crontabber
            ORDER BY app_name
        """

        error_message = (
            "Failed to retrieve crontabber state data from PostgreSQL"
        )
        results = self.query(sql, error_message=error_message)
        state = {}
        for row in results.zipped():
            app_name = row.pop('app_name')
            state[app_name] = row
            possible_datetimes = (
                'next_run',
                'first_run',
                'last_run',
                'last_success',
                'ongoing'
            )
            for key in possible_datetimes:
                value = state[app_name][key]
                if value is None:
                    continue
                state[app_name][key] = datetimeutil.date_to_string(value)

        return {"state": state}
Esempio n. 10
0
    def setup_data(self):
        now = self.now.date()
        yesterday = now - datetime.timedelta(days=1)
        lastweek = now - datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        yesterday_str = datetimeutil.date_to_string(yesterday)
        lastweek_str = datetimeutil.date_to_string(lastweek)

        self.test_source_data = {
            # Test 1: find exact match for one product version and signature
            'products': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "products",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [
                    {
                        "product_name": 'Firefox',
                        "version_string": "8.0",
                        "report_count": 1,
                        "percentage": '50.000',
                    },
                    {
                        "product_name": 'Firefox',
                        "version_string": "9.0",
                        "report_count": 1,
                        "percentage": '50.000',
                    }
                ],
            },
            # Test 2: find ALL matches for all product versions and signature
            'products_no_version': {
                'params': {
                    "report_type": "products",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [
                    {
                        "product_name": 'Firefox',
                        "version_string": "8.0",
                        "report_count": 1,
                        "percentage": '50.000',
                    },
                    {
                        "product_name": 'Firefox',
                        "version_string": "9.0",
                        "report_count": 1,
                        "percentage": '50.000',
                    }
                ],
            },
            # Test 3: find architectures reported for a given version and a
            # signature
            'architecture': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "architecture",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'amd64',
                    "report_count": 1.0,
                    "percentage": "100.000",
                }],
            },
            # Test 4: find architectures reported for a signature with no
            # specific version.
            'architecture_no_version': {
                'params': {
                    "report_type": "architecture",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'amd64',
                    "report_count": 2,
                    "percentage": '100.000',
                }],
            },
            # Test 5: find flash_versions reported for specific version and
            # a signature
            'flash_versions': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "flash_version",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '1.0',
                    "report_count": 1.0,
                    "percentage": "100.000",
                }],
            },
            # Test 6: find flash_versions reported with a signature and without
            # a specific version
            'flash_versions_no_version': {
                'params': {
                    "report_type": "flash_version",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '1.0',
                    "report_count": 2.0,
                    "percentage": "100.000",
                }],
            },
            # Test 7: find installations reported for a signature
            'distinct_install': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "distinct_install",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "product_name": 'Firefox',
                    "version_string": '8.0',
                    "crashes": 10,
                    "installations": 8,
                }],
            },
            # Test 8: find os_version_strings reported for a signature
            'os': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "os",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'Windows NT 6.4',
                    "report_count": 1,
                    "percentage": "100.000",
                }],
            },
            # Test 9: find process_type reported for a signature
            'process_type': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "process_type",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'plugin',
                    "report_count": 1,
                    "percentage": "100.000",
                }],
            },
            # Test 10: find uptime reported for signature
            'uptime': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "uptime",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '15-30 minutes',
                    "report_count": 1,
                    "percentage": '100.000',
                }],
            },
            # Test 11: find exploitability reported for signature
            'exploitability': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "exploitability",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    'low_count': 3,
                    'high_count': 5,
                    'null_count': 1,
                    'none_count': 2,
                    'report_date': yesterday_str,
                    'medium_count': 4,
                }],
            },
            # Test 12: find mobile devices reported for signature with a
            # specific version
            'devices': {
                'params': {
                    "versions": "Firefox:8.0",
                    'report_type': 'devices',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'cpu_abi': 'armeabi-v7a',
                    'manufacturer': 'samsung',
                    'model': 'GT-P5100',
                    'version': '16 (REL)',
                    'report_count': 123,
                    'percentage': '100.000',
                }],
            },
            # Test 13: find mobile devices reported for signature
            'devices_no_version': {
                'params': {
                    'report_type': 'devices',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'cpu_abi': 'armeabi-v7a',
                    'manufacturer': 'samsung',
                    'model': 'GT-P5100',
                    'version': '16 (REL)',
                    'report_count': 246,
                    'percentage': '100.000',
                }],
            },
            # Test 14: find mobile devices reported for signature
            'graphics': {
                'params': {
                    "versions": "Firefox:8.0",
                    'report_type': 'graphics',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'vendor_hex': '0x1234',
                    'adapter_hex': '0x5678',
                    'vendor_name': 'Test Vendor',
                    'adapter_name': 'Test Adapter',
                    'report_count': 123,
                    'percentage': '100.000',
                }],
            },
        }
Esempio n. 11
0
    def get_signature_history(self, **kwargs):
        """Return the history of a signature.

        See http://socorro.readthedocs.org/en/latest/middleware.html
        """
        now = datetimeutil.utc_now()
        lastweek = now - datetime.timedelta(days=7)

        filters = [
            ('product', None, 'str'),
            ('version', None, 'str'),
            ('signature', None, 'str'),
            ('end_date', now, 'datetime'),
            ('start_date', lastweek, 'datetime'),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        for param in ('product', 'version', 'signature'):
            if not params[param]:
                raise MissingArgumentError(param)

        if params.signature == '##null##':
            signature_where = 'AND signature IS NULL'
        else:
            signature_where = 'AND signature = %(signature)s'

        if params.signature == '##empty##':
            params.signature = ''

        sql = """
            /* external.postgresql.crashes.Crashes.get_signature_history */
            WITH hist AS (
                SELECT
                    report_date,
                    report_count
                FROM
                    tcbs JOIN signatures using (signature_id)
                         JOIN product_versions using (product_version_id)
                WHERE
                    report_date BETWEEN %%(start_date)s AND %%(end_date)s
                    AND product_name = %%(product)s
                    AND version_string = %%(version)s
                    %s
                GROUP BY
                    report_date, report_count
                ORDER BY 1
            ),
            scaling_window AS (
                SELECT
                    hist.*,
                    SUM(report_count) over () AS total_crashes
                FROM hist
            )
            SELECT
                report_date AS date,
                report_count AS count,
                report_count / total_crashes::float * 100 AS percent_of_total
            FROM scaling_window
            ORDER BY report_date DESC
        """ % signature_where

        error_message = 'Failed to retrieve signature history from PostgreSQL'
        results = self.query(sql, params, error_message=error_message)

        # Transforming the results into what we want
        history = []
        for dot in results.zipped():
            dot['date'] = datetimeutil.date_to_string(dot['date'])
            history.append(dot)

        return {
            'hits': history,
            'total': len(history)
        }
Esempio n. 12
0
    def test_twoPeriodTopCrasherComparison(self):

        lastweek = self.now - datetime.timedelta(days=7)
        lastweek_str = datetimeutil.date_to_string(lastweek.date())
        two_weeks = datetimeutil.date_to_string(self.now.date() -
                                                datetime.timedelta(days=14))

        res = tcbs.twoPeriodTopCrasherComparison(
            self.connection,
            self.params
        )

        res_expected = {
            'totalPercentage': 1.0,
            'end_date': lastweek_str,
            'start_date': two_weeks,
            'crashes': [{
                'count': 14L,
                'mac_count': 1L,
                'content_count': 0,
                'first_report': lastweek_str,
                'previousRank': 'null',
                'currentRank': 0,
                'startup_percent': None,
                'versions': 'plugin1, plugin2',
                'first_report_exact': lastweek_str + ' 00:00:00',
                'percentOfTotal': 0.58333333333333304,
                'changeInRank': 'new',
                'is_gc_count': 1L,
                'win_count': 12L,
                'changeInPercentOfTotal': 'new',
                'linux_count': 1L,
                'hang_count': 0L,
                'signature': 'Fake Signature #1',
                'versions_count': 2,
                'previousPercentOfTotal': 'null',
                'plugin_count': 0
            }, {
                'count': 10L,
                'mac_count': 2L,
                'content_count': 0,
                'first_report': lastweek_str,
                'previousRank': 'null',
                'currentRank': 1,
                'startup_percent': None,
                'versions': 'plugin1, plugin2, plugin3, '
                            'plugin4, plugin5, plugin6',
                'first_report_exact': lastweek_str + ' 00:00:00',
                'percentOfTotal': 0.41666666666666702,
                'changeInRank': 'new',
                'is_gc_count': 3L,
                'win_count': 7L,
                'changeInPercentOfTotal': 'new',
                'linux_count': 1L,
                'hang_count': 0L,
                'signature': 'Fake Signature #2',
                'versions_count': 6,
                'previousPercentOfTotal': 'null',
                'plugin_count': 0
            }],
            'totalNumberOfCrashes': 24L
        }

        eq_(res, res_expected)
Esempio n. 13
0
    def test_listOfListsWithChangeInRank(self):

        lastweek = self.now - datetime.timedelta(days=7)
        lastweek_str = datetimeutil.date_to_string(lastweek.date())

        params = self.params
        params.startDate = self.now.date() - datetime.timedelta(days=14)

        query_list = tcbs.getListOfTopCrashersBySignature
        query_range = tcbs.rangeOfQueriesGenerator(
            self.connection,
            self.params,
            query_list
        )
        res = tcbs.listOfListsWithChangeInRank(query_range)

        res_expected = [[{
            'count': 5L,
            'mac_count': 0L,
            'content_count': 0,
            'first_report': lastweek_str,
            'previousRank': 0,
            'currentRank': 0,
            'startup_percent': None,
            'versions': 'plugin1, plugin2',
            'first_report_exact': lastweek_str + ' 00:00:00',
            'percentOfTotal': 0.625,
            'changeInRank': 0,
            'is_gc_count': 10L,
            'win_count': 0L,
            'changeInPercentOfTotal': 0.041666666666666963,
            'linux_count': 5L,
            'hang_count': 5L,
            'signature': 'Fake Signature #1',
            'versions_count': 2,
            'previousPercentOfTotal': 0.58333333333333304,
            'plugin_count': 0,
            'total_crashes': 8
        }, {
            'count': 3L,
            'mac_count': 1L,
            'content_count': 0,
            'first_report': lastweek_str,
            'previousRank': 1,
            'currentRank': 1,
            'startup_percent': None,
            'versions': 'plugin1, plugin2, plugin3, plugin4, plugin5, plugin6',
            'first_report_exact': lastweek_str + ' 00:00:00',
            'percentOfTotal': 0.375,
            'changeInRank': 0,
            'is_gc_count': 1L,
            'win_count': 1L,
            'changeInPercentOfTotal': -0.041666666666667018,
            'linux_count': 1L,
            'hang_count': 0L,
            'signature': 'Fake Signature #2',
            'versions_count': 6,
            'previousPercentOfTotal': 0.41666666666666702,
            'plugin_count': 0,
            'total_crashes': 8
        }]]
Esempio n. 14
0
    def test_get(self):
        signature_urls = SignatureURLs(config=self.config)
        now = self.now
        now = datetime.datetime(now.year, now.month, now.day)
        now_str = datetimeutil.date_to_string(now)

        #......................................................................
        # Test 1: find one exact match for products and versions passed
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["Firefox:10.0", "Firefox:11.0"]
        }
        res = signature_urls.get(**params)
        res_expected = {
            "hits": [{
                "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                "crash_count": 1
            }],
            "total":
            1
        }

        eq_(res, res_expected)

        #......................................................................
        # Test 2: Raise error if parameter is not passed
        params = {
            "signature": "",
            "start_date": "",
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["Firefox:10.0", "Firefox:11.0"]
        }
        assert_raises(MissingArgumentError, signature_urls.get, **params)

        #......................................................................
        # Test 3: Query returning no results
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Fennec'],
            "versions": ["Fennec:10.0", "Fennec:11.0"]
        }
        res = signature_urls.get(**params)
        res_expected = {"hits": [], "total": 0}

        eq_(res, res_expected)

        # Test 4: Return results for all version of Firefox
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["ALL"]
        }

        res = signature_urls.get(**params)
        res_expected = {
            "hits": [{
                "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                "crash_count": 1
            }, {
                "url": "http://wikipedia.org/Code_Rush",
                "crash_count": 1
            }],
            "total":
            2
        }

        eq_(res, res_expected)

        # Test 5: Return results for all products and versions
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['ALL'],
            "versions": ["ALL"]
        }

        res = signature_urls.get(**params)
        res_expected = {
            "hits": [{
                "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                "crash_count": 1
            }, {
                "url": "http://wikipedia.org/Code_Rush",
                "crash_count": 1
            }, {
                "url": "http://arewemobileyet.org/",
                "crash_count": 1
            }],
            "total":
            3
        }

        eq_(res, res_expected)

        # Test when we send incorrectly formatted 'versions' parameter
        params = {
            "signature": 'Does not exist',
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ['27.0a1']
        }
        assert_raises(BadArgumentError, signature_urls.get, **params)
Esempio n. 15
0
    def test_get_signature_history(self):
        api = Crashes(config=self.config)
        now = self.now
        lastweek = now - datetime.timedelta(days=7)

        params = {
            'product': 'Firefox',
            'version': '8.0',
            'signature': 'signature1',
            'start_date': lastweek,
            'end_date': now,
        }
        res = api.get_signature_history(**params)

        eq_(len(res['hits']), 2)
        eq_(len(res['hits']), res['total'])

        date = datetimeutil.date_to_string(now.date())
        eq_(res['hits'][0]['date'], date)
        eq_(res['hits'][1]['date'], date)

        eq_(res['hits'][0]['count'], 5)
        eq_(res['hits'][1]['count'], 14)

        eq_(
            round(res['hits'][0]['percent_of_total'], 2),
            round(5.0 / 19.0 * 100, 2)
        )
        eq_(
            round(res['hits'][1]['percent_of_total'], 2),
            round(14.0 / 19.0 * 100, 2)
        )

        # Test no results
        params = {
            'product': 'Firefox',
            'version': '9.0',
            'signature': 'signature1',
            'start_date': lastweek,
            'end_date': now,
        }
        res = api.get_signature_history(**params)
        res_expected = {
            'hits': [],
            'total': 0
        }
        eq_(res, res_expected)

        # Test default date parameters
        params = {
            'product': 'Fennec',
            'version': '11.0.1',
            'signature': 'signature3',
        }
        res = api.get_signature_history(**params)
        res_expected = {
            'hits': [
                {
                    'date': now.date().isoformat(),
                    'count': 14,
                    'percent_of_total': 100
                }
            ],
            'total': 1
        }
        eq_(res, res_expected)

        # Test missing parameters
        assert_raises(
            MissingArgumentError,
            api.get_signature_history
        )
        assert_raises(
            MissingArgumentError,
            api.get_signature_history,
            **{'product': 'Firefox'}
        )
        assert_raises(
            MissingArgumentError,
            api.get_signature_history,
            **{'product': 'Firefox', 'version': '8.0'}
        )
        assert_raises(
            MissingArgumentError,
            api.get_signature_history,
            **{'signature': 'signature1', 'version': '8.0'}
        )
Esempio n. 16
0
    def get_list(self, **kwargs):
        """
        List all crashes with a given signature and return them.

        Both `from_date` and `to_date` (and their aliases `from` and `to`)
        are required and can not be greater than 30 days apart.

        Optional arguments: see SearchCommon.get_parameters()

        """
        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        if not kwargs.get('from_date'):
            raise MissingArgumentError('from_date')
        if not kwargs.get('to_date'):
            raise MissingArgumentError('to_date')

        from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date'])
        to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date'])
        span_days = (to_date - from_date).days
        if span_days > 30:
            raise BadArgumentError(
                'Span between from_date and to_date can not be more than 30'
            )

        # start with the default
        sort_order = {
            'key': 'date_processed',
            'direction': 'DESC'
        }
        if 'sort' in kwargs:
            sort_order['key'] = kwargs.pop('sort')
            _recognized_sort_orders = (
                'date_processed',
                'uptime',
                'user_comments',
                'uuid',
                'uuid_text',
                'product',
                'version',
                'build',
                'signature',
                'url',
                'os_name',
                'os_version',
                'cpu_name',
                'cpu_info',
                'address',
                'reason',
                'last_crash',
                'install_age',
                'hangid',
                'process_type',
                'release_channel',
                'install_time',
                'duplicate_of',
            )
            if sort_order['key'] not in _recognized_sort_orders:
                raise BadArgumentError(
                    '%s is not a recognized sort order key' % sort_order['key']
                )
            sort_order['direction'] = 'ASC'
            if str(kwargs.get('reverse', '')).lower() == 'true':
                if kwargs.pop('reverse'):
                    sort_order['direction'] = 'DESC'

        include_raw_crash = kwargs.get('include_raw_crash') or False
        params = search_common.get_parameters(kwargs)

        if not params["signature"]:
            raise MissingArgumentError('signature')

        params["terms"] = params["signature"]
        params["search_mode"] = "is_exactly"

        # Default mode falls back to starts_with for postgres
        if params["plugin_search_mode"] == "default":
            params["plugin_search_mode"] = "starts_with"

        # Limiting to a signature
        if params["terms"]:
            params["terms"] = self.prepare_terms(params["terms"],
                                                 params["search_mode"])

        # Searching for terms in plugins
        if params["report_process"] == "plugin" and params["plugin_terms"]:
            params["plugin_terms"] = " ".join(params["plugin_terms"])
            params["plugin_terms"] = self.prepare_terms(
                params["plugin_terms"],
                params["plugin_search_mode"]
            )

        # Get information about the versions
        util_service = Util(config=self.context)
        params["versions_info"] = util_service.versions_info(**params)

        # Parsing the versions
        params["versions_string"] = params["versions"]
        (params["versions"], params["products"]) = self.parse_versions(
            params["versions"],
            params["products"]
        )

        if hasattr(self.context, 'webapi'):
            context = self.context.webapi
        else:
            # old middleware
            context = self.context
        # Changing the OS ids to OS names
        for i, elem in enumerate(params["os"]):
            for platform in context.platforms:
                if platform["id"][:3] == elem[:3]:
                    params["os"][i] = platform["name"]

        # Creating the parameters for the sql query
        sql_params = {
        }

        # Preparing the different parts of the sql query
        sql_select = """
            SELECT
                r.date_processed,
                r.uptime,
                r.user_comments,
                r.uuid::uuid,
                r.uuid as uuid_text,
                r.product,
                r.version,
                r.build,
                r.signature,
                r.url,
                r.os_name,
                r.os_version,
                r.cpu_name,
                r.cpu_info,
                r.address,
                r.reason,
                r.last_crash,
                r.install_age,
                r.hangid,
                r.process_type,
                r.release_channel,
                (r.client_crash_date - (r.install_age * INTERVAL '1 second'))
                  AS install_time
        """
        if include_raw_crash:
            pass
        else:
            sql_select += """
                , rd.duplicate_of
            """

        wrapped_select = """
            WITH report_slice AS (
              %s
            ), dupes AS (
                SELECT
                    report_slice.uuid,
                    rd.duplicate_of
                FROM reports_duplicates rd
                JOIN report_slice ON report_slice.uuid_text = rd.uuid
                WHERE
                    rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
            )

            SELECT
                rs.*,
                dupes.duplicate_of,
                rc.raw_crash
            FROM report_slice rs
            LEFT OUTER JOIN dupes USING (uuid)
            LEFT OUTER JOIN raw_crashes rc ON
                rs.uuid = rc.uuid
                AND
                rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
        """

        sql_from = self.build_reports_sql_from(params)

        if not include_raw_crash:
            sql_from = """%s
                LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid
            """ % sql_from

        sql_where, sql_params = self.build_reports_sql_where(
            params,
            sql_params,
            self.context
        )

        sql_order = """
            ORDER BY %(key)s %(direction)s
        """ % sort_order

        sql_limit, sql_params = self.build_reports_sql_limit(
            params,
            sql_params
        )

        # Assembling the query
        if include_raw_crash:
            sql_query = "\n".join((
                "/* socorro.external.postgresql.report.Report.list */",
                sql_select, sql_from, sql_where, sql_order, sql_limit)
            )
        else:
            sql_query = "\n".join((
                "/* socorro.external.postgresql.report.Report.list */",
                sql_select, sql_from, sql_where, sql_order, sql_limit)
            )

        # Query for counting the results
        sql_count_query = "\n".join((
            "/* socorro.external.postgresql.report.Report.list */",
            "SELECT count(*)", sql_from, sql_where)
        )

        # Querying the DB
        with self.get_connection() as connection:

            total = self.count(
                sql_count_query,
                sql_params,
                error_message="Failed to count crashes from reports.",
                connection=connection
            )

            # No need to call Postgres if we know there will be no results
            if total:

                if include_raw_crash:
                    sql_query = wrapped_select % sql_query

                results = self.query(
                    sql_query,
                    sql_params,
                    error_message="Failed to retrieve crashes from reports",
                    connection=connection
                ).zipped()
            else:
                results = []

        crashes = []
        for crash in results:
            assert crash['uuid'] == crash['uuid_text']
            crash.pop('uuid_text')
            if not include_raw_crash and 'raw_crash' in crash:
                crash.pop('raw_crash')
            for i in crash:
                try:
                    crash[i] = datetimeutil.date_to_string(crash[i])
                except TypeError:
                    pass
            crashes.append(crash)

        return {
            "hits": crashes,
            "total": total
        }
Esempio n. 17
0
    def test_get_list(self):
        now = self.now
        yesterday = now - datetime.timedelta(days=1)
        yesterday = datetimeutil.date_to_string(yesterday)
        report = Report(config=self.config)

        base_params = {
            'signature': 'sig1',
            'from_date': yesterday,
            'to_date': now,
        }

        # Basic test
        res = report.get_list(**base_params)
        eq_(res['total'], 5)
        eq_(len(res['hits']), 5)

        duplicates_map = dict(
            (x['uuid'], x['duplicate_of']) for x in res['hits']
            if x['duplicate_of']
        )
        eq_(
            duplicates_map['60597bdc-5dbe-4409-6b38-4309c0130828'],
            '60597bdc-5dbe-4409-6b38-4309c0130833'
        )

        # Test with products and versions
        params = dict(
            base_params,
            products='WaterWolf',
            versions='WaterWolf:2.0',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        # Test with os, build_ids and reasons
        params = dict(
            base_params,
            products='WaterWolf',
            versions=['WaterWolf:1.0', 'WaterWolf:3.0'],
            os='win',
            build_ids='20001212010203',
            reasons='STACK_OVERFLOW',
        )
        res = report.get_list(**params)
        eq_(res['total'], 2)

        res_expected = {
            'hits': [
                {
                    'hangid': None,
                    'product': 'WaterWolf',
                    'os_name': 'Windows NT',
                    'uuid': '60597bdc-5dbe-4409-6b38-4309c0130831',
                    'cpu_info': None,
                    'url': None,
                    'last_crash': None,
                    'date_processed': yesterday,
                    'cpu_name': None,
                    'uptime': None,
                    'release_channel': 'Release',
                    'process_type': 'browser',
                    'os_version': None,
                    'reason': 'STACK_OVERFLOW',
                    'version': '1.0',
                    'build': '20001212010203',
                    'install_age': None,
                    'signature': 'sig1',
                    'install_time': None,
                    'duplicate_of': None,
                    'address': None,
                    'user_comments': None
                },
                {
                    'hangid': None,
                    'product': 'WaterWolf',
                    'os_name': 'Windows NT',
                    'uuid': '60597bdc-5dbe-4409-6b38-4309c0130834',
                    'cpu_info': None,
                    'url': None,
                    'last_crash': None,
                    'date_processed': yesterday,
                    'cpu_name': None,
                    'uptime': None,
                    'release_channel': 'Release',
                    'process_type': 'plugin',
                    'os_version': None,
                    'reason': 'STACK_OVERFLOW',
                    'version': '3.0',
                    'build': '20001212010203',
                    'install_age': None,
                    'signature': 'sig1',
                    'install_time': None,
                    'duplicate_of': None,
                    'address': None,
                    'user_comments': None
                }
            ],
            'total': 2
        }
        eq_(res, res_expected)

        # Test with a signature with strange characters
        params = dict(
            base_params,
            signature='this/is+a=C|signature',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        res_expected = {
            'hits': [{
                'hangid': None,
                'product': 'WindBear',
                'os_name': 'Linux',
                'uuid': '60597bdc-5dbe-4409-6b38-4309c0130837',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'release_channel': 'Release',
                'process_type': 'browser',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '1.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'this/is+a=C|signature',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }],
            'total': 1
        }
        eq_(res, res_expected)

        # Test plugins
        params = dict(
            base_params,
            report_process='plugin',
            plugin_in='filename',
            plugin_terms='NPSWF',
            plugin_search_mode='contains',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        # Test plugins with 'starts_with' search mode
        params = dict(
            base_params,
            report_process='plugin',
            plugin_in='name',
            plugin_terms='Flash',
            plugin_search_mode='starts_with',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)
Esempio n. 18
0
    def get_comments(self, **kwargs):
        """Return a list of comments on crash reports, filtered by
        signatures and other fields.

        See socorro.middleware.search_common.get_parameters() for all filters.
        """
        params = self.prepare_search_params(**kwargs)

        # Creating the parameters for the sql query
        sql_params = {}

        # Preparing the different parts of the sql query

        # WARNING: sensitive data is returned here (email). When there is
        # an authentication mecanism, a verification should be done here.
        sql_select = """
            SELECT
                r.date_processed,
                r.user_comments,
                r.uuid,
                CASE
                    WHEN r.email = '' THEN null
                    WHEN r.email IS NULL THEN null
                    ELSE r.email
                END
        """

        sql_count = """
            SELECT
                COUNT(r.uuid)
        """

        sql_from = self.build_reports_sql_from(params)
        (sql_where,
         sql_params) = self.build_reports_sql_where(params, sql_params,
                                                    self.context)
        sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where

        sql_order = "ORDER BY email ASC, r.date_processed ASC"

        sql_limit, sql_params = self.build_reports_sql_limit(
            params, sql_params)
        sql_count = " ".join(
            ("/* external.postgresql.crashes.Crashes.get_comments */",
             sql_count, sql_from, sql_where))
        count = self.count(sql_count, sql_params)

        comments = []
        if count:

            # Assembling the query
            sql_query = " ".join(
                ("/* external.postgresql.crashes.Crashes.get_comments */",
                 sql_select, sql_from, sql_where, sql_order, sql_limit))

            error_message = "Failed to retrieve comments from PostgreSQL"
            results = self.query(sql_query,
                                 sql_params,
                                 error_message=error_message)

            # Transforming the results into what we want
            for comment in results.zipped():
                comment['date_processed'] = datetimeutil.date_to_string(
                    comment['date_processed'])
                comments.append(comment)

        return {"hits": comments, "total": count}
Esempio n. 19
0
    def test_listOfListsWithChangeInRank(self):

        lastweek = self.now - datetime.timedelta(days=7)
        lastweek_str = datetimeutil.date_to_string(lastweek.date())

        params = self.params
        params.startDate = self.now.date() - datetime.timedelta(days=14)

        query_list = tcbs.getListOfTopCrashersBySignature
        query_range = tcbs.rangeOfQueriesGenerator(
            self.connection,
            self.params,
            query_list
        )
        res = tcbs.listOfListsWithChangeInRank(query_range)

        res_expected = [[{
            'count': 5L,
            'mac_count': 0L,
            'content_count': 0,
            'first_report': lastweek_str,
            'previousRank': 0,
            'currentRank': 0,
            'startup_percent': None,
            'versions': 'plugin1, plugin2',
            'first_report_exact': lastweek_str + ' 00:00:00',
            'percentOfTotal': 0.625,
            'changeInRank': 0,
            'is_gc_count': 10L,
            'win_count': 0L,
            'changeInPercentOfTotal': 0.041666666666666963,
            'linux_count': 5L,
            'hang_count': 5L,
            'signature': 'Fake Signature #1',
            'versions_count': 2,
            'previousPercentOfTotal': 0.58333333333333304,
            'plugin_count': 0,
            'total_crashes': 8
        }, {
            'count': 3L,
            'mac_count': 1L,
            'content_count': 0,
            'first_report': lastweek_str,
            'previousRank': 1,
            'currentRank': 1,
            'startup_percent': None,
            'versions': 'plugin1, plugin2, plugin3, plugin4, plugin5, plugin6',
            'first_report_exact': lastweek_str + ' 00:00:00',
            'percentOfTotal': 0.375,
            'changeInRank': 0,
            'is_gc_count': 1L,
            'win_count': 1L,
            'changeInPercentOfTotal': -0.041666666666667018,
            'linux_count': 1L,
            'hang_count': 0L,
            'signature': 'Fake Signature #2',
            'versions_count': 6,
            'previousPercentOfTotal': 0.41666666666666702,
            'plugin_count': 0,
            'total_crashes': 8
        }]]
Esempio n. 20
0
    def get_signature_history(self, **kwargs):
        """Return the history of a signature.

        See https://socorro.readthedocs.io/en/latest/middleware.html
        """
        now = datetimeutil.utc_now()
        lastweek = now - datetime.timedelta(days=7)

        filters = [
            ('product', None, 'str'),
            ('version', None, 'str'),
            ('signature', None, 'str'),
            ('end_date', now, 'datetime'),
            ('start_date', lastweek, 'datetime'),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        for param in ('product', 'version', 'signature'):
            if not params[param]:
                raise MissingArgumentError(param)

        if params.signature == '##null##':
            signature_where = 'AND signature IS NULL'
        else:
            signature_where = 'AND signature = %(signature)s'

        if params.signature == '##empty##':
            params.signature = ''

        sql = """
            /* external.postgresql.crashes.Crashes.get_signature_history */
            WITH hist AS (
                SELECT
                    report_date,
                    report_count
                FROM
                    tcbs JOIN signatures using (signature_id)
                         JOIN product_versions using (product_version_id)
                WHERE
                    report_date BETWEEN %%(start_date)s AND %%(end_date)s
                    AND product_name = %%(product)s
                    AND version_string = %%(version)s
                    %s
                GROUP BY
                    report_date, report_count
                ORDER BY 1
            ),
            scaling_window AS (
                SELECT
                    hist.*,
                    SUM(report_count) over () AS total_crashes
                FROM hist
            )
            SELECT
                report_date AS date,
                report_count AS count,
                report_count / total_crashes::float * 100 AS percent_of_total
            FROM scaling_window
            ORDER BY report_date DESC
        """ % signature_where

        error_message = 'Failed to retrieve signature history from PostgreSQL'
        results = self.query(sql, params, error_message=error_message)

        # Transforming the results into what we want
        history = []
        for dot in results.zipped():
            dot['date'] = datetimeutil.date_to_string(dot['date'])
            history.append(dot)

        return {'hits': history, 'total': len(history)}
Esempio n. 21
0
    def get_daily(self, **kwargs):
        """Return crashes by active daily users. """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("product", None, "str"),
            ("versions", None, ["list", "str"]),
            ("from_date", lastweek, "date"),
            ("to_date", now, "date"),
            ("os", None, ["list", "str"]),
            ("report_type", None, ["list", "str"]),
            ("date_range_type", "date", "str"),
        ]

        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        params = external_common.parse_arguments(filters, kwargs)

        if not params.product:
            raise MissingArgumentError('product')

        if not params.versions or not params.versions[0]:
            raise MissingArgumentError('versions')

        params.versions = tuple(params.versions)

        # simple version, for home page graphs mainly
        if ((not params.os or not params.os[0])
                and (not params.report_type or not params.report_type[0])):
            if params.date_range_type == "build":
                table_to_use = "home_page_graph_build_view"
                date_range_field = "build_date"
            else:
                table_to_use = "home_page_graph_view"
                date_range_field = "report_date"

            db_fields = ("product_name", "version_string", date_range_field,
                         "report_count", "adu", "crash_hadu")

            out_fields = ("product", "version", "date", "report_count", "adu",
                          "crash_hadu")

            sql = """
                /* socorro.external.postgresql.crashes.Crashes.get_daily */
                SELECT %(db_fields)s
                FROM %(table_to_use)s
                WHERE product_name=%%(product)s
                AND version_string IN %%(versions)s
                AND %(date_range_field)s BETWEEN %%(from_date)s
                    AND %%(to_date)s
            """ % {
                "db_fields": ", ".join(db_fields),
                "date_range_field": date_range_field,
                "table_to_use": table_to_use
            }

        # complex version, for daily crashes page mainly
        else:
            if params.date_range_type == "build":
                table_to_use = "crashes_by_user_build_view"
                date_range_field = "build_date"
            else:
                table_to_use = "crashes_by_user_view"
                date_range_field = "report_date"

            db_fields = [
                "product_name", "version_string", date_range_field,
                "sum(adjusted_report_count)::bigint as report_count",
                "sum(adu)::bigint as adu",
                """crash_hadu(sum(report_count)::bigint, sum(adu)::bigint,
                              avg(throttle)) as crash_hadu""",
                "avg(throttle) as throttle"
            ]

            out_fields = [
                "product", "version", "date", "report_count", "adu",
                "crash_hadu", "throttle"
            ]

            db_group = ["product_name", "version_string", date_range_field]

            sql_where = []
            if params.os and params.os[0]:
                sql_where.append("os_short_name IN %(os)s")
                params.os = tuple(x[0:3].lower() for x in params.os)

            if params.report_type and params.report_type[0]:
                sql_where.append("crash_type_short IN %(report_type)s")
                params.report_type = tuple(params.report_type)

            if sql_where:
                sql_where = "AND %s" % " AND ".join(sql_where)
            else:
                sql_where = ''

            sql = """
                /* socorro.external.postgresql.crashes.Crashes.get_daily */
                SELECT %(db_fields)s
                FROM (
                    SELECT
                        product_name,
                        version_string,
                        %(date_range_field)s,
                        os_name,
                        os_short_name,
                        SUM(report_count)::int as report_count,
                        SUM(adjusted_report_count)::int
                            as adjusted_report_count,
                        MAX(adu) as adu,
                        AVG(throttle) as throttle
                    FROM %(table_to_use)s
                    WHERE product_name=%%(product)s
                    AND version_string IN %%(versions)s
                    AND %(date_range_field)s BETWEEN %%(from_date)s
                        AND %%(to_date)s
                    %(sql_where)s
                    GROUP BY product_name, version_string,
                             %(date_range_field)s, os_name, os_short_name
                ) as aggregated_crashes_by_user
            """ % {
                "db_fields": ", ".join(db_fields),
                "date_range_field": date_range_field,
                "table_to_use": table_to_use,
                "sql_where": sql_where
            }

            if db_group:
                sql = "%s GROUP BY %s" % (sql, ", ".join(db_group))

        error_message = "Failed to retrieve daily crashes data from PostgreSQL"
        results = self.query(sql, params, error_message=error_message)

        hits = {}
        for row in results:
            daily_data = dict(zip(out_fields, row))
            if "throttle" in daily_data:
                daily_data["throttle"] = float(daily_data["throttle"])
            daily_data["crash_hadu"] = float(daily_data["crash_hadu"])
            daily_data["date"] = datetimeutil.date_to_string(
                daily_data["date"])

            key = "%s:%s" % (daily_data["product"], daily_data["version"])

            if "os_short" in daily_data:
                del daily_data["os_short"]

            if key not in hits:
                hits[key] = {}

            hits[key][daily_data["date"]] = daily_data

        return {"hits": hits}
Esempio n. 22
0
    def get_comments(self, **kwargs):
        """Return a list of comments on crash reports, filtered by
        signatures and other fields.

        See socorro.middleware.search_common.get_parameters() for all filters.
        """
        params = self.prepare_search_params(**kwargs)

        # Creating the parameters for the sql query
        sql_params = {}

        # Preparing the different parts of the sql query

        # WARNING: sensitive data is returned here (email). When there is
        # an authentication mecanism, a verification should be done here.
        sql_select = """
            SELECT
                r.date_processed,
                r.user_comments,
                r.uuid,
                CASE
                    WHEN r.email = '' THEN null
                    WHEN r.email IS NULL THEN null
                    ELSE r.email
                END
        """

        sql_count = """
            SELECT
                COUNT(r.uuid)
        """

        sql_from = self.build_reports_sql_from(params)
        (sql_where, sql_params) = self.build_reports_sql_where(params,
                                                               sql_params,
                                                               self.context)
        sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where

        sql_order = "ORDER BY email ASC, r.date_processed ASC"

        sql_limit, sql_params = self.build_reports_sql_limit(
            params,
            sql_params
        )
        sql_count = " ".join((
            "/* external.postgresql.crashes.Crashes.get_comments */",
            sql_count, sql_from, sql_where)
        )
        count = self.count(sql_count, sql_params)

        comments = []
        if count:

            # Assembling the query
            sql_query = " ".join((
                "/* external.postgresql.crashes.Crashes.get_comments */",
                sql_select, sql_from, sql_where, sql_order, sql_limit)
            )

            error_message = "Failed to retrieve comments from PostgreSQL"
            results = self.query(sql_query, sql_params,
                                 error_message=error_message)

            # Transforming the results into what we want
            for comment in results.zipped():
                comment['date_processed'] = datetimeutil.date_to_string(
                    comment['date_processed']
                )
                comments.append(comment)

        return {
            "hits": comments,
            "total": count
        }
Esempio n. 23
0
    def test_get_signature_history(self):
        api = Crashes(config=self.config)
        now = self.now
        lastweek = now - datetime.timedelta(days=7)

        params = {
            'product': 'Firefox',
            'version': '8.0',
            'signature': 'signature1',
            'start_date': lastweek,
            'end_date': now,
        }
        res = api.get_signature_history(**params)

        eq_(len(res['hits']), 2)
        eq_(len(res['hits']), res['total'])

        date = datetimeutil.date_to_string(now.date())
        eq_(res['hits'][0]['date'], date)
        eq_(res['hits'][1]['date'], date)

        eq_(res['hits'][0]['count'], 5)
        eq_(res['hits'][1]['count'], 14)

        eq_(round(res['hits'][0]['percent_of_total'], 2),
            round(5.0 / 19.0 * 100, 2))
        eq_(round(res['hits'][1]['percent_of_total'], 2),
            round(14.0 / 19.0 * 100, 2))

        # Test no results
        params = {
            'product': 'Firefox',
            'version': '9.0',
            'signature': 'signature1',
            'start_date': lastweek,
            'end_date': now,
        }
        res = api.get_signature_history(**params)
        res_expected = {'hits': [], 'total': 0}
        eq_(res, res_expected)

        # Test default date parameters
        params = {
            'product': 'Fennec',
            'version': '11.0.1',
            'signature': 'signature3',
        }
        res = api.get_signature_history(**params)
        res_expected = {
            'hits': [{
                'date': now.date().isoformat(),
                'count': 14,
                'percent_of_total': 100
            }],
            'total':
            1
        }
        eq_(res, res_expected)

        # Test missing parameters
        assert_raises(MissingArgumentError, api.get_signature_history)
        assert_raises(MissingArgumentError, api.get_signature_history,
                      **{'product': 'Firefox'})
        assert_raises(MissingArgumentError, api.get_signature_history, **{
            'product': 'Firefox',
            'version': '8.0'
        })
        assert_raises(MissingArgumentError, api.get_signature_history, **{
            'signature': 'signature1',
            'version': '8.0'
        })
Esempio n. 24
0
    def setup_data(self):

        self.now = datetimeutil.utc_now()
        now = self.now.date()
        yesterday = now - datetime.timedelta(days=1)
        lastweek = now - datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        yesterday_str = datetimeutil.date_to_string(yesterday)
        lastweek_str = datetimeutil.date_to_string(lastweek)

        self.test_source_data = {
            # Test backfill_adu
            'adu': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_all_dups
            'all_dups': {
                'params': {
                    "start_date": yesterday_str,
                    "end_date": now_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_build_adu
            'build_adu': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_correlations
            'correlations': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_crashes_by_user_build
            'crashes_by_user_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_crashes_by_user
            'crashes_by_user': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },

            # TODO: Test backfill_daily_crashes tries to insert into a table
            # that do not exists. It can be fixed by creating a temporary one.
            #'daily_crashes': {
            #    'params': {
            #        "update_day": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # Test backfill_exploitability
            'exploitability': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_home_page_graph_build
            'home_page_graph_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_home_page_graph
            'home_page_graph': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_matviews
            'matviews': {
                'params': {
                    "start_date": yesterday_str,
                    "reports_clean": 'false',
                },
                'res_expected': [(True,)],
            },
            # Test backfill_rank_compare
            'rank_compare': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_reports_clean
            'reports_clean': {
                'params': {
                    "start_date": yesterday_str,
                    "end_date": now_str,
                },
                'res_expected': [(True,)],
            },

            # TODO: Test backfill_reports_duplicates tries to insert into a
            # table that do not exists. It can be fixed by using the update
            # function inside of the backfill.
            #'reports_duplicates': {
            #    'params': {
            #        "start_date": yesterday_str,
            #        "end_date": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # TODO: Test backfill_signature_counts tries to insert into
            # tables and to update functions that does not exist.
            #'signature_counts': {
            #    'params': {
            #        "start_date": yesterday_str,
            #        "end_date": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # Test backfill_tcbs_build
            'tcbs_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_tcbs
            'tcbs': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_weekly_report_partitions
            'weekly_report_partitions': {
                'params': {
                    "start_date": lastweek_str,
                    "end_date": now_str,
                    "table_name": 'raw_crashes',
                },
                'res_expected': [(True,)],
            },
            # TODO: Update Backfill to support signature_summary backfill
            # through the API
            #'signature_summary_products': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_installations': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_uptime': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_os': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_process_type': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_architecture': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_flash_version': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_device': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_graphics': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
        }
Esempio n. 25
0
    def test_get_list(self):
        now = self.now
        yesterday = now - datetime.timedelta(days=1)
        yesterday = datetimeutil.date_to_string(yesterday)
        report = Report(config=self.config)

        base_params = {
            'signature': 'sig1',
            'from_date': yesterday,
            'to_date': now,
        }

        # Basic test
        res = report.get_list(**base_params)
        eq_(res['total'], 5)
        eq_(len(res['hits']), 5)

        duplicates_map = dict((x['uuid'], x['duplicate_of'])
                              for x in res['hits'] if x['duplicate_of'])
        eq_(duplicates_map['60597bdc-5dbe-4409-6b38-4309c0130828'],
            '60597bdc-5dbe-4409-6b38-4309c0130833')

        # Test with products and versions
        params = dict(
            base_params,
            products='WaterWolf',
            versions='WaterWolf:2.0',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        # Test with os, build_ids and reasons
        params = dict(
            base_params,
            products='WaterWolf',
            versions=['WaterWolf:1.0', 'WaterWolf:3.0'],
            os='win',
            build_ids='20001212010203',
            reasons='STACK_OVERFLOW',
        )
        res = report.get_list(**params)
        eq_(res['total'], 2)

        res_expected = {
            'hits': [{
                'hangid': None,
                'product': 'WaterWolf',
                'os_name': 'Windows NT',
                'uuid': '60597bdc-5dbe-4409-6b38-4309c0130831',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'release_channel': 'Release',
                'process_type': 'browser',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '1.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'sig1',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }, {
                'hangid': None,
                'product': 'WaterWolf',
                'os_name': 'Windows NT',
                'uuid': '60597bdc-5dbe-4409-6b38-4309c0130834',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'release_channel': 'Release',
                'process_type': 'plugin',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '3.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'sig1',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }],
            'total':
            2
        }
        eq_(res, res_expected)

        # Test with a signature with strange characters
        params = dict(
            base_params,
            signature='this/is+a=C|signature',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        res_expected = {
            'hits': [{
                'hangid': None,
                'product': 'WindBear',
                'os_name': 'Linux',
                'uuid': '60597bdc-5dbe-4409-6b38-4309c0130837',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'release_channel': 'Release',
                'process_type': 'browser',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '1.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'this/is+a=C|signature',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }],
            'total':
            1
        }
        eq_(res, res_expected)

        # Test plugins
        params = dict(
            base_params,
            report_process='plugin',
            plugin_in='filename',
            plugin_terms='NPSWF',
            plugin_search_mode='contains',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        # Test plugins with 'starts_with' search mode
        params = dict(
            base_params,
            report_process='plugin',
            plugin_in='name',
            plugin_terms='Flash',
            plugin_search_mode='starts_with',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)
Esempio n. 26
0
def fix(configContext, logger, query, fixer):
  rows, last_date_processed = fetchOoids(configContext, logger, query)
  hbc = hbaseClient.HBaseConnectionForCrashReports(configContext.hbaseHost, configContext.hbasePort, configContext.hbaseTimeout, logger=logger)
  for row in rows:
    try:
      ooid, last_date_processed = row
      logger.info('fixing ooid: %s' % ooid)
      dump = hbc.get_dump(ooid)
      fname = '/dev/shm/%s.dump' % ooid
      with open(fname, 'wb') as orig_dump_file:
        orig_dump_file.write(dump)
      logger.debug('wrote dump file: %s' % fname)
      logger.debug('fixed dump file: %s' % fname)
      subprocess.check_call([fixer, fname])
      logger.debug('fixer: %s' % fixer)
      with open(fname, 'rb') as fixed_dump_file:
        fixed_dump = fixed_dump_file.read()
        hbc.put_fixed_dump(ooid, fixed_dump, add_to_unprocessed_queue = True, submitted_timestamp = date_to_string(utc_now()))
      logger.debug('put fixed dump file into hbase: %s' % fname)
      os.unlink(fname)
      logger.debug('removed dump file: %s' % fname)
    except:
      socorrolib.lib.util.reportExceptionAndContinue(logger)

  return last_date_processed
Esempio n. 27
0
    def test_get(self):
        status = server_status.ServerStatus(config=self.config)

        date1 = datetime.datetime(
            self.now.year, self.now.month, self.now.day, 12, 00, 00,
            tzinfo=self.now.tzinfo
        )
        date2 = date1 - datetime.timedelta(minutes=15)
        date3 = date2 - datetime.timedelta(minutes=15)
        date4 = date3 - datetime.timedelta(minutes=15)

        date1 = datetimeutil.date_to_string(date1)
        date2 = datetimeutil.date_to_string(date2)
        date3 = datetimeutil.date_to_string(date3)
        date4 = datetimeutil.date_to_string(date4)

        #......................................................................
        # Test 1: default behavior
        res = status.get()
        res_expected = {
            "hits": [
                {
                    "id": 1,
                    "date_recently_completed": date1,
                    "date_oldest_job_queued": date1,
                    "avg_process_sec": 2,
                    "avg_wait_sec": 5,
                    "waiting_job_count": 3,
                    "processors_count": 2,
                    "date_created": date1
                },
                {
                    "id": 2,
                    "date_recently_completed": date2,
                    "date_oldest_job_queued": date2,
                    "avg_process_sec": 3,
                    "avg_wait_sec": 3.12,
                    "waiting_job_count": 2,
                    "processors_count": 2,
                    "date_created": date2
                },
                {
                    "id": 3,
                    "date_recently_completed": date3,
                    "date_oldest_job_queued": date3,
                    "avg_process_sec": 1,
                    "avg_wait_sec": 2,
                    "waiting_job_count": 4,
                    "processors_count": 1,
                    "date_created": date3
                },
                {
                    "id": 4,
                    "date_recently_completed": None,
                    "date_oldest_job_queued": None,
                    "avg_process_sec": 1,
                    "avg_wait_sec": 2,
                    "waiting_job_count": 4,
                    "processors_count": 1,
                    "date_created": date4
                }
            ],
            "socorro_revision": "42",
            "breakpad_revision": "43",
            "schema_revision": "aaaaaaaaaaaa",
            "total": 4
        }

        eq_(res, res_expected)

        #......................................................................
        # Test 2: with duration
        params = {
            "duration": 1
        }
        res = status.get(**params)
        res_expected = {
            "hits": [
                {
                    "id": 1,
                    "date_recently_completed": date1,
                    "date_oldest_job_queued": date1,
                    "avg_process_sec": 2,
                    "avg_wait_sec": 5,
                    "waiting_job_count": 3,
                    "processors_count": 2,
                    "date_created": date1
                }
            ],
            "socorro_revision": "42",
            "breakpad_revision": "43",
            "schema_revision": "aaaaaaaaaaaa",
            "total": 1
        }

        eq_(res, res_expected)
Esempio n. 28
0
    def test_get(self):
        signature_urls = SignatureURLs(config=self.config)
        now = self.now
        now = datetime.datetime(now.year, now.month, now.day)
        now_str = datetimeutil.date_to_string(now)

        #......................................................................
        # Test 1: find one exact match for products and versions passed
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["Firefox:10.0", "Firefox:11.0"]
        }
        res = signature_urls.get(**params)
        res_expected = {
            "hits": [
                {
                    "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                    "crash_count": 1
                 }
            ],
            "total": 1
        }

        eq_(res, res_expected)

        #......................................................................
        # Test 2: Raise error if parameter is not passed
        params = {
            "signature": "",
            "start_date": "",
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["Firefox:10.0", "Firefox:11.0"]
        }
        assert_raises(MissingArgumentError,
                          signature_urls.get,
                          **params)

        #......................................................................
        # Test 3: Query returning no results
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Fennec'],
            "versions": ["Fennec:10.0", "Fennec:11.0"]
        }
        res = signature_urls.get(**params)
        res_expected = {
            "hits": [],
            "total": 0
        }

        eq_(res, res_expected)

        # Test 4: Return results for all version of Firefox
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["ALL"]
        }

        res = signature_urls.get(**params)
        res_expected = {
            "hits": [
                {
                    "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                    "crash_count": 1
                 },
                     {
                    "url": "http://wikipedia.org/Code_Rush",
                    "crash_count": 1
                 }
            ],
            "total": 2
        }

        eq_(res, res_expected)

        # Test 5: Return results for all products and versions
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['ALL'],
            "versions": ["ALL"]
        }

        res = signature_urls.get(**params)
        res_expected = {
            "hits": [
                {
                    "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                    "crash_count": 1
                 },
                     {
                    "url": "http://wikipedia.org/Code_Rush",
                    "crash_count": 1
                 },
                     {
                    "url": "http://arewemobileyet.org/",
                    "crash_count": 1
                 }
            ],
            "total": 3
        }

        eq_(res, res_expected)

        # Test when we send incorrectly formatted 'versions' parameter
        params = {
            "signature": 'Does not exist',
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ['27.0a1']
        }
        assert_raises(BadArgumentError,
                          signature_urls.get,
                          **params)
Esempio n. 29
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Require that the list of fields be passed.
        if not kwargs.get('_fields'):
            raise MissingArgumentError('_fields')
        self.all_fields = kwargs['_fields']

        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
        )

        # Create filters.
        filters = []
        histogram_intervals = {}

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:
                if param.name.startswith('_'):
                    # By default, all param values are turned into lists,
                    # even when they have and can have only one value.
                    # For those we know there can only be one value,
                    # so we just extract it from the made-up list.
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                        if results_number > 1000:
                            raise BadArgumentError(
                                '_results_number',
                                msg=('_results_number cannot be greater '
                                     'than 1,000'))
                        if results_number < 0:
                            raise BadArgumentError(
                                '_results_number',
                                msg='_results_number cannot be negative')
                    elif param.name == '_facets_size':
                        facets_size = param.value[0]

                    for f in self.histogram_fields:
                        if param.name == '_histogram_interval.%s' % f:
                            histogram_intervals[f] = param.value[0]

                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]

                name = '%s.%s' % (field_data['namespace'],
                                  field_data['in_database_name'])

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                # Operators needing wildcards, and the associated value
                # transformation with said wildcards.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '^': '%s*',  # starts with
                    '$': '*%s'  # ends with
                }
                # Operators needing ranges, and the associated Elasticsearch
                # comparison operator.
                operator_range = {
                    '>': 'gt',
                    '<': 'lt',
                    '>=': 'gte',
                    '<=': 'lte',
                }

                args = {}
                filter_type = 'term'
                filter_value = None

                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]

                        if not isinstance(val, basestring) or ' ' not in val:
                            # There's only one term and no white space, this
                            # is a simple term filter.
                            filter_value = val
                        else:
                            # If the term contains white spaces, we want to
                            # perform a phrase query.
                            filter_type = 'query'
                            args = Q(
                                'simple_query_string',
                                query=param.value[0],
                                fields=[name],
                                default_operator='and',
                            ).to_dict()
                    else:
                        # There are several terms, this is a terms filter.
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_range:
                    filter_type = 'range'
                    filter_value = {
                        operator_range[param.operator]: param.value
                    }
                elif param.operator == '__null__':
                    filter_type = 'missing'
                    args['field'] = name
                elif param.operator == '__true__':
                    filter_type = 'term'
                    filter_value = True
                elif param.operator == '@':
                    filter_type = 'regexp'
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_wildcards:
                    filter_type = 'query'

                    # Wildcard operations are better applied to a non-analyzed
                    # field (called "full") if there is one.
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    q_args = {}
                    q_args[name] = (operator_wildcards[param.operator] %
                                    param.value)
                    query = Q('wildcard', **q_args)
                    args = query.to_dict()

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    new_filter = F(filter_type, **args)
                    if param.operator_not:
                        new_filter = ~new_filter

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif filter_type == 'range':
                        sub_filters &= new_filter
                    else:
                        sub_filters |= new_filter

                    continue

            if sub_filters is not None:
                filters.append(sub_filters)

        search = search.filter(F('bool', must=filters))

        # Restricting returned fields.
        fields = []

        # We keep track of the requested columns in order to make sure we
        # return those column names and not aliases for example.
        self.request_columns = []
        for param in params['_columns']:
            for value in param.value:
                if not value:
                    continue

                self.request_columns.append(value)
                field_name = self.get_field_name(value, full=False)
                fields.append(field_name)

        search = search.fields(fields)

        # Sorting.
        sort_fields = []
        for param in params['_sort']:
            for value in param.value:
                if not value:
                    continue

                # Values starting with a '-' are sorted in descending order.
                # In order to retrieve the database name of the field, we
                # must first remove the '-' part and add it back later.
                # Example: given ['product', '-version'], the results will be
                # sorted by ascending product then descending version.
                desc = False
                if value.startswith('-'):
                    desc = True
                    value = value[1:]

                field_name = self.get_field_name(value)

                if desc:
                    # The underlying library understands that '-' means
                    # sorting in descending order.
                    field_name = '-' + field_name

                sort_fields.append(field_name)

        search = search.sort(*sort_fields)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        if facets_size:
            self._create_aggregations(params, search, facets_size,
                                      histogram_intervals)

        # Query and compute results.
        hits = []

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        errors = []

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()

                aggregations = getattr(results, 'aggregations', {})
                if aggregations:
                    aggregations = self.format_aggregations(aggregations)

                shards = getattr(results, '_shards', {})

                break  # Yay! Results!
            except NotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                errors.append({
                    'type': 'missing_index',
                    'index': missing_index,
                })

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    shards = None
                    break
Esempio n. 30
0
    def setup_data(self):
        now = self.now.date()
        yesterday = now - datetime.timedelta(days=1)
        lastweek = now - datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        yesterday_str = datetimeutil.date_to_string(yesterday)
        lastweek_str = datetimeutil.date_to_string(lastweek)

        self.test_source_data = {
            # Test 1: find exact match for one product version and signature
            "products": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "products",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [
                    {"product_name": "Firefox", "version_string": "8.0", "report_count": 1, "percentage": "50.000"},
                    {"product_name": "Firefox", "version_string": "9.0", "report_count": 1, "percentage": "50.000"},
                ],
            },
            # Test 2: find ALL matches for all product versions and signature
            "products_no_version": {
                "params": {
                    "report_type": "products",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [
                    {"product_name": "Firefox", "version_string": "8.0", "report_count": 1, "percentage": "50.000"},
                    {"product_name": "Firefox", "version_string": "9.0", "report_count": 1, "percentage": "50.000"},
                ],
            },
            # Test 3: find architectures reported for a given version and a
            # signature
            "architecture": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "architecture",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [{"category": "amd64", "report_count": 1.0, "percentage": "100.000"}],
            },
            # Test 4: find architectures reported for a signature with no
            # specific version.
            "architecture_no_version": {
                "params": {
                    "report_type": "architecture",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [{"category": "amd64", "report_count": 2, "percentage": "100.000"}],
            },
            # Test 5: find flash_versions reported for specific version and
            # a signature
            "flash_versions": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "flash_version",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [{"category": "1.0", "report_count": 1.0, "percentage": "100.000"}],
            },
            # Test 6: find flash_versions reported with a signature and without
            # a specific version
            "flash_versions_no_version": {
                "params": {
                    "report_type": "flash_version",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [{"category": "1.0", "report_count": 2.0, "percentage": "100.000"}],
            },
            # Test 7: find installations reported for a signature
            "distinct_install": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "distinct_install",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [
                    {"product_name": "Firefox", "version_string": "8.0", "crashes": 10, "installations": 8}
                ],
            },
            # Test 8: find os_version_strings reported for a signature
            "os": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "os",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [{"category": "Windows NT 6.4", "report_count": 1, "percentage": "100.000"}],
            },
            # Test 9: find process_type reported for a signature
            "process_type": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "process_type",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [{"category": "plugin", "report_count": 1, "percentage": "100.000"}],
            },
            # Test 10: find uptime reported for signature
            "uptime": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "uptime",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [{"category": "15-30 minutes", "report_count": 1, "percentage": "100.000"}],
            },
            # Test 11: find exploitability reported for signature
            "exploitability": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "exploitability",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [
                    {
                        "low_count": 3,
                        "high_count": 5,
                        "null_count": 1,
                        "none_count": 2,
                        "report_date": yesterday_str,
                        "medium_count": 4,
                    }
                ],
            },
            # Test 12: find mobile devices reported for signature with a
            # specific version
            "devices": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "devices",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [
                    {
                        "cpu_abi": "armeabi-v7a",
                        "manufacturer": "samsung",
                        "model": "GT-P5100",
                        "version": "16 (REL)",
                        "report_count": 123,
                        "percentage": "100.000",
                    }
                ],
            },
            # Test 13: find mobile devices reported for signature
            "devices_no_version": {
                "params": {
                    "report_type": "devices",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [
                    {
                        "cpu_abi": "armeabi-v7a",
                        "manufacturer": "samsung",
                        "model": "GT-P5100",
                        "version": "16 (REL)",
                        "report_count": 246,
                        "percentage": "100.000",
                    }
                ],
            },
            # Test 14: find mobile devices reported for signature
            "graphics": {
                "params": {
                    "versions": "Firefox:8.0",
                    "report_type": "graphics",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                "res_expected": [
                    {
                        "vendor_hex": "0x1234",
                        "adapter_hex": "0x5678",
                        "vendor_name": "Test Vendor",
                        "adapter_name": "Test Adapter",
                        "report_count": 123,
                        "percentage": "100.000",
                    }
                ],
            },
        }
Esempio n. 31
0
    def get(self, **kwargs):
        """Return the current state of the server and the revisions of Socorro
        and Breakpad. """
        filters = [
            ("duration", 12, "int"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        # Find the recent server status
        sql = """
            /* socorro.external.postgresql.server_status.ServerStatus.get */
            SELECT
                id,
                date_recently_completed,
                date_oldest_job_queued,
                avg_process_sec,
                avg_wait_sec,
                waiting_job_count,
                processors_count,
                date_created
            FROM server_status
            ORDER BY date_created DESC
            LIMIT %(duration)s
        """

        error_message = "Failed to retrieve server status data from PostgreSQL"
        results = self.query(sql, params, error_message=error_message)

        stats = []
        for stat in results.zipped():
            # Turn dates into strings for later JSON encoding
            for i in ("date_recently_completed",
                      "date_oldest_job_queued",
                      "date_created"):
                try:
                    stat[i] = datetimeutil.date_to_string(stat[i])
                except TypeError:
                    pass

            stats.append(stat)

        # Find the current database version
        sql = """
            /* socorro.external.postgresql.server_status.ServerStatus.get */
            SELECT
                version_num
            FROM alembic_version
        """

        error_message = "Failed to retrieve database version from PostgreSQL"
        results = self.query(sql, error_message=error_message)
        if results:
            schema_revision, = results[0]
        else:
            logger.warning("No version_num was found in table alembic_version")
            schema_revision = "Unknown"

        # Find the current breakpad and socorro revisions
        socorro_revision = resource_string('socorro', 'socorro_revision.txt')
        breakpad_revision = resource_string('socorro', 'breakpad_revision.txt')

        return {
            "hits": stats,
            "total": len(stats),
            "socorro_revision": socorro_revision,
            "breakpad_revision": breakpad_revision,
            "schema_revision": schema_revision,
        }
Esempio n. 32
0
    def setup_data(self):
        now = self.now.date()
        yesterday = now - datetime.timedelta(days=1)
        lastweek = now - datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        yesterday_str = datetimeutil.date_to_string(yesterday)
        lastweek_str = datetimeutil.date_to_string(lastweek)

        self.test_source_data = {
            # Test 1: find exact match for one product version and signature
            'products': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "products",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [
                    {
                        "product_name": 'Firefox',
                        "version_string": "8.0",
                        "report_count": 1,
                        "percentage": '50.000',
                    },
                    {
                        "product_name": 'Firefox',
                        "version_string": "9.0",
                        "report_count": 1,
                        "percentage": '50.000',
                    }
                ],
            },
            # Test 2: find ALL matches for all product versions and signature
            'products_no_version': {
                'params': {
                    "report_type": "products",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [
                    {
                        "product_name": 'Firefox',
                        "version_string": "8.0",
                        "report_count": 1,
                        "percentage": '50.000',
                    },
                    {
                        "product_name": 'Firefox',
                        "version_string": "9.0",
                        "report_count": 1,
                        "percentage": '50.000',
                    }
                ],
            },
            # Test 3: find architectures reported for a given version and a
            # signature
            'architecture': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "architecture",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'amd64',
                    "report_count": 1.0,
                    "percentage": "100.000",
                }],
            },
            # Test 4: find architectures reported for a signature with no
            # specific version.
            'architecture_no_version': {
                'params': {
                    "report_type": "architecture",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'amd64',
                    "report_count": 2,
                    "percentage": '100.000',
                }],
            },
            # Test 5: find flash_versions reported for specific version and
            # a signature
            'flash_versions': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "flash_version",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '1.0',
                    "report_count": 1.0,
                    "percentage": "100.000",
                }],
            },
            # Test 6: find flash_versions reported with a signature and without
            # a specific version
            'flash_versions_no_version': {
                'params': {
                    "report_type": "flash_version",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '1.0',
                    "report_count": 2.0,
                    "percentage": "100.000",
                }],
            },
            # Test 7: find installations reported for a signature
            'distinct_install': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "distinct_install",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "product_name": 'Firefox',
                    "version_string": '8.0',
                    "crashes": 10,
                    "installations": 8,
                }],
            },
            # Test 8: find os_version_strings reported for a signature
            'os': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "os",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'Windows NT 6.4',
                    "report_count": 1,
                    "percentage": "100.000",
                }],
            },
            # Test 9: find process_type reported for a signature
            'process_type': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "process_type",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'plugin',
                    "report_count": 1,
                    "percentage": "100.000",
                }],
            },
            # Test 10: find uptime reported for signature
            'uptime': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "uptime",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '15-30 minutes',
                    "report_count": 1,
                    "percentage": '100.000',
                }],
            },
            # Test 11: find exploitability reported for signature
            'exploitability': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "exploitability",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    'low_count': 3,
                    'high_count': 5,
                    'null_count': 1,
                    'none_count': 2,
                    'report_date': yesterday_str,
                    'medium_count': 4,
                }],
            },
            # Test 12: find mobile devices reported for signature with a
            # specific version
            'devices': {
                'params': {
                    "versions": "Firefox:8.0",
                    'report_type': 'devices',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'cpu_abi': 'armeabi-v7a',
                    'manufacturer': 'samsung',
                    'model': 'GT-P5100',
                    'version': '16 (REL)',
                    'report_count': 123,
                    'percentage': '100.000',
                }],
            },
            # Test 13: find mobile devices reported for signature
            'devices_no_version': {
                'params': {
                    'report_type': 'devices',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'cpu_abi': 'armeabi-v7a',
                    'manufacturer': 'samsung',
                    'model': 'GT-P5100',
                    'version': '16 (REL)',
                    'report_count': 246,
                    'percentage': '100.000',
                }],
            },
            # Test 14: find mobile devices reported for signature
            'graphics': {
                'params': {
                    "versions": "Firefox:8.0",
                    'report_type': 'graphics',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'vendor_hex': '0x1234',
                    'adapter_hex': '0x5678',
                    'vendor_name': 'Test Vendor',
                    'adapter_name': 'Test Adapter',
                    'report_count': 123,
                    'percentage': '100.000',
                }],
            },
        }
Esempio n. 33
0
    def test_get(self):
        products = Products(config=self.config)
        now = self.now.date()
        lastweek = now - datetime.timedelta(days=7)
        nextweek = now + datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        lastweek_str = datetimeutil.date_to_string(lastweek)
        nextweek_str = datetimeutil.date_to_string(nextweek)

        #......................................................................
        # Test 1: find one exact match for one product and one version
        params = {
            "versions": "Firefox:8.0"
        }
        res = products.get(**params)
        res_expected = {
            "hits": [
                {
                    "is_featured": False,
                    "version": "8.0",
                    "throttle": 10.0,
                    "start_date": now_str,
                    "end_date": now_str,
                    "has_builds": False,
                    "product": "Firefox",
                    "build_type": "Release"
                }
            ],
            "total": 1
        }
        # make sure the 'throttle' is a floating point number
        ok_(isinstance(res['hits'][0]['throttle'], float))
        eq_(
            sorted(res['hits'][0]),
            sorted(res_expected['hits'][0])
        )

        #......................................................................
        # Test 2: Find two different products with their correct verions
        params = {
            "versions": ["Firefox:8.0", "Thunderbird:10.0.2b"]
        }
        res = products.get(**params)
        res_expected = {
            "hits": [
                {
                    "product": "Firefox",
                    "version": "8.0",
                    "start_date": now_str,
                    "end_date": now_str,
                    "is_featured": False,
                    "build_type": "Release",
                    "throttle": 10.0,
                    "has_builds": True
                },
                {
                    "product": "Thunderbird",
                    "version": "10.0.2b",
                    "start_date": now_str,
                    "end_date": now_str,
                    "is_featured": False,
                    "build_type": "Release",
                    "throttle": 10.0,
                    "has_builds": False
                }
            ],
            "total": 2
        }

        eq_(
            sorted(res['hits'][0]),
            sorted(res_expected['hits'][0])
        )

        #......................................................................
        # Test 3: empty result, no products:version found
        params = {
            "versions": "Firefox:14.0"
        }
        res = products.get(**params)
        res_expected = {
            "hits": [],
            "total": 0
        }

        eq_(res, res_expected)

        #......................................................................
        # Test 4: Test products list is returned with no parameters
        params = {}
        res = products.get(**params)
        res_expected = {
            "products": ["Firefox", "Thunderbird", "Fennec"],
            "hits": {
                "Firefox": [
                    {
                        "product": "Firefox",
                        "version": "9.0",
                        "start_date": now_str,
                        "end_date": nextweek_str,
                        "throttle": 100.00,
                        "featured": True,
                        "release": "Nightly",
                        "has_builds": True
                    },
                    {
                        "product": "Firefox",
                        "version": "8.0",
                        "start_date": lastweek_str,
                        "end_date": lastweek_str,
                        "throttle": 10.00,
                        "featured": False,
                        "release": "Release",
                        "has_builds": False
                    }
                ],
                "Thunderbird": [
                    {
                        "product": "Thunderbird",
                        "version": "10.0.2b",
                        "start_date": now_str,
                        "end_date": nextweek_str,
                        "throttle": 10.00,
                        "featured": False,
                        "release": "Release",
                        "has_builds": False,
                    }
                ],
                "Fennec": [
                    {
                        "product": "Fennec",
                        "version": "12.0b1",
                        "start_date": now_str,
                        "end_date": nextweek_str,
                        "throttle": 100.00,
                        "featured": False,
                        "release": "Beta",
                        "has_builds": True
                    },
                    {
                        "product": "Fennec",
                        "version": "11.0.1",
                        "start_date": now_str,
                        "end_date": now_str,
                        "throttle": 10.00,
                        "featured": False,
                        "release": "Release",
                        "has_builds": False
                    }
                ]
            },
            "total": 5
        }

        eq_(res['total'], res_expected['total'])
        eq_(
            sorted(res['products']),
            sorted(res_expected['products'])
        )
        eq_(sorted(res['hits']), sorted(res_expected['hits']))
        for product in sorted(res['hits'].keys()):
            eq_(
                sorted(res['hits'][product][0]),
                sorted(res_expected['hits'][product][0])
            )
            eq_(res['hits'][product], res_expected['hits'][product])

        # test returned order of versions
        assert len(res['hits']['Fennec']) == 2
        eq_(res['hits']['Fennec'][0]['version'], '12.0b1')
        eq_(res['hits']['Fennec'][1]['version'], '11.0.1')

        #......................................................................
        # Test 5: An invalid versions list is passed, all versions are returned
        params = {
            'versions': [1]
        }
        res = products.get(**params)
        eq_(res['total'], 5)
Esempio n. 34
0
    def get(self, **kwargs):
        """ Return product information, or version information for one
        or more product:version combinations """
        warnings.warn(
            'This class is deprecated. Use ProductVersions instead.',
            DeprecationWarning
        )
        filters = [
            ("versions", None, ["list", "str"]),  # for legacy, to be removed
        ]
        params = external_common.parse_arguments(filters, kwargs)
        if params.versions and params.versions[0]:
            return self._get_versions(params)

        sql = """
            /* socorro.external.postgresql.products.Products.get */
            SELECT
                product_name AS product,
                version_string AS version,
                start_date,
                end_date,
                throttle,
                is_featured AS featured,
                build_type AS release,
                has_builds
            FROM product_info
            ORDER BY product_sort, version_sort DESC, channel_sort
        """

        error_message = "Failed to retrieve products/versions from PostgreSQL"
        results = self.query(sql, error_message=error_message)

        products = []
        versions_per_product = {}

        for version in results.zipped():
            try:
                version['end_date'] = datetimeutil.date_to_string(
                    version['end_date']
                )
            except TypeError:
                pass
            try:
                version['start_date'] = datetimeutil.date_to_string(
                    version['start_date']
                )
            except TypeError:
                pass

            version['throttle'] = float(version['throttle'])

            product = version['product']
            if product not in products:
                products.append(product)

            if product not in versions_per_product:
                versions_per_product[product] = [version]
            else:
                versions_per_product[product].append(version)

        return {
            'products': products,
            'hits': versions_per_product,
            'total': len(results)
        }
Esempio n. 35
0
    def test_get_comments(self):
        crashes = Crashes(config=self.config)
        today = datetimeutil.date_to_string(self.now)

        # Test 1: results
        params = {
            "signature": "js",
        }
        res_expected = {
            "hits": [
                {
                    "email": None,
                    "date_processed": today,
                    "uuid": "def",
                    "user_comments": "hello"
                },
                {
                    "email": None,
                    "date_processed": today,
                    "uuid": "hij",
                    "user_comments": "hah"
                }
            ],
            "total": 2
        }

        res = crashes.get_comments(**params)
        eq_(res, res_expected)

        # Test 2: no results
        params = {
            "signature": "blah",
        }
        res_expected = {
            "hits": [],
            "total": 0
        }

        res = crashes.get_comments(**params)
        eq_(res, res_expected)

        # Test 3: missing parameter
        assert_raises(MissingArgumentError, crashes.get_comments)

        # Test a valid rapid beta versions
        params = {
            "signature": "cool_sig",
            "products": "Firefox",
            "versions": "Firefox:14.0b",
        }
        res_expected = {
            'hits': [
                {
                    'email': None,
                    'date_processed': today,
                    'uuid': 'nop',
                    'user_comments': 'hi!'
                }
            ],
            'total': 1
        }

        res = crashes.get_comments(**params)
        eq_(res, res_expected)

        # Test an invalid rapid beta versions
        params = {
            "signature": "cool_sig",
            "versions": "WaterWolf:2.0b",
        }
        res_expected = {
            'hits': [
                {
                    'email': None,
                    'date_processed': today,
                    'uuid': 'qrs',
                    'user_comments': 'meow'
                }
            ],
            'total': 1
        }

        res = crashes.get_comments(**params)
        eq_(res, res_expected)

        # use pagination
        params = {
            "signature": "cool_sig",
            "result_number": 1,
            "result_offset": 0,
        }
        params['result_number'] = 1
        params['result_offset'] = 0
        res = crashes.get_comments(**params)
        eq_(len(res['hits']), 1)
        eq_(res['total'], 2)
Esempio n. 36
0
    def _get_versions(self, params):
        """ Return product information for one or more product:version
        combinations """
        products = []
        (params["products_versions"],
         products) = self.parse_versions(params["versions"], [])

        sql_select = """
            SELECT product_name as product,
                   version_string as version,
                   start_date,
                   end_date,
                   is_featured,
                   build_type,
                   throttle::float,
                   has_builds
            FROM product_info
        """

        sql_where = []
        versions_list = []
        products_list = []
        for x in range(0, len(params["products_versions"]), 2):
            products_list.append(params["products_versions"][x])
            versions_list.append(params["products_versions"][x + 1])

        sql_where = [
            "(product_name = %(product" + str(x) +
            ")s AND version_string = %(version" + str(x) + ")s)"
            for x in range(len(products_list))
        ]

        sql_params = {}
        sql_params = add_param_to_dict(sql_params, "product", products_list)
        sql_params = add_param_to_dict(sql_params, "version", versions_list)

        if len(sql_where) > 0:
            sql_query = " WHERE ".join((sql_select, " OR ".join(sql_where)))
        else:
            sql_query = sql_select

        sql_query = """
            /* socorro.external.postgresql.Products.get_versions */
            %s
        """ % sql_query

        error_message = "Failed to retrieve products versions from PostgreSQL"
        results = self.query(sql_query, sql_params,
                             error_message=error_message)

        products = []
        for product in results.zipped():
            product['start_date'] = datetimeutil.date_to_string(
                product['start_date']
            )
            product['end_date'] = datetimeutil.date_to_string(
                product['end_date']
            )
            products.append(product)

        return {
            "hits": products,
            "total": len(products)
        }
Esempio n. 37
0
    def get_list(self, **kwargs):
        """
        List all crashes with a given signature and return them.

        Both `from_date` and `to_date` (and their aliases `from` and `to`)
        are required and can not be greater than 30 days apart.

        Optional arguments: see SearchCommon.get_parameters()

        """
        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        if not kwargs.get('from_date'):
            raise MissingArgumentError('from_date')
        if not kwargs.get('to_date'):
            raise MissingArgumentError('to_date')

        from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date'])
        to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date'])
        span_days = (to_date - from_date).days
        if span_days > 30:
            raise BadArgumentError(
                'Span between from_date and to_date can not be more than 30'
            )

        # start with the default
        sort_order = {
            'key': 'date_processed',
            'direction': 'DESC'
        }
        if 'sort' in kwargs:
            sort_order['key'] = kwargs.pop('sort')
            _recognized_sort_orders = (
                'date_processed',
                'uptime',
                'user_comments',
                'uuid',
                'uuid_text',
                'product',
                'version',
                'build',
                'signature',
                'url',
                'os_name',
                'os_version',
                'cpu_name',
                'cpu_info',
                'address',
                'reason',
                'last_crash',
                'install_age',
                'hangid',
                'process_type',
                'release_channel',
                'install_time',
                'duplicate_of',
            )
            if sort_order['key'] not in _recognized_sort_orders:
                raise BadArgumentError(
                    '%s is not a recognized sort order key' % sort_order['key']
                )
            sort_order['direction'] = 'ASC'
            if str(kwargs.get('reverse', '')).lower() == 'true':
                if kwargs.pop('reverse'):
                    sort_order['direction'] = 'DESC'

        include_raw_crash = kwargs.get('include_raw_crash') or False
        params = search_common.get_parameters(kwargs)

        if not params["signature"]:
            raise MissingArgumentError('signature')

        params["terms"] = params["signature"]
        params["search_mode"] = "is_exactly"

        # Default mode falls back to starts_with for postgres
        if params["plugin_search_mode"] == "default":
            params["plugin_search_mode"] = "starts_with"

        # Limiting to a signature
        if params["terms"]:
            params["terms"] = self.prepare_terms(params["terms"],
                                                 params["search_mode"])

        # Searching for terms in plugins
        if params["report_process"] == "plugin" and params["plugin_terms"]:
            params["plugin_terms"] = " ".join(params["plugin_terms"])
            params["plugin_terms"] = self.prepare_terms(
                params["plugin_terms"],
                params["plugin_search_mode"]
            )

        # Get information about the versions
        util_service = Util(config=self.context)
        params["versions_info"] = util_service.versions_info(**params)

        # Parsing the versions
        params["versions_string"] = params["versions"]
        (params["versions"], params["products"]) = self.parse_versions(
            params["versions"],
            params["products"]
        )

        if hasattr(self.context, 'webapi'):
            context = self.context.webapi
        else:
            # old middleware
            context = self.context
        # Changing the OS ids to OS names
        for i, elem in enumerate(params["os"]):
            for platform in context.platforms:
                if platform["id"][:3] == elem[:3]:
                    params["os"][i] = platform["name"]

        # Creating the parameters for the sql query
        sql_params = {
        }

        # Preparing the different parts of the sql query
        sql_select = """
            SELECT
                r.date_processed,
                r.uptime,
                r.user_comments,
                r.uuid::uuid,
                r.uuid as uuid_text,
                r.product,
                r.version,
                r.build,
                r.signature,
                r.url,
                r.os_name,
                r.os_version,
                r.cpu_name,
                r.cpu_info,
                r.address,
                r.reason,
                r.last_crash,
                r.install_age,
                r.hangid,
                r.process_type,
                r.release_channel,
                (r.client_crash_date - (r.install_age * INTERVAL '1 second'))
                  AS install_time
        """
        if include_raw_crash:
            pass
        else:
            sql_select += """
                , rd.duplicate_of
            """

        wrapped_select = """
            WITH report_slice AS (
              %s
            ), dupes AS (
                SELECT
                    report_slice.uuid,
                    rd.duplicate_of
                FROM reports_duplicates rd
                JOIN report_slice ON report_slice.uuid_text = rd.uuid
                WHERE
                    rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
            )

            SELECT
                rs.*,
                dupes.duplicate_of,
                rc.raw_crash
            FROM report_slice rs
            LEFT OUTER JOIN dupes USING (uuid)
            LEFT OUTER JOIN raw_crashes rc ON
                rs.uuid = rc.uuid
                AND
                rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
        """

        sql_from = self.build_reports_sql_from(params)

        if not include_raw_crash:
            sql_from = """%s
                LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid
            """ % sql_from

        sql_where, sql_params = self.build_reports_sql_where(
            params,
            sql_params,
            self.context
        )

        sql_order = """
            ORDER BY %(key)s %(direction)s
        """ % sort_order

        sql_limit, sql_params = self.build_reports_sql_limit(
            params,
            sql_params
        )

        # Assembling the query
        if include_raw_crash:
            sql_query = "\n".join((
                "/* socorro.external.postgresql.report.Report.list */",
                sql_select, sql_from, sql_where, sql_order, sql_limit)
            )
        else:
            sql_query = "\n".join((
                "/* socorro.external.postgresql.report.Report.list */",
                sql_select, sql_from, sql_where, sql_order, sql_limit)
            )

        # Query for counting the results
        sql_count_query = "\n".join((
            "/* socorro.external.postgresql.report.Report.list */",
            "SELECT count(*)", sql_from, sql_where)
        )

        # Querying the DB
        with self.get_connection() as connection:

            total = self.count(
                sql_count_query,
                sql_params,
                error_message="Failed to count crashes from reports.",
                connection=connection
            )

            # No need to call Postgres if we know there will be no results
            if total:

                if include_raw_crash:
                    sql_query = wrapped_select % sql_query

                results = self.query(
                    sql_query,
                    sql_params,
                    error_message="Failed to retrieve crashes from reports",
                    connection=connection
                ).zipped()
            else:
                results = []

        crashes = []
        for crash in results:
            assert crash['uuid'] == crash['uuid_text']
            crash.pop('uuid_text')
            if not include_raw_crash and 'raw_crash' in crash:
                crash.pop('raw_crash')
            for i in crash:
                try:
                    crash[i] = datetimeutil.date_to_string(crash[i])
                except TypeError:
                    pass
            crashes.append(crash)

        return {
            "hits": crashes,
            "total": total
        }
Esempio n. 38
0
    def get_daily(self, **kwargs):
        """Return crashes by active daily users. """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("product", None, "str"),
            ("versions", None, ["list", "str"]),
            ("from_date", lastweek, "date"),
            ("to_date", now, "date"),
            ("os", None, ["list", "str"]),
            ("report_type", None, ["list", "str"]),
            ("date_range_type", "date", "str"),
        ]

        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        params = external_common.parse_arguments(filters, kwargs)

        if not params.product:
            raise MissingArgumentError('product')

        if not params.versions or not params.versions[0]:
            raise MissingArgumentError('versions')

        params.versions = tuple(params.versions)

        # simple version, for home page graphs mainly
        if ((not params.os or not params.os[0]) and
                (not params.report_type or not params.report_type[0])):
            if params.date_range_type == "build":
                table_to_use = "home_page_graph_build_view"
                date_range_field = "build_date"
            else:
                table_to_use = "home_page_graph_view"
                date_range_field = "report_date"

            db_fields = ("product_name", "version_string", date_range_field,
                         "report_count", "adu", "crash_hadu")

            out_fields = ("product", "version", "date", "report_count", "adu",
                          "crash_hadu")

            sql = """
                /* socorro.external.postgresql.crashes.Crashes.get_daily */
                SELECT %(db_fields)s
                FROM %(table_to_use)s
                WHERE product_name=%%(product)s
                AND version_string IN %%(versions)s
                AND %(date_range_field)s BETWEEN %%(from_date)s
                    AND %%(to_date)s
            """ % {"db_fields": ", ".join(db_fields),
                   "date_range_field": date_range_field,
                   "table_to_use": table_to_use}

        # complex version, for daily crashes page mainly
        else:
            if params.date_range_type == "build":
                table_to_use = "crashes_by_user_build_view"
                date_range_field = "build_date"
            else:
                table_to_use = "crashes_by_user_view"
                date_range_field = "report_date"

            db_fields = [
                "product_name",
                "version_string",
                date_range_field,
                "sum(adjusted_report_count)::bigint as report_count",
                "sum(adu)::bigint as adu",
                """crash_hadu(sum(report_count)::bigint, sum(adu)::bigint,
                              avg(throttle)) as crash_hadu""",
                "avg(throttle) as throttle"
            ]

            out_fields = ["product", "version", "date", "report_count", "adu",
                          "crash_hadu", "throttle"]

            db_group = ["product_name", "version_string", date_range_field]

            sql_where = []
            if params.os and params.os[0]:
                sql_where.append("os_short_name IN %(os)s")
                params.os = tuple(x[0:3].lower() for x in params.os)

            if params.report_type and params.report_type[0]:
                sql_where.append("crash_type_short IN %(report_type)s")
                params.report_type = tuple(params.report_type)

            if sql_where:
                sql_where = "AND %s" % " AND ".join(sql_where)
            else:
                sql_where = ''

            sql = """
                /* socorro.external.postgresql.crashes.Crashes.get_daily */
                SELECT %(db_fields)s
                FROM (
                    SELECT
                        product_name,
                        version_string,
                        %(date_range_field)s,
                        os_name,
                        os_short_name,
                        SUM(report_count)::int as report_count,
                        SUM(adjusted_report_count)::int
                            as adjusted_report_count,
                        MAX(adu) as adu,
                        AVG(throttle) as throttle
                    FROM %(table_to_use)s
                    WHERE product_name=%%(product)s
                    AND version_string IN %%(versions)s
                    AND %(date_range_field)s BETWEEN %%(from_date)s
                        AND %%(to_date)s
                    %(sql_where)s
                    GROUP BY product_name, version_string,
                             %(date_range_field)s, os_name, os_short_name
                ) as aggregated_crashes_by_user
            """ % {"db_fields": ", ".join(db_fields),
                   "date_range_field": date_range_field,
                   "table_to_use": table_to_use,
                   "sql_where": sql_where}

            if db_group:
                sql = "%s GROUP BY %s" % (sql, ", ".join(db_group))

        error_message = "Failed to retrieve daily crashes data from PostgreSQL"
        results = self.query(sql, params, error_message=error_message)

        hits = {}
        for row in results:
            daily_data = dict(zip(out_fields, row))
            if "throttle" in daily_data:
                daily_data["throttle"] = float(daily_data["throttle"])
            daily_data["crash_hadu"] = float(daily_data["crash_hadu"])
            daily_data["date"] = datetimeutil.date_to_string(
                daily_data["date"]
            )

            key = "%s:%s" % (daily_data["product"],
                             daily_data["version"])

            if "os_short" in daily_data:
                del daily_data["os_short"]

            if key not in hits:
                hits[key] = {}

            hits[key][daily_data["date"]] = daily_data

        return {"hits": hits}
Esempio n. 39
0
def test_date_to_string_fail():
    datetimeutil.date_to_string('2012-01-03')
Esempio n. 40
0
    def get(self, **kwargs):
        """ Return product information, or version information for one
        or more product:version combinations """
        warnings.warn('This class is deprecated. Use ProductVersions instead.',
                      DeprecationWarning)
        filters = [
            ("versions", None, ["list", "str"]),  # for legacy, to be removed
        ]
        params = external_common.parse_arguments(filters, kwargs)
        if params.versions and params.versions[0]:
            return self._get_versions(params)

        sql = """
            /* socorro.external.postgresql.products.Products.get */
            SELECT
                product_name AS product,
                version_string AS version,
                start_date,
                end_date,
                throttle,
                is_featured AS featured,
                build_type AS release,
                has_builds
            FROM product_info
            ORDER BY product_sort, version_sort DESC, channel_sort
        """

        error_message = "Failed to retrieve products/versions from PostgreSQL"
        results = self.query(sql, error_message=error_message)

        products = []
        versions_per_product = {}

        for version in results.zipped():
            try:
                version['end_date'] = datetimeutil.date_to_string(
                    version['end_date'])
            except TypeError:
                pass
            try:
                version['start_date'] = datetimeutil.date_to_string(
                    version['start_date'])
            except TypeError:
                pass

            version['throttle'] = float(version['throttle'])

            product = version['product']
            if product not in products:
                products.append(product)

            if product not in versions_per_product:
                versions_per_product[product] = [version]
            else:
                versions_per_product[product].append(version)

        return {
            'products': products,
            'hits': versions_per_product,
            'total': len(results)
        }
Esempio n. 41
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Require that the list of fields be passed.
        if not kwargs.get('_fields'):
            raise MissingArgumentError('_fields')
        self.all_fields = kwargs['_fields']

        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
        )

        # Create filters.
        filters = []
        histogram_intervals = {}

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:
                if param.name.startswith('_'):
                    # By default, all param values are turned into lists,
                    # even when they have and can have only one value.
                    # For those we know there can only be one value,
                    # so we just extract it from the made-up list.
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                        if results_number > 1000:
                            raise BadArgumentError(
                                '_results_number',
                                msg=(
                                    '_results_number cannot be greater '
                                    'than 1,000'
                                )
                            )
                        if results_number < 0:
                            raise BadArgumentError(
                                '_results_number',
                                msg='_results_number cannot be negative'
                            )
                    elif param.name == '_facets_size':
                        facets_size = param.value[0]

                    for f in self.histogram_fields:
                        if param.name == '_histogram_interval.%s' % f:
                            histogram_intervals[f] = param.value[0]

                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]

                name = '%s.%s' % (
                    field_data['namespace'],
                    field_data['in_database_name']
                )

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                # Operators needing wildcards, and the associated value
                # transformation with said wildcards.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '^': '%s*',  # starts with
                    '$': '*%s'  # ends with
                }
                # Operators needing ranges, and the associated Elasticsearch
                # comparison operator.
                operator_range = {
                    '>': 'gt',
                    '<': 'lt',
                    '>=': 'gte',
                    '<=': 'lte',
                }

                args = {}
                filter_type = 'term'
                filter_value = None

                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]

                        if not isinstance(val, basestring) or ' ' not in val:
                            # There's only one term and no white space, this
                            # is a simple term filter.
                            filter_value = val
                        else:
                            # If the term contains white spaces, we want to
                            # perform a phrase query.
                            filter_type = 'query'
                            args = Q(
                                'simple_query_string',
                                query=param.value[0],
                                fields=[name],
                                default_operator='and',
                            ).to_dict()
                    else:
                        # There are several terms, this is a terms filter.
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_range:
                    filter_type = 'range'
                    filter_value = {
                        operator_range[param.operator]: param.value
                    }
                elif param.operator == '__null__':
                    filter_type = 'missing'
                    args['field'] = name
                elif param.operator == '__true__':
                    filter_type = 'term'
                    filter_value = True
                elif param.operator == '@':
                    filter_type = 'regexp'
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_wildcards:
                    filter_type = 'query'

                    # Wildcard operations are better applied to a non-analyzed
                    # field (called "full") if there is one.
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    q_args = {}
                    q_args[name] = (
                        operator_wildcards[param.operator] % param.value
                    )
                    query = Q('wildcard', **q_args)
                    args = query.to_dict()

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    new_filter = F(filter_type, **args)
                    if param.operator_not:
                        new_filter = ~new_filter

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif filter_type == 'range':
                        sub_filters &= new_filter
                    else:
                        sub_filters |= new_filter

                    continue

            if sub_filters is not None:
                filters.append(sub_filters)

        search = search.filter(F('bool', must=filters))

        # Restricting returned fields.
        fields = []

        # We keep track of the requested columns in order to make sure we
        # return those column names and not aliases for example.
        self.request_columns = []
        for param in params['_columns']:
            for value in param.value:
                if not value:
                    continue

                self.request_columns.append(value)
                field_name = self.get_field_name(value, full=False)
                fields.append(field_name)

        search = search.fields(fields)

        # Sorting.
        sort_fields = []
        for param in params['_sort']:
            for value in param.value:
                if not value:
                    continue

                # Values starting with a '-' are sorted in descending order.
                # In order to retrieve the database name of the field, we
                # must first remove the '-' part and add it back later.
                # Example: given ['product', '-version'], the results will be
                # sorted by ascending product then descending version.
                desc = False
                if value.startswith('-'):
                    desc = True
                    value = value[1:]

                field_name = self.get_field_name(value)

                if desc:
                    # The underlying library understands that '-' means
                    # sorting in descending order.
                    field_name = '-' + field_name

                sort_fields.append(field_name)

        search = search.sort(*sort_fields)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        for param in params['_facets']:
            self._add_second_level_aggs(
                param,
                search.aggs,
                facets_size,
                histogram_intervals,
            )

        # Create sub-aggregations.
        for key in params:
            if not key.startswith('_aggs.'):
                continue

            fields = key.split('.')[1:]

            if fields[0] not in self.all_fields:
                continue

            base_bucket = self._get_fields_agg(fields[0], facets_size)
            sub_bucket = base_bucket

            for field in fields[1:]:
                # For each field, make a bucket, then include that bucket in
                # the latest one, and then make that new bucket the latest.
                if field in self.all_fields:
                    tmp_bucket = self._get_fields_agg(field, facets_size)
                    sub_bucket.bucket(field, tmp_bucket)
                    sub_bucket = tmp_bucket

            for value in params[key]:
                self._add_second_level_aggs(
                    value,
                    sub_bucket,
                    facets_size,
                    histogram_intervals,
                )

            search.aggs.bucket(fields[0], base_bucket)

        # Create histograms.
        for f in self.histogram_fields:
            key = '_histogram.%s' % f
            if params.get(key):
                histogram_bucket = self._get_histogram_agg(
                    f, histogram_intervals
                )

                for param in params[key]:
                    self._add_second_level_aggs(
                        param,
                        histogram_bucket,
                        facets_size,
                        histogram_intervals,
                    )

                search.aggs.bucket('histogram_%s' % f, histogram_bucket)

        # Query and compute results.
        hits = []

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()
                aggregations = self.format_aggregations(results.aggregations)
                break  # Yay! Results!
            except NotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    break
Esempio n. 42
0
    def setup_data(self):

        self.now = datetimeutil.utc_now()
        now = self.now.date()
        yesterday = now - datetime.timedelta(days=1)
        lastweek = now - datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        yesterday_str = datetimeutil.date_to_string(yesterday)
        lastweek_str = datetimeutil.date_to_string(lastweek)

        self.test_source_data = {
            # Test backfill_adu
            'adu': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_all_dups
            'all_dups': {
                'params': {
                    "start_date": yesterday_str,
                    "end_date": now_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_build_adu
            'build_adu': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_correlations
            'correlations': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_crashes_by_user_build
            'crashes_by_user_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_crashes_by_user
            'crashes_by_user': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },

            # TODO: Test backfill_daily_crashes tries to insert into a table
            # that do not exists. It can be fixed by creating a temporary one.
            #'daily_crashes': {
            #    'params': {
            #        "update_day": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # Test backfill_exploitability
            'exploitability': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_home_page_graph_build
            'home_page_graph_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_home_page_graph
            'home_page_graph': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_matviews
            'matviews': {
                'params': {
                    "start_date": yesterday_str,
                    "reports_clean": 'false',
                },
                'res_expected': [(True,)],
            },
            # Test backfill_rank_compare
            'rank_compare': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_reports_clean
            'reports_clean': {
                'params': {
                    "start_date": yesterday_str,
                    "end_date": now_str,
                },
                'res_expected': [(True,)],
            },

            # TODO: Test backfill_reports_duplicates tries to insert into a
            # table that do not exists. It can be fixed by using the update
            # function inside of the backfill.
            #'reports_duplicates': {
            #    'params': {
            #        "start_date": yesterday_str,
            #        "end_date": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # TODO: Test backfill_signature_counts tries to insert into
            # tables and to update functions that does not exist.
            #'signature_counts': {
            #    'params': {
            #        "start_date": yesterday_str,
            #        "end_date": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # Test backfill_tcbs_build
            'tcbs_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_tcbs
            'tcbs': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_weekly_report_partitions
            'weekly_report_partitions': {
                'params': {
                    "start_date": lastweek_str,
                    "end_date": now_str,
                    "table_name": 'raw_crashes',
                },
                'res_expected': [(True,)],
            },
            # TODO: Update Backfill to support signature_summary backfill
            # through the API
            #'signature_summary_products': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_installations': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_uptime': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_os': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_process_type': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_architecture': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_flash_version': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_device': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_graphics': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
        }