Example #1
0
    def test_get_exploitibility(self):
        crashes = Crashes(config=self.config)
        today = datetimeutil.date_to_string(self.now.date())
        yesterday_date = (self.now - datetime.timedelta(days=1)).date()
        yesterday = datetimeutil.date_to_string(yesterday_date)

        res_expected = {
            "hits": [
                {
                    "signature": "canIhaveYourSignature()",
                    "report_date": today,
                    "null_count": 0,
                    "none_count": 1,
                    "low_count": 2,
                    "medium_count": 3,
                    "high_count": 4,
                },
                {
                    "signature": "ofCourseYouCan()",
                    "report_date": yesterday,
                    "null_count": 4,
                    "none_count": 3,
                    "low_count": 2,
                    "medium_count": 1,
                    "high_count": 0,
                }
            ],
            "total": 2,
        }

        res = crashes.get_exploitability()
        self.assertEqual(res, res_expected)
Example #2
0
    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        crash_id = raw_crash.uuid
        old_processed_crash = self.crashstore.get_unredacted_processed(crash_id)

        for key, value in old_processed_crash.iteritems():
            if 'date_processed' in key:
                processed_crash[key] = date_to_string(
                    string_to_datetime(value) - self.config.time_delta
                )
                print processed_crash.uuid, value, processed_crash[key]
            else:
                if key != 'uptime' and key != 'crash_time' and (
                   'time' in key or "date" in key or 'Date' in key
                ):
                    value = date_to_string(string_to_datetime(value))
                processed_crash[key] = value
        processor_meta.processor_notes.append(
            'DateProcessedTimeMachine has pushed date_processed into the past'
            ' by "%s" (D HH:MM:SS)' %  to_str(self.config.time_delta)
        )
        processor_meta.processor_notes.append(
            'Original processor_notes: %s'
            % old_processed_crash['processor_notes']
        )
        return True
Example #3
0
    def test_get_parameters_date_defaults(self):
        with _get_config_manager().context() as config:
            search = SearchBase(
                config=config,
                fields=SUPERSEARCH_FIELDS_MOCKED_RESULTS,
            )

        now = datetimeutil.utc_now()

        # Test default values when nothing is passed
        params = search.get_parameters()
        ok_('date' in params)
        eq_(len(params['date']), 2)

        # Pass only the high value
        args = {
            'date': '<%s' % datetimeutil.date_to_string(now)
        }
        params = search.get_parameters(**args)
        ok_('date' in params)
        eq_(len(params['date']), 2)
        eq_(params['date'][0].operator, '<')
        eq_(params['date'][1].operator, '>=')
        eq_(params['date'][0].value.date(), now.date())
        eq_(
            params['date'][1].value.date(),
            now.date() - datetime.timedelta(days=7)
        )

        # Pass only the low value
        pasttime = now - datetime.timedelta(days=10)
        args = {
            'date': '>=%s' % datetimeutil.date_to_string(pasttime)
        }
        params = search.get_parameters(**args)
        ok_('date' in params)
        eq_(len(params['date']), 2)
        eq_(params['date'][0].operator, '<=')
        eq_(params['date'][1].operator, '>=')
        eq_(params['date'][0].value.date(), now.date())
        eq_(params['date'][1].value.date(), pasttime.date())

        # Pass the two values
        pasttime = now - datetime.timedelta(days=10)
        args = {
            'date': [
                '<%s' % datetimeutil.date_to_string(now),
                '>%s' % datetimeutil.date_to_string(pasttime),
            ]
        }
        params = search.get_parameters(**args)
        ok_('date' in params)
        eq_(len(params['date']), 2)
        eq_(params['date'][0].operator, '<')
        eq_(params['date'][1].operator, '>')
        eq_(params['date'][0].value.date(), now.date())
        eq_(params['date'][1].value.date(), pasttime.date())
Example #4
0
    def get(self, **kwargs):
        filters = [
            ("start_date", None, "datetime"),
            ("end_date", None, "datetime"),
            ("product", None, "str"),
            ("version", None, "str"),
            ]

        params = external_common.parse_arguments(filters, kwargs)
        results = []  # So we have something to return.

        query_string = """SELECT product_name,
                    version_string,
                    product_version_id,
                    report_date,
                    nightly_builds.build_date,
                    days_out,
                    sum(report_count) as report_count
                FROM nightly_builds
                JOIN product_versions USING ( product_version_id )
                WHERE report_date <= %(end_date)s
                AND report_date >= %(start_date)s
                AND product_name = %(product)s
                AND version_string = %(version)s
                GROUP BY product_name,
                         version_string,
                         product_version_id,
                         report_date,
                         nightly_builds.build_date,
                         days_out"""

        try:
            connection = self.database.connection()
            cursor = connection.cursor()
            sql_results = db.execute(cursor, query_string, params)
        except psycopg2.Error:
            logger.error("Failed retrieving crashtrends data from PostgreSQL",
                         exc_info=True)
        else:
            for trend in sql_results:
                row = dict(zip((
                              "product_name",
                              "version_string",
                              "product_version_id",
                              "report_date",
                              "build_date",
                              "days_out",
                              "report_count"), trend))
                row['report_date'] = datetimeutil.date_to_string(row['report_date'])
                row['build_date'] = datetimeutil.date_to_string(row['build_date'])
                results.append(row)
        finally:
            connection.close()
        results = {'crashtrends' : results}
        return results
Example #5
0
    def get(self, **kwargs):
        filters = [
            ("start_date", None, "datetime"),
            ("end_date", None, "datetime"),
            ("product", None, "str"),
            ("version", None, "str"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        sql = """
        /* socorro.external.postgresql.crash_trends.CrashTrends.get */
        SELECT product_name,
               version_string,
               product_version_id,
               report_date,
               nightly_builds.build_date,
               days_out,
               sum(report_count) as report_count
        FROM nightly_builds
            JOIN product_versions USING ( product_version_id )
        WHERE report_date <= %(end_date)s
        AND report_date >= %(start_date)s
        AND product_name = %(product)s
        AND version_string = %(version)s
        GROUP BY product_name,
                 version_string,
                 product_version_id,
                 report_date,
                 nightly_builds.build_date,
                 days_out
        """

        error_message = "Failed to retrieve crash trends data from PostgreSQL"
        sql_results = self.query(sql, params, error_message=error_message)

        results = []
        for row in sql_results:
            trend = dict(zip((
                "product_name",
                "version_string",
                "product_version_id",
                "report_date",
                "build_date",
                "days_out",
                "report_count"
            ), row))
            trend['report_date'] = datetimeutil.date_to_string(
                trend['report_date'])
            trend['build_date'] = datetimeutil.date_to_string(
                trend['build_date'])
            results.append(trend)

        return {'crashtrends': results}
Example #6
0
    def test_get_parameters_date_defaults(self):
        with _get_config_manager().context() as config:
            search = SearchBaseWithFields(
                config=config,
            )

        now = datetimeutil.utc_now()

        # Test default values when nothing is passed
        params = search.get_parameters()
        assert 'date' in params
        assert len(params['date']) == 2

        # Pass only the high value
        args = {
            'date': '<%s' % datetimeutil.date_to_string(now)
        }
        params = search.get_parameters(**args)
        assert 'date' in params
        assert len(params['date']) == 2
        assert params['date'][0].operator == '<'
        assert params['date'][1].operator == '>='
        assert params['date'][0].value.date() == now.date()
        assert params['date'][1].value.date() == now.date() - datetime.timedelta(days=7)

        # Pass only the low value
        pasttime = now - datetime.timedelta(days=10)
        args = {
            'date': '>=%s' % datetimeutil.date_to_string(pasttime)
        }
        params = search.get_parameters(**args)
        assert 'date' in params
        assert len(params['date']) == 2
        assert params['date'][0].operator == '<='
        assert params['date'][1].operator == '>='
        assert params['date'][0].value.date() == now.date()
        assert params['date'][1].value.date() == pasttime.date()

        # Pass the two values
        pasttime = now - datetime.timedelta(days=10)
        args = {
            'date': [
                '<%s' % datetimeutil.date_to_string(now),
                '>%s' % datetimeutil.date_to_string(pasttime),
            ]
        }
        params = search.get_parameters(**args)
        assert 'date' in params
        assert len(params['date']) == 2
        assert params['date'][0].operator == '<'
        assert params['date'][1].operator == '>'
        assert params['date'][0].value.date() == now.date()
        assert params['date'][1].value.date() == pasttime.date()
Example #7
0
def twoPeriodTopCrasherComparison(
            databaseConnection, context,
            closestEntryFunction=latestEntryBeforeOrEqualTo,
            listOfTopCrashersFunction=getListOfTopCrashersBySignature):
    try:
        context['logger'].debug('entered twoPeriodTopCrasherComparison')
    except KeyError:
        context['logger'] = util.SilentFakeLogger()

    assertions = ['to_date', 'duration', 'product', 'version']

    for param in assertions:
        assert param in context, (
            "%s is missing from the configuration" % param)

    context['numberOfComparisonPoints'] = 2
    if not context['limit']:
        context['limit'] = 100

    #context['logger'].debug('about to latestEntryBeforeOrEqualTo')
    context['to_date'] = closestEntryFunction(databaseConnection,
                                              context['to_date'],
                                              context['product'],
                                              context['version'])
    context['logger'].debug('New to_date: %s' % context['to_date'])
    context['startDate'] = context.to_date - (context.duration *
                                              context.numberOfComparisonPoints)
    #context['logger'].debug('after %s' % context)
    listOfTopCrashers = listOfListsWithChangeInRank(
                                            rangeOfQueriesGenerator(
                                                databaseConnection,
                                                context,
                                                listOfTopCrashersFunction))[0]
    #context['logger'].debug('listOfTopCrashers %s' % listOfTopCrashers)
    totalNumberOfCrashes = totalPercentOfTotal = 0
    for x in listOfTopCrashers:
        if 'total_crashes' in x:
            totalNumberOfCrashes = x['total_crashes']
            del x['total_crashes']
        totalPercentOfTotal += x.get('percentOfTotal', 0)

    result = {
        'crashes': listOfTopCrashers,
        'start_date': datetimeutil.date_to_string(
            context.to_date - context.duration
        ),
        'end_date': datetimeutil.date_to_string(context.to_date),
        'totalNumberOfCrashes': totalNumberOfCrashes,
        'totalPercentage': totalPercentOfTotal,
    }
    #logger.debug("about to return %s", result)
    return result
Example #8
0
    def test_search_combined_filters(self, mock_psql_util):
        with self.get_config_manager().context() as config:
            api = Search(config=config)

            # get the first, default crash report
            params = {
                'terms': 'js::break_your_browser',
                'search_mode': 'is_exactly',
                'products': 'WaterWolf',
                'versions': 'WaterWolf:1.0',
                'release_channels': 'release',
                'os': 'Linux',
                'build_ids': '1234567890',
                'reasons': 'MOZALLOC_WENT_WRONG',
                'report_type': 'crash',
                'report_process': 'browser',
            }
            res = api.get(**params)

            self.assertEqual(res['total'], 1)
            self.assertEqual(
                res['hits'][0]['signature'],
                'js::break_your_browser'
            )
            self.assertEqual(res['hits'][0]['is_linux'], 1)
            self.assertEqual(res['hits'][0]['is_windows'], 0)
            self.assertEqual(res['hits'][0]['is_mac'], 0)

            # get the crash report from last month
            now = datetimeutil.utc_now()

            three_weeks_ago = now - datetime.timedelta(weeks=3)
            three_weeks_ago = datetimeutil.date_to_string(three_weeks_ago)

            five_weeks_ago = now - datetime.timedelta(weeks=5)
            five_weeks_ago = datetimeutil.date_to_string(five_weeks_ago)

            params = {
                'from_date': five_weeks_ago,
                'to_date': three_weeks_ago,
            }
            res = api.get(**params)

            self.assertEqual(res['total'], 1)
            self.assertEqual(
                res['hits'][0]['signature'],
                'my_little_signature'
            )
            self.assertEqual(res['hits'][0]['is_linux'], 1)
            self.assertEqual(res['hits'][0]['is_windows'], 0)
            self.assertEqual(res['hits'][0]['is_mac'], 0)
Example #9
0
    def post(self, **kwargs):
        params = external_common.parse_arguments(self.filters, kwargs)

        if not params['signatures']:
            raise MissingArgumentError('signatures')

        sql_params = [tuple(params['signatures'])]
        sql = """
            SELECT
                signature,
                first_report AS first_date,
                first_build
            FROM signatures
            WHERE signature IN %s
        """

        error_message = 'Failed to retrieve signatures from PostgreSQL'
        results = self.query(sql, sql_params, error_message=error_message)

        signatures = []
        for sig in results.zipped():
            sig.first_date = datetimeutil.date_to_string(sig.first_date)
            signatures.append(sig)

        return {
            'hits': signatures,
            'total': len(signatures)
        }
Example #10
0
    def get_comments(self, **kwargs):
        """Return a list of comments on crash reports, filtered by
        signatures and other fields.

        See socorro.lib.search_common.get_parameters() for all filters.
        """
        params = self.prepare_search_params(**kwargs)

        # Creating the parameters for the sql query
        sql_params = {}

        # Preparing the different parts of the sql query

        # WARNING: sensitive data is returned here (email). When there is
        # an authentication mecanism, a verification should be done here.
        sql_select = """
            SELECT
                r.date_processed,
                r.user_comments,
                r.uuid,
                CASE
                    WHEN r.email = '' THEN null
                    WHEN r.email IS NULL THEN null
                    ELSE r.email
                END
        """

        sql_from = self.build_reports_sql_from(params)
        (sql_where, sql_params) = self.build_reports_sql_where(params,
                                                               sql_params,
                                                               self.context)
        sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where

        sql_order = "ORDER BY email ASC, r.date_processed ASC"

        # Assembling the query
        sql_query = " ".join((
                "/* external.postgresql.crashes.Crashes.get_comments */",
                sql_select, sql_from, sql_where, sql_order))

        error_message = "Failed to retrieve comments from PostgreSQL"
        results = self.query(sql_query, sql_params,
                             error_message=error_message)

        # Transforming the results into what we want
        comments = []
        for row in results:
            comment = dict(zip((
                       "date_processed",
                       "user_comments",
                       "uuid",
                       "email"), row))
            comment["date_processed"] = datetimeutil.date_to_string(
                                                    comment["date_processed"])
            comments.append(comment)

        return {
            "hits": comments,
            "total": len(comments)
        }
Example #11
0
def twoPeriodTopCrasherComparison(
    databaseConnection,
    context,
    closestEntryFunction=latestEntryBeforeOrEqualTo,
    listOfTopCrashersFunction=getListOfTopCrashersBySignature,
):
    try:
        context["logger"].debug("entered twoPeriodTopCrasherComparison")
    except KeyError:
        context["logger"] = util.SilentFakeLogger()

    assertions = ["to_date", "duration", "product", "version"]

    for param in assertions:
        assert param in context, "%s is missing from the configuration" % param

    context["numberOfComparisonPoints"] = 2
    if not context["limit"]:
        context["limit"] = 100

    # context['logger'].debug('about to latestEntryBeforeOrEqualTo')
    context["to_date"] = closestEntryFunction(
        databaseConnection, context["to_date"], context["product"], context["version"]
    )
    context["logger"].debug("New to_date: %s" % context["to_date"])
    context["startDate"] = context.to_date - (context.duration * context.numberOfComparisonPoints)
    # context['logger'].debug('after %s' % context)
    listOfTopCrashers = listOfListsWithChangeInRank(
        rangeOfQueriesGenerator(databaseConnection, context, listOfTopCrashersFunction)
    )[0]
    # context['logger'].debug('listOfTopCrashers %s' % listOfTopCrashers)
    totalNumberOfCrashes = totalPercentOfTotal = 0
    for x in listOfTopCrashers:
        if "total_crashes" in x:
            totalNumberOfCrashes = x["total_crashes"]
            del x["total_crashes"]
        totalPercentOfTotal += x.get("percentOfTotal", 0)

    result = {
        "crashes": listOfTopCrashers,
        "start_date": datetimeutil.date_to_string(context.to_date - context.duration),
        "end_date": datetimeutil.date_to_string(context.to_date),
        "totalNumberOfCrashes": totalNumberOfCrashes,
        "totalPercentage": totalPercentOfTotal,
    }
    # logger.debug("about to return %s", result)
    return result
Example #12
0
    def test_listOfListsWithChangeInRank(self):

        lastweek = self.now - datetime.timedelta(days=7)
        lastweek_str = datetimeutil.date_to_string(lastweek.date())

        params = self.params
        params.startDate = self.now.date() - datetime.timedelta(days=14)

        query_list = tcbs.getListOfTopCrashersBySignature
        query_range = tcbs.rangeOfQueriesGenerator(
            self.connection,
            self.params,
            query_list
        )
        res = tcbs.listOfListsWithChangeInRank(query_range)

        res_expected = [[{
            'count': 5L,
            'mac_count': 0L,
            'content_count': 0,
            'first_report': lastweek_str,
            'previousRank': 0,
            'currentRank': 0,
            'startup_percent': None,
            'versions': 'plugin1, plugin2',
            'first_report_exact': lastweek_str + ' 00:00:00',
            'percentOfTotal': 0.625,
            'changeInRank': 0,
            'is_gc_count': 10L,
            'win_count': 0L,
            'changeInPercentOfTotal': 0.041666666666666963,
            'linux_count': 5L,
            'hang_count': 5L,
            'signature': 'Fake Signature #1',
            'versions_count': 2,
            'previousPercentOfTotal': 0.58333333333333304,
            'plugin_count': 0
        }, {
            'count': 3L,
            'mac_count': 1L,
            'content_count': 0,
            'first_report': lastweek_str,
            'previousRank': 1,
            'currentRank': 1,
            'startup_percent': None,
            'versions': 'plugin1, plugin2, plugin3, plugin4, plugin5, plugin6',
            'first_report_exact': lastweek_str + ' 00:00:00',
            'percentOfTotal': 0.375,
            'changeInRank': 0,
            'is_gc_count': 1L,
            'win_count': 1L,
            'changeInPercentOfTotal': -0.041666666666667018,
            'linux_count': 1L,
            'hang_count': 0L,
            'signature': 'Fake Signature #2',
            'versions_count': 6,
            'previousPercentOfTotal': 0.41666666666666702,
            'plugin_count': 0
        }]]
Example #13
0
    def get(self, **kwargs):
        """Return a job in the job queue. """
        filters = [
            ("uuid", None, "str"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        if not params.uuid:
            raise MissingOrBadArgumentError(
                        "Mandatory parameter 'uuid' is missing or empty")

        fields = [
            "id",
            "pathname",
            "uuid",
            "owner",
            "priority",
            "queueddatetime",
            "starteddatetime",
            "completeddatetime",
            "success",
            "message"
        ]
        sql = """
            /* socorro.external.postgresql.job.Job.get */
            SELECT %s FROM jobs WHERE uuid=%%(uuid)s
        """ % ", ".join(fields)

        json_result = {
            "total": 0,
            "hits": []
        }

        connection = None
        try:
            # Creating the connection to the DB
            connection = self.database.connection()
            cur = connection.cursor()
            results = db.execute(cur, sql, params)
        except psycopg2.Error:
            logger.error("Failed retrieving jobs data from PostgreSQL",
                         exc_info=True)
        else:
            for job in results:
                row = dict(zip(fields, job))

                # Make sure all dates are turned into strings
                for i in row:
                    if isinstance(row[i], datetime.datetime):
                        row[i] = datetimeutil.date_to_string(row[i])

                json_result["hits"].append(row)
            json_result["total"] = len(json_result["hits"])
        finally:
            if connection:
                connection.close()

        return json_result
Example #14
0
    def test_get(self):
        signature_urls = SignatureURLs(config=self.config)
        now = datetimeutil.utc_now()
        now = datetime.datetime(now.year, now.month, now.day)
        now_str = datetimeutil.date_to_string(now)

        #......................................................................
        # Test 1: find one exact match for products and versions passed
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["Firefox:10.0", "Firefox:11.0"]
        }
        res = signature_urls.get(**params)
        res_expected = {
            "hits": [
                {
                    "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                    "crash_count": 1
                 }
            ],
            "total": 1
        }

        self.assertEqual(res, res_expected)

        #......................................................................
        # Test 2: Raise error if parameter is not passed
        params = {
            "signature": "",
            "start_date": "",
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["Firefox:10.0", "Firefox:11.0"]
        }
        self.assertRaises(MissingOrBadArgumentException,
                          signature_urls.get,
                          **params)

        #......................................................................
        # Test 3: Query returning no results
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Fennec'],
            "versions": ["Fennec:10.0", "Fennec:11.0"]
        }
        res = signature_urls.get(**params)
        res_expected = {
            "hits": [],
            "total": 0
        }

        self.assertEqual(res, res_expected)
Example #15
0
def test_date_to_string():
    # Datetime with timezone
    date = datetime.datetime(2012, 1, 3, 12, 23, 34, tzinfo=UTC)
    res_exp = '2012-01-03T12:23:34+00:00'
    res = datetimeutil.date_to_string(date)
    assert res == res_exp

    # Datetime without timezone
    date = datetime.datetime(2012, 1, 3, 12, 23, 34)
    res_exp = '2012-01-03T12:23:34'
    res = datetimeutil.date_to_string(date)
    assert res == res_exp

    # Date (no time, no timezone)
    date = datetime.date(2012, 1, 3)
    res_exp = '2012-01-03'
    res = datetimeutil.date_to_string(date)
    assert res == res_exp
Example #16
0
    def test_get(self):
        extensions = Extensions(config=self.config)
        now = datetimeutil.utc_now()
        now = datetime.datetime(now.year, now.month, now.day,
                                tzinfo=now.tzinfo)
        uuid = "%%s-%s" % now.strftime("%y%m%d")
        now_str = datetimeutil.date_to_string(now)

        #......................................................................
        # Test 1: a valid crash with duplicates
        params = {
            "uuid": uuid % "a1",
            "date": now_str
        }
        res = extensions.get(**params)
        res_expected = {
            "hits": [
                {
                    "report_id": 1,
                    "date_processed": now_str,
                    "extension_key": 10,
                    "extension_id": 'id1',
                    "extension_version": 'version1'
                },
                {
                    "report_id": 1,
                    "date_processed": now_str,
                    "extension_key": 11,
                    "extension_id": 'id2',
                    "extension_version": 'version2'
                },
                {
                    "report_id": 1,
                    "date_processed": now_str,
                    "extension_key": 12,
                    "extension_id": 'id3',
                    "extension_version": 'version3'
                }
            ],
            "total": 3
        }

        self.assertEqual(res, res_expected)

        #......................................................................
        # Test 2: a crash without extensions
        params = {
            "uuid": uuid % "a2",
            "date": now_str
        }
        res = extensions.get(**params)
        res_expected = {
            "hits": [],
            "total": 0
        }

        self.assertEqual(res, res_expected)
Example #17
0
    def setUp(self):
        super(IntegrationTestQuery, self).setUp()

        config = self.get_config_context()
        self.storage = crashstorage.ElasticSearchCrashStorage(config)
        self.api = Query(config=config)

        # clear the indices cache so the index is created on every test
        self.storage.indices_cache = set()

        # Create the supersearch fields.
        self.storage.es.bulk_index(
            index=config.webapi.elasticsearch_default_index,
            doc_type='supersearch_fields',
            docs=SUPERSEARCH_FIELDS.values(),
            id_field='name',
            refresh=True,
        )

        self.now = datetimeutil.utc_now()

        yesterday = self.now - datetime.timedelta(days=1)
        yesterday = datetimeutil.date_to_string(yesterday)

        # insert data into elasticsearch
        default_crash_report = {
            'uuid': 100,
            'signature': 'js::break_your_browser',
            'date_processed': yesterday,
            'product': 'WaterWolf',
            'version': '1.0',
            'release_channel': 'release',
            'os_name': 'Linux',
            'build': '1234567890',
            'reason': 'MOZALLOC_WENT_WRONG',
            'hangid': None,
            'process_type': None,
        }

        self.storage.save_processed(default_crash_report)

        self.storage.save_processed(
            dict(default_crash_report, uuid=1, product='EarthRaccoon')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=2, version='2.0')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=3, release_channel='aurora')
        )

        # As indexing is asynchronous, we need to force elasticsearch to
        # make the newly created content searchable before we run the tests
        self.storage.es.refresh()
Example #18
0
    def test_twoPeriodTopCrasherComparisonLimited(self):

        lastweek = self.now - datetime.timedelta(days=7)
        lastweek_str = datetimeutil.date_to_string(lastweek.date())
        two_weeks = datetimeutil.date_to_string(self.now.date() -
                                                datetime.timedelta(days=14))

        self.params.limit = 1
        res = tcbs.twoPeriodTopCrasherComparison(
            self.connection,
            self.params
        )

        res_expected = {
            'totalPercentage': 0.58333333333333304,
            'end_date': lastweek_str,
            'start_date': two_weeks,
            'crashes': [{
                'count': 14L,
                'mac_count': 1L,
                'content_count': 0,
                'first_report': lastweek_str,
                'previousRank': 'null',
                'currentRank': 0,
                'startup_percent': None,
                'versions': 'plugin1, plugin2',
                'first_report_exact': lastweek_str + ' 00:00:00',
                'percentOfTotal': 0.58333333333333304,
                'changeInRank': 'new',
                'is_gc_count': 1L,
                'win_count': 12L,
                'changeInPercentOfTotal': 'new',
                'linux_count': 1L,
                'hang_count': 0L,
                'signature': 'Fake Signature #1',
                'versions_count': 2,
                'previousPercentOfTotal': 'null',
                'plugin_count': 0
            }],
            'totalNumberOfCrashes': 24L
        }

        eq_(res, res_expected)
Example #19
0
    def test_get_parameters_date_defaults(self):
        with _get_config_manager().context() as config:
            search = SearchBase(config=config)

        now = datetimeutil.utc_now()

        # Test default values when nothing is passed
        params = search.get_parameters()
        ok_("date" in params)
        eq_(len(params["date"]), 2)

        # Pass only the high value
        args = {"date": "<%s" % datetimeutil.date_to_string(now)}
        params = search.get_parameters(**args)
        ok_("date" in params)
        eq_(len(params["date"]), 2)
        eq_(params["date"][0].operator, "<")
        eq_(params["date"][1].operator, ">=")
        eq_(params["date"][0].value.date(), now.date())
        eq_(params["date"][1].value.date(), now.date() - datetime.timedelta(days=7))

        # Pass only the low value
        pasttime = now - datetime.timedelta(days=10)
        args = {"date": ">=%s" % datetimeutil.date_to_string(pasttime)}
        params = search.get_parameters(**args)
        ok_("date" in params)
        eq_(len(params["date"]), 2)
        eq_(params["date"][0].operator, "<=")
        eq_(params["date"][1].operator, ">=")
        eq_(params["date"][0].value.date(), now.date())
        eq_(params["date"][1].value.date(), pasttime.date())

        # Pass the two values
        pasttime = now - datetime.timedelta(days=10)
        args = {"date": ["<%s" % datetimeutil.date_to_string(now), ">%s" % datetimeutil.date_to_string(pasttime)]}
        params = search.get_parameters(**args)
        ok_("date" in params)
        eq_(len(params["date"]), 2)
        eq_(params["date"][0].operator, "<")
        eq_(params["date"][1].operator, ">")
        eq_(params["date"][0].value.date(), now.date())
        eq_(params["date"][1].value.date(), pasttime.date())
    def test_get_signature_history(self):
        api = Crashes(config=self.config)
        now = self.now
        lastweek = now - datetime.timedelta(days=7)

        params = {
            "product": "Firefox",
            "version": "8.0",
            "signature": "signature1",
            "start_date": lastweek,
            "end_date": now,
        }
        res = api.get_signature_history(**params)

        self.assertEqual(len(res["hits"]), 2)
        self.assertEqual(len(res["hits"]), res["total"])

        date = datetimeutil.date_to_string(now.date())
        self.assertEqual(res["hits"][0]["date"], date)
        self.assertEqual(res["hits"][1]["date"], date)

        self.assertEqual(res["hits"][0]["count"], 5)
        self.assertEqual(res["hits"][1]["count"], 14)

        self.assertEqual(round(res["hits"][0]["percent_of_total"], 2), round(5.0 / 19.0 * 100, 2))
        self.assertEqual(round(res["hits"][1]["percent_of_total"], 2), round(14.0 / 19.0 * 100, 2))

        # Test no results
        params = {
            "product": "Firefox",
            "version": "9.0",
            "signature": "signature1",
            "start_date": lastweek,
            "end_date": now,
        }
        res = api.get_signature_history(**params)
        res_expected = {"hits": [], "total": 0}
        self.assertEqual(res, res_expected)

        # Test default date parameters
        params = {"product": "Fennec", "version": "11.0.1", "signature": "signature3"}
        res = api.get_signature_history(**params)
        res_expected = {"hits": [{"date": now.date().isoformat(), "count": 14, "percent_of_total": 100}], "total": 1}
        self.assertEqual(res, res_expected)

        # Test missing parameters
        self.assertRaises(MissingOrBadArgumentError, api.get_signature_history)
        self.assertRaises(MissingOrBadArgumentError, api.get_signature_history, **{"product": "Firefox"})
        self.assertRaises(
            MissingOrBadArgumentError, api.get_signature_history, **{"product": "Firefox", "version": "8.0"}
        )
        self.assertRaises(
            MissingOrBadArgumentError, api.get_signature_history, **{"signature": "signature1", "version": "8.0"}
        )
Example #21
0
    def test_twoPeriodTopCrasherComparisonLimited(self):

        lastweek = self.now - datetime.timedelta(days=7)
        lastweek_str = datetimeutil.date_to_string(lastweek.date())
        two_weeks = datetimeutil.date_to_string(self.now.date() - datetime.timedelta(days=14))

        self.params.limit = 1
        res = tcbs.twoPeriodTopCrasherComparison(self.connection, self.params)

        res_expected = {
            "totalPercentage": 0.58333333333333304,
            "end_date": lastweek_str,
            "start_date": two_weeks,
            "crashes": [
                {
                    "count": 14L,
                    "mac_count": 1L,
                    "content_count": 0,
                    "first_report": lastweek_str,
                    "previousRank": "null",
                    "currentRank": 0,
                    "startup_percent": None,
                    "versions": "plugin1, plugin2",
                    "first_report_exact": lastweek_str + " 00:00:00",
                    "percentOfTotal": 0.58333333333333304,
                    "changeInRank": "new",
                    "is_gc_count": 1L,
                    "win_count": 12L,
                    "changeInPercentOfTotal": "new",
                    "linux_count": 1L,
                    "hang_count": 0L,
                    "signature": "Fake Signature #1",
                    "versions_count": 2,
                    "previousPercentOfTotal": "null",
                    "plugin_count": 0,
                }
            ],
            "totalNumberOfCrashes": 24L,
        }

        eq_(res, res_expected)
Example #22
0
    def get(self, **kwargs):
        """Return the current state of all Crontabber jobs"""

        sql = """
        /* socorro.external.postgresql.crontabber_state.CrontabberState.get */
            SELECT
                app_name,
                next_run,
                first_run,
                last_run,
                last_success,
                error_count,
                depends_on,
                last_error,
                ongoing
            FROM crontabber
            ORDER BY app_name
        """

        error_message = (
            "Failed to retrieve crontabber state data from PostgreSQL"
        )
        results = self.query(sql, error_message=error_message)
        state = {}
        for row in results:
            app_name = row[0]
            state[app_name] = dict(zip((
                'next_run',
                'first_run',
                'last_run',
                'last_success',
                'error_count',
                'depends_on',
                'last_error',
                'ongoing'
            ), row[1:]))
            possible_datetimes = (
                'next_run',
                'first_run',
                'last_run',
                'last_success',
                'ongoing'
            )
            for key in possible_datetimes:
                value = state[app_name][key]
                if value is None:
                    continue
                state[app_name][key] = datetimeutil.date_to_string(value)
            state[app_name]['last_error'] = json.loads(
                state[app_name]['last_error']
            )

        return {"state": state}
Example #23
0
    def test_get_adu_by_signature(self):
        crashes = Crashes(config=self.config)

        signature = "canIhaveYourSignature()"
        channel = "release"
        yesterday_date = (self.now - datetime.timedelta(days=1)).date()
        yesterday = datetimeutil.date_to_string(yesterday_date)

        res_expected = {
            "hits": [
                {
                    "product_name": "WaterWolf",
                    "signature": signature,
                    "adu_date": yesterday,
                    "build_date": "2014-03-01",
                    "buildid": '201403010101',
                    "crash_count": 3,
                    "adu_count": 1023,
                    "os_name": "Mac OS X",
                    "channel": channel,
                },
                {
                    "product_name": "WaterWolf",
                    "signature": signature,
                    "adu_date": yesterday,
                    "build_date": "2014-04-01",
                    "buildid": '201404010101',
                    "crash_count": 4,
                    "adu_count": 1024,
                    "os_name": "Windows NT",
                    "channel": channel,
                },
            ],
            "total": 2,
        }

        res = crashes.get_adu_by_signature(
            product_name="WaterWolf",
            start_date=yesterday,
            end_date=yesterday,
            signature=signature,
            channel=channel,
        )
        eq_(res, res_expected)

        assert_raises(
            BadArgumentError,
            crashes.get_adu_by_signature,
            start_date=(yesterday_date - datetime.timedelta(days=366)),
            end_date=yesterday,
            signature=signature,
            channel=channel
        )
Example #24
0
    def get_exploitability(self, **kwargs):
        """Return a list of exploitable crash reports.

        See socorro.lib.external_common.parse_arguments() for all filters.
        """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("start_date", lastweek, "date"),
            ("end_date", now, "date"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            SELECT
                signature,
                report_date,
                null_count,
                none_count,
                low_count,
                medium_count,
                high_count
            FROM exploitability_reports
            WHERE
                report_date BETWEEN %(start_date)s AND %(end_date)s
            ORDER BY
                report_date DESC;
        """
        error_message = "Failed to retrieve exploitable crashes from PostgreSQL"
        results = self.query(sql_query, params, error_message=error_message)

        # Transforming the results into what we want
        crashes = []
        for row in results:
            crash = dict(zip(("signature",
                              "report_date",
                              "null_count",
                              "none_count",
                              "low_count",
                              "medium_count",
                              "high_count"), row))
            crash["report_date"] = datetimeutil.date_to_string(
                crash["report_date"])
            crashes.append(crash)

        return {
            "hits": crashes,
            "total": len(crashes)
        }
Example #25
0
    def get(self, **kwargs):
        """Return a list of extensions associated with a crash's UUID."""
        filters = [
            ("uuid", None, "str"),
            ("date", None, "datetime"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        sql = """/* socorro.external.postgresql.extensions.Extensions.get */
            SELECT extensions.*
            FROM extensions
            INNER JOIN reports ON extensions.report_id = reports.id
            WHERE reports.uuid = %(uuid)s
            AND reports.date_processed = %(crash_date)s
            AND extensions.date_processed = %(crash_date)s
        """
        sql_params = {
            "uuid": params.uuid,
            "crash_date": params.date
        }

        result = {
            "total": 0,
            "hits": []
        }

        try:
            connection = self.database.connection()
            cur = connection.cursor()
            results = db.execute(cur, sql, sql_params)
        except psycopg2.Error:
            logger.error("Failed retrieving extensions data from PostgreSQL",
                         exc_info=True)
        else:
            for crash in results:
                row = dict(zip((
                           "report_id",
                           "date_processed",
                           "extension_key",
                           "extension_id",
                           "extension_version"), crash))
                result["hits"].append(row)
                row["date_processed"] = datetimeutil.date_to_string(row["date_processed"])
            result["total"] = len(result["hits"])
        finally:
            connection.close()

        return result
    def format_dates_in_crash(self, processed_crash):
        # HBase returns dates in a format that elasticsearch does not
        # understand. To keep our elasticsearch mapping simple, we
        # transform all dates to a recognized format.
        for attr in processed_crash:
            try:
                processed_crash[attr] = datetimeutil.date_to_string(
                    datetimeutil.string_to_datetime(
                        processed_crash[attr]
                    )
                )
            except (ValueError, TypeError, ISO8601Error):
                # the attribute is not a date
                pass

        return processed_crash
Example #27
0
    def get(self, **kwargs):
        """Return a job in the job queue. """
        filters = [
            ("uuid", None, "str"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        if not params.uuid:
            raise MissingOrBadArgumentError(
                        "Mandatory parameter 'uuid' is missing or empty")

        fields = [
            "id",
            "pathname",
            "uuid",
            "owner",
            "priority",
            "queueddatetime",
            "starteddatetime",
            "completeddatetime",
            "success",
            "message"
        ]
        sql = """
            /* socorro.external.postgresql.job.Job.get */
            SELECT %s FROM jobs WHERE uuid=%%(uuid)s
        """ % ", ".join(fields)

        error_message = "Failed to retrieve jobs data from PostgreSQL"
        results = self.query(sql, params, error_message=error_message)

        jobs = []
        for row in results:
            job = dict(zip(fields, row))

            # Make sure all dates are turned into strings
            for i in job:
                if isinstance(job[i], datetime.datetime):
                    job[i] = datetimeutil.date_to_string(job[i])

            jobs.append(job)

        return {
            "hits": jobs,
            "total": len(jobs)
        }
Example #28
0
    def get(self, **kwargs):
        """Return the current state of all Crontabber jobs"""
        sql = (
            '/* socorro.external.postgresql.crontabber_state.CrontabberState'
            '.get */\n'
            'SELECT state, last_updated FROM crontabber_state;'
        )

        error_message = (
            "Failed to retrieve crontabber state data from PostgreSQL"
        )
        results = self.query(sql, error_message=error_message)
        result, = results
        state, last_updated = result
        return {
            "state": json.loads(state),
            "last_updated": datetimeutil.date_to_string(last_updated)
        }
    def main(self):
        es_storage = self.config.elasticsearch_storage_class(self.config)
        hb_storage = self.config.hbase_storage_class(self.config)
        hb_client = HBaseConnectionForCrashReports(
            self.config.hbase_host,
            self.config.hbase_port,
            self.config.hbase_timeout,
        )

        current_date = self.config.end_date
        date = current_date.strftime('%y%m%d')

        one_day = datetime.timedelta(days=1)
        for i in range(self.config.duration):
            day = current_date.strftime('%y%m%d')
            self.config.logger.info('backfilling crashes for %s', day)

            reports = hb_client.get_list_of_processed_json_for_date(
                day,
                number_of_retries=5
            )

            for report in reports:
                processed_crash = json.loads(report)

                # HBase returns dates in a format that elasticsearch does not
                # understand. To keep our elasticsearch mapping simple, we
                # transform all dates to a recognized format.
                for attr in processed_crash:
                    try:
                        processed_crash[attr] = datetimeutil.date_to_string(
                            datetimeutil.string_to_datetime(
                                processed_crash[attr]
                            )
                        )
                    except (ValueError, TypeError, ISO8601Error):
                        # the attribute is not a date
                        pass
                # print processed_crash['uuid']
                es_storage.save_processed(processed_crash)
            current_date -= one_day

        return 0
Example #30
0
    def test_get_comments(self):
        crashes = Crashes(config=self.config)
        today = datetimeutil.date_to_string(self.now)

        # Test 1: results
        params = {
            "signature": "js",
        }
        res_expected = {
            "hits": [
                {
                    "email": None,
                    "date_processed": today,
                    "uuid": "def",
                    "user_comments": "hello"
                },
                {
                    "email": None,
                    "date_processed": today,
                    "uuid": "hij",
                    "user_comments": "hah"
                }
            ],
            "total": 2
        }

        res = crashes.get_comments(**params)
        self.assertEqual(res, res_expected)

        # Test 2: no results
        params = {
            "signature": "blah",
        }
        res_expected = {
            "hits": [],
            "total": 0
        }

        res = crashes.get_comments(**params)
        self.assertEqual(res, res_expected)

        # Test 3: missing parameter
        self.assertRaises(MissingArgumentError, crashes.get_comments)
Example #31
0
    def test_twoPeriodTopCrasherComparison(self):

        lastweek = self.now - datetime.timedelta(days=7)
        lastweek_str = datetimeutil.date_to_string(lastweek.date())
        two_weeks = datetimeutil.date_to_string(self.now.date() -
                                                datetime.timedelta(days=14))

        res = tcbs.twoPeriodTopCrasherComparison(self.connection, self.params)

        res_expected = {
            'totalPercentage':
            1.0,
            'end_date':
            lastweek_str,
            'start_date':
            two_weeks,
            'crashes': [{
                'count': 14L,
                'mac_count': 1L,
                'content_count': 0,
                'first_report': lastweek_str,
                'previousRank': 'null',
                'currentRank': 0,
                'startup_percent': None,
                'versions': 'plugin1, plugin2',
                'first_report_exact': lastweek_str + ' 00:00:00',
                'percentOfTotal': 0.58333333333333304,
                'changeInRank': 'new',
                'is_gc_count': 1L,
                'win_count': 12L,
                'changeInPercentOfTotal': 'new',
                'linux_count': 1L,
                'hang_count': 0L,
                'signature': 'Fake Signature #1',
                'versions_count': 2,
                'previousPercentOfTotal': 'null',
                'plugin_count': 0
            }, {
                'count': 10L,
                'mac_count': 2L,
                'content_count': 0,
                'first_report': lastweek_str,
                'previousRank': 'null',
                'currentRank': 1,
                'startup_percent': None,
                'versions': 'plugin1, plugin2, plugin3, '
                'plugin4, plugin5, plugin6',
                'first_report_exact': lastweek_str + ' 00:00:00',
                'percentOfTotal': 0.41666666666666702,
                'changeInRank': 'new',
                'is_gc_count': 3L,
                'win_count': 7L,
                'changeInPercentOfTotal': 'new',
                'linux_count': 1L,
                'hang_count': 0L,
                'signature': 'Fake Signature #2',
                'versions_count': 6,
                'previousPercentOfTotal': 'null',
                'plugin_count': 0
            }],
            'totalNumberOfCrashes':
            24L
        }

        eq_(res, res_expected)
Example #32
0
    def test_get(self):
        products = Products(config=self.config)
        now = self.now.date()
        now_str = datetimeutil.date_to_string(now)

        #......................................................................
        # Test 1: find one exact match for one product and one version
        params = {"versions": "Firefox:8.0"}
        res = products.get(**params)
        res_expected = {
            "hits": [{
                "is_featured": False,
                "version": "8.0",
                "throttle": 10.0,
                "start_date": now_str,
                "end_date": now_str,
                "has_builds": False,
                "product": "Firefox",
                "build_type": "Release"
            }],
            "total":
            1
        }

        eq_(sorted(res['hits'][0]), sorted(res_expected['hits'][0]))

        #......................................................................
        # Test 2: Find two different products with their correct verions
        params = {"versions": ["Firefox:8.0", "Thunderbird:10.0.2b"]}
        res = products.get(**params)
        res_expected = {
            "hits": [{
                "product": "Firefox",
                "version": "8.0",
                "start_date": now_str,
                "end_date": now_str,
                "is_featured": False,
                "build_type": "Release",
                "throttle": 10.0,
                "has_builds": False
            }, {
                "product": "Thunderbird",
                "version": "10.0.2b",
                "start_date": now_str,
                "end_date": now_str,
                "is_featured": False,
                "build_type": "Release",
                "throttle": 10.0,
                "has_builds": False
            }],
            "total":
            2
        }

        eq_(sorted(res['hits'][0]), sorted(res_expected['hits'][0]))

        #......................................................................
        # Test 3: empty result, no products:version found
        params = {"versions": "Firefox:14.0"}
        res = products.get(**params)
        res_expected = {"hits": [], "total": 0}

        eq_(res, res_expected)

        #......................................................................
        # Test 4: Test products list is returned with no parameters
        # Note that the expired version is not returned
        params = {}
        res = products.get(**params)
        res_expected = {
            "products": ["Firefox", "Thunderbird", "Fennec"],
            "hits": {
                "Firefox": [{
                    "product": "Firefox",
                    "version": "8.0",
                    "start_date": now_str,
                    "end_date": now_str,
                    "throttle": 10.00,
                    "featured": False,
                    "release": "Release",
                    "has_builds": False
                }],
                "Thunderbird": [{
                    "product": "Thunderbird",
                    "version": "10.0.2b",
                    "start_date": now_str,
                    "end_date": now_str,
                    "throttle": 10.00,
                    "featured": False,
                    "release": "Release",
                    "has_builds": False,
                }],
                "Fennec": [{
                    "product": "Fennec",
                    "version": "12.0b1",
                    "start_date": now_str,
                    "end_date": now_str,
                    "throttle": 100.00,
                    "featured": False,
                    "release": "Beta",
                    "has_builds": False
                }, {
                    "product": "Fennec",
                    "version": "11.0.1",
                    "start_date": now_str,
                    "end_date": now_str,
                    "throttle": 10.00,
                    "featured": False,
                    "release": "Release",
                    "has_builds": False
                }]
            },
            "total": 4
        }

        eq_(res['total'], res_expected['total'])
        eq_(sorted(res['products']), sorted(res_expected['products']))
        eq_(sorted(res['hits']), sorted(res_expected['hits']))
        for product in sorted(res['hits'].keys()):
            eq_(sorted(res['hits'][product][0]),
                sorted(res_expected['hits'][product][0]))

        # test returned order of versions
        assert len(res['hits']['Fennec']) == 2
        eq_(res['hits']['Fennec'][0]['version'], '12.0b1')
        eq_(res['hits']['Fennec'][1]['version'], '11.0.1')

        #......................................................................
        # Test 5: An invalid versions list is passed, all versions are returned
        params = {'versions': [1]}
        res = products.get(**params)
        eq_(res['total'], 4)
Example #33
0
    def test_get(self):
        signature_urls = SignatureURLs(config=self.config)
        now = self.now
        now = datetime.datetime(now.year, now.month, now.day)
        now_str = datetimeutil.date_to_string(now)

        #......................................................................
        # Test 1: find one exact match for products and versions passed
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["Firefox:10.0", "Firefox:11.0"]
        }
        res = signature_urls.get(**params)
        res_expected = {
            "hits": [{
                "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                "crash_count": 1
            }],
            "total":
            1
        }

        eq_(res, res_expected)

        #......................................................................
        # Test 2: Raise error if parameter is not passed
        params = {
            "signature": "",
            "start_date": "",
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["Firefox:10.0", "Firefox:11.0"]
        }
        assert_raises(MissingArgumentError, signature_urls.get, **params)

        #......................................................................
        # Test 3: Query returning no results
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Fennec'],
            "versions": ["Fennec:10.0", "Fennec:11.0"]
        }
        res = signature_urls.get(**params)
        res_expected = {"hits": [], "total": 0}

        eq_(res, res_expected)

        # Test 4: Return results for all version of Firefox
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ["ALL"]
        }

        res = signature_urls.get(**params)
        res_expected = {
            "hits": [{
                "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                "crash_count": 1
            }, {
                "url": "http://wikipedia.org/Code_Rush",
                "crash_count": 1
            }],
            "total":
            2
        }

        eq_(res, res_expected)

        # Test 5: Return results for all products and versions
        params = {
            "signature": "EMPTY: no crashing thread identified; corrupt dump",
            "start_date": now_str,
            "end_date": now_str,
            "products": ['ALL'],
            "versions": ["ALL"]
        }

        res = signature_urls.get(**params)
        res_expected = {
            "hits": [{
                "url": "http://deusex.wikia.com/wiki/Praxis_kit",
                "crash_count": 1
            }, {
                "url": "http://wikipedia.org/Code_Rush",
                "crash_count": 1
            }, {
                "url": "http://arewemobileyet.org/",
                "crash_count": 1
            }],
            "total":
            3
        }

        eq_(res, res_expected)

        # Test when we send incorrectly formatted 'versions' parameter
        params = {
            "signature": 'Does not exist',
            "start_date": now_str,
            "end_date": now_str,
            "products": ['Firefox'],
            "versions": ['27.0a1']
        }
        assert_raises(BadArgumentError, signature_urls.get, **params)
Example #34
0
    def build_query_from_params(params, config):
        """
        Build and return an ES query given a list of parameters.

        See socorro.lib.search_common.SearchCommon.get_parameters() for
        parameters and default values.

        """
        # Dates need to be strings for ES
        params["from_date"] = dtutil.date_to_string(params["from_date"])
        params["to_date"] = dtutil.date_to_string(params["to_date"])

        # Preparing the different elements of the json query
        query = {"match_all": {}}
        queries = []

        filters = {"and": []}

        # Creating the terms depending on the way we should search
        if params["terms"] and params["search_mode"] == "default":
            filters["and"].append(
                ElasticSearchBase.build_terms_query(
                    params["fields"], [x.lower() for x in params["terms"]]))

        elif (params["terms"] and params["search_mode"] == "is_exactly"
              and params["fields"] == ["signature"]):
            filters["and"].append(
                ElasticSearchBase.build_terms_query("signature.full",
                                                    params["terms"]))

        elif params["terms"]:
            params["terms"] = ElasticSearchBase.prepare_terms(
                params["terms"], params["search_mode"])
            queries.append(
                ElasticSearchBase.build_wildcard_query(params["fields"],
                                                       params["terms"]))

        # Generating the filters
        if params["products"]:
            filters["and"].append(
                ElasticSearchBase.build_terms_query("product.full",
                                                    params["products"]))
        if params["os"]:
            filters["and"].append(
                ElasticSearchBase.build_terms_query(
                    "os_name", [x.lower() for x in params["os"]]))
        if params["build_ids"]:
            filters["and"].append(
                ElasticSearchBase.build_terms_query("build",
                                                    params["build_ids"]))
        if params["reasons"]:
            filters["and"].append(
                ElasticSearchBase.build_terms_query(
                    "reason", [x.lower() for x in params["reasons"]]))
        if params["release_channels"]:
            filters["and"].append(
                ElasticSearchBase.build_terms_query(
                    "release_channel",
                    [x.lower() for x in params["release_channels"]]))

        # plugins filter
        if params['plugin_terms']:
            # change plugin field names to match what is in elasticsearch
            params['plugin_in'] = [
                'Plugin%s' % x.capitalize() for x in params['plugin_in']
            ]

            if params['plugin_search_mode'] == 'default':
                filters['and'].append(
                    ElasticSearchBase.build_terms_query(
                        params['plugin_in'],
                        [x.lower() for x in params['plugin_terms']]))
            elif (params['plugin_search_mode'] == 'is_exactly'
                  and len(params['plugin_in']) == 1):
                filters['and'].append(
                    ElasticSearchBase.build_terms_query(
                        '%s.full' % params['plugin_in'][0],
                        params['plugin_terms']))
            else:
                params['plugin_terms'] = ElasticSearchBase.prepare_terms(
                    params['plugin_terms'], params['plugin_search_mode'])
                queries.append(
                    ElasticSearchBase.build_wildcard_query(
                        ['%s.full' % x for x in params['plugin_in']],
                        params['plugin_terms']))

        filters["and"].append({
            "range": {
                "date_processed": {
                    "from": params["from_date"],
                    "to": params["to_date"]
                }
            }
        })

        if params["report_process"] == "browser":
            filters["and"].append({"missing": {"field": "process_type"}})
        elif params["report_process"] in ("plugin", "content"):
            filters["and"].append(
                ElasticSearchBase.build_terms_query("process_type",
                                                    params["report_process"]))

        if params["report_type"] == "crash":
            filters["and"].append({"missing": {"field": "hangid"}})
        elif params["report_type"] == "hang":
            filters["and"].append({"exists": {"field": "hangid"}})

        # Generating the filters for versions
        if params["versions"]:
            versions = ElasticSearchBase.format_versions(params["versions"])
            versions_info = params["versions_info"]

            # There are several pairs product:version
            or_filter = []
            for v in versions:
                version = v["version"]
                product = v["product"]

                if not version:
                    # There is no valid version here.
                    continue

                key = "%s:%s" % (product, version)

                version_data = {}
                if key in versions_info:
                    version_data = versions_info[key]

                if version_data and version_data["is_rapid_beta"]:
                    # If the version is a rapid beta, that means it's an
                    # alias for a list of other versions. We thus don't filter
                    # on that version, but on all versions listed in the
                    # version_data that we have.

                    # Get all versions that are linked to this rapid beta.
                    rapid_beta_versions = [
                        x for x in versions_info
                        if versions_info[x]["from_beta_version"] == key
                        and not versions_info[x]["is_rapid_beta"]
                    ]

                    for rapid_beta in rapid_beta_versions:
                        and_filter = ElasticSearchBase.build_version_filters(
                            product,
                            versions_info[rapid_beta]["version_string"],
                            versions_info[rapid_beta], config)

                        or_filter.append({"and": and_filter})
                else:
                    # This is a "normal" version, let's filter on it
                    and_filter = ElasticSearchBase.build_version_filters(
                        product, version, version_data, config)

                    or_filter.append({"and": and_filter})

            if or_filter:
                filters["and"].append({"or": or_filter})

        if len(queries) > 1:
            query = {"bool": {"must": queries}}
        elif len(queries) == 1:
            query = queries[0]

        # Generating the full query from the parts
        return {
            "size": params["result_number"],
            "from": params["result_offset"],
            "query": {
                "filtered": {
                    "query": query,
                    "filter": filters
                }
            }
        }
Example #35
0
def test_date_to_string_fail():
    res = datetimeutil.date_to_string('2012-01-03')
Example #36
0
    def get_list(self, **kwargs):
        """
        List all crashes with a given signature and return them.

        Optional arguments: see SearchCommon.get_parameters()

        """
        # Creating the connection to the DB
        self.connection = self.database.connection()
        cur = self.connection.cursor()

        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        params = search_common.get_parameters(kwargs)

        if params["signature"] is None:
            return None

        params["terms"] = params["signature"]
        params["search_mode"] = "is_exactly"

        # Default mode falls back to starts_with for postgres
        if params["plugin_search_mode"] == "default":
            params["plugin_search_mode"] = "starts_with"

        # Limiting to a signature
        if params["terms"]:
            params["terms"] = self.prepare_terms(params["terms"],
                                                 params["search_mode"])

        # Searching for terms in plugins
        if params["report_process"] == "plugin" and params["plugin_terms"]:
            params["plugin_terms"] = " ".join(params["plugin_terms"])
            params["plugin_terms"] = self.prepare_terms(
                params["plugin_terms"], params["plugin_search_mode"])

        # Get information about the versions
        util_service = Util(config=self.context)
        params["versions_info"] = util_service.versions_info(**params)

        # Parsing the versions
        params["versions_string"] = params["versions"]
        (params["versions"],
         params["products"]) = self.parse_versions(params["versions"],
                                                   params["products"])

        if hasattr(self.context, 'webapi'):
            context = self.context.webapi
        else:
            # old middleware
            context = self.context
        # Changing the OS ids to OS names
        for i, elem in enumerate(params["os"]):
            for platform in context.platforms:
                if platform["id"] == elem:
                    params["os"][i] = platform["name"]

        # Creating the parameters for the sql query
        sql_params = {}

        # Preparing the different parts of the sql query

        sql_select = """
            SELECT
                r.date_processed,
                r.uptime,
                r.user_comments,
                r.uuid,
                r.product,
                r.version,
                r.build,
                r.signature,
                r.url,
                r.os_name,
                r.os_version,
                r.cpu_name,
                r.cpu_info,
                r.address,
                r.reason,
                r.last_crash,
                r.install_age,
                r.hangid,
                r.process_type,
                (r.client_crash_date - (r.install_age * INTERVAL '1 second'))
                    AS install_time,
                rd.duplicate_of
        """

        sql_from = self.build_reports_sql_from(params)
        sql_from = """%s
            LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid
        """ % sql_from

        (sql_where,
         sql_params) = self.build_reports_sql_where(params, sql_params,
                                                    self.context)

        sql_order = """
            ORDER BY r.date_processed DESC
        """

        (sql_limit,
         sql_params) = self.build_reports_sql_limit(params, sql_params)

        # Assembling the query
        sql_query = " ".join(
            ("/* socorro.external.postgresql.report.Report.list */",
             sql_select, sql_from, sql_where, sql_order, sql_limit))

        # Query for counting the results
        sql_count_query = " ".join(
            ("/* socorro.external.postgresql.report.Report.list */",
             "SELECT count(*)", sql_from, sql_where))

        # Debug
        logger.debug(sql_count_query)
        logger.debug(cur.mogrify(sql_count_query, sql_params))

        # Querying the DB
        try:
            total = db.singleValueSql(cur, sql_count_query, sql_params)
        except db.SQLDidNotReturnSingleValue:
            total = 0
            util.reportExceptionAndContinue(logger)

        results = []

        # No need to call Postgres if we know there will be no results
        if total != 0:
            try:
                results = db.execute(cur, sql_query, sql_params)
            except psycopg2.Error:
                util.reportExceptionAndContinue(logger)

        json_result = {"total": total, "hits": []}

        # Transforming the results into what we want
        for crash in results:
            row = dict(
                zip(("date_processed", "uptime", "user_comments", "uuid",
                     "product", "version", "build", "signature", "url",
                     "os_name", "os_version", "cpu_name", "cpu_info",
                     "address", "reason", "last_crash", "install_age",
                     "hangid", "process_type", "install_time", "duplicate_of"),
                    crash))
            for i in row:
                if isinstance(row[i], datetime.datetime):
                    row[i] = datetimeutil.date_to_string(row[i])
            json_result["hits"].append(row)

        self.connection.close()

        return json_result
Example #37
0
    def get_daily(self, **kwargs):
        """Return crashes by active daily users. """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("product", None, "str"),
            ("versions", None, ["list", "str"]),
            ("from_date", lastweek, "date"),
            ("to_date", now, "date"),
            ("os", None, ["list", "str"]),
            ("report_type", None, ["list", "str"]),
            ("separated_by", None, "str"),
            ("date_range_type", "date", "str"),
        ]

        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        params = external_common.parse_arguments(filters, kwargs)

        if not params.product:
            raise MissingOrBadArgumentError(
                "Mandatory parameter 'product' is missing or empty")

        if not params.versions or not params.versions[0]:
            raise MissingOrBadArgumentError(
                "Mandatory parameter 'versions' is missing or empty")

        params.versions = tuple(params.versions)

        # simple version, for home page graphs mainly
        if ((not params.os or not params.os[0])
                and (not params.report_type or not params.report_type[0])
                and (not params.separated_by or not params.separated_by[0])):
            if params.date_range_type == "build":
                table_to_use = "home_page_graph_build_view"
                date_range_field = "build_date"
            else:
                table_to_use = "home_page_graph_view"
                date_range_field = "report_date"

            db_fields = ("product_name", "version_string", date_range_field,
                         "report_count", "adu", "crash_hadu")

            out_fields = ("product", "version", "date", "report_count", "adu",
                          "crash_hadu")

            sql = """
                /* socorro.external.postgresql.crashes.Crashes.get_daily */
                SELECT %(db_fields)s
                FROM %(table_to_use)s
                WHERE product_name=%%(product)s
                AND version_string IN %%(versions)s
                AND %(date_range_field)s BETWEEN %%(from_date)s
                    AND %%(to_date)s
            """ % {
                "db_fields": ", ".join(db_fields),
                "date_range_field": date_range_field,
                "table_to_use": table_to_use
            }

        # complex version, for daily crashes page mainly
        else:
            if params.date_range_type == "build":
                table_to_use = "crashes_by_user_build_view"
                date_range_field = "build_date"
            else:
                table_to_use = "crashes_by_user_view"
                date_range_field = "report_date"

            db_fields = [
                "product_name", "version_string", date_range_field,
                "sum(adjusted_report_count)::bigint as report_count",
                "sum(adu)::bigint as adu",
                """crash_hadu(sum(report_count)::bigint, sum(adu)::bigint,
                              avg(throttle)) as crash_hadu""",
                "avg(throttle) as throttle"
            ]

            out_fields = [
                "product", "version", "date", "report_count", "adu",
                "crash_hadu", "throttle"
            ]

            db_group = ["product_name", "version_string", date_range_field]

            if params.separated_by == "os":
                db_fields += ["os_name", "os_short_name"]
                db_group += ["os_name", "os_short_name"]
                out_fields += ["os", "os_short"]

            sql_where = []
            if params.os and params.os[0]:
                sql_where.append("os_short_name IN %(os)s")
                params.os = tuple(x[0:3].lower() for x in params.os)

            if params.report_type and params.report_type[0]:
                sql_where.append("crash_type_short IN %(report_type)s")
                params.report_type = tuple(params.report_type)

            if sql_where:
                sql_where = "AND %s" % " AND ".join(sql_where)
            else:
                sql_where = ''

            sql = """
                /* socorro.external.postgresql.crashes.Crashes.get_daily */
                SELECT %(db_fields)s
                FROM (
                    SELECT
                        product_name,
                        version_string,
                        %(date_range_field)s,
                        os_name,
                        os_short_name,
                        SUM(report_count)::int as report_count,
                        SUM(adjusted_report_count)::int
                            as adjusted_report_count,
                        MAX(adu) as adu,
                        AVG(throttle) as throttle
                    FROM %(table_to_use)s
                    WHERE product_name=%%(product)s
                    AND version_string IN %%(versions)s
                    AND %(date_range_field)s BETWEEN %%(from_date)s
                        AND %%(to_date)s
                    %(sql_where)s
                    GROUP BY product_name, version_string,
                             %(date_range_field)s, os_name, os_short_name
                ) as aggregated_crashes_by_user
            """ % {
                "db_fields": ", ".join(db_fields),
                "date_range_field": date_range_field,
                "table_to_use": table_to_use,
                "sql_where": sql_where
            }

            if db_group:
                sql = "%s GROUP BY %s" % (sql, ", ".join(db_group))

        sql = str(" ".join(sql.split()))  # better formatting of the sql string

        error_message = "Failed to retrieve daily crashes data from PostgreSQL"
        results = self.query(sql, params, error_message=error_message)

        hits = {}
        for row in results:
            daily_data = dict(zip(out_fields, row))
            if "throttle" in daily_data:
                daily_data["throttle"] = float(daily_data["throttle"])
            daily_data["crash_hadu"] = float(daily_data["crash_hadu"])
            daily_data["date"] = datetimeutil.date_to_string(
                daily_data["date"])

            key = "%s:%s" % (daily_data["product"], daily_data["version"])
            if params.separated_by == "os":
                key = "%s:%s" % (key, daily_data["os_short"])

            if "os_short" in daily_data:
                del daily_data["os_short"]

            if key not in hits:
                hits[key] = {}

            hits[key][daily_data["date"]] = daily_data

        return {"hits": hits}
Example #38
0
    def test_get_list(self):
        now = self.now
        yesterday = now - datetime.timedelta(days=1)
        yesterday = datetimeutil.date_to_string(yesterday)
        report = Report(config=self.config)

        base_params = {
            'signature': 'sig1',
            'from_date': yesterday,
            'to_date': now,
        }

        # Basic test
        res = report.get_list(**base_params)
        eq_(res['total'], 5)
        eq_(len(res['hits']), 5)

        duplicates_map = dict((x['uuid'], x['duplicate_of'])
                              for x in res['hits'] if x['duplicate_of'])
        eq_(duplicates_map['60597bdc-5dbe-4409-6b38-4309c0130828'],
            '60597bdc-5dbe-4409-6b38-4309c0130833')

        # Test with products and versions
        params = dict(
            base_params,
            products='WaterWolf',
            versions='WaterWolf:2.0',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        # Test with os, build_ids and reasons
        params = dict(
            base_params,
            products='WaterWolf',
            versions=['WaterWolf:1.0', 'WaterWolf:3.0'],
            os='win',
            build_ids='20001212010203',
            reasons='STACK_OVERFLOW',
        )
        res = report.get_list(**params)
        eq_(res['total'], 2)

        res_expected = {
            'hits': [{
                'hangid': None,
                'product': 'WaterWolf',
                'os_name': 'Windows NT',
                'uuid': '60597bdc-5dbe-4409-6b38-4309c0130831',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'release_channel': 'Release',
                'process_type': 'browser',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '1.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'sig1',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }, {
                'hangid': None,
                'product': 'WaterWolf',
                'os_name': 'Windows NT',
                'uuid': '60597bdc-5dbe-4409-6b38-4309c0130834',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'release_channel': 'Release',
                'process_type': 'plugin',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '3.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'sig1',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }],
            'total':
            2
        }
        eq_(res, res_expected)

        # Test with a signature with strange characters
        params = dict(
            base_params,
            signature='this/is+a=C|signature',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        res_expected = {
            'hits': [{
                'hangid': None,
                'product': 'WindBear',
                'os_name': 'Linux',
                'uuid': '60597bdc-5dbe-4409-6b38-4309c0130837',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'release_channel': 'Release',
                'process_type': 'browser',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '1.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'this/is+a=C|signature',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }],
            'total':
            1
        }
        eq_(res, res_expected)

        # Test plugins
        params = dict(
            base_params,
            report_process='plugin',
            plugin_in='filename',
            plugin_terms='NPSWF',
            plugin_search_mode='contains',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)

        # Test plugins with 'starts_with' search mode
        params = dict(
            base_params,
            report_process='plugin',
            plugin_in='name',
            plugin_terms='Flash',
            plugin_search_mode='starts_with',
        )
        res = report.get_list(**params)
        eq_(res['total'], 1)
Example #39
0
    def get(self, **kwargs):
        """ Return product information, or version information for one
         or more product:version combinations """
        filters = [
            ("versions", None, ["list", "str"]),  # for legacy, to be removed
            ("type", "desktop", "str"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        accepted_types = ("desktop", "webapp")
        if params.type not in accepted_types:
            raise BadArgumentError('type', params.type, accepted_types)

        if params.versions and params.versions[0]:
            return self._get_versions(params)

        if params.type == "desktop":
            sql = """
                /* socorro.external.postgresql.products.Products.get */
                SELECT
                    product_name,
                    version_string,
                    start_date,
                    end_date,
                    throttle,
                    is_featured,
                    build_type,
                    has_builds
                FROM product_info
                ORDER BY product_sort, version_sort DESC, channel_sort
            """
        elif params.type == "webapp":
            sql = """
                /* socorro.external.postgresql.products.Products.get */
                SELECT
                    product_name,
                    version,
                    NULL as start_date,
                    NULL as end_date,
                    1.0 as throttle,
                    FALSE as is_featured,
                    build_type,
                    FALSE as has_builds
                FROM bixie.raw_product_releases
                ORDER BY product_name, version DESC
            """

        error_message = "Failed to retrieve products/versions from PostgreSQL"
        results = self.query(sql, error_message=error_message)

        products = []
        versions_per_product = {}

        for row in results:
            version = dict(
                zip((
                    'product',
                    'version',
                    'start_date',
                    'end_date',
                    'throttle',
                    'featured',
                    'release',
                    'has_builds',
                ), row))

            try:
                version['end_date'] = datetimeutil.date_to_string(
                    version['end_date'])
            except TypeError:
                pass
            try:
                version['start_date'] = datetimeutil.date_to_string(
                    version['start_date'])
            except TypeError:
                pass

            version['throttle'] = float(version['throttle'])

            product = version['product']
            if product not in products:
                products.append(product)

            if product not in versions_per_product:
                versions_per_product[product] = [version]
            else:
                versions_per_product[product].append(version)

        return {
            'products': products,
            'hits': versions_per_product,
            'total': len(results)
        }
Example #40
0
    def get(self, **kwargs):
        """ Return product information, or version information for one
         or more product:version combinations """
        filters = [
            ("versions", None, ["list", "str"]),  # for legacy, to be removed
        ]
        params = external_common.parse_arguments(filters, kwargs)
        if params.versions and params.versions[0]:
            return self._get_versions(params)

        sql = """
            /* socorro.external.postgresql.products.Products.get */
            SELECT
                product_name AS product,
                version_string AS version,
                start_date,
                end_date,
                throttle,
                is_featured AS featured,
                build_type AS release,
                has_builds
            FROM product_info
            ORDER BY product_sort, version_sort DESC, channel_sort
        """

        error_message = "Failed to retrieve products/versions from PostgreSQL"
        results = self.query(sql, error_message=error_message)

        products = []
        versions_per_product = {}

        for version in results.zipped():
            try:
                version.end_date = datetimeutil.date_to_string(
                    version.end_date
                )
            except TypeError:
                pass
            try:
                version.start_date = datetimeutil.date_to_string(
                    version.start_date
                )
            except TypeError:
                pass

            version.throttle = float(version.throttle)

            product = version.product
            if product not in products:
                products.append(product)

            if product not in versions_per_product:
                versions_per_product[product] = [version]
            else:
                versions_per_product[product].append(version)

        return {
            'products': products,
            'hits': versions_per_product,
            'total': len(results)
        }
Example #41
0
    def _get_versions(self, params):
        """ Return product information for one or more product:version
        combinations """
        products = []
        (params["products_versions"],
         products) = self.parse_versions(params["versions"], [])

        sql_select = """
            SELECT product_name as product,
                   version_string as version,
                   start_date,
                   end_date,
                   is_featured,
                   build_type,
                   throttle::float,
                   has_builds
            FROM product_info
        """

        sql_where = []
        versions_list = []
        products_list = []
        for x in range(0, len(params["products_versions"]), 2):
            products_list.append(params["products_versions"][x])
            versions_list.append(params["products_versions"][x + 1])

        sql_where = ["(product_name = %(product" + str(x) +
                     ")s AND version_string = %(version" + str(x) + ")s)"
                                  for x in range(len(products_list))]

        sql_params = {}
        sql_params = add_param_to_dict(sql_params, "product", products_list)
        sql_params = add_param_to_dict(sql_params, "version", versions_list)

        if len(sql_where) > 0:
            sql_query = " WHERE ".join((sql_select, " OR ".join(sql_where)))
        else:
            sql_query = sql_select

        sql_query = """
            /* socorro.external.postgresql.Products.get_versions */
            %s
        """ % sql_query

        error_message = "Failed to retrieve products versions from PostgreSQL"
        results = self.query(sql_query, sql_params,
                             error_message=error_message)

        products = []
        for product in results.zipped():
            product.start_date = datetimeutil.date_to_string(
                product.start_date
            )
            product.end_date = datetimeutil.date_to_string(
                product.end_date
            )
            products.append(product)

        return {
            "hits": products,
            "total": len(products)
        }
Example #42
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
        )

        # Create filters.
        filters = None

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:

                if param.name.startswith('_'):
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]

                name = '%s.%s' % (field_data['namespace'],
                                  field_data['in_database_name'])

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                args = {}
                filter_type = 'term'
                filter_value = None
                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]
                        if not isinstance(val, basestring) or (isinstance(
                                val, basestring) and ' ' not in val):
                            filter_value = val

                        # If the term contains white spaces, we want to perform
                        # a phrase query. Thus we do nothing here and let this
                        # value be handled later.
                    else:
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator == '>':
                    # greater than
                    filter_type = 'range'
                    filter_value = {'gt': param.value}
                elif param.operator == '<':
                    # lower than
                    filter_type = 'range'
                    filter_value = {'lt': param.value}
                elif param.operator == '>=':
                    # greater than or equal to
                    filter_type = 'range'
                    filter_value = {'gte': param.value}
                elif param.operator == '<=':
                    # lower than or equal to
                    filter_type = 'range'
                    filter_value = {'lte': param.value}
                elif param.operator == '__null__':
                    # is null
                    filter_type = 'missing'
                    args['field'] = name

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    if param.operator_not:
                        new_filter = ~F(filter_type, **args)
                    else:
                        new_filter = F(filter_type, **args)

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif param.data_type == 'enum':
                        sub_filters |= new_filter
                    else:
                        sub_filters &= new_filter

                    continue

                # These use a wildcard and thus need to be in a query
                # instead of a filter.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '$': '%s*',  # starts with
                    '^': '*%s'  # ends with
                }
                if param.operator in operator_wildcards:
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    query_type = 'wildcard'
                    args[name] = (operator_wildcards[param.operator] %
                                  param.value)
                elif not param.operator:
                    # This is a phrase that was passed down.
                    query_type = 'simple_query_string'
                    args['query'] = param.value[0]
                    args['fields'] = [name]
                    args['default_operator'] = 'and'

                if args:
                    query = Q(query_type, **args)
                    if param.operator_not:
                        query = ~query
                    search = search.query(query)
                else:
                    # If we reach this point, that means the operator is
                    # not supported, and we should raise an error about that.
                    raise NotImplementedError('Operator %s is not supported' %
                                              param.operator)

            if filters is None:
                filters = sub_filters
            elif sub_filters is not None:
                filters &= sub_filters

        search = search.filter(filters)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        for param in params['_facets']:
            for value in param.value:
                try:
                    field_ = self.all_fields[value]
                except KeyError:
                    # That is not a known field, we can't facet on it.
                    raise BadArgumentError(
                        value,
                        msg='Unknown field "%s", cannot facet on it' % value)

                field_name = '%s.%s' % (field_['namespace'],
                                        field_['in_database_name'])

                if field_['has_full_version']:
                    # If the param has a full version, that means what matters
                    # is the full string, and not its individual terms.
                    field_name += '.full'

                search.aggs.bucket(value,
                                   'terms',
                                   field=field_name,
                                   size=self.config.facets_max_number)

        # Query and compute results.
        hits = []
        fields = [
            '%s.%s' % (x['namespace'], x['in_database_name'])
            for x in self.all_fields.values() if x['is_returned']
        ]
        search = search.fields(*fields)

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()
                aggregations = self.format_aggregations(results.aggregations)
                break  # Yay! Results!
            except NotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    break
Example #43
0
    def get_comments(self, **kwargs):
        """Return a list of comments on crash reports, filtered by
        signatures and other fields.

        See socorro.lib.search_common.get_parameters() for all filters.
        """
        params = self.prepare_search_params(**kwargs)

        # Creating the parameters for the sql query
        sql_params = {}

        # Preparing the different parts of the sql query

        # WARNING: sensitive data is returned here (email). When there is
        # an authentication mecanism, a verification should be done here.
        sql_select = """
            SELECT
                r.date_processed,
                r.user_comments,
                r.uuid,
                CASE
                    WHEN r.email = '' THEN null
                    WHEN r.email IS NULL THEN null
                    ELSE r.email
                END
        """

        sql_count = """
            SELECT
                COUNT(r.uuid)
        """

        sql_from = self.build_reports_sql_from(params)
        (sql_where, sql_params) = self.build_reports_sql_where(params,
                                                               sql_params,
                                                               self.context)
        sql_where = "%s AND r.user_comments IS NOT NULL" % sql_where

        sql_order = "ORDER BY email ASC, r.date_processed ASC"

        sql_limit, sql_params = self.build_reports_sql_limit(
            params,
            sql_params
        )
        sql_count = " ".join((
            "/* external.postgresql.crashes.Crashes.get_comments */",
            sql_count, sql_from, sql_where)
        )
        count = self.count(sql_count, sql_params)

        comments = []
        if count:

            # Assembling the query
            sql_query = " ".join((
                "/* external.postgresql.crashes.Crashes.get_comments */",
                sql_select, sql_from, sql_where, sql_order, sql_limit)
            )

            error_message = "Failed to retrieve comments from PostgreSQL"
            results = self.query(sql_query, sql_params,
                                 error_message=error_message)

            # Transforming the results into what we want
            for row in results:
                comment = dict(zip((
                    "date_processed",
                    "user_comments",
                    "uuid",
                    "email",
                ), row))
                comment["date_processed"] = datetimeutil.date_to_string(
                    comment["date_processed"]
                )
                comments.append(comment)

        return {
            "hits": comments,
            "total": count
        }
Example #44
0
    def get(self, **kwargs):
        """Return a list of results and facets based on parameters.

        The list of accepted parameters (with types and default values) is in
        socorro.lib.search_common.SearchBase
        """
        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indexes to use to optimize the elasticsearch query.
        indexes = self.get_indexes(params['date'])

        # Create and configure the search object.
        search = SuperS().es(
            urls=self.config.elasticsearch_urls,
            timeout=self.config.elasticsearch_timeout,
        )
        search = search.indexes(indexes)
        search = search.doctypes(self.config.elasticsearch_doctype)

        # Create filters.
        filters = F()

        for field, sub_params in params.items():
            for param in sub_params:
                name = PARAM_TO_FIELD_MAPPING.get(param.name, param.name)
                name = self.prefix_field_name(name)

                if name.startswith('_'):
                    if name == '_results_offset':
                        results_from = param.value[0]
                    elif name == '_results_number':
                        results_number = param.value[0]
                    # Don't use meta parameters in the query.
                    continue

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                args = {}
                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        args[name] = param.value[0]
                    else:
                        args['%s__in' % name] = param.value
                elif param.operator == '=':
                    # is exactly
                    if name in FIELDS_WITH_FULL_VERSION:
                        name = '%s.full' % name
                    args[name] = param.value
                elif param.operator == '>':
                    # greater than
                    args['%s__gt' % name] = param.value
                elif param.operator == '<':
                    # lower than
                    args['%s__lt' % name] = param.value
                elif param.operator == '>=':
                    # greater than or equal to
                    args['%s__gte' % name] = param.value
                elif param.operator == '<=':
                    # lower than or equal to
                    args['%s__lte' % name] = param.value
                elif param.operator == '__null__':
                    # is null
                    args['%s__missing' % name] = param.value

                if args:
                    if param.operator_not:
                        filters &= ~F(**args)
                    else:
                        filters &= F(**args)
                    continue

                # These use a wildcard and thus need to be in a query
                # instead of a filter.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '$': '%s*',  # starts with
                    '^': '*%s'  # ends with
                }
                if param.operator in operator_wildcards:
                    if name in FIELDS_WITH_FULL_VERSION:
                        name = '%s.full' % name
                    args['%s__wildcard' % name] = \
                        operator_wildcards[param.operator] % param.value
                    args['must_not'] = param.operator_not

                if args:
                    search = search.query(**args)
                else:
                    # If we reach this point, that means the operator is
                    # not supported, and we should raise an error about that.
                    raise NotImplementedError('Operator %s is not supported' %
                                              param.operator)

        search = search.filter(filters)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        processed_filters = search._process_filters(filters.filters)

        for param in params['_facets']:
            for value in param.value:
                filter_ = self.get_filter(value)
                if not filter_:
                    # That is not a known field, we can't facet on it.
                    raise BadArgumentError(
                        'Unknown field "%s", cannot facet on it' % value)

                field_name = PARAM_TO_FIELD_MAPPING.get(value, value)
                field_name = self.prefix_field_name(field_name)

                if field_name in FIELDS_WITH_FULL_VERSION:
                    # If the param has a full version, that means what matters
                    # is the full string, and not its individual terms.
                    field_name += '.full'

                args = {
                    value: {
                        'terms': {
                            'field': field_name,
                            'size': self.config.facets_max_number,
                        },
                        'facet_filter': processed_filters,
                    }
                }
                search = search.facet_raw(**args)

        # Query and compute results.
        hits = []
        fields = ['processed_crash.%s' % x for x in PROCESSED_CRASH_FIELDS]
        for hit in search.values_dict(*fields):
            hits.append(self.format_field_names(hit))

        return {
            'hits': hits,
            'total': search.count(),
            'facets': search.facet_counts(),
        }
Example #45
0
    def test_get_comments(self):
        crashes = Crashes(config=self.config)
        today = datetimeutil.date_to_string(self.now)

        # Test 1: results
        params = {
            "signature": "js",
        }
        res_expected = {
            "hits": [{
                "email": None,
                "date_processed": today,
                "uuid": "def",
                "user_comments": "hello"
            }, {
                "email": None,
                "date_processed": today,
                "uuid": "hij",
                "user_comments": "hah"
            }],
            "total":
            2
        }

        res = crashes.get_comments(**params)
        eq_(res, res_expected)

        # Test 2: no results
        params = {
            "signature": "blah",
        }
        res_expected = {"hits": [], "total": 0}

        res = crashes.get_comments(**params)
        eq_(res, res_expected)

        # Test 3: missing parameter
        assert_raises(MissingArgumentError, crashes.get_comments)

        # Test a valid rapid beta versions
        params = {
            "signature": "cool_sig",
            "products": "Firefox",
            "versions": "Firefox:14.0b",
        }
        res_expected = {
            'hits': [{
                'email': None,
                'date_processed': today,
                'uuid': 'nop',
                'user_comments': 'hi!'
            }],
            'total':
            1
        }

        res = crashes.get_comments(**params)
        eq_(res, res_expected)

        # Test an invalid rapid beta versions
        params = {
            "signature": "cool_sig",
            "versions": "WaterWolf:2.0b",
        }
        res_expected = {
            'hits': [{
                'email': None,
                'date_processed': today,
                'uuid': 'qrs',
                'user_comments': 'meow'
            }],
            'total':
            1
        }

        res = crashes.get_comments(**params)
        eq_(res, res_expected)

        # use pagination
        params = {
            "signature": "cool_sig",
            "result_number": 1,
            "result_offset": 0,
        }
        params['result_number'] = 1
        params['result_offset'] = 0
        res = crashes.get_comments(**params)
        eq_(len(res['hits']), 1)
        eq_(res['total'], 2)
Example #46
0
    def setup_data(self):

        self.now = datetimeutil.utc_now()
        now = self.now.date()
        yesterday = now - datetime.timedelta(days=1)
        lastweek = now - datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        yesterday_str = datetimeutil.date_to_string(yesterday)
        lastweek_str = datetimeutil.date_to_string(lastweek)

        self.test_source_data = {
            # Test backfill_adu
            'adu': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_all_dups
            'all_dups': {
                'params': {
                    "start_date": yesterday_str,
                    "end_date": now_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_build_adu
            'build_adu': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_correlations
            'correlations': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_crashes_by_user_build
            'crashes_by_user_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_crashes_by_user
            'crashes_by_user': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },

            # TODO: Test backfill_daily_crashes tries to insert into a table
            # that do not exists. It can be fixed by creating a temporary one.
            #'daily_crashes': {
            #    'params': {
            #        "update_day": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # Test backfill_exploitability
            'exploitability': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_home_page_graph_build
            'home_page_graph_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_home_page_graph
            'home_page_graph': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_matviews
            'matviews': {
                'params': {
                    "start_date": yesterday_str,
                    "reports_clean": 'false',
                },
                'res_expected': [(True,)],
            },
            # Test backfill_rank_compare
            'rank_compare': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_reports_clean
            'reports_clean': {
                'params': {
                    "start_date": yesterday_str,
                    "end_date": now_str,
                },
                'res_expected': [(True,)],
            },

            # TODO: Test backfill_reports_duplicates tries to insert into a
            # table that do not exists. It can be fixed by using the update
            # function inside of the backfill.
            #'reports_duplicates': {
            #    'params': {
            #        "start_date": yesterday_str,
            #        "end_date": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # TODO: Test backfill_signature_counts tries to insert into
            # tables and to update functions that does not exist.
            #'signature_counts': {
            #    'params': {
            #        "start_date": yesterday_str,
            #        "end_date": now_str,
            #    },
            #    'res_expected': [(True,)],
            # },

            # Test backfill_tcbs_build
            'tcbs_build': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_tcbs
            'tcbs': {
                'params': {
                    "update_day": yesterday_str,
                },
                'res_expected': [(True,)],
            },
            # Test backfill_weekly_report_partitions
            'weekly_report_partitions': {
                'params': {
                    "start_date": lastweek_str,
                    "end_date": now_str,
                    "table_name": 'raw_crashes',
                },
                'res_expected': [(True,)],
            },
            # TODO: Update Backfill to support signature_summary backfill
            # through the API
            #'signature_summary_products': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_installations': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_uptime': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_os': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_process_type': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_architecture': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_flash_version': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_device': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
            #'signature_summary_graphics': {
            #    'params': {
            #        "update_day": yesterday_str,
            #    },
            #    'res_expected': [(True,)],
            #},
        }
Example #47
0
    def get(self, **kwargs):
        """Return the current state of the server and the revisions of Socorro
        and Breakpad. """
        filters = [
            ("duration", 12, "int"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        # Find the recent server status
        sql = """
            /* socorro.external.postgresql.server_status.ServerStatus.get */
            SELECT
                id,
                date_recently_completed,
                date_oldest_job_queued,
                avg_process_sec,
                avg_wait_sec,
                waiting_job_count,
                processors_count,
                date_created
            FROM server_status
            ORDER BY date_created DESC
            LIMIT %(duration)s
        """

        error_message = "Failed to retrieve server status data from PostgreSQL"
        results = self.query(sql, params, error_message=error_message)

        stats = []
        for stat in results.zipped():
            # Turn dates into strings for later JSON encoding
            for i in ("date_recently_completed", "date_oldest_job_queued",
                      "date_created"):
                try:
                    stat[i] = datetimeutil.date_to_string(stat[i])
                except TypeError:
                    pass

            stats.append(stat)

        # Find the current database version
        sql = """
            /* socorro.external.postgresql.server_status.ServerStatus.get */
            SELECT
                version_num
            FROM alembic_version
        """

        error_message = "Failed to retrieve database version from PostgreSQL"
        results = self.query(sql, error_message=error_message)
        if results:
            schema_revision, = results[0]
        else:
            logger.warning("No version_num was found in table alembic_version")
            schema_revision = "Unknown"

        # Find the current breakpad and socorro revisions
        socorro_revision = resource_string('socorro', 'socorro_revision.txt')
        breakpad_revision = resource_string('socorro', 'breakpad_revision.txt')

        return {
            "hits": stats,
            "total": len(stats),
            "socorro_revision": socorro_revision,
            "breakpad_revision": breakpad_revision,
            "schema_revision": schema_revision,
        }
Example #48
0
    def test_get(self):
        status = ServerStatus(config=self.config)
        now = datetimeutil.utc_now()

        date1 = datetime.datetime(now.year,
                                  now.month,
                                  now.day,
                                  12,
                                  00,
                                  00,
                                  tzinfo=now.tzinfo)
        date2 = date1 - datetime.timedelta(minutes=15)
        date3 = date2 - datetime.timedelta(minutes=15)
        date4 = date3 - datetime.timedelta(minutes=15)

        date1 = datetimeutil.date_to_string(date1)
        date2 = datetimeutil.date_to_string(date2)
        date3 = datetimeutil.date_to_string(date3)
        date4 = datetimeutil.date_to_string(date4)

        #......................................................................
        # Test 1: default behavior
        res = status.get()
        res_expected = {
            "hits": [{
                "id": 1,
                "date_recently_completed": date1,
                "date_oldest_job_queued": date1,
                "avg_process_sec": 2,
                "avg_wait_sec": 5,
                "waiting_job_count": 3,
                "processors_count": 2,
                "date_created": date1
            }, {
                "id": 2,
                "date_recently_completed": date2,
                "date_oldest_job_queued": date2,
                "avg_process_sec": 3,
                "avg_wait_sec": 3.12,
                "waiting_job_count": 2,
                "processors_count": 2,
                "date_created": date2
            }, {
                "id": 3,
                "date_recently_completed": date3,
                "date_oldest_job_queued": date3,
                "avg_process_sec": 1,
                "avg_wait_sec": 2,
                "waiting_job_count": 4,
                "processors_count": 1,
                "date_created": date3
            }, {
                "id": 4,
                "date_recently_completed": None,
                "date_oldest_job_queued": None,
                "avg_process_sec": 1,
                "avg_wait_sec": 2,
                "waiting_job_count": 4,
                "processors_count": 1,
                "date_created": date4
            }],
            "socorro_revision":
            42,
            "breakpad_revision":
            43,
            "total":
            4
        }

        self.assertEqual(res, res_expected)

        #......................................................................
        # Test 2: with duration
        params = {"duration": 1}
        res = status.get(**params)
        res_expected = {
            "hits": [{
                "id": 1,
                "date_recently_completed": date1,
                "date_oldest_job_queued": date1,
                "avg_process_sec": 2,
                "avg_wait_sec": 5,
                "waiting_job_count": 3,
                "processors_count": 2,
                "date_created": date1
            }],
            "socorro_revision":
            42,
            "breakpad_revision":
            43,
            "total":
            1
        }

        self.assertEqual(res, res_expected)
Example #49
0
    def setup_data(self):
        now = self.now.date()
        yesterday = now - datetime.timedelta(days=1)
        lastweek = now - datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        yesterday_str = datetimeutil.date_to_string(yesterday)
        lastweek_str = datetimeutil.date_to_string(lastweek)

        self.test_source_data = {
            # Test 1: find exact match for one product version and signature
            'products': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "products",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "product_name": 'Firefox',
                    "version_string": "8.0",
                    "report_count": 1,
                    "percentage": '50.000',
                }, {
                    "product_name": 'Firefox',
                    "version_string": "9.0",
                    "report_count": 1,
                    "percentage": '50.000',
                }],
            },
            # Test 2: find ALL matches for all product versions and signature
            'products_no_version': {
                'params': {
                    "report_type": "products",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "product_name": 'Firefox',
                    "version_string": "8.0",
                    "report_count": 1,
                    "percentage": '50.000',
                }, {
                    "product_name": 'Firefox',
                    "version_string": "9.0",
                    "report_count": 1,
                    "percentage": '50.000',
                }],
            },
            # Test 3: find architectures reported for a given version and a
            # signature
            'architecture': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "architecture",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'amd64',
                    "report_count": 1.0,
                    "percentage": "100.000",
                }],
            },
            # Test 4: find architectures reported for a signature with no
            # specific version.
            'architecture_no_version': {
                'params': {
                    "report_type": "architecture",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'amd64',
                    "report_count": 2,
                    "percentage": '100.000',
                }],
            },
            # Test 5: find flash_versions reported for specific version and
            # a signature
            'flash_versions': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "flash_version",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '1.0',
                    "report_count": 1.0,
                    "percentage": "100.000",
                }],
            },
            # Test 6: find flash_versions reported with a signature and without
            # a specific version
            'flash_versions_no_version': {
                'params': {
                    "report_type": "flash_version",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '1.0',
                    "report_count": 2.0,
                    "percentage": "100.000",
                }],
            },
            # Test 7: find installations reported for a signature
            'distinct_install': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "distinct_install",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "product_name": 'Firefox',
                    "version_string": '8.0',
                    "crashes": 10,
                    "installations": 8,
                }],
            },
            # Test 8: find os_version_strings reported for a signature
            'os': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "os",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'Windows NT 6.4',
                    "report_count": 1,
                    "percentage": "100.000",
                }],
            },
            # Test 9: find process_type reported for a signature
            'process_type': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "process_type",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": 'plugin',
                    "report_count": 1,
                    "percentage": "100.000",
                }],
            },
            # Test 10: find uptime reported for signature
            'uptime': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "uptime",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    "category": '15-30 minutes',
                    "report_count": 1,
                    "percentage": '100.000',
                }],
            },
            # Test 11: find exploitability reported for signature
            'exploitability': {
                'params': {
                    "versions": "Firefox:8.0",
                    "report_type": "exploitability",
                    "signature": "Fake Signature #1",
                    "start_date": lastweek_str,
                    "end_date": now_str,
                },
                'res_expected': [{
                    'low_count': 3,
                    'high_count': 5,
                    'null_count': 1,
                    'none_count': 2,
                    'report_date': yesterday_str,
                    'medium_count': 4,
                }],
            },
            # Test 12: find mobile devices reported for signature with a
            # specific version
            'devices': {
                'params': {
                    "versions": "Firefox:8.0",
                    'report_type': 'devices',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'cpu_abi': 'armeabi-v7a',
                    'manufacturer': 'samsung',
                    'model': 'GT-P5100',
                    'version': '16 (REL)',
                    'report_count': 123,
                    'percentage': '100.000',
                }],
            },
            # Test 13: find mobile devices reported for signature
            'devices_no_version': {
                'params': {
                    'report_type': 'devices',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'cpu_abi': 'armeabi-v7a',
                    'manufacturer': 'samsung',
                    'model': 'GT-P5100',
                    'version': '16 (REL)',
                    'report_count': 246,
                    'percentage': '100.000',
                }],
            },
            # Test 14: find mobile devices reported for signature
            'graphics': {
                'params': {
                    "versions": "Firefox:8.0",
                    'report_type': 'graphics',
                    'signature': 'Fake Signature #1',
                    'start_date': lastweek_str,
                    'end_date': now_str,
                },
                'res_expected': [{
                    'vendor_hex': '0x1234',
                    'adapter_hex': '0x5678',
                    'vendor_name': 'Test Vendor',
                    'adapter_name': 'Test Adapter',
                    'report_count': 123,
                    'percentage': '100.000',
                }],
            },
        }
Example #50
0
    def get(self, **kwargs):
        """Return the current state of the server and the revisions of Socorro
        and Breakpad. """
        filters = [
            ("duration", 12, "int"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        sql = """
            /* socorro.external.postgresql.server_status.ServerStatus.get */
            SELECT
                id,
                date_recently_completed,
                date_oldest_job_queued,
                avg_process_sec,
                avg_wait_sec,
                waiting_job_count,
                processors_count,
                date_created
            FROM server_status
            ORDER BY date_created DESC
            LIMIT %(duration)s
        """

        connection = None
        try:
            connection = self.database.connection()
            cursor = connection.cursor()
            cursor.execute(sql, params)
            results = cursor.fetchall()
        except psycopg2.Error:
            logger.error("Failed retrieving server status from PostgreSQL",
                         exc_info=True)
            results = []
        finally:
            if connection:
                connection.close()

        stats = []
        for row in results:
            stat = dict(
                zip(("id", "date_recently_completed", "date_oldest_job_queued",
                     "avg_process_sec", "avg_wait_sec", "waiting_job_count",
                     "processors_count", "date_created"), row))

            # Turn dates into strings for later JSON encoding
            for i in ("date_recently_completed", "date_oldest_job_queued",
                      "date_created"):
                try:
                    stat[i] = datetimeutil.date_to_string(stat[i])
                except TypeError:
                    pass

            stats.append(stat)

        return {
            "hits": stats,
            "total": len(stats),
            "socorro_revision": self.context.socorro_revision,
            "breakpad_revision": self.context.breakpad_revision
        }
Example #51
0
    def get_exploitability(self, **kwargs):
        """Return a list of exploitable crash reports.

        See socorro.lib.external_common.parse_arguments() for all filters.
        """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("start_date", lastweek, "date"),
            ("end_date", now, "date"),
            ("page", None, "int"),
            ("batch", None, "int"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        count_sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            SELECT COUNT(*)
            FROM exploitability_reports
            WHERE
                report_date BETWEEN %(start_date)s AND %(end_date)s
        """
        results = self.query(
            count_sql_query,
            params,
            error_message="Failed to retrieve exploitable crashes count")
        total_crashes_count, = results[0]

        sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            SELECT
                signature,
                report_date,
                null_count,
                none_count,
                low_count,
                medium_count,
                high_count
            FROM exploitability_reports
            WHERE
                report_date BETWEEN %(start_date)s AND %(end_date)s
            ORDER BY
                report_date DESC
        """

        if params['page'] is not None:
            if params['page'] <= 0:
                raise BadArgumentError('page', params['page'], 'starts on 1')
            if params['batch'] is None:
                raise MissingArgumentError('batch')
            sql_query += """
            LIMIT %(limit)s
            OFFSET %(offset)s
            """
            params['limit'] = params['batch']
            params['offset'] = params['batch'] * (params['page'] - 1)

        error_message = "Failed to retrieve exploitable crashes from PostgreSQL"
        results = self.query(sql_query, params, error_message=error_message)

        # Transforming the results into what we want
        crashes = []
        for row in results:
            crash = dict(
                zip(("signature", "report_date", "null_count", "none_count",
                     "low_count", "medium_count", "high_count"), row))
            crash["report_date"] = datetimeutil.date_to_string(
                crash["report_date"])
            crashes.append(crash)

        return {"hits": crashes, "total": total_crashes_count}
Example #52
0
    def test_get_comments(self):
        crashes = Crashes(config=self.config)
        today = datetimeutil.date_to_string(self.now)

        # Test 1: results
        params = {
            "signature": "js",
        }
        res_expected = {
            "hits": [{
                "email": None,
                "date_processed": today,
                "uuid": "def",
                "user_comments": "hello"
            }, {
                "email": None,
                "date_processed": today,
                "uuid": "hij",
                "user_comments": "hah"
            }],
            "total":
            2
        }

        res = crashes.get_comments(**params)
        self.assertEqual(res, res_expected)

        # Test 2: no results
        params = {
            "signature": "blah",
        }
        res_expected = {"hits": [], "total": 0}

        res = crashes.get_comments(**params)
        self.assertEqual(res, res_expected)

        # Test 3: missing parameter
        self.assertRaises(MissingArgumentError, crashes.get_comments)

        # Test a valid rapid beta versions
        params = {
            "signature": "cool_sig",
            "products": "Firefox",
            "versions": "Firefox:14.0b",
        }
        res_expected = {
            'hits': [{
                'email': None,
                'date_processed': today,
                'uuid': 'nop',
                'user_comments': 'hi!'
            }],
            'total':
            1
        }

        res = crashes.get_comments(**params)
        self.assertEqual(res, res_expected)

        # Test an invalid rapid beta versions
        params = {
            "signature": "cool_sig",
            "versions": "WaterWolf:2.0b",
        }

        res = crashes.get_comments(**params)
        self.assertTrue(res)
Example #53
0
    def get_list(self, **kwargs):
        """
        List all crashes with a given signature and return them.

        Both `from_date` and `to_date` (and their aliases `from` and `to`)
        are required and can not be greater than 30 days apart.

        Optional arguments: see SearchCommon.get_parameters()

        """
        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        if not kwargs.get('from_date'):
            raise MissingArgumentError('from_date')
        if not kwargs.get('to_date'):
            raise MissingArgumentError('to_date')

        from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date'])
        to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date'])
        span_days = (to_date - from_date).days
        if span_days > 30:
            raise BadArgumentError(
                'Span between from_date and to_date can not be more than 30')

        # start with the default
        sort_order = {'key': 'date_processed', 'direction': 'DESC'}
        if 'sort' in kwargs:
            sort_order['key'] = kwargs.pop('sort')
            _recognized_sort_orders = (
                'date_processed',
                'uptime',
                'user_comments',
                'uuid',
                'uuid_text',
                'product',
                'version',
                'build',
                'signature',
                'url',
                'os_name',
                'os_version',
                'cpu_name',
                'cpu_info',
                'address',
                'reason',
                'last_crash',
                'install_age',
                'hangid',
                'process_type',
                'release_channel',
                'install_time',
                'duplicate_of',
            )
            if sort_order['key'] not in _recognized_sort_orders:
                raise BadArgumentError(
                    '%s is not a recognized sort order key' %
                    sort_order['key'])
            sort_order['direction'] = 'ASC'
            if 'reverse' in kwargs:
                if kwargs.pop('reverse'):
                    sort_order['direction'] = 'DESC'

        include_raw_crash = kwargs.get('include_raw_crash') or False
        params = search_common.get_parameters(kwargs)

        if not params["signature"]:
            raise MissingArgumentError('signature')

        params["terms"] = params["signature"]
        params["search_mode"] = "is_exactly"

        # Default mode falls back to starts_with for postgres
        if params["plugin_search_mode"] == "default":
            params["plugin_search_mode"] = "starts_with"

        # Limiting to a signature
        if params["terms"]:
            params["terms"] = self.prepare_terms(params["terms"],
                                                 params["search_mode"])

        # Searching for terms in plugins
        if params["report_process"] == "plugin" and params["plugin_terms"]:
            params["plugin_terms"] = " ".join(params["plugin_terms"])
            params["plugin_terms"] = self.prepare_terms(
                params["plugin_terms"], params["plugin_search_mode"])

        # Get information about the versions
        util_service = Util(config=self.context)
        params["versions_info"] = util_service.versions_info(**params)

        # Parsing the versions
        params["versions_string"] = params["versions"]
        (params["versions"],
         params["products"]) = self.parse_versions(params["versions"],
                                                   params["products"])

        if hasattr(self.context, 'webapi'):
            context = self.context.webapi
        else:
            # old middleware
            context = self.context
        # Changing the OS ids to OS names
        for i, elem in enumerate(params["os"]):
            for platform in context.platforms:
                if platform["id"][:3] == elem[:3]:
                    params["os"][i] = platform["name"]

        # Creating the parameters for the sql query
        sql_params = {}

        # Preparing the different parts of the sql query
        sql_select = """
            SELECT
                r.date_processed,
                r.uptime,
                r.user_comments,
                r.uuid::uuid,
                r.uuid as uuid_text,
                r.product,
                r.version,
                r.build,
                r.signature,
                r.url,
                r.os_name,
                r.os_version,
                r.cpu_name,
                r.cpu_info,
                r.address,
                r.reason,
                r.last_crash,
                r.install_age,
                r.hangid,
                r.process_type,
                r.release_channel,
                (r.client_crash_date - (r.install_age * INTERVAL '1 second'))
                  AS install_time
        """
        if include_raw_crash:
            pass
        else:
            sql_select += """
                , rd.duplicate_of
            """

        wrapped_select = """
            WITH report_slice AS (
              %s
            ), dupes AS (
                SELECT
                    report_slice.uuid,
                    rd.duplicate_of
                FROM reports_duplicates rd
                JOIN report_slice ON report_slice.uuid_text = rd.uuid
                WHERE
                    rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
            )

            SELECT
                rs.*,
                dupes.duplicate_of,
                rc.raw_crash
            FROM report_slice rs
            LEFT OUTER JOIN dupes USING (uuid)
            LEFT OUTER JOIN raw_crashes rc ON
                rs.uuid = rc.uuid
                AND
                rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
        """

        sql_from = self.build_reports_sql_from(params)

        if not include_raw_crash:
            sql_from = """%s
                LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid
            """ % sql_from

        sql_where, sql_params = self.build_reports_sql_where(
            params, sql_params, self.context)

        sql_order = """
            ORDER BY %(key)s %(direction)s
        """ % sort_order

        sql_limit, sql_params = self.build_reports_sql_limit(
            params, sql_params)

        # Assembling the query
        if include_raw_crash:
            sql_query = "\n".join(
                ("/* socorro.external.postgresql.report.Report.list */",
                 sql_select, sql_from, sql_where, sql_order, sql_limit))
        else:
            sql_query = "\n".join(
                ("/* socorro.external.postgresql.report.Report.list */",
                 sql_select, sql_from, sql_where, sql_order, sql_limit))

        # Query for counting the results
        sql_count_query = "\n".join(
            ("/* socorro.external.postgresql.report.Report.list */",
             "SELECT count(*)", sql_from, sql_where))

        # Querying the DB
        with self.get_connection() as connection:

            total = self.count(
                sql_count_query,
                sql_params,
                error_message="Failed to count crashes from reports.",
                connection=connection)

            # No need to call Postgres if we know there will be no results
            if total:

                if include_raw_crash:
                    sql_query = wrapped_select % sql_query

                results = self.query(
                    sql_query,
                    sql_params,
                    error_message="Failed to retrieve crashes from reports",
                    connection=connection)
            else:
                results = []

        # Transforming the results into what we want
        fields = (
            "date_processed",
            "uptime",
            "user_comments",
            "uuid",
            "uuid",  # the uuid::text one
            "product",
            "version",
            "build",
            "signature",
            "url",
            "os_name",
            "os_version",
            "cpu_name",
            "cpu_info",
            "address",
            "reason",
            "last_crash",
            "install_age",
            "hangid",
            "process_type",
            "release_channel",
            "install_time",
            "duplicate_of",
        )
        if include_raw_crash:
            fields += ("raw_crash", )
        crashes = []
        for row in results:
            crash = dict(zip(fields, row))
            if include_raw_crash and crash['raw_crash']:
                crash['raw_crash'] = json.loads(crash['raw_crash'])
            for i in crash:
                try:
                    crash[i] = datetimeutil.date_to_string(crash[i])
                except TypeError:
                    pass
            crashes.append(crash)

        return {"hits": crashes, "total": total}
Example #54
0
    def test_get_list(self):
        now = datetimeutil.utc_now()
        yesterday = now - datetime.timedelta(days=1)
        yesterday = datetimeutil.date_to_string(yesterday)
        report = Report(config=self.config)

        # Test 1
        params = {'signature': 'sig1'}
        res = report.get_list(**params)
        self.assertEqual(res['total'], 5)

        # Test 2
        params = {
            'signature': 'sig1',
            'products': 'WaterWolf',
            'versions': 'WaterWolf:2.0'
        }
        res = report.get_list(**params)
        self.assertEqual(res['total'], 1)

        # Test 3
        params = {
            'signature': 'sig1',
            'products': 'WaterWolf',
            'versions': ['WaterWolf:1.0', 'WaterWolf:3.0'],
            'os': 'win',
            'build_ids': '20001212010203',
            'reasons': 'STACK_OVERFLOW'
        }
        res = report.get_list(**params)
        self.assertEqual(res['total'], 2)

        res_expected = {
            'hits': [{
                'hangid': None,
                'product': 'WaterWolf',
                'os_name': 'Windows NT',
                'uuid': '4',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'process_type': 'browser',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '1.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'sig1',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }, {
                'hangid': None,
                'product': 'WaterWolf',
                'os_name': 'Windows NT',
                'uuid': '7',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'process_type': 'plugin',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '3.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'sig1',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }],
            'total':
            2
        }
        self.assertEqual(res, res_expected)

        # Test 5
        params = {'signature': 'this/is+a=C|signature'}
        res = report.get_list(**params)
        self.assertEqual(res['total'], 1)

        res_expected = {
            'hits': [{
                'hangid': None,
                'product': 'WindBear',
                'os_name': 'Linux',
                'uuid': '10',
                'cpu_info': None,
                'url': None,
                'last_crash': None,
                'date_processed': yesterday,
                'cpu_name': None,
                'uptime': None,
                'process_type': 'browser',
                'os_version': None,
                'reason': 'STACK_OVERFLOW',
                'version': '1.0',
                'build': '20001212010203',
                'install_age': None,
                'signature': 'this/is+a=C|signature',
                'install_time': None,
                'duplicate_of': None,
                'address': None,
                'user_comments': None
            }],
            'total':
            1
        }
        self.assertEqual(res, res_expected)

        # Test 6: plugins
        params = {
            'signature': 'sig1',
            'report_process': 'plugin',
            'plugin_in': 'filename',
            'plugin_terms': 'NPSWF',
            'plugin_search_mode': 'contains',
        }
        res = report.get_list(**params)
        self.assertEqual(res['total'], 1)

        # Test 7: plugins
        params = {
            'signature': 'sig1',
            'report_process': 'plugin',
            'plugin_in': 'name',
            'plugin_terms': 'Flash',
            'plugin_search_mode': 'starts_with',
        }
        res = report.get_list(**params)
        self.assertEqual(res['total'], 1)
Example #55
0
    def setUp(self):
        super(IntegrationElasticsearchSearch, self).setUp()

        with self.get_config_manager().context() as config:
            self.storage = crashstorage.ElasticSearchCrashStorage(config)

            # clear the indices cache so the index is created on every test
            self.storage.indices_cache = set()

        now = datetimeutil.utc_now()

        yesterday = now - datetime.timedelta(days=1)
        yesterday = datetimeutil.date_to_string(yesterday)

        last_month = now - datetime.timedelta(weeks=4)
        last_month = datetimeutil.date_to_string(last_month)

        # insert data into elasticsearch
        default_crash_report = {
            'uuid': 100,
            'signature': 'js::break_your_browser',
            'date_processed': yesterday,
            'product': 'WaterWolf',
            'version': '1.0',
            'release_channel': 'release',
            'os_name': 'Linux',
            'build': '1234567890',
            'reason': 'MOZALLOC_WENT_WRONG',
            'hangid': None,
            'process_type': None,
        }

        self.storage.save_processed(default_crash_report)

        self.storage.save_processed(
            dict(default_crash_report, uuid=1, product='EarthRaccoon')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=2, version='2.0')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=3, release_channel='aurora')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=4, os_name='Windows NT')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=5, build='0987654321')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=6, reason='VERY_BAD_EXCEPTION')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=7, hangid='12')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=8, process_type='plugin')
        )

        self.storage.save_processed(
            dict(default_crash_report, uuid=9, signature='my_bad')
        )

        self.storage.save_processed(
            dict(
                default_crash_report,
                uuid=10,
                date_processed=last_month,
                signature='my_little_signature',
            )
        )

        # for plugin terms test
        self.storage.save_processed(
            dict(
                default_crash_report,
                uuid=11,
                product='PluginSoft',
                process_type='plugin',
                PluginFilename='carly.dll',
                PluginName='Hey I just met you',
                PluginVersion='1.2',
            )
        )

        self.storage.save_processed(
            dict(
                default_crash_report,
                uuid=12,
                product='PluginSoft',
                process_type='plugin',
                PluginFilename='hey.dll',
                PluginName='Hey Plugin',
                PluginVersion='10.7.0.2a',
            )
        )

        self.storage.save_processed(
            dict(
                default_crash_report,
                uuid=13,
                product='EarlyOwl',
                version='11.0b1',
                release_channel='beta',
            )
        )

        self.storage.save_processed(
            dict(
                default_crash_report,
                uuid=14,
                product='EarlyOwl',
                version='11.0b2',
                release_channel='beta',
            )
        )

        # As indexing is asynchronous, we need to force elasticsearch to
        # make the newly created content searchable before we run the tests
        self.storage.es.refresh()
Example #56
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Require that the list of fields be passed.
        if not kwargs.get('_fields'):
            raise MissingArgumentError('_fields')
        self.all_fields = kwargs['_fields']

        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
        )

        # Create filters.
        filters = []
        histogram_intervals = {}

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:
                if param.name.startswith('_'):
                    # By default, all param values are turned into lists,
                    # even when they have and can have only one value.
                    # For those we know there can only be one value,
                    # so we just extract it from the made-up list.
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                        if results_number > 1000:
                            raise BadArgumentError(
                                '_results_number',
                                msg=('_results_number cannot be greater '
                                     'than 1,000'))
                        if results_number < 0:
                            raise BadArgumentError(
                                '_results_number',
                                msg='_results_number cannot be negative')
                    elif param.name == '_facets_size':
                        facets_size = param.value[0]
                        # Why cap it?
                        # Because if the query is covering a lot of different
                        # things you can get a really really large query
                        # which can hog resources excessively.
                        # Downloading, as an example, 100k facets (and 0 hits)
                        # when there is plenty of data yields a 11MB JSON
                        # file.
                        if facets_size > 10000:
                            raise BadArgumentError(
                                '_facets_size greater than 10,000')

                    for f in self.histogram_fields:
                        if param.name == '_histogram_interval.%s' % f:
                            histogram_intervals[f] = param.value[0]

                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]
                name = self.get_full_field_name(field_data)

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                # Operators needing wildcards, and the associated value
                # transformation with said wildcards.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '^': '%s*',  # starts with
                    '$': '*%s'  # ends with
                }
                # Operators needing ranges, and the associated Elasticsearch
                # comparison operator.
                operator_range = {
                    '>': 'gt',
                    '<': 'lt',
                    '>=': 'gte',
                    '<=': 'lte',
                }

                args = {}
                filter_type = 'term'
                filter_value = None

                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]

                        if not isinstance(val, basestring) or ' ' not in val:
                            # There's only one term and no white space, this
                            # is a simple term filter.
                            filter_value = val
                        else:
                            # If the term contains white spaces, we want to
                            # perform a phrase query.
                            filter_type = 'query'
                            args = Q(
                                'simple_query_string',
                                query=param.value[0],
                                fields=[name],
                                default_operator='and',
                            ).to_dict()
                    else:
                        # There are several terms, this is a terms filter.
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_range:
                    filter_type = 'range'
                    filter_value = {
                        operator_range[param.operator]: param.value
                    }
                elif param.operator == '__null__':
                    filter_type = 'missing'
                    args['field'] = name
                elif param.operator == '__true__':
                    filter_type = 'term'
                    filter_value = True
                elif param.operator == '@':
                    filter_type = 'regexp'
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_wildcards:
                    filter_type = 'query'

                    # Wildcard operations are better applied to a non-analyzed
                    # field (called "full") if there is one.
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    q_args = {}
                    q_args[name] = (operator_wildcards[param.operator] %
                                    param.value)
                    query = Q('wildcard', **q_args)
                    args = query.to_dict()

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    new_filter = F(filter_type, **args)
                    if param.operator_not:
                        new_filter = ~new_filter

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif filter_type == 'range':
                        sub_filters &= new_filter
                    else:
                        sub_filters |= new_filter

                    continue

            if sub_filters is not None:
                filters.append(sub_filters)

        search = search.filter(F('bool', must=filters))

        # Restricting returned fields.
        fields = []

        # We keep track of the requested columns in order to make sure we
        # return those column names and not aliases for example.
        self.request_columns = []
        for param in params['_columns']:
            for value in param.value:
                if not value:
                    continue

                self.request_columns.append(value)
                field_name = self.get_field_name(value, full=False)
                fields.append(field_name)

        search = search.fields(fields)

        # Sorting.
        sort_fields = []
        for param in params['_sort']:
            for value in param.value:
                if not value:
                    continue

                # Values starting with a '-' are sorted in descending order.
                # In order to retrieve the database name of the field, we
                # must first remove the '-' part and add it back later.
                # Example: given ['product', '-version'], the results will be
                # sorted by ascending product then descending version.
                desc = False
                if value.startswith('-'):
                    desc = True
                    value = value[1:]

                field_name = self.get_field_name(value)

                if desc:
                    # The underlying library understands that '-' means
                    # sorting in descending order.
                    field_name = '-' + field_name

                sort_fields.append(field_name)

        search = search.sort(*sort_fields)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        if facets_size:
            self._create_aggregations(params, search, facets_size,
                                      histogram_intervals)

        # Query and compute results.
        hits = []

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        errors = []

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()

                aggregations = getattr(results, 'aggregations', {})
                if aggregations:
                    aggregations = self.format_aggregations(aggregations)

                shards = getattr(results, '_shards', {})

                break  # Yay! Results!
            except NotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                errors.append({
                    'type': 'missing_index',
                    'index': missing_index,
                })

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    shards = None
                    break
            except RequestError as exception:
                # Try to handle it gracefully if we can find out what
                # input was bad and caused the exception.
                try:
                    bad_input = ELASTICSEARCH_PARSE_EXCEPTION_REGEX.findall(
                        exception.error)[-1]
                    # Loop over the original parameters to try to figure
                    # out which *key* had the bad input.
                    for key, value in kwargs.items():
                        if value == bad_input:
                            raise BadArgumentError(key)
                except IndexError:
                    # Not an ElasticsearchParseException exception
                    pass
                raise
Example #57
0
    def test_get_signature_history(self):
        api = Crashes(config=self.config)
        now = self.now
        lastweek = now - datetime.timedelta(days=7)

        params = {
            'product': 'Firefox',
            'version': '8.0',
            'signature': 'signature1',
            'start_date': lastweek,
            'end_date': now,
        }
        res = api.get_signature_history(**params)

        eq_(len(res['hits']), 2)
        eq_(len(res['hits']), res['total'])

        date = datetimeutil.date_to_string(now.date())
        eq_(res['hits'][0]['date'], date)
        eq_(res['hits'][1]['date'], date)

        eq_(res['hits'][0]['count'], 5)
        eq_(res['hits'][1]['count'], 14)

        eq_(round(res['hits'][0]['percent_of_total'], 2),
            round(5.0 / 19.0 * 100, 2))
        eq_(round(res['hits'][1]['percent_of_total'], 2),
            round(14.0 / 19.0 * 100, 2))

        # Test no results
        params = {
            'product': 'Firefox',
            'version': '9.0',
            'signature': 'signature1',
            'start_date': lastweek,
            'end_date': now,
        }
        res = api.get_signature_history(**params)
        res_expected = {'hits': [], 'total': 0}
        eq_(res, res_expected)

        # Test default date parameters
        params = {
            'product': 'Fennec',
            'version': '11.0.1',
            'signature': 'signature3',
        }
        res = api.get_signature_history(**params)
        res_expected = {
            'hits': [{
                'date': now.date().isoformat(),
                'count': 14,
                'percent_of_total': 100
            }],
            'total':
            1
        }
        eq_(res, res_expected)

        # Test missing parameters
        assert_raises(MissingArgumentError, api.get_signature_history)
        assert_raises(MissingArgumentError, api.get_signature_history,
                      **{'product': 'Firefox'})
        assert_raises(MissingArgumentError, api.get_signature_history, **{
            'product': 'Firefox',
            'version': '8.0'
        })
        assert_raises(MissingArgumentError, api.get_signature_history, **{
            'signature': 'signature1',
            'version': '8.0'
        })
Example #58
0
    def test_get(self):
        products = Products(config=self.config)
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(days=7)
        now_str = datetimeutil.date_to_string(now)
        lastweek_str = datetimeutil.date_to_string(lastweek)

        #......................................................................
        # Test 1: find one exact match for one product and one version
        params = {"versions": "Firefox:8.0"}
        res = products.get(**params)
        res_expected = {
            "hits": [{
                "product": "Firefox",
                "version": "8.0",
                "start_date": now_str,
                "end_date": now_str,
                "is_featured": False,
                "build_type": "Release",
                "throttle": 10.0,
                "has_builds": False
            }],
            "total":
            1
        }

        self.assertEqual(res, res_expected)

        #......................................................................
        # Test 2: Find two different products with their correct verions
        params = {"versions": ["Firefox:8.0", "Thunderbird:10.0.2b"]}
        res = products.get(**params)
        res_expected = {
            "hits": [{
                "product": "Firefox",
                "version": "8.0",
                "start_date": now_str,
                "end_date": now_str,
                "is_featured": False,
                "build_type": "Release",
                "throttle": 10.0,
                "has_builds": False
            }, {
                "product": "Thunderbird",
                "version": "10.0.2b",
                "start_date": now_str,
                "end_date": now_str,
                "is_featured": False,
                "build_type": "Release",
                "throttle": 10.0,
                "has_builds": False
            }],
            "total":
            2
        }

        self.assertEqual(res, res_expected)

        #......................................................................
        # Test 3: empty result, no products:version found
        params = {"versions": "Firefox:14.0"}
        res = products.get(**params)
        res_expected = {"hits": [], "total": 0}

        self.assertEqual(res, res_expected)

        #......................................................................
        # Test 4: Test products list is returned with no parameters
        # Note that the expired version is not returned
        params = {}
        res = products.get(**params)
        res_expected = {
            "products": ["Firefox", "Thunderbird", "Fennec"],
            "hits": {
                "Firefox": [{
                    "product": "Firefox",
                    "version": "8.0",
                    "start_date": now_str,
                    "end_date": now_str,
                    "throttle": 10.00,
                    "featured": False,
                    "release": "Release",
                    "has_builds": False
                }],
                "Thunderbird": [{
                    "product": "Thunderbird",
                    "version": "10.0.2b",
                    "start_date": now_str,
                    "end_date": now_str,
                    "throttle": 10.00,
                    "featured": False,
                    "release": "Release",
                    "has_builds": False,
                }],
                "Fennec": [{
                    "product": "Fennec",
                    "version": "11.0.1",
                    "start_date": now_str,
                    "end_date": now_str,
                    "throttle": 10.00,
                    "featured": False,
                    "release": "Release",
                    "has_builds": False
                }]
            },
            "total": 3
        }

        self.assertEqual(res, res_expected)

        #......................................................................
        # Test 5: An unvalid versions list is passed, all versions are returned
        params = {'versions': [1]}
        res = products.get(**params)
        res_expected = {
            "hits": [{
                "product": "Fennec",
                "version": "11.0.1",
                "start_date": now_str,
                "end_date": now_str,
                "is_featured": False,
                "build_type": "Release",
                "throttle": 10.0,
                "has_builds": False
            }, {
                "product": "Firefox",
                "version": "8.0",
                "start_date": now_str,
                "end_date": now_str,
                "is_featured": False,
                "build_type": "Release",
                "throttle": 10.0,
                "has_builds": False
            }, {
                "product": "Thunderbird",
                "version": "10.0.2b",
                "start_date": now_str,
                "end_date": now_str,
                "is_featured": False,
                "build_type": "Release",
                "throttle": 10.0,
                "has_builds": False
            }],
            "total":
            3
        }

        self.assertEqual(res, res_expected)
Example #59
0
    def test_get_exploitibility_with_pagination(self):
        crashes = Crashes(config=self.config)
        yesterday_date = (self.now - datetime.timedelta(days=1)).date()
        day_before_yesterday = (self.now - datetime.timedelta(days=2)).date()

        j = 100  # some number so it's not used by other tests or fixtures

        rand = lambda: random.randint(0, 10)
        exploit_values = []
        signature_values = []
        for day in day_before_yesterday, yesterday_date, self.now:
            for i in range(10):
                exploit_values.append(
                    "(%s, 3, 'Signature%s%s', '%s', %s, %s, %s, %s, %s)" %
                    (j + 1, j, i, day, rand(), rand(), rand(), rand(), rand()))
                signature_values.append(
                    "(%s, 'Signature%s%s', %s, '%s')" %
                    (j + 1, j, i, day.strftime('%Y%m%d%H'), day))
                j += 1
        cursor = self.connection.cursor()

        insert = """
        INSERT INTO signatures
            (signature_id, signature, first_build, first_report)
        VALUES
        """
        insert += ',\n'.join(signature_values)
        cursor.execute(insert)

        insert = """
        INSERT INTO exploitability_reports
           (signature_id, product_version_id, signature, report_date,
            null_count, none_count, low_count, medium_count, high_count)
        VALUES
        """
        insert += ',\n'.join(exploit_values)
        cursor.execute(insert)
        self.connection.commit()

        res = crashes.get_exploitability()
        self.assertEqual(len(res['hits']), res['total'])
        self.assertTrue(res['total'] >= 3 * 10)

        res = crashes.get_exploitability(start_date=yesterday_date,
                                         end_date=self.now)
        self.assertEqual(len(res['hits']), res['total'])
        self.assertTrue(res['total'] >= 2 * 10)
        self.assertTrue(res['total'] < 3 * 10)

        # passing a `page` without `batch` will yield an error
        self.assertRaises(MissingArgumentError,
                          crashes.get_exploitability,
                          page=2)
        # `page` starts on one so anything smaller is bad
        self.assertRaises(BadArgumentError,
                          crashes.get_exploitability,
                          page=0,
                          batch=15)

        # Note, `page=1` is on number line starting on 1
        res = crashes.get_exploitability(page=1, batch=15)
        self.assertNotEqual(len(res['hits']), res['total'])
        self.assertEqual(len(res['hits']), 15)
        self.assertTrue(res['total'] >= 3 * 10)
        # since it's ordered by `report_date`...
        report_dates = [x['report_date'] for x in res['hits']]
        self.assertEqual(report_dates[0],
                         datetimeutil.date_to_string(self.now.date()))
        self.assertEqual(report_dates[-1],
                         datetimeutil.date_to_string(yesterday_date))

        res = crashes.get_exploitability(page=2,
                                         batch=5,
                                         start_date=day_before_yesterday,
                                         end_date=yesterday_date)

        self.assertEqual(len(res['hits']), 5)
        self.assertTrue(res['total'] >= 2 * 10)
        self.assertTrue(res['total'] < 3 * 10)
        report_dates = [x['report_date'] for x in res['hits']]
        self.assertEqual(report_dates[0],
                         datetimeutil.date_to_string(yesterday_date))
Example #60
0
    def get_signature_history(self, **kwargs):
        """Return the history of a signature.

        See http://socorro.readthedocs.org/en/latest/middleware.html
        """
        now = datetimeutil.utc_now()
        lastweek = now - datetime.timedelta(days=7)

        filters = [
            ('product', None, 'str'),
            ('version', None, 'str'),
            ('signature', None, 'str'),
            ('end_date', now, 'datetime'),
            ('start_date', lastweek, 'datetime'),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        for param in ('product', 'version', 'signature'):
            if not params[param]:
                raise MissingArgumentError(param)

        if params.signature == '##null##':
            signature_where = 'AND signature IS NULL'
        else:
            signature_where = 'AND signature = %(signature)s'

        if params.signature == '##empty##':
            params.signature = ''

        sql = """
            /* external.postgresql.crashes.Crashes.get_signature_history */
            WITH hist AS (
                SELECT
                    report_date,
                    report_count
                FROM
                    tcbs JOIN signatures using (signature_id)
                         JOIN product_versions using (product_version_id)
                WHERE
                    report_date BETWEEN %%(start_date)s AND %%(end_date)s
                    AND product_name = %%(product)s
                    AND version_string = %%(version)s
                    %s
                GROUP BY
                    report_date, report_count
                ORDER BY 1
            ),
            scaling_window AS (
                SELECT
                    hist.*,
                    SUM(report_count) over () AS total_crashes
                FROM hist
            )
            SELECT
                report_date,
                report_count,
                report_count / total_crashes::float * 100 AS percent_of_total
            FROM scaling_window
            ORDER BY report_date DESC
        """ % signature_where

        error_message = 'Failed to retrieve signature history from PostgreSQL'
        results = self.query(sql, params, error_message=error_message)

        # Transforming the results into what we want
        history = []
        for row in results:
            dot = dict(zip(('date', 'count', 'percent_of_total'), row))
            dot['date'] = datetimeutil.date_to_string(dot['date'])
            history.append(dot)

        return {
            'hits': history,
            'total': len(history)
        }