コード例 #1
0
    def get_field_name(self, value, full=True):
        try:
            field_ = self.all_fields[value]
        except KeyError:
            raise BadArgumentError(
                value,
                msg='Unknown field "%s"' % value
            )

        if not field_['is_returned']:
            # Returning this field is not allowed.
            raise BadArgumentError(
                value,
                msg='Field "%s" is not allowed to be returned' % value
            )

        field_name = '%s.%s' % (
            field_['namespace'],
            field_['in_database_name']
        )

        if full and field_['has_full_version']:
            # If the param has a full version, that means what matters
            # is the full string, and not its individual terms.
            field_name += '.full'

        return field_name
コード例 #2
0
    def get_signatures(self, **kwargs):
        """Return top crashers by signatures.

        See http://socorro.readthedocs.org/en/latest/middleware.html#tcbs
        """
        filters = [
            ("product", None, "str"),
            ("version", None, "str"),
            ("crash_type", "all", "str"),
            ("to_date", datetimeutil.utc_now(), "datetime"),
            ("duration", datetime.timedelta(7), "timedelta"),
            ("os", None, "str"),
            ("limit", 100, "int"),
            ("date_range_type", None, "str")
        ]

        params = external_common.parse_arguments(filters, kwargs)
        params.logger = logger

        # what the twoPeriodTopCrasherComparison() function does is that it
        # makes a start date from taking the to_date - duration
        if params.duration > datetime.timedelta(30):
            raise BadArgumentError('Duration too long. Max 30 days.')

        with self.get_connection() as connection:
            return tcbs.twoPeriodTopCrasherComparison(connection, params)
コード例 #3
0
ファイル: correlations.py プロジェクト: jorgenpt/socorro
    def get(self, **kwargs):
        filters = [
            ("report_date", None, "datetime"),
            ("report_type", None, "str"),
            ("product", None, "str"),
            ("version", None, "str"),
            ("signature", None, "str"),
            ("platform", None, "str"),
            ("min_crashes", 10, "int"),
            ("min_baseline_diff", 0.005, "float"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        hits = []
        if params['report_type'] == 'interesting-addons':
            hits = self.interesting_addons(params)
        elif params['report_type'] == 'interesting-modules':
            hits = self.interesting_modules(params)
        elif params['report_type'] == 'interesting-addons-with-version':
            hits = self.interesting_addons_with_version(params)
        elif params['report_type'] == 'interesting-modules-with-version':
            hits = self.interesting_modules_with_version(params)
        elif params['report_type'] == 'core-counts':
            hits = self.core_counts(params)
        else:
            raise BadArgumentError('report_type', received=report_type)

        return {'hits': hits, 'total': len(hits)}
コード例 #4
0
    def test_mapping(self, mapping):
        """Verify that a mapping is correct.

        This function does so by first creating a new, temporary index in
        elasticsearch using the mapping. It then takes some recent crash
        reports that are in elasticsearch and tries to insert them in the
        temporary index. Any failure in any of those steps will raise an
        exception. If any is raised, that means the mapping is incorrect in
        some way (either it doesn't validate against elasticsearch's rules,
        or is not compatible with the data we currently store).

        If no exception is raised, the mapping is likely correct.

        This function is to be used in any place that can change the
        `storage_mapping` field in any Super Search Field.
        Methods `create_field` and `update_field` use it, see above.
        """
        temp_index = 'socorro_mapping_test'

        es_connection = self.get_connection()

        # Import at runtime to avoid dependency circle.
        from socorro.external.es.index_creator import IndexCreator
        index_creator = IndexCreator(self.config)
        try:
            index_creator.create_index(
                temp_index,
                mapping,
            )

            now = datetimeutil.utc_now()
            last_week = now - datetime.timedelta(days=7)
            current_indices = self.generate_list_of_indexes(last_week, now)

            crashes_sample = es_connection.search(
                index=current_indices,
                doc_type=self.config.elasticsearch.elasticsearch_doctype,
                size=self.config.elasticsearch.mapping_test_crash_number,
            )
            crashes = [x['_source'] for x in crashes_sample['hits']['hits']]

            for crash in crashes:
                es_connection.index(
                    index=temp_index,
                    doc_type=self.config.elasticsearch.elasticsearch_doctype,
                    body=crash,
                )
        except elasticsearch.exceptions.ElasticsearchException as e:
            raise BadArgumentError(
                'storage_mapping',
                msg='Indexing existing data in Elasticsearch failed with the '
                    'new mapping. Error is: %s' % str(e),
            )
        finally:
            try:
                index_creator.get_index_client().delete(temp_index)
            except elasticsearch.exceptions.NotFoundError:
                # If the index does not exist (if the index creation failed
                # for example), we don't need to do anything.
                pass
コード例 #5
0
ファイル: search_common.py プロジェクト: Tchanders/socorro
    def fix_date_parameter(self, parameters):
        """Correct the date parameter.

        If there is no date parameter, set default values. Otherwise, make
        sure there is exactly one lower bound value and one greater bound
        value.
        """
        default_date_range = datetime.timedelta(
            days=self.config.search_default_date_range)
        maximum_date_range = datetime.timedelta(
            days=self.config.search_maximum_date_range)

        if not parameters.get('date'):
            now = datetimeutil.utc_now()
            lastweek = now - default_date_range

            parameters['date'] = []
            parameters['date'].append(
                SearchParam('date', lastweek, '>=', 'datetime'))
            parameters['date'].append(
                SearchParam('date', now, '<=', 'datetime'))
        else:
            lower_than = None
            greater_than = None
            for param in parameters['date']:
                if ('<' in param.operator
                        and (not lower_than or
                             (lower_than and lower_than.value > param.value))):
                    lower_than = param
                if ('>' in param.operator and
                    (not greater_than or
                     (greater_than and greater_than.value < param.value))):
                    greater_than = param

            # Remove all the existing parameters so we have exactly
            # one lower value and one greater value
            parameters['date'] = []

            if not lower_than:
                # add a lower than that is now
                lower_than = SearchParam('date', datetimeutil.utc_now(), '<=',
                                         'datetime')

            if not greater_than:
                # add a greater than that is lower_than minus the date range
                greater_than = SearchParam(
                    'date', lower_than.value - default_date_range, '>=',
                    'datetime')

            # Verify the date range is not too big.
            delta = lower_than.value - greater_than.value
            if delta > maximum_date_range:
                raise BadArgumentError(
                    'date',
                    msg='Date range is bigger than %s days' %
                    self.config.webapi.search_maximum_date_range)

            parameters['date'].append(lower_than)
            parameters['date'].append(greater_than)
コード例 #6
0
    def get(self, **kwargs):
        filters = [
            ("report_types", None, ["list", "str"]),
            ("report_type", None, "str"),
            ("signature", None, "str"),
            ("start_date", None, "datetime"),
            ("end_date", None, "datetime"),
            ("versions", None, ["list", "str"]),
        ]

        params = external_common.parse_arguments(filters, kwargs)
        if not params.get('report_types') and params.get('report_type'):
            # to support the legacy case
            individual_report = True
            report_types = [params['report_type']]
        else:
            individual_report = False
            report_types = params['report_types']

        # check that all the report types are recognized
        for report_type in report_types:
            query_params = report_type_sql.get(report_type, {})
            known_report_types = ('products', 'distinct_install',
                                  'exploitability', 'devices', 'graphics')
            if (report_type not in known_report_types
                    and 'first_col' not in query_params):
                raise BadArgumentError(report_type)

        products = []
        versions = []

        # Get information about the versions
        util_service = Util(config=self.context)
        versions_info = util_service.versions_info(**params)
        if versions_info:
            for i, elem in enumerate(versions_info):
                products.append(versions_info[elem]["product_name"])
                versions.append(str(versions_info[elem]["version_string"]))

        # This MUST be a tuple otherwise it gets cast to an array
        params['product'] = tuple(products)
        params['version'] = tuple(versions)

        all_results = {}
        with self.get_connection() as connection:
            for report_type in report_types:
                result_cols, query_string, query_parameters = self._get_query(
                    report_type, params)
                sql_results = self.query(query_string,
                                         params=query_parameters,
                                         connection=connection)
                results = [dict(zip(result_cols, row)) for row in sql_results]
                all_results[report_type] = results

            if individual_report:
                return all_results.values()[0]
            else:
                return {'reports': all_results}
コード例 #7
0
    def get_adu_by_signature(self, **kwargs):
        """Return a list of ADUs and crash counts by signature and ADU date
        """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("start_date", lastweek, "date"),
            ("end_date", now, "date"),
            ("signature", None, "str"),
            ("channel", None, "str"),
            ("product_name", None, "str"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        for param in ("start_date", "end_date", "signature", "channel"):
            if not params[param]:
                raise MissingArgumentError(param)

        if (params.end_date -
                params.start_date) > datetime.timedelta(days=365):
            raise BadArgumentError('Duration too long. Max 365 days.')

        sql_query = """
            SELECT
                product_name,
                signature,
                adu_date::TEXT,
                build_date::TEXT,
                buildid::TEXT,
                crash_count,
                adu_count,
                os_name,
                channel
            FROM crash_adu_by_build_signature
            WHERE adu_date BETWEEN %(start_date)s AND %(end_date)s
            AND product_name = %(product_name)s
            AND channel = %(channel)s
            AND signature = %(signature)s
            ORDER BY buildid
        """

        error_message = (
            "Failed to retrieve crash ADU by build signature from PostgreSQL")
        results = self.query(sql_query, params, error_message=error_message)

        fields = [
            'product_name', 'signature', 'adu_date', 'build_date', 'buildid',
            'crash_count', 'adu_count', 'os_name', 'channel'
        ]
        crashes = [dict(zip(fields, row)) for row in results]

        return {"hits": crashes, "total": len(crashes)}
コード例 #8
0
ファイル: query.py プロジェクト: rhelmer/socorro-lib
    def get(self, **kwargs):
        '''Return the result of a custom query. '''
        params = external_common.parse_arguments(self.filters, kwargs)

        if not params.query:
            raise MissingArgumentError('query')

        try:
            query = json.loads(params.query)
        except ValueError:
            raise BadArgumentError(
                'query',
                msg="Invalid JSON value for parameter 'query'"
            )

        es = pyelasticsearch.ElasticSearch(
            urls=self.config.elasticsearch_urls,
            timeout=self.config.elasticsearch_timeout_extended,
        )

        # Set indices.
        indices = []
        if not params.indices:
            # By default, use the last two indices.
            today = utc_now()
            last_week = today - datetime.timedelta(days=7)

            indices = self.generate_list_of_indexes(last_week, today)
        elif len(params.indices) == 1 and params.indices[0] == 'ALL':
            # If we want all indices, just do nothing.
            pass
        else:
            indices = params.indices

        search_args = {}
        if indices:
            search_args['index'] = indices
            search_args['doc_type'] = self.config.elasticsearch_doctype

        try:
            results = es.search(
                query,
                **search_args
            )
        except ElasticHttpNotFoundError, e:
            missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
            raise ResourceNotFound(
                "elasticsearch index '%s' does not exist" % missing_index
            )
コード例 #9
0
ファイル: bugs.py プロジェクト: rhelmer/socorro-lib
    def post(self, **kwargs):
        """Return a list of signatures-to-bug_ids or bug_ids-to-signatures
           associations. """
        params = external_common.parse_arguments(self.filters, kwargs)

        if not params['signatures'] and not params['bug_ids']:
            raise MissingArgumentError('specify one of signatures or bug_ids')
        elif params['signatures'] and params['bug_ids']:
            raise BadArgumentError('specify only one of signatures or bug_ids')

        sql_params = []
        if params['signatures']:
            sql_params.append(tuple(params.signatures))

            sql = """/* socorro.external.postgresql.bugs.Bugs.get */
                SELECT ba.signature, bugs.id
                FROM bugs
                    JOIN bug_associations AS ba ON bugs.id = ba.bug_id
                WHERE EXISTS(
                    SELECT 1 FROM bug_associations
                    WHERE bug_associations.bug_id = bugs.id
                    AND signature IN %s
                )
            """
        elif params['bug_ids']:
            sql_params.append(tuple(params.bug_ids))

            sql = """/* socorro.external.postgresql.bugs.Bugs.get */
                SELECT ba.signature, bugs.id
                FROM bugs
                    JOIN bug_associations AS ba ON bugs.id = ba.bug_id
                WHERE bugs.id IN %s
            """

        error_message = "Failed to retrieve bug associations from PostgreSQL"
        results = self.query(sql, sql_params, error_message=error_message)

        bugs = []
        for row in results:
            bug = dict(zip(("signature", "id"), row))
            bugs.append(bug)

        return {"hits": bugs, "total": len(bugs)}
コード例 #10
0
ファイル: backfill.py プロジェクト: rhelmer/socorro-lib
    def get(self, **kwargs):

        filters = [
            ("backfill_type", None, "str"),
            ("reports_clean", True, "bool"),
            ("check_period", '01:00:00', "str"),
            ("table_name", None, "str"),
            ("update_day", None, "datetime"),
            ("start_date", None, "datetime"),
            ("end_date", None, "datetime"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        if not params.backfill_type:
            raise MissingArgumentError('backfill_type')

        date_param = ['update_day', 'start_date', 'end_date']
        for i in date_param:
            if i in kwargs:
                params[i] = str(params[i].date())

        try:
            query = 'SELECT backfill_%(backfill_type)s (%(params)s); '
            required_params = BACKFILL_PARAMETERS[params.backfill_type]
            query_params = [(i, params[i]) for i in required_params]
            query_params_str = ', '.join('%(' + str(i[0]) + ')s'
                                         for i in query_params)
            query = query % {
                'backfill_type': params.backfill_type,
                'params': query_params_str
            }
        except:
            raise BadArgumentError(kwargs['backfill_type'])

        error_message = "Failed to retrieve backfill %s from PostgreSQL"
        error_message = error_message % kwargs['backfill_type']
        results = self.query(query, params, error_message=error_message)
        return results
コード例 #11
0
ファイル: report.py プロジェクト: rhelmer/socorro-lib
    def get_list(self, **kwargs):
        """
        List all crashes with a given signature and return them.

        Both `from_date` and `to_date` (and their aliases `from` and `to`)
        are required and can not be greater than 30 days apart.

        Optional arguments: see SearchCommon.get_parameters()

        """
        # aliases
        if "from" in kwargs and "from_date" not in kwargs:
            kwargs["from_date"] = kwargs.get("from")
        if "to" in kwargs and "to_date" not in kwargs:
            kwargs["to_date"] = kwargs.get("to")

        if not kwargs.get('from_date'):
            raise MissingArgumentError('from_date')
        if not kwargs.get('to_date'):
            raise MissingArgumentError('to_date')

        from_date = datetimeutil.datetimeFromISOdateString(kwargs['from_date'])
        to_date = datetimeutil.datetimeFromISOdateString(kwargs['to_date'])
        span_days = (to_date - from_date).days
        if span_days > 30:
            raise BadArgumentError(
                'Span between from_date and to_date can not be more than 30')

        # start with the default
        sort_order = {'key': 'date_processed', 'direction': 'DESC'}
        if 'sort' in kwargs:
            sort_order['key'] = kwargs.pop('sort')
            _recognized_sort_orders = (
                'date_processed',
                'uptime',
                'user_comments',
                'uuid',
                'uuid_text',
                'product',
                'version',
                'build',
                'signature',
                'url',
                'os_name',
                'os_version',
                'cpu_name',
                'cpu_info',
                'address',
                'reason',
                'last_crash',
                'install_age',
                'hangid',
                'process_type',
                'release_channel',
                'install_time',
                'duplicate_of',
            )
            if sort_order['key'] not in _recognized_sort_orders:
                raise BadArgumentError(
                    '%s is not a recognized sort order key' %
                    sort_order['key'])
            sort_order['direction'] = 'ASC'
            if 'reverse' in kwargs:
                if kwargs.pop('reverse'):
                    sort_order['direction'] = 'DESC'

        include_raw_crash = kwargs.get('include_raw_crash') or False
        params = search_common.get_parameters(kwargs)

        if not params["signature"]:
            raise MissingArgumentError('signature')

        params["terms"] = params["signature"]
        params["search_mode"] = "is_exactly"

        # Default mode falls back to starts_with for postgres
        if params["plugin_search_mode"] == "default":
            params["plugin_search_mode"] = "starts_with"

        # Limiting to a signature
        if params["terms"]:
            params["terms"] = self.prepare_terms(params["terms"],
                                                 params["search_mode"])

        # Searching for terms in plugins
        if params["report_process"] == "plugin" and params["plugin_terms"]:
            params["plugin_terms"] = " ".join(params["plugin_terms"])
            params["plugin_terms"] = self.prepare_terms(
                params["plugin_terms"], params["plugin_search_mode"])

        # Get information about the versions
        util_service = Util(config=self.context)
        params["versions_info"] = util_service.versions_info(**params)

        # Parsing the versions
        params["versions_string"] = params["versions"]
        (params["versions"],
         params["products"]) = self.parse_versions(params["versions"],
                                                   params["products"])

        if hasattr(self.context, 'webapi'):
            context = self.context.webapi
        else:
            # old middleware
            context = self.context
        # Changing the OS ids to OS names
        for i, elem in enumerate(params["os"]):
            for platform in context.platforms:
                if platform["id"][:3] == elem[:3]:
                    params["os"][i] = platform["name"]

        # Creating the parameters for the sql query
        sql_params = {}

        # Preparing the different parts of the sql query
        sql_select = """
            SELECT
                r.date_processed,
                r.uptime,
                r.user_comments,
                r.uuid::uuid,
                r.uuid as uuid_text,
                r.product,
                r.version,
                r.build,
                r.signature,
                r.url,
                r.os_name,
                r.os_version,
                r.cpu_name,
                r.cpu_info,
                r.address,
                r.reason,
                r.last_crash,
                r.install_age,
                r.hangid,
                r.process_type,
                r.release_channel,
                (r.client_crash_date - (r.install_age * INTERVAL '1 second'))
                  AS install_time
        """
        if include_raw_crash:
            pass
        else:
            sql_select += """
                , rd.duplicate_of
            """

        wrapped_select = """
            WITH report_slice AS (
              %s
            ), dupes AS (
                SELECT
                    report_slice.uuid,
                    rd.duplicate_of
                FROM reports_duplicates rd
                JOIN report_slice ON report_slice.uuid_text = rd.uuid
                WHERE
                    rd.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
            )

            SELECT
                rs.*,
                dupes.duplicate_of,
                rc.raw_crash
            FROM report_slice rs
            LEFT OUTER JOIN dupes USING (uuid)
            LEFT OUTER JOIN raw_crashes rc ON
                rs.uuid = rc.uuid
                AND
                rc.date_processed BETWEEN %%(from_date)s AND %%(to_date)s
        """

        sql_from = self.build_reports_sql_from(params)

        if not include_raw_crash:
            sql_from = """%s
                LEFT OUTER JOIN reports_duplicates rd ON r.uuid = rd.uuid
            """ % sql_from

        sql_where, sql_params = self.build_reports_sql_where(
            params, sql_params, self.context)

        sql_order = """
            ORDER BY %(key)s %(direction)s
        """ % sort_order

        sql_limit, sql_params = self.build_reports_sql_limit(
            params, sql_params)

        # Assembling the query
        if include_raw_crash:
            sql_query = "\n".join(
                ("/* socorro.external.postgresql.report.Report.list */",
                 sql_select, sql_from, sql_where, sql_order, sql_limit))
        else:
            sql_query = "\n".join(
                ("/* socorro.external.postgresql.report.Report.list */",
                 sql_select, sql_from, sql_where, sql_order, sql_limit))

        # Query for counting the results
        sql_count_query = "\n".join(
            ("/* socorro.external.postgresql.report.Report.list */",
             "SELECT count(*)", sql_from, sql_where))

        # Querying the DB
        with self.get_connection() as connection:

            total = self.count(
                sql_count_query,
                sql_params,
                error_message="Failed to count crashes from reports.",
                connection=connection)

            # No need to call Postgres if we know there will be no results
            if total:

                if include_raw_crash:
                    sql_query = wrapped_select % sql_query

                results = self.query(
                    sql_query,
                    sql_params,
                    error_message="Failed to retrieve crashes from reports",
                    connection=connection)
            else:
                results = []

        # Transforming the results into what we want
        fields = (
            "date_processed",
            "uptime",
            "user_comments",
            "uuid",
            "uuid",  # the uuid::text one
            "product",
            "version",
            "build",
            "signature",
            "url",
            "os_name",
            "os_version",
            "cpu_name",
            "cpu_info",
            "address",
            "reason",
            "last_crash",
            "install_age",
            "hangid",
            "process_type",
            "release_channel",
            "install_time",
            "duplicate_of",
        )
        if include_raw_crash:
            fields += ("raw_crash", )
        crashes = []
        for row in results:
            crash = dict(zip(fields, row))
            if include_raw_crash and crash['raw_crash']:
                crash['raw_crash'] = json.loads(crash['raw_crash'])
            for i in crash:
                try:
                    crash[i] = datetimeutil.date_to_string(crash[i])
                except TypeError:
                    pass
            crashes.append(crash)

        return {"hits": crashes, "total": total}
コード例 #12
0
ファイル: signature_summary.py プロジェクト: walikhan/socorro
    def get(self, **kwargs):
        filters = [
            ("report_type", None, "str"),
            ("signature", None, "str"),
            ("start_date", None, "datetime"),
            ("end_date", None, "datetime"),
            ("versions", None, ["list", "str"]),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        products = []
        versions = []

        # Get information about the versions
        util_service = Util(config=self.context)
        versions_info = util_service.versions_info(**params)
        if versions_info:
            for i, elem in enumerate(versions_info):
                products.append(versions_info[elem]["product_name"])
                versions.append(str(versions_info[elem]["version_string"]))

        # This MUST be a tuple otherwise it gets cast to an array
        params['product'] = tuple(products)
        params['version'] = tuple(versions)

        if params['product'] and params['report_type'] is not 'products':
            product_list = ' AND product_name IN %s '
        else:
            product_list = ''

        if params['version'] and params['report_type'] is not 'products':
            version_list = ' AND version_string IN %s '
        else:
            version_list = ''

        query_params = report_type_sql.get(params['report_type'], {})
        if (params['report_type'] not in
            ('products', 'distinct_install', 'exploitability', 'devices', 
             'graphics')
            and 'first_col' not in query_params):
            raise BadArgumentError('report type')

        self.connection = self.database.connection()
        cursor = self.connection.cursor()

        if params['report_type'] == 'products':
            result_cols = ['product_name',
                           'version_string',
                           'report_count',
                           'percentage']
            query_string = """
            WITH crashes as (
                SELECT
                    product_name as category
                    , version_string
                    , SUM(report_count) as report_count
                FROM signature_summary_products
                    JOIN signatures USING (signature_id)
                WHERE signatures.signature = %s
                    AND report_date >= %s
                    AND report_date < %s
                GROUP BY product_name, version_string
            ),
            totals as (
                SELECT
                    category
                    , version_string
                    , report_count
                    , SUM(report_count) OVER () as total_count
                FROM crashes
            )
            SELECT category
                , version_string
                , report_count
                , round((report_count * 100::numeric)/total_count,3)::TEXT
                as percentage
            FROM totals
            ORDER BY report_count DESC"""
            query_parameters = (params['signature'],
                                params['start_date'],
                                params['end_date'])

        elif params['report_type'] == 'distinct_install':
            result_cols = ['product_name',
                           'version_string',
                           'crashes',
                           'installations']
            query_string = """
                SELECT product_name
                    , version_string
                    , SUM(crash_count) AS crashes
                    , SUM(install_count) AS installations
                FROM signature_summary_installations
                    JOIN signatures USING (signature_id)
                WHERE
                    signatures.signature = %s
                    AND report_date >= %s
                    AND report_date < %s
            """
            query_string += product_list
            query_string += version_list
            query_string += """
                GROUP BY product_name, version_string
                ORDER BY crashes DESC
            """
            query_parameters = (
                params['signature'],
                params['start_date'],
                params['end_date']
            )

            if product_list:
                query_parameters += (params['product'],)
            if version_list:
                query_parameters += (params['version'],)

        elif params['report_type'] == 'exploitability':
            # Note, even if params['product'] is something we can't use
            # that in this query
            result_cols = [
                'report_date',
                'null_count',
                'none_count',
                'low_count',
                'medium_count',
                'high_count',
            ]
            query_string = """
                SELECT
                    cast(report_date as TEXT),
                    SUM(null_count),
                    SUM(none_count),
                    SUM(low_count),
                    SUM(medium_count),
                    SUM(high_count)
                FROM exploitability_reports
                    JOIN signatures USING (signature_id)
                WHERE
                    signatures.signature = %s
                    AND report_date >= %s
                    AND report_date < %s
            """
            query_string += product_list
            query_string += version_list
            query_string += """
                GROUP BY report_date
                ORDER BY report_date DESC
            """
            query_parameters = (
                params['signature'],
                params['start_date'],
                params['end_date'],
            )

            if product_list:
                query_parameters += (params['product'],)
            if version_list:
                query_parameters += (params['version'],)

        elif params['report_type'] == 'devices':
            result_cols = [
                'cpu_abi',
                'manufacturer',
                'model',
                'version',
                'report_count',
                'percentage',
            ]
            query_string = """
                WITH crashes as (
                    SELECT
                        android_devices.android_cpu_abi as cpu_abi,
                        android_devices.android_manufacturer as manufacturer,
                        android_devices.android_model as model,
                        android_devices.android_version as version,
                        SUM(report_count) as report_count
                    FROM signature_summary_device
                        JOIN signatures USING (signature_id)
                        JOIN android_devices ON
                            signature_summary_device.android_device_id =
                            android_devices.android_device_id
                    WHERE signatures.signature = %s
                        AND report_date >= %s
                        AND report_date < %s
            """
            query_string += product_list
            query_string += version_list
            query_string += """
                    GROUP BY
                        android_devices.android_cpu_abi,
                        android_devices.android_manufacturer,
                        android_devices.android_model,
                        android_devices.android_version
                ),
                totals as (
                    SELECT
                        cpu_abi,
                        manufacturer,
                        model,
                        version,
                        report_count,
                        SUM(report_count) OVER () as total_count
                    FROM crashes
                )
                SELECT
                    cpu_abi,
                    manufacturer,
                    model,
                    version,
                    report_count,
                    round((report_count * 100::numeric)/total_count,3)::TEXT
                        as percentage
                FROM totals
                ORDER BY report_count DESC
            """
            query_parameters = (
                params['signature'],
                params['start_date'],
                params['end_date'],
            )

            if product_list:
                query_parameters += (params['product'],)
            if version_list:
                query_parameters += (params['version'],)

        elif params['report_type'] == 'graphics':
            result_cols = [
                'vendor_hex',
                'adapter_hex',
                'vendor_name',
                'adapter_name',
                'report_count',
                'percentage',
            ]
            query_string = """
                WITH crashes as (
                    SELECT
                        graphics_device.vendor_hex as vendor_hex,
                        graphics_device.adapter_hex as adapter_hex,
                        graphics_device.vendor_name as vendor_name,
                        graphics_device.adapter_name as adapter_name,
                        SUM(report_count) as report_count
                    FROM signature_summary_graphics
                        JOIN signatures USING (signature_id)
                        JOIN graphics_device ON
                            signature_summary_graphics.graphics_device_id =
                            graphics_device.graphics_device_id
                    WHERE signatures.signature = %s
                        AND report_date >= %s
                        AND report_date < %s
            """
            query_string += product_list
            query_string += version_list
            query_string += """
                    GROUP BY
                        graphics_device.graphics_device_id
                ),
                totals as (
                    SELECT
                        vendor_hex,
                        adapter_hex,
                        vendor_name,
                        adapter_name,
                        report_count,
                        SUM(report_count) OVER () as total_count
                    FROM crashes
                )
                SELECT
                    vendor_hex,
                    adapter_hex,
                    vendor_name,
                    adapter_name,
                    report_count,
                    round((report_count * 100::numeric)/total_count,3)::TEXT
                        as percentage
                FROM totals
                ORDER BY report_count DESC
            """
            query_parameters = (
                params['signature'],
                params['start_date'],
                params['end_date'],
            )
            
            if product_list:
                query_parameters += (params['product'],)
            if version_list:
                query_parameters += (params['version'],)

        elif params['report_type'] in report_type_columns:
            result_cols = ['category', 'report_count', 'percentage']
            query_string = """
                WITH crashes AS (
                    SELECT """
            query_string += report_type_columns[params['report_type']]
            query_string += """ AS category
                        , sum(report_count) AS report_count
                    FROM signature_summary_"""
            query_string += params['report_type']
            query_string += """
                        JOIN signatures USING (signature_id)
                    WHERE
                        signatures.signature = %s
                        AND report_date >= %s
                        AND report_date < %s
            """
            query_string += product_list
            query_string += version_list
            query_string += """
                    GROUP BY category
                ),
                totals AS (
                    SELECT
                        category
                        , report_count
                        , sum(report_count) OVER () as total_count
                    FROM crashes
                )
                SELECT category
                    , report_count
                    , round((report_count * 100::numeric)/total_count,3)::TEXT
                as percentage
                FROM totals
                ORDER BY report_count DESC
            """
            query_parameters = (
                params['signature'],
                params['start_date'],
                params['end_date']
            )

            if product_list:
                query_parameters += (params['product'],)
            if version_list:
                query_parameters += (params['version'],)

        sql_results = db.execute(cursor, query_string, query_parameters)
        results = []
        for row in sql_results:
            newrow = dict(zip(result_cols, row))
            results.append(newrow)

        # Closing the connection here because we're not using
        # the parent class' query()
        self.connection.close()
        return results
コード例 #13
0
ファイル: graphics_devices.py プロジェクト: snorp/socorro
    def post(self, **kwargs):
        try:
            data = json.loads(kwargs['data'])
            if data is None:
                raise BadArgumentError('POST data sent was null')
        except AttributeError:
            raise MissingArgumentError('No POST data sent')
        except ValueError:
            raise BadArgumentError('Posted data not valid JSON')
        except TypeError:
            # happens if kwargs['data'] is None
            raise BadArgumentError('POST data sent was empty')

        # make an upsert for each thing and rollback if any failed
        upsert = """
        WITH
        update_graphics_device AS (
            UPDATE graphics_device
            SET
                adapter_name = %(adapter_name)s,
                vendor_name = %(vendor_name)s
            WHERE
                vendor_hex = %(vendor_hex)s
                AND
                adapter_hex = %(adapter_hex)s
            RETURNING 1
        ),
        insert_graphics_device AS (
            INSERT INTO
                graphics_device
                (vendor_hex, adapter_hex, vendor_name, adapter_name)
            SELECT
                %(vendor_hex)s AS vendor_hex,
                %(adapter_hex)s AS adapter_hex,
                %(vendor_name)s AS vendor_name,
                %(adapter_name)s AS adapter_name
            WHERE NOT EXISTS (
                SELECT * FROM graphics_device
                WHERE
                    vendor_hex = %(vendor_hex)s
                    AND
                    adapter_hex = %(adapter_hex)s
                LIMIT 1
            )
            RETURNING 2
        )
        SELECT * FROM update_graphics_device
        UNION
        ALL SELECT * FROM insert_graphics_device
        """

        with self.get_connection() as connection:
            try:
                for row in data:
                    self.query(upsert, row, connection=connection)
                connection.commit()
                return True
            except (psycopg2.Error, KeyError):
                # KeyErrors happen if any of the rows don't have
                # all the required keys
                connection.rollback()
                return False
コード例 #14
0
ファイル: supersearch.py プロジェクト: walikhan/socorro
    def get(self, **kwargs):
        """Return a list of results and facets based on parameters.

        The list of accepted parameters (with types and default values) is in
        socorro.lib.search_common.SearchBase
        """
        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indexes to use to optimize the elasticsearch query.
        indexes = self.get_indexes(params['date'])

        # Create and configure the search object.
        search = SuperS().es(
            urls=self.config.elasticsearch_urls,
            timeout=self.config.elasticsearch_timeout,
        )
        search = search.indexes(*indexes)
        search = search.doctypes(self.config.elasticsearch_doctype)

        # Create filters.
        filters = F()

        for field, sub_params in params.items():
            sub_filters = F()
            for param in sub_params:

                if param.name.startswith('_'):
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]

                name = '%s.%s' % (field_data['namespace'],
                                  field_data['in_database_name'])

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                args = {}
                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]
                        if not isinstance(val, basestring) or (isinstance(
                                val, basestring) and ' ' not in val):
                            args[name] = val

                        # If the term contains white spaces, we want to perform
                        # a phrase query. Thus we do nothing here and let this
                        # value be handled later.
                    else:
                        args['%s__in' % name] = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    args[name] = param.value
                elif param.operator == '>':
                    # greater than
                    args['%s__gt' % name] = param.value
                elif param.operator == '<':
                    # lower than
                    args['%s__lt' % name] = param.value
                elif param.operator == '>=':
                    # greater than or equal to
                    args['%s__gte' % name] = param.value
                elif param.operator == '<=':
                    # lower than or equal to
                    args['%s__lte' % name] = param.value
                elif param.operator == '__null__':
                    # is null
                    args['%s__missing' % name] = param.value

                if args:
                    if param.operator_not:
                        new_filter = ~F(**args)
                    else:
                        new_filter = F(**args)

                    if param.data_type == 'enum':
                        sub_filters |= new_filter
                    else:
                        sub_filters &= new_filter

                    continue

                # These use a wildcard and thus need to be in a query
                # instead of a filter.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '$': '%s*',  # starts with
                    '^': '*%s'  # ends with
                }
                if param.operator in operator_wildcards:
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    args['%s__wildcard' % name] = \
                        operator_wildcards[param.operator] % param.value
                    args['must_not'] = param.operator_not
                elif not param.operator:
                    # This is phrase that was passed down.
                    args['%s__match_phrase' % name] = param.value[0]

                if args:
                    search = search.query(**args)
                else:
                    # If we reach this point, that means the operator is
                    # not supported, and we should raise an error about that.
                    raise NotImplementedError('Operator %s is not supported' %
                                              param.operator)

            filters &= sub_filters

        search = search.filter(filters)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        processed_filters = search._process_filters(filters.filters)

        for param in params['_facets']:
            for value in param.value:
                try:
                    field_ = self.all_fields[value]
                except KeyError:
                    # That is not a known field, we can't facet on it.
                    raise BadArgumentError(
                        value,
                        msg='Unknown field "%s", cannot facet on it' % value)

                field_name = '%s.%s' % (field_['namespace'],
                                        field_['in_database_name'])

                if field_['has_full_version']:
                    # If the param has a full version, that means what matters
                    # is the full string, and not its individual terms.
                    field_name += '.full'

                args = {
                    value: {
                        'terms': {
                            'field': field_name,
                            'size': self.config.facets_max_number,
                        },
                        'facet_filter': processed_filters,
                    }
                }
                search = search.facet_raw(**args)

        # Query and compute results.
        hits = []
        fields = [
            '%s.%s' % (x['namespace'], x['in_database_name'])
            for x in self.all_fields.values() if x['is_returned']
        ]

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search._build_query(),
                'indices': indexes,
            }

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                for hit in search.values_dict(*fields):
                    hits.append(self.format_field_names(hit))

                total = search.count()
                facets = search.facet_counts()
                break  # Yay! Results!
            except ElasticHttpNotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indexes:
                    del indexes[indexes.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                if indexes:
                    # Update the list of indices and try again.
                    search = search.indexes(*indexes)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    facets = {}
                    break
コード例 #15
0
    def get_exploitability(self, **kwargs):
        """Return a list of exploitable crash reports.

        See socorro.lib.external_common.parse_arguments() for all filters.
        """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("start_date", lastweek, "date"),
            ("end_date", now, "date"),
            ("product", None, "str"),
            ("version", None, "str"),
            ("page", None, "int"),
            ("batch", None, "int"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        sql_where = """
            report_date BETWEEN %(start_date)s AND %(end_date)s
            AND
            null_count + none_count + low_count + medium_count + high_count > 4
        """

        if params.product:
            sql_where += " AND pv.product_name = %(product)s"
        if params.version:
            sql_where += " AND pv.version_string = %(version)s"

        inner_with_sql = """
            SELECT
                signature,
                SUM(high_count) AS high_count,
                SUM(medium_count) AS medium_count,
                SUM(low_count) AS low_count,
                SUM(null_count) AS null_count,
                SUM(none_count) AS none_count,
                SUM(high_count) + SUM(medium_count) AS med_or_high
            FROM exploitability_reports
            JOIN product_versions AS pv USING (product_version_id)
            WHERE
                high_count + medium_count + null_count + none_count > 4
                AND
                %s
            GROUP BY signature
        """ % (sql_where, )

        count_sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            WITH sums AS (
                %s
            )
            SELECT
                count(signature)
            FROM sums
        """ % (inner_with_sql, )

        results = self.query(
            count_sql_query,
            params,
            error_message="Failed to retrieve exploitable crashes count")
        total_crashes_count, = results[0]

        sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            WITH sums AS (
                %s
            )
            SELECT
                signature,
                high_count,
                medium_count,
                low_count,
                null_count,
                none_count
            FROM sums
            ORDER BY
                med_or_high DESC, signature ASC
        """ % (inner_with_sql, )

        if params['page'] is not None:
            if params['page'] <= 0:
                raise BadArgumentError('page', params['page'], 'starts on 1')
            if params['batch'] is None:
                raise MissingArgumentError('batch')
            sql_query += """
            LIMIT %(limit)s
            OFFSET %(offset)s
            """
            params['limit'] = params['batch']
            params['offset'] = params['batch'] * (params['page'] - 1)

        error_message = (
            "Failed to retrieve exploitable crashes from PostgreSQL")
        results = self.query(sql_query, params, error_message=error_message)

        # Transforming the results into what we want
        crashes = []
        for row in results:
            crash = dict(
                zip(("signature", "high_count", "medium_count", "low_count",
                     "null_count", "none_count"), row))
            crashes.append(crash)

        return {"hits": crashes, "total": total_crashes_count}
コード例 #16
0
ファイル: products.py プロジェクト: walikhan/socorro
    def get(self, **kwargs):
        """ Return product information, or version information for one
         or more product:version combinations """
        filters = [
            ("versions", None, ["list", "str"]),  # for legacy, to be removed
            ("type", "desktop", "str"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        accepted_types = ("desktop", "webapp")
        if params.type not in accepted_types:
            raise BadArgumentError('type', params.type, accepted_types)

        if params.versions and params.versions[0]:
            return self._get_versions(params)

        if params.type == "desktop":
            sql = """
                /* socorro.external.postgresql.products.Products.get */
                SELECT
                    product_name,
                    version_string,
                    start_date,
                    end_date,
                    throttle,
                    is_featured,
                    build_type,
                    has_builds
                FROM product_info
                ORDER BY product_sort, version_sort DESC, channel_sort
            """
        elif params.type == "webapp":
            sql = """
                /* socorro.external.postgresql.products.Products.get */
                SELECT
                    product_name,
                    version,
                    NULL as start_date,
                    NULL as end_date,
                    1.0 as throttle,
                    FALSE as is_featured,
                    build_type,
                    FALSE as has_builds
                FROM bixie.raw_product_releases
                ORDER BY product_name, version DESC
            """

        error_message = "Failed to retrieve products/versions from PostgreSQL"
        results = self.query(sql, error_message=error_message)

        products = []
        versions_per_product = {}

        for row in results:
            version = dict(
                zip((
                    'product',
                    'version',
                    'start_date',
                    'end_date',
                    'throttle',
                    'featured',
                    'release',
                    'has_builds',
                ), row))

            try:
                version['end_date'] = datetimeutil.date_to_string(
                    version['end_date'])
            except TypeError:
                pass
            try:
                version['start_date'] = datetimeutil.date_to_string(
                    version['start_date'])
            except TypeError:
                pass

            version['throttle'] = float(version['throttle'])

            product = version['product']
            if product not in products:
                products.append(product)

            if product not in versions_per_product:
                versions_per_product[product] = [version]
            else:
                versions_per_product[product].append(version)

        return {
            'products': products,
            'hits': versions_per_product,
            'total': len(results)
        }
コード例 #17
0
    def get_parameters(self, **kwargs):
        parameters = {}

        fields = kwargs['_fields']
        assert fields
        if fields:
            self.build_filters(fields)

        for param in self.filters:
            values = kwargs.get(param.name, param.default)

            if values in ('', []):
                # Those values are equivalent to None here.
                # Note that we cannot use bool(), because 0 is not equivalent
                # to None in our case.
                values = None

            if values is None and param.mandatory:
                raise MissingArgumentError(param.name)
            if values is None and param.default is not None:
                values = param.default

            # all values can be a list, so we make them all lists to simplify
            if values is not None and not isinstance(values, (list, tuple)):
                values = [values]

            if values is not None:
                # There should only be one parameter with no operator, and
                # we want to stack all values into it. That's why we want
                # to keep track of it.
                # Actually, we want _two_ parameters with no operator: one
                # for each possible value of "operator_not".
                no_operator_param = {True: None, False: None}

                for value in values:
                    operator = None
                    operator_not = False

                    operators = OPERATORS_MAP.get(param.data_type,
                                                  OPERATORS_MAP['default'])

                    if isinstance(value, basestring):
                        if value.startswith(OPERATOR_NOT):
                            operator_not = True
                            value = value[1:]

                        for ope in operators:
                            if value.startswith(ope):
                                operator = ope
                                value = value[len(ope):]
                                break

                    # ensure the right data type
                    try:
                        value = convert_to_type(value, param.data_type)
                    except ValueError:
                        raise BadArgumentError(
                            param.name,
                            msg='Bad value for parameter %s:'
                            ' "%s" is not a valid %s' %
                            (param.name, value, param.data_type))

                    if param.name not in parameters:
                        parameters[param.name] = []

                    if not operator:
                        if not no_operator_param[operator_not]:
                            no_operator_param[operator_not] = SearchParam(
                                param.name, [value], operator, param.data_type,
                                operator_not)
                        else:
                            no_operator_param[operator_not].value.append(value)
                    else:
                        parameters[param.name].append(
                            SearchParam(param.name, value, operator,
                                        param.data_type, operator_not))

                for value in no_operator_param.values():
                    if value:
                        parameters[value.name].append(value)

        self.fix_date_parameter(parameters)
        self.fix_process_type_parameter(parameters)
        self.fix_hang_type_parameter(parameters)
        self.fix_version_parameter(parameters)

        return parameters
コード例 #18
0
    def get_exploitability(self, **kwargs):
        """Return a list of exploitable crash reports.

        See socorro.lib.external_common.parse_arguments() for all filters.
        """
        now = datetimeutil.utc_now().date()
        lastweek = now - datetime.timedelta(weeks=1)

        filters = [
            ("start_date", lastweek, "date"),
            ("end_date", now, "date"),
            ("page", None, "int"),
            ("batch", None, "int"),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        count_sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            SELECT COUNT(*)
            FROM exploitability_reports
            WHERE
                report_date BETWEEN %(start_date)s AND %(end_date)s
        """
        results = self.query(
            count_sql_query,
            params,
            error_message="Failed to retrieve exploitable crashes count")
        total_crashes_count, = results[0]

        sql_query = """
            /* external.postgresql.crashes.Crashes.get_exploitability */
            SELECT
                signature,
                report_date,
                null_count,
                none_count,
                low_count,
                medium_count,
                high_count
            FROM exploitability_reports
            WHERE
                report_date BETWEEN %(start_date)s AND %(end_date)s
            ORDER BY
                report_date DESC
        """

        if params['page'] is not None:
            if params['page'] <= 0:
                raise BadArgumentError('page', params['page'], 'starts on 1')
            if params['batch'] is None:
                raise MissingArgumentError('batch')
            sql_query += """
            LIMIT %(limit)s
            OFFSET %(offset)s
            """
            params['limit'] = params['batch']
            params['offset'] = params['batch'] * (params['page'] - 1)

        error_message = "Failed to retrieve exploitable crashes from PostgreSQL"
        results = self.query(sql_query, params, error_message=error_message)

        # Transforming the results into what we want
        crashes = []
        for row in results:
            crash = dict(
                zip(("signature", "report_date", "null_count", "none_count",
                     "low_count", "medium_count", "high_count"), row))
            crash["report_date"] = datetimeutil.date_to_string(
                crash["report_date"])
            crashes.append(crash)

        return {"hits": crashes, "total": total_crashes_count}
コード例 #19
0
    def get(self, **kwargs):
        """ Return urls for signature """
        filters = [
            ("signature", None, "str"),
            ("start_date", None, "datetime"),
            ("end_date", None, "datetime"),
            ("products", None, ["list", "str"]),
            ("versions", None, ["list", "str"]),
        ]

        params = external_common.parse_arguments(filters, kwargs)

        #Because no parameters are optional, we need to loop through
        #all parameters to ensure each has been set and is not None
        missingParams = []
        for param in params:
            if not params[param]:
                if param == 'versions':
                    # force versions parameter to being 'ALL' if empty
                    params[param] = 'ALL'
                    continue
                missingParams.append(param)

        if len(missingParams) > 0:
            raise MissingArgumentError(", ".join(missingParams))

        all_products_versions_sql = """
        /* socorro.external.postgresql.signature_urls.SignatureURLs.get */
            SELECT url, count(*) as crash_count FROM reports_clean
            JOIN reports_user_info USING ( UUID )
            JOIN signatures USING ( signature_id )
            WHERE reports_clean.date_processed
                BETWEEN %(start_date)s AND %(end_date)s
            AND reports_user_info.date_processed
                BETWEEN %(start_date)s AND %(end_date)s
            AND signature = %(signature)s
            AND url <> ''
        """

        sql = """
        /* socorro.external.postgresql.signature_urls.SignatureURLs.get */
            SELECT url, count(*) as crash_count FROM reports_clean
            JOIN reports_user_info USING ( UUID )
            JOIN signatures USING ( signature_id )
            JOIN product_versions USING ( product_version_id )
            WHERE reports_clean.date_processed
                BETWEEN %(start_date)s AND %(end_date)s
            AND reports_user_info.date_processed
                BETWEEN %(start_date)s AND %(end_date)s
            AND signature = %(signature)s
            AND url <> ''
            AND (
        """

        sql_group_order = """ GROUP BY url
            ORDER BY crash_count DESC LIMIT 100"""
        sql_params = {
            "start_date": params.start_date,
            "end_date": params.end_date,
            "signature": params.signature
        }

        # if this query is for all products the 'ALL' keyword will be
        # the only item in the products list and this will then also
        # be for all versions.
        if 'ALL' in params['products']:
            sql_query = " ".join((all_products_versions_sql, sql_group_order))
        # if this query is for all versions the 'ALL' keyword will be
        # the only item in the versions list.
        elif 'ALL' in params['versions']:
            sql_products = " product_name IN %(products)s )"
            sql_params['products'] = tuple(params.products)

            sql_date_range_limit = """AND %(end_date)s BETWEEN
                product_versions.build_date
                    AND product_versions.sunset_date"""
            sql_query = " ".join((sql, sql_products,
                                  sql_date_range_limit, sql_group_order))
        else:
            products = []
            (params["products_versions"],
             products) = self.parse_versions(params["versions"], [])

            if len(params["products_versions"]) == 0:
                raise BadArgumentError(", ".join(params["versions"]))

            versions_list = []
            products_list = []
            for x in range(0, len(params["products_versions"]), 2):
                products_list.append(params["products_versions"][x])
                versions_list.append(params["products_versions"][x + 1])

            product_version_list = []
            for prod in params["products"]:
                versions = []
                [versions.append(versions_list[i])
                 for i, x in enumerate(products_list)
                 if x == prod]
                product_version_list.append(tuple(versions))

            sql_product_version_ids = [
                """( product_name = %%(product%s)s
                    AND version_string IN %%(version%s)s ) """
                        % (x, x) for x in range(len(product_version_list))]

            sql_params = add_param_to_dict(sql_params, "version",
                                       product_version_list)

            sql_params = add_param_to_dict(sql_params, "product",
                                       params.products)

            sql_query = " ".join((sql, " OR ".join(sql_product_version_ids),
                              " ) " + sql_group_order))

        error_message = "Failed to retrieve urls for signature from PostgreSQL"
        results = self.query(sql_query, sql_params,
                             error_message=error_message)
        urls = results.zipped()
        return {
            "hits": urls,
            "total": len(urls)
        }
コード例 #20
0
    def get(self, **kwargs):
        """Return a list of results and facets based on parameters.

        The list of accepted parameters (with types and default values) is in
        socorro.lib.search_common.SearchBase
        """
        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indexes to use to optimize the elasticsearch query.
        indexes = self.get_indexes(params['date'])

        # Create and configure the search object.
        search = SuperS().es(
            urls=self.config.elasticsearch_urls,
            timeout=self.config.elasticsearch_timeout,
        )
        search = search.indexes(indexes)
        search = search.doctypes(self.config.elasticsearch_doctype)

        # Create filters.
        filters = F()

        for field, sub_params in params.items():
            for param in sub_params:
                name = PARAM_TO_FIELD_MAPPING.get(param.name, param.name)
                name = self.prefix_field_name(name)

                if name.startswith('_'):
                    if name == '_results_offset':
                        results_from = param.value[0]
                    elif name == '_results_number':
                        results_number = param.value[0]
                    # Don't use meta parameters in the query.
                    continue

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                args = {}
                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        args[name] = param.value[0]
                    else:
                        args['%s__in' % name] = param.value
                elif param.operator == '=':
                    # is exactly
                    if name in FIELDS_WITH_FULL_VERSION:
                        name = '%s.full' % name
                    args[name] = param.value
                elif param.operator == '>':
                    # greater than
                    args['%s__gt' % name] = param.value
                elif param.operator == '<':
                    # lower than
                    args['%s__lt' % name] = param.value
                elif param.operator == '>=':
                    # greater than or equal to
                    args['%s__gte' % name] = param.value
                elif param.operator == '<=':
                    # lower than or equal to
                    args['%s__lte' % name] = param.value
                elif param.operator == '__null__':
                    # is null
                    args['%s__missing' % name] = param.value

                if args:
                    if param.operator_not:
                        filters &= ~F(**args)
                    else:
                        filters &= F(**args)
                    continue

                # These use a wildcard and thus need to be in a query
                # instead of a filter.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '$': '%s*',  # starts with
                    '^': '*%s'  # ends with
                }
                if param.operator in operator_wildcards:
                    if name in FIELDS_WITH_FULL_VERSION:
                        name = '%s.full' % name
                    args['%s__wildcard' % name] = \
                        operator_wildcards[param.operator] % param.value
                    args['must_not'] = param.operator_not

                if args:
                    search = search.query(**args)
                else:
                    # If we reach this point, that means the operator is
                    # not supported, and we should raise an error about that.
                    raise NotImplementedError('Operator %s is not supported' %
                                              param.operator)

        search = search.filter(filters)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        processed_filters = search._process_filters(filters.filters)

        for param in params['_facets']:
            for value in param.value:
                filter_ = self.get_filter(value)
                if not filter_:
                    # That is not a known field, we can't facet on it.
                    raise BadArgumentError(
                        'Unknown field "%s", cannot facet on it' % value)

                field_name = PARAM_TO_FIELD_MAPPING.get(value, value)
                field_name = self.prefix_field_name(field_name)

                if field_name in FIELDS_WITH_FULL_VERSION:
                    # If the param has a full version, that means what matters
                    # is the full string, and not its individual terms.
                    field_name += '.full'

                args = {
                    value: {
                        'terms': {
                            'field': field_name,
                            'size': self.config.facets_max_number,
                        },
                        'facet_filter': processed_filters,
                    }
                }
                search = search.facet_raw(**args)

        # Query and compute results.
        hits = []
        fields = ['processed_crash.%s' % x for x in PROCESSED_CRASH_FIELDS]
        for hit in search.values_dict(*fields):
            hits.append(self.format_field_names(hit))

        return {
            'hits': hits,
            'total': search.count(),
            'facets': search.facet_counts(),
        }
コード例 #21
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Require that the list of fields be passed.
        if not kwargs.get('_fields'):
            raise MissingArgumentError('_fields')
        self.all_fields = kwargs['_fields']
        self._build_fields()

        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
        )

        # Create filters.
        filters = []
        histogram_intervals = {}

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:
                if param.name.startswith('_'):
                    # By default, all param values are turned into lists,
                    # even when they have and can have only one value.
                    # For those we know there can only be one value,
                    # so we just extract it from the made-up list.
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                        if results_number > 1000:
                            raise BadArgumentError('_results_number too large')
                    elif param.name == '_facets_size':
                        facets_size = param.value[0]

                    for f in self.histogram_fields:
                        if param.name == '_histogram_interval.%s' % f:
                            histogram_intervals[f] = param.value[0]

                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]

                name = '%s.%s' % (field_data['namespace'],
                                  field_data['in_database_name'])

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                # Operators needing wildcards, and the associated value
                # transformation with said wildcards.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '$': '%s*',  # starts with
                    '^': '*%s'  # ends with
                }
                # Operators needing ranges, and the associated Elasticsearch
                # comparison operator.
                operator_range = {
                    '>': 'gt',
                    '<': 'lt',
                    '>=': 'gte',
                    '<=': 'lte',
                }

                args = {}
                filter_type = 'term'
                filter_value = None

                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]

                        if not isinstance(val, basestring) or ' ' not in val:
                            # There's only one term and no white space, this
                            # is a simple term filter.
                            filter_value = val
                        else:
                            # If the term contains white spaces, we want to
                            # perform a phrase query.
                            filter_type = 'query'
                            args = Q(
                                'simple_query_string',
                                query=param.value[0],
                                fields=[name],
                                default_operator='and',
                            ).to_dict()
                    else:
                        # There are several terms, this is a terms filter.
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_range:
                    filter_type = 'range'
                    filter_value = {
                        operator_range[param.operator]: param.value
                    }
                elif param.operator == '__null__':
                    filter_type = 'missing'
                    args['field'] = name
                elif param.operator in operator_wildcards:
                    filter_type = 'query'

                    # Wildcard operations are better applied to a non-analyzed
                    # field (called "full") if there is one.
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    q_args = {}
                    q_args[name] = (operator_wildcards[param.operator] %
                                    param.value)
                    query = Q('wildcard', **q_args)
                    args = query.to_dict()

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    new_filter = F(filter_type, **args)
                    if param.operator_not:
                        new_filter = ~new_filter

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif filter_type == 'range':
                        sub_filters &= new_filter
                    else:
                        sub_filters |= new_filter

                    continue

            if sub_filters is not None:
                filters.append(sub_filters)

        search = search.filter(F('bool', must=filters))

        # Restricting returned fields.
        fields = []
        for param in params['_columns']:
            for value in param.value:
                if not value:
                    continue

                field_name = self.get_field_name(value, full=False)
                fields.append(field_name)

        search = search.fields(fields)

        # Sorting.
        sort_fields = []
        for param in params['_sort']:
            for value in param.value:
                if not value:
                    continue

                # Values starting with a '-' are sorted in descending order.
                # In order to retrieve the database name of the field, we
                # must first remove the '-' part and add it back later.
                # Example: given ['product', '-version'], the results will be
                # sorted by ascending product and descending version.
                desc = False
                if value.startswith('-'):
                    desc = True
                    value = value[1:]

                field_name = self.get_field_name(value, full=False)

                if desc:
                    # The underlying library understands that '-' means
                    # sorting in descending order.
                    field_name = '-' + field_name

                sort_fields.append(field_name)

        search = search.sort(*sort_fields)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        for param in params['_facets']:
            self._add_second_level_aggs(
                param,
                search.aggs,
                facets_size,
                histogram_intervals,
            )

        # Create sub-aggregations.
        for key in params:
            if not key.startswith('_aggs.'):
                continue

            fields = key.split('.')[1:]

            if fields[0] not in self.all_fields:
                continue

            base_bucket = self._get_fields_agg(fields[0], facets_size)
            sub_bucket = base_bucket

            for field in fields[1:]:
                # For each field, make a bucket, then include that bucket in
                # the latest one, and then make that new bucket the latest.
                if field in self.all_fields:
                    tmp_bucket = self._get_fields_agg(field, facets_size)
                    sub_bucket.bucket(field, tmp_bucket)
                    sub_bucket = tmp_bucket

            for value in params[key]:
                self._add_second_level_aggs(
                    value,
                    sub_bucket,
                    facets_size,
                    histogram_intervals,
                )

            search.aggs.bucket(fields[0], base_bucket)

        # Create histograms.
        for f in self.histogram_fields:
            key = '_histogram.%s' % f
            if params.get(key):
                histogram_bucket = self._get_histogram_agg(
                    f, histogram_intervals)

                for param in params[key]:
                    self._add_second_level_aggs(
                        param,
                        histogram_bucket,
                        facets_size,
                        histogram_intervals,
                    )

                search.aggs.bucket('histogram_%s' % f, histogram_bucket)

        # Query and compute results.
        hits = []

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()
                aggregations = self.format_aggregations(results.aggregations)
                break  # Yay! Results!
            except NotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    break
コード例 #22
0
    def get(self, **kwargs):

        filters = [
            ('limit', 500, 'int'),
            ('offset', 0, 'int'),
        ]
        params = external_common.parse_arguments(filters, kwargs)
        if params['limit'] > 1000:
            raise BadArgumentError('Max limit is 1000')

        sql = """
            /* socorro.external.postgresql.laglog.LagLog.get */
            SELECT
                replica_name,
                lag,
                moment,
                master,
                AVG(lag)
            OVER (
                PARTITION BY replica_name
                ORDER BY moment DESC
                ROWS BETWEEN 0 FOLLOWING AND 11 FOLLOWING
            ) AS average
            FROM lag_log
            ORDER BY moment DESC
            LIMIT %(limit)s
            OFFSET %(offset)s
        """

        results = self.query(sql, params)
        averages = defaultdict(list)
        all = defaultdict(list)
        for row in results:
            replica_name = row[0]
            lag = row[1]
            moment = row[2]
            master = row[3]
            average = row[4]
            timestamp = calendar.timegm(moment.utctimetuple())
            all[replica_name].append({
                'x': timestamp,
                'y': lag,
                'master': master
            })
            if not average:
                average = 0
            averages[replica_name].append({'x': timestamp, 'y': int(average)})

        max_bytes_critical = self.context.laglog.max_bytes_critical
        max_bytes_warning = self.context.laglog.max_bytes_warning

        replicas = []
        for name, rows in all.items():
            message = None
            last_average = averages[name][0]['y']
            last_value = rows[0]['y']
            if last_average > max_bytes_critical:
                message = 'CRITICAL'
            elif last_average > max_bytes_warning:
                message = 'WARNING'

            rows.reverse()
            averages_rows = averages[name]
            averages_rows.reverse()
            replicas.append({
                'name': name,
                'rows': rows,
                'averages': averages_rows,
                'message': message,
                'last_average': last_average,
                'last_value': last_value,
            })
        replicas.sort(key=lambda x: x['name'])

        return {'replicas': replicas}
コード例 #23
0
    def get_paireduuid(self, **kwargs):
        """Return paired uuid given a uuid and an optional hangid.

        If a hangid is passed, then return only one result. Otherwise, return
        all found paired uuids.

        """
        filters = [
            ("uuid", None, "str"),
            ("hangid", None, "str"),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        if not params.uuid:
            raise MissingArgumentError('uuid')

        try:
            crash_date = datetimeutil.uuid_to_date(params.uuid)
        except ValueError:
            raise BadArgumentError(
                'uuid', 'Date could not be converted extract from %s' %
                (params.uuid, ))

        sql = """
            /* socorro.external.postgresql.crashes.Crashes.get_paireduuid */
            SELECT uuid
            FROM reports r
            WHERE r.uuid != %(uuid)s
            AND r.date_processed BETWEEN
                TIMESTAMP %(crash_date)s - CAST('1 day' AS INTERVAL) AND
                TIMESTAMP %(crash_date)s + CAST('1 day' AS INTERVAL)
        """
        sql_params = {"uuid": params.uuid, "crash_date": crash_date}

        if params.hangid is not None:
            sql = """%s
                AND r.hangid = %%(hangid)s
                LIMIT 1
            """ % sql
            sql_params["hangid"] = params.hangid
        else:
            sql = """%s
                AND r.hangid IN (
                    SELECT hangid
                    FROM reports r2
                    WHERE r2.date_processed BETWEEN
                        TIMESTAMP %%(crash_date)s - CAST('1 day' AS INTERVAL)
                        AND
                        TIMESTAMP %%(crash_date)s + CAST('1 day' AS INTERVAL)
                    AND r2.uuid = %%(uuid)s
                )
            """ % sql

        # Query the database
        error_message = "Failed to retrieve paired uuids from PostgreSQL"
        results = self.query(sql, sql_params, error_message=error_message)

        # Transforming the results into what we want
        uuids = [dict(zip(("uuid", ), row)) for row in results]

        return {"hits": uuids, "total": len(uuids)}
コード例 #24
0
 def get(self, **kwargs):
     self.context.logger.info('Running %s' % self.__class__.__name__)
     raise BadArgumentError('bad arg')
コード例 #25
0
    def create_field(self, **kwargs):
        """Create a new field in the database, to be used by supersearch and
        all Elasticsearch related services.
        """
        filters = [
            ('name', None, 'str'),
            ('data_validation_type', 'enum', 'str'),
            ('default_value', None, 'str'),
            ('description', None, 'str'),
            ('form_field_choices', None, ['list', 'str']),
            ('has_full_version', False, 'bool'),
            ('in_database_name', None, 'str'),
            ('is_exposed', False, 'bool'),
            ('is_returned', False, 'bool'),
            ('is_mandatory', False, 'bool'),
            ('query_type', 'enum', 'str'),
            ('namespace', None, 'str'),
            ('permissions_needed', None, ['list', 'str']),
            ('storage_mapping', None, 'json'),
        ]
        params = external_common.parse_arguments(filters, kwargs)

        mandatory_params = ('name', 'in_database_name')
        for param in mandatory_params:
            if not params[param]:
                raise MissingArgumentError(param)

        # Before making the change, make sure it does not break indexing.
        new_mapping = self.get_mapping(overwrite_mapping=params)

        # Try the mapping. If there is an error, an exception will be raised.
        # If an exception is raised, the new mapping will be rejected.
        self.test_mapping(new_mapping)

        es_connection = self.get_connection()

        try:
            es_connection.index(
                index=self.config.elasticsearch.elasticsearch_default_index,
                doc_type='supersearch_fields',
                body=params,
                id=params['name'],
                op_type='create',
                refresh=True,
            )
        except elasticsearch.exceptions.ConflictError:
            # This field exists in the database, it thus cannot be created!
            raise BadArgumentError(
                'name',
                msg='The field "%s" already exists in the database, '
                    'impossible to create it. ' % params['name'],
            )

        if params.get('storage_mapping'):
            # If we made a change to the storage_mapping, log that change.
            self.config.logger.info(
                'elasticsearch mapping changed for field "%s", '
                'added new mapping "%s"',
                params['name'],
                params['storage_mapping'],
            )

        return True
コード例 #26
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
        )

        # Create filters.
        filters = None

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:

                if param.name.startswith('_'):
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]

                name = '%s.%s' % (field_data['namespace'],
                                  field_data['in_database_name'])

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                args = {}
                filter_type = 'term'
                filter_value = None
                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]
                        if not isinstance(val, basestring) or (isinstance(
                                val, basestring) and ' ' not in val):
                            filter_value = val

                        # If the term contains white spaces, we want to perform
                        # a phrase query. Thus we do nothing here and let this
                        # value be handled later.
                    else:
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator == '>':
                    # greater than
                    filter_type = 'range'
                    filter_value = {'gt': param.value}
                elif param.operator == '<':
                    # lower than
                    filter_type = 'range'
                    filter_value = {'lt': param.value}
                elif param.operator == '>=':
                    # greater than or equal to
                    filter_type = 'range'
                    filter_value = {'gte': param.value}
                elif param.operator == '<=':
                    # lower than or equal to
                    filter_type = 'range'
                    filter_value = {'lte': param.value}
                elif param.operator == '__null__':
                    # is null
                    filter_type = 'missing'
                    args['field'] = name

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    if param.operator_not:
                        new_filter = ~F(filter_type, **args)
                    else:
                        new_filter = F(filter_type, **args)

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif param.data_type == 'enum':
                        sub_filters |= new_filter
                    else:
                        sub_filters &= new_filter

                    continue

                # These use a wildcard and thus need to be in a query
                # instead of a filter.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '$': '%s*',  # starts with
                    '^': '*%s'  # ends with
                }
                if param.operator in operator_wildcards:
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    query_type = 'wildcard'
                    args[name] = (operator_wildcards[param.operator] %
                                  param.value)
                elif not param.operator:
                    # This is a phrase that was passed down.
                    query_type = 'simple_query_string'
                    args['query'] = param.value[0]
                    args['fields'] = [name]
                    args['default_operator'] = 'and'

                if args:
                    query = Q(query_type, **args)
                    if param.operator_not:
                        query = ~query
                    search = search.query(query)
                else:
                    # If we reach this point, that means the operator is
                    # not supported, and we should raise an error about that.
                    raise NotImplementedError('Operator %s is not supported' %
                                              param.operator)

            if filters is None:
                filters = sub_filters
            elif sub_filters is not None:
                filters &= sub_filters

        search = search.filter(filters)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        for param in params['_facets']:
            for value in param.value:
                try:
                    field_ = self.all_fields[value]
                except KeyError:
                    # That is not a known field, we can't facet on it.
                    raise BadArgumentError(
                        value,
                        msg='Unknown field "%s", cannot facet on it' % value)

                field_name = '%s.%s' % (field_['namespace'],
                                        field_['in_database_name'])

                if field_['has_full_version']:
                    # If the param has a full version, that means what matters
                    # is the full string, and not its individual terms.
                    field_name += '.full'

                search.aggs.bucket(value,
                                   'terms',
                                   field=field_name,
                                   size=self.config.facets_max_number)

        # Query and compute results.
        hits = []
        fields = [
            '%s.%s' % (x['namespace'], x['in_database_name'])
            for x in self.all_fields.values() if x['is_returned']
        ]
        search = search.fields(*fields)

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()
                aggregations = self.format_aggregations(results.aggregations)
                break  # Yay! Results!
            except NotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    break
コード例 #27
0
    def get_parameters(self, **kwargs):
        parameters = {}

        for param in self.filters:
            values = kwargs.get(param.name, param.default)

            if values in ('', []):
                # Those values are equivalent to None here.
                # Note that we cannot use bool(), because 0 is not equivalent
                # to None in our case.
                values = None

            if values is None and param.mandatory:
                raise MissingArgumentError(param.name)
            if values is None and param.default is not None:
                values = param.default

            # all values can be a list, so we make them all lists to simplify
            if values is not None and not isinstance(values, (list, tuple)):
                values = [values]

            if values is not None:
                no_operator_param = None
                for value in values:
                    operator = None
                    operator_not = False

                    operators = OPERATORS_MAP.get(param.data_type,
                                                  OPERATORS_MAP['default'])

                    if isinstance(value, basestring):
                        if value.startswith(OPERATOR_NOT):
                            operator_not = True
                            value = value[1:]

                        for ope in operators:
                            if value.startswith(ope):
                                operator = ope
                                value = value[len(ope):]
                                break

                    # ensure the right data type
                    try:
                        value = convert_to_type(value, param.data_type)
                    except ValueError:
                        raise BadArgumentError(
                            param.name,
                            msg='Bad value for parameter %s:'
                            ' "%s" is not a valid %s' %
                            (param.name, value, param.data_type))

                    if not param.name in parameters:
                        parameters[param.name] = []

                    if not operator:
                        if not no_operator_param:
                            no_operator_param = SearchParam(
                                param.name, [value], operator, param.data_type,
                                operator_not)
                        else:
                            no_operator_param.value.append(value)
                    else:
                        parameters[param.name].append(
                            SearchParam(param.name, value, operator,
                                        param.data_type, operator_not))

                if no_operator_param:
                    parameters[no_operator_param.name].append(
                        no_operator_param)

        self.fix_date_parameter(parameters)
        self.fix_process_type_parameter(parameters)
        self.fix_hang_type_parameter(parameters)

        return parameters
コード例 #28
0
def getListOfTopCrashersBySignature(connection, dbParams):
    """
    Answers a generator of tcbs rows
    """
    assertPairs = {
        'startDate': (datetime.date, datetime.datetime),
        'to_date': (datetime.date, datetime.datetime),
        'product': basestring,
        'version': basestring,
        'limit': int
    }

    for param in assertPairs:
        if not isinstance(dbParams[param], assertPairs[param]):
            raise BadArgumentError(type(dbParams[param]))

    order_by = 'report_count'  # default order field
    where = ['']  # trick for the later join
    if dbParams['crash_type'] != 'all':
        where.append("process_type = %s" %
                     (sqlutils.quote_value(dbParams['crash_type']), ))
    if dbParams['os']:
        abbreviated_os = dbParams['os'][0:3].lower()
        if abbreviated_os not in ('win', 'lin', 'mac'):
            # this check prevents possible SQL injections
            raise BadArgumentError('Invalid OS to order on')
        order_by = '%s_count' % abbreviated_os
        where.append("%s > 0" % order_by)

    where = ' AND '.join(where)

    table_to_use = 'tcbs'
    date_range_field = 'report_date'

    if dbParams['date_range_type'] == 'build':
        table_to_use = 'tcbs_build'
        date_range_field = 'build_date'

    sql = """
        WITH tcbs_r as (
        SELECT tcbs.signature_id,
                signature,
                pv.product_name,
                version_string,
                sum(report_count) as report_count,
                sum(win_count) as win_count,
                sum(lin_count) as lin_count,
                sum(mac_count) as mac_count,
                sum(hang_count) as hang_count,
                plugin_count(process_type,report_count) as plugin_count,
                content_count(process_type,report_count) as content_count,
                first_report,
                version_list,
                sum(startup_count) as startup_count,
                sum(is_gc_count) as is_gc_count
        FROM %s tcbs
            JOIN signatures USING (signature_id)
            JOIN product_versions AS pv USING (product_version_id)
            JOIN signature_products_rollup AS spr
                ON spr.signature_id = tcbs.signature_id
                AND spr.product_name = pv.product_name
        WHERE pv.product_name = %%s
            AND version_string = %%s
            AND tcbs.%s BETWEEN %%s AND %%s
            %s
        GROUP BY tcbs.signature_id, signature, pv.product_name, version_string,
             first_report, spr.version_list
        ),
        tcbs_window AS (
            SELECT tcbs_r.*,
            sum(report_count) over () as total_crashes,
                    dense_rank() over (order by report_count desc) as ranking
            FROM
                tcbs_r
        )
        SELECT signature,
                report_count,
                win_count,
                lin_count,
                mac_count,
                hang_count,
                plugin_count,
                content_count,
                first_report,
                version_list,
                %s / total_crashes::float as percent_of_total,
                startup_count / %s::float as startup_percent,
                is_gc_count,
                total_crashes::int
        FROM tcbs_window
        ORDER BY %s DESC
        LIMIT %s
    """ % (table_to_use, date_range_field, where, order_by, order_by, order_by,
           dbParams["limit"])
    params = (
        dbParams['product'],
        dbParams['version'],
        dbParams['startDate'],
        dbParams['to_date'],
    )
    try:
        cursor = connection.cursor()
        return db.execute(cursor, sql, params)
    except Exception:
        connection.rollback()
        raise
    else:
        connection.commit()