コード例 #1
0
ファイル: query.py プロジェクト: riccardo01/dati-ckan-docker
 def validate(self):
     for key, value in self.items():
         if key in self.BOOLEAN_OPTIONS:
             try:
                 value = asbool(value)
             except ValueError:
                 raise SearchQueryError('Value for search option %r must be True or False (1 or 0) but received %r' % (key, value))
         elif key in self.INTEGER_OPTIONS:
             try:
                 value = int(value)
             except ValueError:
                 raise SearchQueryError('Value for search option %r must be an integer but received %r' % (key, value))
         elif key in self.UNSUPPORTED_OPTIONS:
                 raise SearchQueryError('Search option %r is not supported' % key)
         self[key] = value
コード例 #2
0
ファイル: query.py プロジェクト: frafra/ckan
def convert_legacy_parameters_to_solr(
        legacy_params: dict[str, Any]) -> dict[str, Any]:
    '''API v1 and v2 allowed search params that the SOLR syntax does not
    support, so use this function to convert those to SOLR syntax.
    See tests for examples.

    raises SearchQueryError on invalid params.
    '''
    options = QueryOptions(**legacy_params)
    options.validate()
    solr_params = legacy_params.copy()
    solr_q_list: list[str] = []
    if solr_params.get('q'):
        solr_q_list.append(solr_params['q'].replace('+', ' '))
    non_solr_params = set(legacy_params.keys()) - VALID_SOLR_PARAMETERS
    for search_key in non_solr_params:
        value_obj = legacy_params[search_key]
        value = value_obj.replace('+', ' ') if isinstance(value_obj,
                                                          str) else value_obj
        if search_key == 'all_fields':
            if value:
                solr_params['fl'] = '*'
        elif search_key == 'offset':
            solr_params['start'] = value
        elif search_key == 'limit':
            solr_params['rows'] = value
        elif search_key == 'order_by':
            solr_params['sort'] = '%s asc' % value
        elif search_key == 'tags':
            if isinstance(value_obj, list):
                tag_list = value_obj
            elif isinstance(value_obj, str):
                tag_list = [value_obj]
            else:
                raise SearchQueryError(
                    'Was expecting either a string or JSON list for the tags parameter: %r'
                    % value)
            solr_q_list.extend([
                'tags:"%s"' % escape_legacy_argument(tag) for tag in tag_list
            ])
        else:
            if len(value.strip()):
                value = escape_legacy_argument(value)
                if ' ' in value:
                    value = '"%s"' % value
                solr_q_list.append('%s:%s' % (search_key, value))
        del solr_params[search_key]
    solr_params['q'] = ' '.join(solr_q_list)
    if non_solr_params:
        log.debug('Converted legacy search params from %r to %r',
                  legacy_params, solr_params)
    return solr_params
コード例 #3
0
ファイル: query.py プロジェクト: shiJiangChen/ckan
    def run(self, query):
        '''
        Performs a dataset search using the given query.

        @param query - dictionary with keys like: q, fq, sort, rows, facet
        @return - dictionary with keys results and count

        May raise SearchQueryError or SearchError.
        '''
        assert isinstance(query, (dict, MultiDict))
        # check that query keys are valid
        if not set(query.keys()) <= VALID_SOLR_PARAMETERS:
            invalid_params = [
                s for s in set(query.keys()) - VALID_SOLR_PARAMETERS
            ]
            raise SearchQueryError("Invalid search parameters: %s" %
                                   invalid_params)

        # default query is to return all documents
        q = query.get('q')
        if not q or q == '""' or q == "''":
            query['q'] = "*:*"

        # number of results
        rows_to_return = min(1000, int(query.get('rows', 10)))
        if rows_to_return > 0:
            # #1683 Work around problem of last result being out of order
            #       in SOLR 1.4
            rows_to_query = rows_to_return + 1
        else:
            rows_to_query = rows_to_return
        query['rows'] = rows_to_query

        # order by score if no 'sort' term given
        order_by = query.get('sort')
        if order_by == 'rank' or order_by is None:
            query['sort'] = 'score desc, name asc'

        # show only results from this CKAN instance
        fq = query.get('fq', '')
        if not '+site_id:' in fq:
            fq += ' +site_id:"%s"' % config.get('ckan.site_id')

        # filter for package status
        if not '+state:' in fq:
            fq += " +state:active"
        query['fq'] = fq

        # faceting
        query['facet'] = query.get('facet', 'true')
        query['facet.limit'] = query.get(
            'facet.limit', config.get('search.facets.limit', '50'))
        query['facet.mincount'] = query.get('facet.mincount', 1)

        # return the package ID and search scores
        query['fl'] = query.get('fl', 'name')

        # return results as json encoded string
        query['wt'] = query.get('wt', 'json')

        # If the query has a colon in it then consider it a fielded search and do use dismax.
        if ':' not in query['q']:
            query['defType'] = 'dismax'
            query['tie'] = '0.1'
            # this minimum match is explained
            # http://wiki.apache.org/solr/DisMaxQParserPlugin#mm_.28Minimum_.27Should.27_Match.29
            query['mm'] = '2<-1 5<80%'
            query['qf'] = query.get('qf', QUERY_FIELDS)

        conn = make_connection()
        log.debug('Package query: %r' % query)
        try:
            solr_response = conn.raw_query(**query)
        except SolrException, e:
            raise SearchError(
                'SOLR returned an error running query: %r Error: %r' %
                (query, e.reason))
コード例 #4
0
ファイル: query.py プロジェクト: tbekkers/ckan
    def run(self, query, permission_labels=None, **kwargs):
        '''
        Performs a dataset search using the given query.

        :param query: dictionary with keys like: q, fq, sort, rows, facet
        :type query: dict
        :param permission_labels: filter results to those that include at
            least one of these labels. None to not filter (return everything)
        :type permission_labels: list of unicode strings; or None

        :returns: dictionary with keys results and count

        May raise SearchQueryError or SearchError.
        '''
        assert isinstance(query, (dict, MultiDict))
        # check that query keys are valid
        if not set(query.keys()) <= VALID_SOLR_PARAMETERS:
            invalid_params = [
                s for s in set(query.keys()) - VALID_SOLR_PARAMETERS
            ]
            raise SearchQueryError("Invalid search parameters: %s" %
                                   invalid_params)

        # default query is to return all documents
        q = query.get('q')
        if not q or q == '""' or q == "''":
            query['q'] = "*:*"

        # number of results
        rows_to_return = min(1000, int(query.get('rows', 10)))
        if rows_to_return > 0:
            # #1683 Work around problem of last result being out of order
            #       in SOLR 1.4
            rows_to_query = rows_to_return + 1
        else:
            rows_to_query = rows_to_return
        query['rows'] = rows_to_query

        fq = []
        if 'fq' in query:
            fq.append(query['fq'])
        fq.extend(query.get('fq_list', []))

        # show only results from this CKAN instance
        fq.append('+site_id:%s' % solr_literal(config.get('ckan.site_id')))

        # filter for package status
        if not '+state:' in query.get('fq', ''):
            fq.append('+state:active')

        # only return things we should be able to see
        if permission_labels is not None:
            fq.append('+permission_labels:(%s)' %
                      ' OR '.join(solr_literal(p) for p in permission_labels))
        query['fq'] = fq

        # faceting
        query['facet'] = query.get('facet', 'true')
        query['facet.limit'] = query.get(
            'facet.limit', config.get('search.facets.limit', '50'))
        query['facet.mincount'] = query.get('facet.mincount', 1)

        # return the package ID and search scores
        query['fl'] = query.get('fl', 'name')

        # return results as json encoded string
        query['wt'] = query.get('wt', 'json')

        # If the query has a colon in it then consider it a fielded search and do use dismax.
        defType = query.get('defType', 'dismax')
        if ':' not in query['q'] or defType == 'edismax':
            query['defType'] = defType
            query['tie'] = query.get('tie', '0.1')
            # this minimum match is explained
            # http://wiki.apache.org/solr/DisMaxQParserPlugin#mm_.28Minimum_.27Should.27_Match.29
            query['mm'] = query.get('mm', '2<-1 5<80%')
            query['qf'] = query.get('qf', QUERY_FIELDS)

        conn = make_connection(decode_dates=False)
        log.debug('Package query: %r' % query)
        try:
            solr_response = conn.search(**query)
        except pysolr.SolrError as e:
            # Error with the sort parameter.  You see slightly different
            # error messages depending on whether the SOLR JSON comes back
            # or Jetty gets in the way converting it to HTML - not sure why
            #
            if e.args and isinstance(e.args[0], str):
                if "Can't determine a Sort Order" in e.args[0] or \
                        "Can't determine Sort Order" in e.args[0] or \
                        'Unknown sort order' in e.args[0]:
                    raise SearchQueryError('Invalid "sort" parameter')
            raise SearchError(
                'SOLR returned an error running query: %r Error: %r' %
                (query, e))
        self.count = solr_response.hits
        self.results = solr_response.docs

        # #1683 Filter out the last row that is sometimes out of order
        self.results = self.results[:rows_to_return]

        # get any extras and add to 'extras' dict
        for result in self.results:
            extra_keys = filter(lambda x: x.startswith('extras_'),
                                result.keys())
            extras = {}
            for extra_key in extra_keys:
                value = result.pop(extra_key)
                extras[extra_key[len('extras_'):]] = value
            if extra_keys:
                result['extras'] = extras

        # if just fetching the id or name, return a list instead of a dict
        if query.get('fl') in ['id', 'name']:
            self.results = [r.get(query.get('fl')) for r in self.results]

        # get facets and convert facets list to a dict
        self.facets = solr_response.facets.get('facet_fields', {})
        for field, values in six.iteritems(self.facets):
            self.facets[field] = dict(zip(values[0::2], values[1::2]))

        return {'results': self.results, 'count': self.count}
コード例 #5
0
    def run(self, query, permission_labels=None, **kwargs):
        '''
        Performs a dataset search using the given query.

        :param query: dictionary with keys like: q, fq, sort, rows, facet
        :type query: dict
        :param permission_labels: filter results to those that include at
            least one of these labels. None to not filter (return everything)
        :type permission_labels: list of unicode strings; or None

        :returns: dictionary with keys results and count

        May raise SearchQueryError or SearchError.
        '''
        assert isinstance(query, (dict, MultiDict))
        # check that query keys are valid
        valid_solr_parameters = VALID_SOLR_PARAMETERS
        for item in plugins.PluginImplementations(plugins.IPackageController):
            if 'update_valid_solr_parameters' in dir(item):
                valid_solr_parameters = item.update_valid_solr_parameters(
                    valid_solr_parameters)

        if not set(query.keys()) <= valid_solr_parameters:
            invalid_params = [
                s for s in set(query.keys()) - valid_solr_parameters
            ]
            raise SearchQueryError("Invalid search parameters: %s" %
                                   invalid_params)

        # default query is to return all documents
        q = query.get('q')
        if not q or q == '""' or q == "''":
            query['q'] = "*:*"

        # number of results
        rows_to_return = min(1000, int(query.get('rows', 10)))
        if rows_to_return > 0:
            # #1683 Work around problem of last result being out of order
            #       in SOLR 1.4
            rows_to_query = rows_to_return + 1
        else:
            rows_to_query = rows_to_return
        query['rows'] = rows_to_query

        fq = []
        if 'fq' in query:
            fq.append(query['fq'])
        fq.extend(query.get('fq_list', []))

        # show only results from this CKAN instance
        fq.append('+site_id:%s' % solr_literal(config.get('ckan.site_id')))

        # filter for package status
        if not '+state:' in query.get('fq', ''):
            fq.append('+state:active')

        # only return things we should be able to see
        if permission_labels is not None:
            fq.append('+permission_labels:(%s)' %
                      ' OR '.join(solr_literal(p) for p in permission_labels))
        query['fq'] = fq

        # faceting
        query['facet'] = query.get('facet', 'true')
        query['facet.limit'] = query.get(
            'facet.limit', config.get('search.facets.limit', '50'))
        query['facet.mincount'] = query.get('facet.mincount', 1)

        # return the package ID and search scores
        query['fl'] = query.get('fl', 'name')

        # return results as json encoded string
        query['wt'] = query.get('wt', 'json')

        # If the query has a colon in it then consider it a fielded search and do use dismax.
        defType = query.get('defType', 'dismax')
        if ':' not in query['q'] or defType == 'edismax':
            query['defType'] = defType
            query['tie'] = query.get('tie', '0.1')
            # this minimum match is explained
            # http://wiki.apache.org/solr/DisMaxQParserPlugin#mm_.28Minimum_.27Should.27_Match.29
            query['mm'] = query.get('mm', '2<-1 5<80%')
            query['qf'] = query.get('qf', QUERY_FIELDS)

        conn = make_connection(decode_dates=False)
        log.debug('Package query: %r' % query)
        try:
            solr_response = conn.search(**query)
        except pysolr.SolrError, e:
            # Error with the sort parameter.  You see slightly different
            # error messages depending on whether the SOLR JSON comes back
            # or Jetty gets in the way converting it to HTML - not sure why
            #
            if e.args and isinstance(e.args[0], str):
                if "Can't determine a Sort Order" in e.args[0] or \
                        "Can't determine Sort Order" in e.args[0] or \
                        'Unknown sort order' in e.args[0]:
                    raise SearchQueryError('Invalid "sort" parameter')
            raise SearchError(
                'SOLR returned an error running query: %r Error: %r' %
                (query, e))
コード例 #6
0
    def run(self, query):
        '''
        Performs a dataset search using the given query.

        @param query - dictionary with keys like: q, fq, sort, rows, facet
        @return - dictionary with keys results and count

        May raise SearchQueryError or SearchError.
        '''
        assert isinstance(query, (dict, MultiDict))
        # check that query keys are valid
        if not set(query.keys()) <= VALID_SOLR_PARAMETERS:
            invalid_params = [
                s for s in set(query.keys()) - VALID_SOLR_PARAMETERS
            ]
            raise SearchQueryError("Invalid search parameters: %s" %
                                   invalid_params)

        # default query is to return all documents
        q = query.get('q')
        if not q or q == '""' or q == "''":
            query['q'] = "*:*"

        # number of results
        rows_to_return = min(1000, int(query.get('rows', 10)))
        if rows_to_return > 0:
            # #1683 Work around problem of last result being out of order
            #       in SOLR 1.4
            rows_to_query = rows_to_return + 1
        else:
            rows_to_query = rows_to_return
        query['rows'] = rows_to_query

        # show only results from this CKAN instance
        fq = query.get('fq', '')
        if not '+site_id:' in fq:
            fq += ' +site_id:"%s"' % config.get('ckan.site_id')

        # filter for package status
        if not '+state:' in fq:
            fq += " +state:active"
        query['fq'] = [fq]

        fq_list = query.get('fq_list', [])
        query['fq'].extend(fq_list)

        # faceting
        query['facet'] = query.get('facet', 'true')
        query['facet.limit'] = query.get(
            'facet.limit', config.get('search.facets.limit', '50'))
        query['facet.mincount'] = query.get('facet.mincount', 1)

        # return the package ID and search scores
        query['fl'] = query.get('fl', 'name')

        # return results as json encoded string
        query['wt'] = query.get('wt', 'json')

        # If the query has a colon in it then consider it a fielded search and do use dismax.
        defType = query.get('defType', 'dismax')
        if ':' not in query['q'] or defType == 'edismax':
            query['defType'] = defType
            query['tie'] = query.get('tie', '0.1')
            # this minimum match is explained
            # http://wiki.apache.org/solr/DisMaxQParserPlugin#mm_.28Minimum_.27Should.27_Match.29
            query['mm'] = query.get('mm', '2<-1 5<80%')
            query['qf'] = query.get('qf', QUERY_FIELDS)

        conn = make_connection(decode_dates=False)
        log.debug('Package query: %r' % query)
        try:
            solr_response = conn.search(**query)
        except pysolr.SolrError, e:
            # Error with the sort parameter.  You see slightly different
            # error messages depending on whether the SOLR JSON comes back
            # or Jetty gets in the way converting it to HTML - not sure why
            #
            if e.args and isinstance(e.args[0], str):
                if "Can't determine a Sort Order" in e.args[0] or \
                        "Can't determine Sort Order" in e.args[0] or \
                        'Unknown sort order' in e.args[0]:
                    raise SearchQueryError('Invalid "sort" parameter')
            raise SearchError(
                'SOLR returned an error running query: %r Error: %r' %
                (query, e))