Ejemplo n.º 1
0
    def before_search(self,search_params):
        if 'extras' in search_params and 'ext_bbox' in search_params['extras'] \
            and search_params['extras']['ext_bbox']:

            bbox = validate_bbox(search_params['extras']['ext_bbox'])
            if not bbox:
                raise SearchError('Wrong bounding box provided')
            if search_params.get('sort') == 'spatial desc':
                if search_params.get('q') or search_params.get('fq'):
                    raise SearchError('Spatial ranking cannot be mixed with other search parameters')
                    # ...because it is too inefficient to use SOLR to filter
                    # results and return the entire set to this class and
                    # after_search do the sorting and paging.
                extents = bbox_query_ordered(bbox)
                are_no_results = not extents
                search_params['extras']['ext_rows'] = search_params.get('rows', 50)
                search_params['extras']['ext_start'] = search_params.get('start', 0)
                # this SOLR query needs to return no actual results since
                # they are in the wrong order anyway. We just need this SOLR
                # query to get the count and facet counts.
                rows = 0
                search_params['sort'] = None # SOLR should not sort.
                # Store the rankings of the results for this page, so for
                # after_search to construct the correctly sorted results
                rows = search_params['extras']['ext_rows'] = search_params.get('rows', 50)
                start = search_params['extras']['ext_start'] = search_params.get('start', 0)
                search_params['extras']['ext_spatial'] = [
                    (extent.package_id, extent.spatial_ranking) \
                    for extent in extents[start:start+rows]]
            else:
                extents = bbox_query(bbox)
                are_no_results = extents.count() == 0

            if are_no_results:
                # We don't need to perform the search
                search_params['abort_search'] = True
            else:
                # We'll perform the existing search but also filtering by the ids
                # of datasets within the bbox
                bbox_query_ids = [extent.package_id for extent in extents]

                q = search_params.get('q','').strip() or '""'
                new_q = '%s AND ' % q if q else ''
                new_q += '(%s)' % ' OR '.join(['id:%s' % id for id in bbox_query_ids])

                search_params['q'] = new_q

        return search_params
Ejemplo n.º 2
0
    def before_search(self, search_params):
        if 'extras' in search_params and 'ext_bbox' in search_params['extras'] \
            and search_params['extras']['ext_bbox']:

            bbox = validate_bbox(search_params['extras']['ext_bbox'])
            if not bbox:
                raise SearchError('Wrong bounding box provided')

            extents = bbox_query(bbox)

            if extents.count() == 0:
                # We don't need to perform the search
                search_params['abort_search'] = True
            else:
                # We'll perform the existing search but also filtering by the ids
                # of datasets within the bbox
                bbox_query_ids = [extent.package_id for extent in extents]

                q = search_params.get('q', '')
                new_q = '%s AND ' % q if q else ''
                new_q += '(%s)' % ' OR '.join(
                    ['id:%s' % id for id in bbox_query_ids])

                search_params['q'] = new_q

        return search_params
Ejemplo n.º 3
0
    def before_search(self, search_params):
        from ckanext.spatial.lib import validate_bbox
        from ckan.lib.search import SearchError

        if search_params.get('extras', None) and search_params['extras'].get(
                'ext_bbox', None):

            bbox = validate_bbox(search_params['extras']['ext_bbox'])
            if not bbox:
                raise SearchError('Wrong bounding box provided')

            # Adjust easting values
            while (bbox['minx'] < -180):
                bbox['minx'] += 360
                bbox['maxx'] += 360
            while (bbox['minx'] > 180):
                bbox['minx'] -= 360
                bbox['maxx'] -= 360

            if self.search_backend == 'solr':
                search_params = self._params_for_solr_search(
                    bbox, search_params)
            elif self.search_backend == 'solr-spatial-field':
                search_params = self._params_for_solr_spatial_field_search(
                    bbox, search_params)
            elif self.search_backend == 'postgis':
                search_params = self._params_for_postgis_search(
                    bbox, search_params)

        return search_params
Ejemplo n.º 4
0
def edc_package_update(context, input_data_dict):
    '''
    Find a package, from the given object_name, and update it with the given fields.
    1) Call __package_search to find the package
    2) Check the results (success == true), (count==1)
    3) Modify the data
    4) Call get_action(package_update) to update the package
    '''
    from ckan.lib.search import SearchError

    # first, do the search
    q = 'object_name:' + input_data_dict.get("object_name")
    fq = ''
    offset = 0
    limit = 2
    sort = 'metadata_modified desc'

    try:
        data_dict = {
            'q': q,
            'fq': fq,
            'start': offset,
            'rows': limit,
            'sort': sort
        }

        # Use package_search to filter the list
        query = get_action('package_search')(context, data_dict)

    except SearchError, se:
        log.error('Search error : %s', str(se))
        raise SearchError(str(se))
Ejemplo n.º 5
0
    def before_search(self, search_params):
        search_params_modified = copy.deepcopy(search_params)

        if 'extras' in search_params:
            extras = search_params['extras']
            begin_time = extras.get('ext_timerange_start')
            end_time = extras.get('ext_timerange_end')
            # temporal handling
            # if both begin and end time are none, no search window was provided
            if begin_time is None and end_time is None:
                return search_params
            else:
                try:
                    log.debug(begin_time)
                    convert_begin = convert_date(begin_time)
                    log.debug(convert_begin)
                    log.debug(end_time)
                    convert_end = convert_date(end_time)
                    log.debug(convert_end)
                except pendulum.parsing.exceptions.ParserError:
                    log.exception("Error while parsing begin/end time")
                    raise SearchError("Cannot parse provided time")

                log.debug(search_params)
                # fq should be defined in query params, but just in case, use .get
                # defaulting to empty string
                fq_contents = search_params.get('fq', '')
                fq_modified = ("{} +temporal_extent:[{} TO {}]".format(
                    fq_contents, convert_begin, convert_end))

                search_params_modified['fq'] = fq_modified
                log.debug(search_params_modified)
                return search_params_modified
Ejemplo n.º 6
0
    def _params_for_postgis_search(self, bbox, search_params):
        from ckanext.spatial.lib import bbox_query, bbox_query_ordered
        from ckan.lib.search import SearchError

        # Note: This will be deprecated at some point in favour of the
        # Solr 4 spatial sorting capabilities
        if search_params.get('sort') == 'spatial desc' and \
           tk.asbool(config.get('ckanext.spatial.use_postgis_sorting', 'False')):
            if search_params['q'] or search_params['fq']:
                raise SearchError(
                    'Spatial ranking cannot be mixed with other search parameters'
                )
                # ...because it is too inefficient to use SOLR to filter
                # results and return the entire set to this class and
                # after_search do the sorting and paging.
            extents = bbox_query_ordered(bbox)
            are_no_results = not extents
            search_params['extras']['ext_rows'] = search_params['rows']
            search_params['extras']['ext_start'] = search_params['start']
            # this SOLR query needs to return no actual results since
            # they are in the wrong order anyway. We just need this SOLR
            # query to get the count and facet counts.
            rows = 0
            search_params['sort'] = None  # SOLR should not sort.
            # Store the rankings of the results for this page, so for
            # after_search to construct the correctly sorted results
            rows = search_params['extras']['ext_rows'] = search_params['rows']
            start = search_params['extras']['ext_start'] = search_params[
                'start']
            search_params['extras']['ext_spatial'] = [
                (extent.package_id, extent.spatial_ranking) \
                for extent in extents[start:start+rows]]
        else:
            extents = bbox_query(bbox)
            are_no_results = extents.count() == 0

        if are_no_results:
            # We don't need to perform the search
            search_params['abort_search'] = True
        else:
            # We'll perform the existing search but also filtering by the ids
            # of datasets within the bbox
            bbox_query_ids = [extent.package_id for extent in extents]

            q = search_params.get('q', '').strip() or '""'
            # Note: `"" AND` query doesn't work in github ci
            new_q = '%s AND ' % q if q and q != '""' else ''
            new_q += '(%s)' % ' OR '.join(
                ['id:%s' % id for id in bbox_query_ids])

            search_params['q'] = new_q

        return search_params
Ejemplo n.º 7
0
Archivo: get.py Proyecto: pingali/ckan
def resource_search(context, data_dict):
    model = context['model']
    session = context['session']

    fields = data_dict['fields']
    order_by = data_dict.get('order_by')
    offset = data_dict.get('offset')
    limit = data_dict.get('limit')

    # TODO: should we check for user authentication first?
    q = model.Session.query(model.Resource)
    resource_fields = model.Resource.get_columns()

    for field, terms in fields.items():
        if isinstance(terms, basestring):
            terms = terms.split()
        if field not in resource_fields:
            raise SearchError('Field "%s" not recognised in Resource search.' %
                              field)
        for term in terms:
            model_attr = getattr(model.Resource, field)
            if field == 'hash':
                q = q.filter(model_attr.ilike(unicode(term) + '%'))
            elif field in model.Resource.get_extra_columns():
                model_attr = getattr(model.Resource, 'extras')

                like = or_(
                    model_attr.ilike(u'''%%"%s": "%%%s%%",%%''' %
                                     (field, term)),
                    model_attr.ilike(u'''%%"%s": "%%%s%%"}''' % (field, term)))
                q = q.filter(like)
            else:
                q = q.filter(model_attr.ilike('%' + unicode(term) + '%'))

    if order_by is not None:
        if hasattr(model.Resource, order_by):
            q = q.order_by(getattr(model.Resource, order_by))

    count = q.count()
    q = q.offset(offset)
    q = q.limit(limit)

    results = []
    for result in q:
        if isinstance(result, tuple) and isinstance(result[0],
                                                    model.DomainObject):
            # This is the case for order_by rank due to the add_column.
            results.append(result[0])
        else:
            results.append(result)

    return {'count': count, 'results': results}
Ejemplo n.º 8
0
    def before_search(self, search_params):
        if search_params.get('extras', None) and search_params['extras'].get('ext_bbox', None):

            bbox = validate_bbox(search_params['extras']['ext_bbox'])
            if not bbox:
                raise SearchError('Wrong bounding box provided')

            if self.search_backend == 'solr':
                search_params = self._params_for_solr_search(bbox, search_params)
            elif self.search_backend == 'solr-spatial-field':
                search_params = self._params_for_solr_spatial_field_search(bbox, search_params)
            elif self.search_backend == 'postgis':
                search_params = self._params_for_postgis_search(bbox, search_params)

        return search_params
Ejemplo n.º 9
0
def edc_package_update_bcgw(context, input_data_dict):
    '''
    Find a package, from the given object_name, and update it with the given fields.
    1) Call __package_search to find the package
    2) Check the results (success == true), (count==1)
    3) Modify the data
    4) Call get_action(package_update) to update the package
    '''
    from ckan.lib.search import SearchError

    '''
    Fixed unicode characters decoding problem.
    '''
    import json
    input_dict_str = json.dumps(input_data_dict, ensure_ascii=False)

    input_data_dict = json.loads(input_dict_str, encoding="cp1252")

    update = {}
    # first, do the search
    q = 'object_name:' + input_data_dict.get("object_name")
    fq = ''
    offset = 0
    limit = 2
    sort = 'metadata_modified desc'

    try:
        data_dict = {
            'q': q,
            'fq': fq,
            'start': offset,
            'rows': limit,
            'sort': sort
        }

        # Use package_search to filter the list
        query = get_action('package_search')(context, data_dict)
    except SearchError, se:
        log.error('Search error : %s', str(se))
        raise SearchError(str(se))
Ejemplo n.º 10
0
def reverse_apicontroller_action(status, response):
    """
    Make an API call look like a direct action call by reversing the
    exception -> HTTP response translation that ApiController.action does
    """
    try:
        parsed = json.loads(response)
        if parsed.get('success'):
            return parsed
        if hasattr(parsed, 'get'):
            err = parsed.get('error', {})
        else:
            err = {}
    except ValueError:
        err = {}

    etype = err.get('__type')
    emessage = err.get('message', '').split(': ', 1)[-1]
    if etype == 'Search Query Error':
        # I refuse to eval(emessage), even if it would be more correct
        raise SearchQueryError(emessage)
    elif etype == 'Search Error':
        # I refuse to eval(emessage), even if it would be more correct
        raise SearchError(emessage)
    elif etype == 'Search Index Error':
        raise SearchIndexError(emessage)
    elif etype == 'Parameter Error':
        raise ParameterError(emessage)
    elif etype == 'Validation Error':
        raise ValidationError(err)
    elif etype == 'Not Found Error':
        raise NotFound(emessage)
    elif etype == 'Authorization Error':
        raise NotAuthorized()

    # don't recognize the error
    raise CKANAPIError(response, status)
Ejemplo n.º 11
0
    def before_view(self, pkg_dict):
        """
        Extend the group controller to show resource information
        The resource information will come from elastic search
        """
        # use r as query string
        q = c.q = request.params.get(
            'q', default=None)  # unicode format (decoded from utf8)
        page = c.page = request.params.get('page', default=None)

        # TODO: put this as a parameter
        rows = 20

        if page is None:
            start = 0
        else:
            # Start with the first element in this page
            start = ((int(page) * rows) - rows)

        # format q to send to elastic search
        if q is None or q is '*:*':
            query = {
                "sort": {
                    "data": {
                        "order": "desc"
                    },
                },
                "query": {
                    "match_all": {}
                },
                "size": rows,
                "from": start
            }
        else:
            query = {
                "sort": {
                    "data": {
                        "order": "desc"
                    },
                },
                "query": {
                    "query_string": {
                        "query": q,
                        "default_operator": "AND"
                    }
                },
                "size": rows,
                "from": start
            }

        # Now send query to elastic search
        self._load_elastic_config()
        client = DataStoreClient(urlparse.urljoin(self.url, pkg_dict['name']))
        headers = dict()
        headers['Authorization'] = self.user.get('apikey')
        #req = urllib2.Request(webstore_request_url, post_data, headers)
        client._headers = headers

        # do not fail on search errors
        try:
            response = client.query(query)
        except:
            # there's an error in search params
            import traceback
            response = dict()
            errmsg = 'Error searching query string \n %s \n Message\n%s' % (
                query, traceback.format_exc())
            log.error(errmsg)

            raise SearchError(errmsg)

        # Now we have to parse the result back to package dict
        hits = response.get('hits')
        resources = list()
        if hits is not None:
            for res in hits['hits']:
                # Store it in extras
                resources.append(res['_source'])

            # Add a new field on pkg_dict
            pkg_dict['elastic_resources'] = resources
            pkg_dict['elastic_hits'] = hits.get('total')

        else:
            # Add a new field on pkg_dict
            pkg_dict['elastic_resources'] = dict()
            pkg_dict['elastic_hits'] = 0

        return pkg_dict