def _run(self): q = model.Session.query(model.Resource) # TODO authz if self.query.terms: raise SearchError( 'Only field specific terms allowed in resource search.') #self._check_options_specified_are_allowed('resource search', ['all_fields', 'offset', 'limit']) self.options.ref_entity_with_attr = 'id' # has no name resource_fields = model.Resource.get_columns() for field, terms in self.query.fields.items(): if isinstance(terms, basestring): terms = terms.split() if field not in resource_fields: raise SearchError( 'Field "%s" not recognised in Resource search.' % field) for term in terms: model_attr = getattr(model.Resource, field) if field == 'hash': q = q.filter(model_attr.ilike(unicode(term) + '%')) elif field in model.Resource.get_extra_columns(): model_attr = getattr(model.Resource, 'extras') like = or_( model_attr.ilike(u'''%%"%s": "%%%s%%",%%''' % (field, term)), model_attr.ilike(u'''%%"%s": "%%%s%%"}''' % (field, term))) q = q.filter(like) else: q = q.filter(model_attr.ilike('%' + unicode(term) + '%')) order_by = self.options.order_by if order_by is not None: if hasattr(model.Resource, order_by): q = q.order_by(getattr(model.Resource, order_by)) self._db_query(q)
def check_solr_schema_version(schema_file=None): ''' Checks if the schema version of the SOLR server is compatible with this CKAN version. The schema will be retrieved from the SOLR server, using the offset defined in SOLR_SCHEMA_FILE_OFFSET_MANAGED ('/schema?wt=schema.xml'). If SOLR is set to use the manually edited `schema.xml`, the schema will be retrieved from the SOLR server using the offset defined in SOLR_SCHEMA_FILE_OFFSET_CLASSIC ('/admin/file/?file=schema.xml'). The schema_file parameter allows to override this pointing to different schema file, but it should only be used for testing purposes. If the CKAN instance is configured to not use SOLR or the SOLR server is not available, the function will return False, as the version check does not apply. If the SOLR server is available, a SearchError exception will be thrown if the version could not be extracted or it is not included in the supported versions list. :schema_file: Absolute path to an alternative schema file. Should be only used for testing purposes (Default is None) ''' if not is_available(): # Something is wrong with the SOLR server log.warn('Problems were found while connecting to the SOLR server') return False # Try to get the schema XML file to extract the version if not schema_file: try: # Try Managed Schema res = _get_schema_from_solr(SOLR_SCHEMA_FILE_OFFSET_MANAGED) res.raise_for_status() except requests.HTTPError: # Fallback to Manually Edited schema.xml res = _get_schema_from_solr(SOLR_SCHEMA_FILE_OFFSET_CLASSIC) schema_content = res.text else: with open(schema_file, 'rb') as f: schema_content = f.read() tree = xml.dom.minidom.parseString(schema_content) version = tree.documentElement.getAttribute('version') if not len(version): msg = 'Could not extract version info from the SOLR schema' if schema_file: msg += ', using file {}'.format(schema_file) raise SearchError(msg) if not version in SUPPORTED_SCHEMA_VERSIONS: raise SearchError('SOLR schema version not supported: %s. Supported' ' versions are [%s]' % (version, ', '.join(SUPPORTED_SCHEMA_VERSIONS))) return True
def check_solr_schema_version(schema_file=None): ''' Checks if the schema version of the SOLR server is compatible with this CKAN version. The schema will be retrieved from the SOLR server, using the offset defined in SOLR_SCHEMA_FILE_OFFSET ('/admin/file/?file=schema.xml'). The schema_file parameter allows to override this pointing to different schema file, but it should only be used for testing purposes. If the CKAN instance is configured to not use SOLR or the SOLR server is not available, the function will return False, as the version check does not apply. If the SOLR server is available, a SearchError exception will be thrown if the version could not be extracted or it is not included in the supported versions list. :schema_file: Absolute path to an alternative schema file. Should be only used for testing purposes (Default is None) ''' if not is_available(): # Something is wrong with the SOLR server log.warn('Problems were found while connecting to the SOLR server') return False # Try to get the schema XML file to extract the version if not schema_file: solr_url, solr_user, solr_password = SolrSettings.get() http_auth = None if solr_user is not None and solr_password is not None: http_auth = solr_user + ':' + solr_password http_auth = 'Basic ' + http_auth.encode('base64').strip() url = solr_url.strip('/') + SOLR_SCHEMA_FILE_OFFSET req = urllib2.Request(url=url) if http_auth: req.add_header('Authorization', http_auth) res = urllib2.urlopen(req) else: url = 'file://%s' % schema_file res = urllib2.urlopen(url) tree = xml.dom.minidom.parseString(res.read()) version = tree.documentElement.getAttribute('version') if not len(version): raise SearchError('Could not extract version info from the SOLR' ' schema, using file: \n%s' % url) if not version in SUPPORTED_SCHEMA_VERSIONS: raise SearchError('SOLR schema version not supported: %s. Supported' ' versions are [%s]' % (version, ', '.join(SUPPORTED_SCHEMA_VERSIONS))) return True
def query_for(_type): """ Get a SearchQuery instance sub-class suitable for the specified type. """ try: _type_n = _normalize_type(_type) return _QUERIES[_type_n]() except KeyError, ke: raise SearchError("Unknown search type: %s" % _type)
def run(self, query=None, terms=[], fields={}, facet_by=[], options=None, **kwargs): raise SearchError("SearchQuery.run() not implemented!")
class PackageSearchQuery(SearchQuery): def get_all_entity_ids(self, max_results=1000): """ Return a list of the IDs of all indexed packages. """ query = "*:*" fq = "+site_id:\"%s\" " % config.get('ckan.site_id') fq += "+state:active " conn = make_connection() try: data = conn.query(query, fq=fq, rows=max_results, fields='id') finally: conn.close() return [r.get('id') for r in data.results] def get_index(self, reference): query = { 'rows': 1, 'q': 'name:%s OR id:%s' % (reference, reference), 'wt': 'json', 'fq': 'site_id:"%s"' % config.get('ckan.site_id') } conn = make_connection() log.debug('Package query: %r' % query) try: solr_response = conn.raw_query(**query) except SolrException, e: raise SearchError( 'SOLR returned an error running query: %r Error: %r' % (query, e.reason)) try: data = json.loads(solr_response) if data['response']['numFound'] == 0: raise SearchError('Dataset not found in the search index: %s' % reference) else: return data['response']['docs'][0] except Exception, e: log.exception(e) raise SearchError(e)
def get_index(self, reference): query = { 'rows': 1, 'q': 'name:%s OR id:%s' % (reference, reference), 'wt': 'json', 'fq': 'site_id:"%s"' % config.get('ckan.site_id') } conn = make_connection() log.debug('Package query: %r' % query) try: solr_response = conn.raw_query(**query) except SolrException, e: raise SearchError( 'SOLR returned an error running query: %r Error: %r' % (query, e.reason))
def run(self, query): ''' Performs a dataset search using the given query. @param query - dictionary with keys like: q, fq, sort, rows, facet @return - dictionary with keys results and count May raise SearchQueryError or SearchError. ''' from solr import SolrException assert isinstance(query, (dict, MultiDict)) # check that query keys are valid if not set(query.keys()) <= VALID_SOLR_PARAMETERS: invalid_params = [ s for s in set(query.keys()) - VALID_SOLR_PARAMETERS ] raise SearchQueryError("Invalid search parameters: %s" % invalid_params) # default query is to return all documents q = query.get('q') if not q or q == '""' or q == "''": query['q'] = "*:*" # number of results query['rows'] = min(1000, int(query.get('rows', 10))) # order by score if no 'sort' term given order_by = query.get('sort') if order_by == 'rank' or order_by is None: query['sort'] = 'score desc, name asc' # show only results from this CKAN instance fq = query.get('fq', '') if not '+site_id:' in fq: fq += ' +site_id:"%s"' % config.get('ckan.site_id') # filter for package status if not '+state:' in fq: fq += " +state:active" query['fq'] = fq # faceting query['facet'] = query.get('facet', 'true') query['facet.limit'] = query.get( 'facet.limit', config.get('search.facets.limit', '50')) query['facet.mincount'] = query.get('facet.mincount', 1) # return the package ID and search scores query['fl'] = query.get('fl', 'name') # return results as json encoded string query['wt'] = query.get('wt', 'json') # query field weighting: disabled for now as solr 3.* is required for # the 'edismax' query parser, our current Ubuntu version only has # packages for 1.4 # # query['defType'] = 'edismax' # query['tie'] = '0.5' # query['qf'] = query.get('qf', QUERY_FIELDS) conn = make_connection() log.debug('Package query: %r' % query) try: solr_response = conn.raw_query(**query) except SolrException, e: raise SearchError( 'SOLR returned an error running query: %r Error: %r' % (query, e.reason))
class PackageSearchQuery(SearchQuery): def get_all_entity_ids(self, max_results=1000): """ Return a list of the IDs of all indexed packages. """ query = "*:*" fq = "+site_id:\"%s\" " % config.get('ckan.site_id') fq += "+state:active " conn = make_connection() try: data = conn.query(query, fq=fq, rows=max_results, fields='id') finally: conn.close() return [r.get('id') for r in data.results] def run(self, query): ''' Performs a dataset search using the given query. @param query - dictionary with keys like: q, fq, sort, rows, facet @return - dictionary with keys results and count May raise SearchQueryError or SearchError. ''' from solr import SolrException assert isinstance(query, (dict, MultiDict)) # check that query keys are valid if not set(query.keys()) <= VALID_SOLR_PARAMETERS: invalid_params = [ s for s in set(query.keys()) - VALID_SOLR_PARAMETERS ] raise SearchQueryError("Invalid search parameters: %s" % invalid_params) # default query is to return all documents q = query.get('q') if not q or q == '""' or q == "''": query['q'] = "*:*" # number of results query['rows'] = min(1000, int(query.get('rows', 10))) # order by score if no 'sort' term given order_by = query.get('sort') if order_by == 'rank' or order_by is None: query['sort'] = 'score desc, name asc' # show only results from this CKAN instance fq = query.get('fq', '') if not '+site_id:' in fq: fq += ' +site_id:"%s"' % config.get('ckan.site_id') # filter for package status if not '+state:' in fq: fq += " +state:active" query['fq'] = fq # faceting query['facet'] = query.get('facet', 'true') query['facet.limit'] = query.get( 'facet.limit', config.get('search.facets.limit', '50')) query['facet.mincount'] = query.get('facet.mincount', 1) # return the package ID and search scores query['fl'] = query.get('fl', 'name') # return results as json encoded string query['wt'] = query.get('wt', 'json') # query field weighting: disabled for now as solr 3.* is required for # the 'edismax' query parser, our current Ubuntu version only has # packages for 1.4 # # query['defType'] = 'edismax' # query['tie'] = '0.5' # query['qf'] = query.get('qf', QUERY_FIELDS) conn = make_connection() log.debug('Package query: %r' % query) try: solr_response = conn.raw_query(**query) except SolrException, e: raise SearchError( 'SOLR returned an error running query: %r Error: %r' % (query, e.reason)) try: data = json.loads(solr_response) response = data['response'] self.count = response.get('numFound', 0) self.results = response.get('docs', []) # get any extras and add to 'extras' dict for result in self.results: extra_keys = filter(lambda x: x.startswith('extras_'), result.keys()) extras = {} for extra_key in extra_keys: value = result.pop(extra_key) extras[extra_key[len('extras_'):]] = value if extra_keys: result['extras'] = extras # if just fetching the id or name, return a list instead of a dict if query.get('fl') in ['id', 'name']: self.results = [r.get(query.get('fl')) for r in self.results] # get facets and convert facets list to a dict self.facets = data.get('facet_counts', {}).get('facet_fields', {}) for field, values in self.facets.iteritems(): self.facets[field] = dict(zip(values[0::2], values[1::2])) except Exception, e: log.exception(e) raise SearchError(e)
def run(self, query): ''' Performs a dataset search using the given query. @param query - dictionary with keys like: q, fq, sort, rows, facet @return - dictionary with keys results and count May raise SearchQueryError or SearchError. ''' from solr import SolrException assert isinstance(query, (dict, MultiDict)) # check that query keys are valid if not set(query.keys()) <= VALID_SOLR_PARAMETERS: invalid_params = [ s for s in set(query.keys()) - VALID_SOLR_PARAMETERS ] raise SearchQueryError("Invalid search parameters: %s" % invalid_params) # default query is to return all documents q = query.get('q') if not q or q == '""' or q == "''": query['q'] = "*:*" # number of results rows_to_return = min(1000, int(query.get('rows', 10))) if rows_to_return > 0: # #1683 Work around problem of last result being out of order # in SOLR 1.4 rows_to_query = rows_to_return + 1 else: rows_to_query = rows_to_return query['rows'] = rows_to_query # order by score if no 'sort' term given order_by = query.get('sort') if order_by == 'rank' or order_by is None: query['sort'] = 'score desc, name asc' # show only results from this CKAN instance fq = query.get('fq', '') if not '+site_id:' in fq: fq += ' +site_id:"%s"' % config.get('ckan.site_id') # filter for package status if not '+state:' in fq: fq += " +state:active" query['fq'] = fq # faceting query['facet'] = query.get('facet', 'true') query['facet.limit'] = query.get( 'facet.limit', config.get('search.facets.limit', '50')) query['facet.mincount'] = query.get('facet.mincount', 1) # return the package ID and search scores query['fl'] = query.get('fl', 'name') # return results as json encoded string query['wt'] = query.get('wt', 'json') # If the query has a colon in it then consider it a fielded search and do use dismax. if ':' not in query['q']: query['defType'] = 'dismax' query['tie'] = '0.1' query['mm'] = '1' query['qf'] = query.get('qf', QUERY_FIELDS) conn = make_connection() log.debug('Package query: %r' % query) try: solr_response = conn.raw_query(**query) except SolrException, e: raise SearchError( 'SOLR returned an error running query: %r Error: %r' % (query, e.reason))
# #1683 Filter out the last row that is sometimes out of order self.results = self.results[:rows_to_return] # get any extras and add to 'extras' dict for result in self.results: extra_keys = filter(lambda x: x.startswith('extras_'), result.keys()) extras = {} for extra_key in extra_keys: value = result.pop(extra_key) extras[extra_key[len('extras_'):]] = value if extra_keys: result['extras'] = extras # if just fetching the id or name, return a list instead of a dict if query.get('fl') in ['id', 'name']: self.results = [r.get(query.get('fl')) for r in self.results] # get facets and convert facets list to a dict self.facets = data.get('facet_counts', {}).get('facet_fields', {}) for field, values in self.facets.iteritems(): self.facets[field] = dict(zip(values[0::2], values[1::2])) except Exception, e: log.exception(e) raise SearchError(e) finally: conn.close() return {'results': self.results, 'count': self.count}