def _apply_index(self, request, cid=''): # parse the query options record = parseIndexRequest(request, self.getId(), self.query_options) if record.keys is None: return None # prepare query (must be unicode string) query = record.keys[0] if not isinstance(query, str): query = str(query, record.get('encoding', self.index.default_encoding), 'ignore') if not query: return None # options options = {} for k in ('parser', 'language', 'field', 'autoexpand', 'similarity_ratio', 'thesaurus', 'ranking', 'ranking_maxhits', 'search_all_fields'): v = getattr(record, k, marker) if v is not marker: options[k] = v result = self.index.search(query, **options) ranked_resultset = result.getRankedResults() if ranked_resultset: return ranked_resultset, self.id else: return result.getDocids(), self.id
def _apply_index(self, request): record = parseIndexRequest(request, self.id) try: qstart, qend = record.keys except TypeError: return None minint = BTrees.family64.minint maxint = BTrees.family64.maxint qstart = min(maxint, max(minint, qstart)) qend = max(minint, min(maxint, qend)) # start in inside range start = multiunion(self._since_index.values(max=qstart)) end = multiunion(self._until_index.values(min=qstart)) start_into = intersection(start, end) # end inside range start = multiunion(self._since_index.values(max=qend)) end = multiunion(self._until_index.values(min=qend)) end_into = intersection(start, end) # start before range and end after range start = multiunion(self._since_index.values(min=qstart)) end = multiunion(self._until_index.values(max=qend)) start_before_end_after = intersection(start, end) result = union(start_into, end_into) result = union(result, start_before_end_after) return multiunion(map(self._index.__getitem__, result)), (self.id,)
def _apply_index(self, request, cid=''): """ hook for (Z)Catalog 'request' -- mapping type (usually {"path": "..." } additionaly a parameter "path_level" might be passed to specify the level (see search()) 'cid' -- ??? """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys == None: return None level = record.get("level", 0) operator = record.get('operator', self.useOperator).lower() # depending on the operator we use intersection of union if operator == "or": set_func = union else: set_func = intersection res = None for k in record.keys: rows = self.search(k, level) res = set_func(res, rows) if res: return res, (self.id, ) else: return IISet(), (self.id, )
def extendedpathindex_apply_index(self, request, res=None): """ hook for (Z)Catalog 'request' -- mapping type (usually {"path": "..." } additionaly a parameter "path_level" might be passed to specify the level (see search()) """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys == None: return None level = record.get("level", 0) operator = record.get('operator', self.useOperator).lower() depth = getattr(record, 'depth', -1) # use getattr to get 0 value navtree = record.get('navtree', 0) navtree_start = record.get('navtree_start', 0) # depending on the operator we use intersection of union if operator == "or": set_func = union else: set_func = intersection result = None for k in record.keys: rows = self.search(k, level, depth, navtree, navtree_start, tmpres=res) result = set_func(result, rows) if result: return result, (self.id, ) else: return IISet(), (self.id, )
def parse_query(self, field, field_query): name = field.name request = {name: field_query} record = parseIndexRequest(request, name, ('query', 'range')) if not record.keys: return None query_range = record.get('range', None) if query_range is None: return super(DateFieldHandler, self).parse_query(field, field_query) elif query_range == 'min': min_query = self.convert_one(min(record.keys)) return {'fq': u'%s:[%s TO *]' % (name, solr_escape(min_query))} elif query_range == 'max': max_query = self.convert_one(max(record.keys)) return {'fq': u'%s:[* TO %s]' % (name, solr_escape(max_query))} elif query_range == 'min:max': min_query = self.convert_one(min(record.keys)) max_query = self.convert_one(max(record.keys)) return { 'fq': u'%s:[%s TO %s]' % (name, solr_escape(min_query), solr_escape(max_query)) } else: raise AssertionError("Invalid range: %s" % range)
def _apply_index(self, request, cid=''): """ hook for (Z)Catalog 'request' -- mapping type (usually {"path": "..." } additionaly a parameter "path_level" might be passed to specify the level (see search()) 'cid' -- ??? """ record = parseIndexRequest(request,self.id,self.query_options) if record.keys==None: return None level = record.get("level",0) operator = record.get('operator',self.useOperator).lower() # depending on the operator we use intersection of union if operator == "or": set_func = union else: set_func = intersection res = None for k in record.keys: rows = self.search(k,level) res = set_func(res,rows) if res: return res, (self.id,) else: return IISet(), (self.id,)
def _apply_index(self, request, cid=''): """ hook for (Z)Catalog request mapping type (usually {"topic": "..." } cid ??? """ record = parseIndexRequest(request,self.id,self.query_options) if record.keys==None: return None # experimental code for specifing the operator operator = record.get('operator',self.defaultOperator).lower() # depending on the operator we use intersection of union if operator=="or": set_func = union else: set_func = intersection res = None for filterId in record.keys: rows = self.search(filterId) res = set_func(res,rows) if res: return res, (self.id,) else: return IISet(), (self.id,)
def _apply_index(self, request): """ See IPluggableIndex. o Unpacks args from catalog and mapps onto '_search'. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None level = record.get("level", 0) operator = record.get('operator', self.useOperator).lower() # depending on the operator we use intersection of union if operator == "or": set_func = union else: set_func = intersection res = None for k in record.keys: rows = self._search(k, level) res = set_func(res, rows) if res: return res, (self.id, ) else: return IISet(), (self.id, )
def _apply_index(self, request, cid=''): # parse the query options record = parseIndexRequest(request, self.getId(), self.query_options) if record.keys is None: return None # prepare query (must be unicode string) query = record.keys[0] if not isinstance(query, unicode): query = unicode(query, record.get('encoding', self.index.default_encoding), 'ignore') if not query: return None # options options = {} for k in ('parser', 'language', 'field', 'autoexpand', 'similarity_ratio', 'thesaurus', 'ranking', 'ranking_maxhits', 'search_all_fields'): v = getattr(record, k, marker) if v is not marker: options[k] = v result = self.index.search(query, **options) ranked_resultset = result.getRankedResults() if ranked_resultset: return ranked_resultset, self.id else: return result.getDocids(), self.id
def extendedpathindex_apply_index(self, request, res=None): """ hook for (Z)Catalog 'request' -- mapping type (usually {"path": "..." } additionaly a parameter "path_level" might be passed to specify the level (see search()) """ record = parseIndexRequest(request,self.id,self.query_options) if record.keys==None: return None level = record.get("level", 0) operator = record.get('operator', self.useOperator).lower() depth = getattr(record, 'depth', -1) # use getattr to get 0 value navtree = record.get('navtree', 0) navtree_start = record.get('navtree_start', 0) # depending on the operator we use intersection of union if operator == "or": set_func = union else: set_func = intersection result = None for k in record.keys: rows = self.search(k,level, depth, navtree, navtree_start, tmpres=res) result = set_func(result,rows) if result: return result, (self.id,) else: return IISet(), (self.id,)
def _apply_index(self, request, cid='', raw=False): """Apply query specified by request, a mapping containing the query. Returns two objects on success: the resultSet containing the matching record numbers, and a tuple containing the names of the fields used. Returns None if request is not valid for this index. If ``raw``, returns the raw response from the index server as a mapping. """ record = parseIndexRequest(request, self.getId(), self.query_options) if record.keys is None: return None params = {'query': record.keys, 'range': record.range} log.debug("querying: %r", params) cm = self.connection_manager try: response = cm.connection.query( params['range'], params['query']) if raw: return response result = IIBTree() for item in response: score = int(float(item.get('score', 0)) * 1000) result[int(item['id'])] = score return result, (self.getId(),) except Exception as e: log.warn("Failed to apply %s: %s", params, str(e)) return None
def _apply_index(self, request): """ See IPluggableIndex. o Unpacks args from catalog and mapps onto '_search'. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None level = record.get("level", 0) operator = record.get('operator', self.useOperator).lower() # depending on the operator we use intersection of union if operator == "or": set_func = union else: set_func = intersection res = None for k in record.keys: rows = self._search(k, level) res = set_func(res, rows) if res: return res, (self.id,) else: return IISet(), (self.id,)
def _apply_index(self, request, cid=''): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.getId()) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion(self._until_only.values(term)) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion(self._since_only.values(None, term)) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion(self._until.values(term)) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union(bounded, until_only) result = union(result, since_only) #result = union( result, bounded ) result = union(result, self._always) return result, (self._since_field, self._until_field)
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None template_params = { 'keys': record.keys, } query_body = self._apply_template(template_params) logger.info(query_body) es_kwargs = dict( index=index_name(), body=query_body, size=BATCH_SIZE, scroll='1m', _source_include=['rid'], ) es = get_query_client() result = es.search(**es_kwargs) # initial return value, other batches to be applied def score(record): return int(10000 * float(record['_score'])) retval = IIBTree() for r in result['hits']['hits']: retval[r['_source']['rid']] = score(r) total = result['hits']['total'] if total > BATCH_SIZE: sid = result['_scroll_id'] counter = BATCH_SIZE while counter < total: result = es.scroll(scroll_id=sid, scroll='1m') for record in result['hits']['hits']: retval[record['_source']['rid']] = score(record) counter += BATCH_SIZE return retval, (self.id,)
def _apply_index( self, request, cid='' ): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest( request, self.getId() ) if record.keys is None: return None term = self._convertDateTime( record.keys[0] ) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion( self._until_only.values( term ) ) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion( self._since_only.values( None, term ) ) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion( self._until.values( term ) ) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion( self._since.values( None, term ) ) bounded = intersection( until, since ) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union( bounded, until_only ) result = union( result, since_only ) #result = union( result, bounded ) result = union( result, self._always ) return result, ( self._since_field, self._until_field )
def _apply_index(self, request, cid=''): """ Apply the index to query parameters given in the argument, request The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request,self.id,self.query_options) if record.keys==None: return None # Changed for 2.4 # We use the default operator that can me managed via the ZMI qop = record.get('operator', self.useOperator) # We keep this for pre-2.4 compatibility # This stinking code should go away somewhere. A global # textindex_operator makes no sense when using multiple # text indexes inside a catalog. An index operator should # should be specified on a per-index base if request.has_key('textindex_operator'): qop = request['textindex_operator'] warnings.warn("The usage of the 'textindex_operator' " "is no longer recommended.\n" "Please use a mapping object and the " "'operator' key to specify the operator.") query_operator = operator_dict.get(qop) if query_operator is None: raise exceptions.RuntimeError, ("Invalid operator '%s' " "for a TextIndex" % escape(qop)) r = None for key in record.keys: key = key.strip() if not key: continue b = self.query(key, query_operator).bucket() w, r = weightedIntersection(r, b) if r is not None: return r, (self.id,) return (IIBucket(), (self.id,))
def _apply_index(self, request, cid=''): """ Apply the index to query parameters given in the argument, request The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys == None: return None # Changed for 2.4 # We use the default operator that can me managed via the ZMI qop = record.get('operator', self.useOperator) # We keep this for pre-2.4 compatibility # This stinking code should go away somewhere. A global # textindex_operator makes no sense when using multiple # text indexes inside a catalog. An index operator should # should be specified on a per-index base if request.has_key('textindex_operator'): qop = request['textindex_operator'] warnings.warn("The usage of the 'textindex_operator' " "is no longer recommended.\n" "Please use a mapping object and the " "'operator' key to specify the operator.") query_operator = operator_dict.get(qop) if query_operator is None: raise exceptions.RuntimeError, ("Invalid operator '%s' " "for a TextIndex" % escape(qop)) r = None for key in record.keys: key = key.strip() if not key: continue b = self.query(key, query_operator).bucket() w, r = weightedIntersection(r, b) if r is not None: return r, (self.id, ) return (IIBucket(), (self.id, ))
def parse_query(self, field, field_query): name = field.name request = {name: field_query} record = parseIndexRequest(request, name, ('query',)) if not record.keys: return None query_str = ' '.join(record.keys) if not query_str: return None return {'q': u'+%s:%s' % (name, quote_query(query_str))}
def _apply_index(self, request, cid=''): """ Apply the index to query parameters given in the argument, request The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys==None: return None # extract some parameters from the request query_operator = record.get('operator','dummy') if query_operator is None: raise TXNGError, ("Invalid operator '%s' " "for a TextIndex" % query_operator) query_parser = record.get('parser', self.use_parser) if not ParserRegistry.is_registered(query_parser): raise TXNGError, "Unknown parser '%s'" % query_parser query = record.keys[0] encoding = record.get('encoding', self.default_encoding) if isinstance(query, str): query = unicode(query, encoding) P = ParserRegistry.get( query_parser ) parsed_query = P(query.strip(), operator=query_operator) if not parsed_query: raise TXNGError,"Bad query: '%s'" % q evaluator = Evaluator(self) evaluator.autoexpand = record.get('autoexpand', self.autoexpand) evaluator.near_distance = record.get('near_distance', self.near_distance) numhits = record.get('numhits', self.numhits) resultset = evaluator(parsed_query) if self.getStorage().providesWordFrequencies(): resultset.cosine_ranking(self, numhits) return resultset.result(), (self.id,) else: return resultset.docIds(), (self.id,)
def _apply_index(self,request, cid= ''): '''see 'PluggableIndex'. What is *cid* for??? ''' __traceback_info__ = self.id record= parseIndexRequest(request, self.id, self.query_options) terms= record.keys if terms is None: return __traceback_info__ = self.id, record.keys op= record.get('operator', self.useOperator) if op not in self.operators: raise ValueError("operator not permitted: %s" % op) combine= op == 'or' and union or intersection filteredSearch = None if record.get('isearch') and record.get('isearch_filter') \ and self.supportFiltering and IFilter is not None: filteredSearch = self._getFilteredISearch(record) if filteredSearch is None: match = record.get('match') if match is not None: l = []; match = getattr(self, 'match' + match.capitalize()) prenorm = self._prenormalizeTerm for t in terms: t = prenorm(t, None) if t is not None: l.extend(match(t)) terms = l range= record.get('range') if range is not None: terms= [self._standardizeTerm(t,elimStopTerm=0, prenormalize=not match) for t in terms] range= range.split(':'); lo= hi= None if 'min' in range: lo= min(terms) if 'max' in range: hi= max(terms) terms= self._enumerateRange(lo,hi) else: terms= [self._standardizeTerm(t, prenormalize=not match) for t in terms] if filteredSearch is None: r = self._search(terms,combine,record) else: r = filteredSearch if r is None: return return r, self.id
def _apply_index(self, request, cid=''): """Apply the index to the search parameters given in request""" record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None result = None fallback = self.fallback if hasattr(record, 'fallback'): fallback = bool(record.fallback) for language in record.keys: rows = self._search(language, fallback) result = ii_union(result, rows) return (result or IISet()), ('Language', )
def _apply_index(self, request, cid=''): """Apply query specified by request, a mapping containing the query. Returns two object on success, the resultSet containing the matching record numbers and a tuple containing the names of the fields used Returns None if request is not valid for this index. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None query_str = ' '.join(record.keys) if not query_str: return None tree = QueryParser(self.getLexicon()).parseQuery(query_str) results = tree.executeQuery(self.index) return results, (self.id, )
def _apply_index(self, request, cid=''): """Apply query specified by request, a mapping containing the query. Returns two object on success, the resultSet containing the matching record numbers and a tuple containing the names of the fields used Returns None if request is not valid for this index. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None query_str = ' '.join(record.keys) if not query_str: return None tree = QueryParser(self.getLexicon()).parseQuery(query_str) results = tree.executeQuery(self.index) return results, (self.id,)
def _apply_index(self, request, resultset=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index indexed = self._index_value for key in record.keys: if bool(key) is bool(indexed): # If we match the indexed value, check index return (intersection(index, resultset), (self.id,)) else: # Otherwise, remove from resultset or _unindex if resultset is None: return (union(difference(self._unindex, index), IISet([])), (self.id,)) else: return (difference(resultset, index), (self.id,)) return (IISet(), (self.id,))
def _apply_index(self, request, resultset=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index for key in record.keys: if key: # If True, check index return (intersection(index, resultset), (self.id, )) else: # Otherwise, remove from resultset or _unindex if resultset is None: return (union(difference(self._unindex, index), IISet([])), (self.id, )) else: return (difference(resultset, index), (self.id, )) return (IISet(), (self.id, ))
def _apply_index(self, request): """ hook for (Z)Catalog 'request' -- mapping type (usually {"topic": "..." } """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None operator = record.get('operator', self.defaultOperator).lower() if operator == 'or': set_func = union else: set_func = intersection res = None for filter_id in record.keys: rows = self.search(filter_id) res = set_func(res,rows) if res: return res, (self.id,) else: return IITreeSet(), (self.id,)
def _apply_index(self, request): """ hook for (Z)Catalog 'request' -- mapping type (usually {"topic": "..." } """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None operator = record.get('operator', self.defaultOperator).lower() if operator == 'or': set_func = union else: set_func = intersection res = None for filter_id in record.keys: rows = self.search(filter_id) res = set_func(res, rows) if res: return res, (self.id, ) else: return IITreeSet(), (self.id, )
def _apply_index(self, request, cid=''): record = parseIndexRequest(request, self.getId(), self.query_options) if record.keys is None: return None catalog = getToolByName(self, 'portal_catalog') geoIndex = catalog._catalog.getIndex(self.geoindex_id) geoRequest = {} geoRequest[self.geoindex_id] = { 'query': record.keys, 'range': record.range} geo_response = geoIndex._apply_index(geoRequest, raw=True) paths = {} for item in geo_response: paths[int(item['id'])] = item['properties']['path'] rolesIndex = catalog._catalog.getIndex('allowedRolesAndUsers') user = _getAuthenticatedUser(self) perms_set = rolesIndex._apply_index( {'allowedRolesAndUsers': catalog._listAllowedRolesAndUsers(user)} )[0] r = intersection(perms_set, IISet(paths.keys())) if isinstance(r, int): r = IISet((r,)) if r is None: return IISet(), (self.getId(),) else: url_tool = getToolByName(self, 'portal_url') portal_path = url_tool.getPortalObject().getPhysicalPath() root = list(portal_path) def up(path): return '/'.join(root + path.strip('/').split('/')[:-1]) return union( r, IISet([catalog.getrid(up(paths[lid])) for lid in r]) ), (self.getId(),)
def languageindex_apply_index(self, request, cid='', res=None): """Apply the index to the search parameters given in request""" record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None result = None fallback = self.fallback if hasattr(record, 'fallback'): fallback = bool(record.fallback) # TODO: This could be optimized to avoid a loop per language # As we most often get a language code and '' for language neutral this # could be beneficial. If the site has no language neutral content, the # first check "main not in self._index" will return None. The union of # None with the resultset is a cheap call, so we don't care right now. for language in record.keys: rows = self._search(language, fallback, res=res) result = ii_union(result, rows) return (result or IISet()), ('Language', )
def _apply_index(self, request, cid=''): """ hook for (Z)Catalog request mapping type (usually {"path": "..." } additionaly a parameter "path_level" might be passed to specify the level (see search()) cid ??? """ record = parseIndexRequest(request,self.id,self.query_options) if record.keys==None: return None if request.has_key('%s_level' % cid): warnings.warn("The usage of the '%s_level' " "is no longer recommended.\n" "Please use a mapping object and the " "'level' key to specify the operator." % cid) # get the level parameter level = record.get("level",0) # experimental code for specifing the operator operator = record.get('operator',self.useOperator).lower() # depending on the operator we use intersection of union if operator=="or": set_func = union else: set_func = intersection res = None for k in record.keys: rows = self.search(k,level) res = set_func(res,rows) if res: return res, (self.id,) else: return IISet(), (self.id,)
def zctidx_ApplyIndexWithSynonymous(self, request, cid=''): """Apply query specified by request, a mapping containing the query. Returns two object on success, the resultSet containing the matching record numbers and a tuple containing the names of the fields used Returns None if request is not valid for this index. If this index id is listed in PloneGlossary.config.INDEX_SEARCH_GLOSSARY, the query tree is changed to look for terms and their variants found in general glossaries. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None query_str = ' '.join(record.keys) if not query_str: return None parseQuery = QueryParser(self.getLexicon()).parseQuery tree = parseQuery(query_str) if self.getId() in INDEX_SEARCH_GLOSSARY: gtool = getToolByName(self, PLONEGLOSSARY_TOOL) glossary_uids = gtool.getGeneralGlossaryUIDs() all_term_items = gtool._getGlossaryTermItems(glossary_uids) # get atoms from query and build related term query # text = ' '.join(flatten(tree.terms())) excluded = dict.fromkeys(__getNOTWords(tree), True) tree = replaceWordsQuery(tree, parseQuery, gtool, all_term_items, excluded) results = tree.executeQuery(self.index) return results, (self.id,)
def parse_query(self, field, field_query): name = field.name request = {name: field_query} record = parseIndexRequest(request, name, ('query', 'range')) if not record.keys: return None query_range = record.get('range', None) if query_range is None: return super(DateFieldHandler, self).parse_query(field, field_query) elif query_range == 'min': min_query = self.convert_one(min(record.keys)) return {'fq': u'%s:[%s TO *]' % (name, solr_escape(min_query))} elif query_range == 'max': max_query = self.convert_one(max(record.keys)) return {'fq': u'%s:[* TO %s]' % (name, solr_escape(max_query))} elif query_range == 'min:max': min_query = self.convert_one(min(record.keys)) max_query = self.convert_one(max(record.keys)) return {'fq': u'%s:[%s TO %s]' % (name, solr_escape(min_query), solr_escape(max_query))} else: raise AssertionError("Invalid range: %s" % range)
def languageindex_apply_index(self, request, cid='', res=None): """Apply the index to the search parameters given in request""" record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None result = None fallback = self.fallback if hasattr(record, 'fallback'): fallback = bool(record.fallback) # TODO: This could be optimized to avoid a loop per language # As we most often get a language code and '' for language neutral this # could be beneficial. If the site has no language neutral content, the # first check "main not in self._index" will return None. The union of # None with the resultset is a cheap call, so we don't care right now. for language in record.keys: rows = self._search(language, fallback, res=res) result = ii_union(result, rows) return (result or IISet()), ('Language',)
def parse_query(self, field, field_query): name = field.name request = {name: field_query} record = parseIndexRequest(request, name, ('query', 'operator')) if not record.keys: return None parts = [] for part in record.keys: parts.extend(self.convert(part)) if not parts: return None if len(parts) == 1: escaped = solr_escape(parts[0]) return {'fq': u'%s:"%s"' % (name, escaped)} operator = record.get('operator', self.default_operator) if operator not in self.operators: raise AssertionError("Invalid operator: %s" % operator) parts_fmt = [u'"%s"' % solr_escape(part) for part in parts] s = (u' %s ' % operator.upper()).join(parts_fmt) return {'fq': u'%s:(%s)' % (name, s)}
def _apply_index(self, request, cid='', type=type): """ """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys==None: return None r = None operator = record.get('geometry_operator',self.useOperator) if not operator in self.operators : raise RuntimeError,"operator not valid: %s" % operator if operator=='disjoint': raise RuntimeError,"DISJOINT not supported yet" logger.debug('Operator: %s' % operator) # we only process one key key = record.keys[0] bbox = [float(c) for c in key.split(',')] #bboxAsTuple(key) intersection=self.rtree.intersection(bbox) set = [] for d in [l for l in intersection]: try: geom_wkt = self.backward.get( int(d), None ) except: logger.info('backward.get failed for %s : %s' %(str(d), str(int(d)))) continue if geom_wkt is not None: geom = wkt.loads(geom_wkt) if geom is not None: opr=getattr(geom, operator) mp = MultiPoint([bbox[:2],bbox[2:]]) if opr(mp.envelope): set.append(int(d)) r = IITreeSet(set) return r, (self.id,)
def dateindex_apply_index( self, request, cid='', type=type, res=None): record = parseIndexRequest( request, self.id, self.query_options ) if record.keys == None: return None keys = map( self._convert, record.keys ) index = self._index r = None opr = None #experimental code for specifing the operator operator = record.get( 'operator', self.useOperator ) if not operator in self.operators : raise RuntimeError, "operator not valid: %s" % operator # depending on the operator we use intersection or union if operator=="or": set_func = union else: set_func = intersection # range parameter range_arg = record.get('range',None) if range_arg: opr = "range" opr_args = [] if range_arg.find("min") > -1: opr_args.append("min") if range_arg.find("max") > -1: opr_args.append("max") if record.get('usage',None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr=="range": # range search if 'min' in opr_args: lo = min(keys) else: lo = None if 'max' in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo,hi) else: setlist = index.values(lo) #for k, set in setlist: #if type(set) is IntType: #set = IISet((set,)) #r = set_func(r, set) # XXX: Use multiunion! r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if isinstance(set, int): set = IISet((set,)) else: # set can't be bigger than res set = intersection(set, res) r = set_func(r, set) if isinstance(r, int): r = IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.id) if record.keys is None: return None keys = [] for key in record.keys: key = key.replace("\\", "").replace('"', "") if not isinstance(key, bytes): key = key.encode("utf8") keys.append(key) template_params = {"keys": keys} __traceback_info__ = "template parameters: {0}".format(template_params) query_body = self._apply_template(template_params) logger.info(query_body) es_kwargs = dict( index=index_name(), body=query_body, size=BATCH_SIZE, scroll="1m", _source_includes=["rid"], ) es = get_query_client() try: result = es.search(**es_kwargs) except RequestError: logger.info("Query failed:\n{0}".format(query_body)) return None except TransportError: logger.exception("ElasticSearch failed") return None # initial return value, other batches to be applied def score(record): return int(10000 * float(record["_score"])) retval = IIBTree() for r in result["hits"]["hits"]: retval[r["_source"]["rid"]] = score(r) total = result["hits"]["total"]["value"] if total > BATCH_SIZE: sid = result["_scroll_id"] counter = BATCH_SIZE while counter < total: result = es.scroll(scroll_id=sid, scroll="1m") for record in result["hits"]["hits"]: retval[record["_source"]["rid"]] = score(record) counter += BATCH_SIZE return retval, (self.id,)
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parameters, then return None. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ iid = self.id record = parseIndexRequest(request, iid, self.query_options) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = aq_get(self, 'REQUEST', None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = self._cache_key(catalog) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or 'None' if resultset is None: cachekey = '_daterangeindex_%s_%s' % (iid, tid) else: cachekey = '_daterangeindex_inverse_%s_%s' % (iid, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return (cached, (self._since_field, self._until_field)) else: return (difference(resultset, cached), (self._since_field, self._until_field)) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by resultset if REQUEST is None: until = intersection(resultset, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion([bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (result, (self._since_field, self._until_field)) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (difference(resultset, result), (self._since_field, self._until_field))
def unindex_apply_index(self, request, cid='', type=type, res=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys==None: return None index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get('operator',self.useOperator) if not operator in self.operators : raise RuntimeError,"operator not valid: %s" % escape(operator) # depending on the operator we use intersection or union if operator=="or": set_func = union else: set_func = intersection # Range parameter range_parm = record.get('range',None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min")>-1: opr_args.append("min") if range_parm.find("max")>-1: opr_args.append("max") if record.get('usage',None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args=opr[0], opr[1:] if opr=="range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo,hi) else: setlist = index.values(lo) # If we only use 1 key (default setting), intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) return result, (self.id,) if operator == 'or': r = multiunion(setlist) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s,)) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = res for s in setlist: r = intersection(r, s) else: # not a range search # Filter duplicates, and sort by length keys = set(record.keys) setlist = [] for k in keys: s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id,) elif isinstance(s, int): s = IISet((s,)) setlist.append(s) # If we only use 1 key (default setting), intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) return result, (self.id,) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is faster # than creating a multiunion first. if res is not None and len(res) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(res, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = res for s in setlist: r = intersection(r, s) if isinstance(r, int): r=IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def dateindex_apply_index(self, request, cid="", type=type, res=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys == None: return None keys = map(self._convert, record.keys) index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get("operator", self.useOperator) if not operator in self.operators: raise RuntimeError, "operator not valid: %s" % operator # depending on the operator we use intersection or union if operator == "or": set_func = union else: set_func = intersection # range parameter range_arg = record.get("range", None) if range_arg: opr = "range" opr_args = [] if range_arg.find("min") > -1: opr_args.append("min") if range_arg.find("max") > -1: opr_args.append("max") if record.get("usage", None): # see if any usage params are sent to field opr = record.usage.lower().split(":") opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if "min" in opr_args: lo = min(keys) else: lo = None if "max" in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # for k, set in setlist: # if type(set) is IntType: # set = IISet((set,)) # r = set_func(r, set) # XXX: Use multiunion! r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if isinstance(set, int): set = IISet((set,)) else: # set can't be bigger than res set = intersection(set, res) r = set_func(r, set) if isinstance(r, int): r = IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the argument Normalize the 'query' arguments into integer values at minute precision before querying. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None keys = map(self._convert, record.keys) index = self._index r = None opr = None #experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError("operator not valid: %s" % operator) # depending on the operator we use intersection or union if operator == "or": set_func = union else: set_func = intersection # range parameter range_arg = record.get('range', None) if range_arg: opr = "range" opr_args = [] if range_arg.find("min") > -1: opr_args.append("min") if range_arg.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(keys) else: lo = None if 'max' in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if isinstance(set, int): set = IISet((set, )) else: # set can't be bigger than resultset set = intersection(set, resultset) r = set_func(r, set) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id, ) else: return r, (self.id, )
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the request arg. The request argument should be a mapping object. If the request does not have a key which matches the "id" of the index instance, then None is returned. If the request *does* have a key which matches the "id" of the index instance, one of a few things can happen: - if the value is a blank string, None is returned (in order to support requests from web forms where you can't tell a blank string from empty). - if the value is a nonblank string, turn the value into a single-element sequence, and proceed. - if the value is a sequence, return a union search. - If the value is a dict and contains a key of the form '<index>_operator' this overrides the default method ('or') to combine search results. Valid values are "or" and "and". If None is not returned as a result of the abovementioned constraints, two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. FAQ answer: to search a Field Index for documents that have a blank string as their value, wrap the request value up in a tuple ala: request = {'id':('',)} """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index r = None opr = None # not / exclude parameter not_parm = record.get('not', None) if not record.keys and not_parm: # convert into indexed format not_parm = map(self._convert, not_parm) # we have only a 'not' query record.keys = [k for k in index.keys() if k not in not_parm] else: # convert query arguments into indexed format record.keys = map(self._convert, record.keys) # experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError("operator not valid: %s" % escape(operator)) # Range parameter range_parm = record.get('range', None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min") > -1: opr_args.append("min") if range_parm.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use one key, intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result, (self.id,) if operator == 'or': tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s,)) tmp.append(s) r = multiunion(tmp) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s,)) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = resultset for s in setlist: # the result is bound by the resultset r = intersection(r, s) else: # not a range search # Filter duplicates setlist = [] for k in record.keys: if k is None: raise TypeError('None cannot be in an index.') s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id,) elif isinstance(s, int): s = IISet((s,)) setlist.append(s) # If we only use one key return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result, (self.id,) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is # faster than creating a multiunion first. if resultset is not None and len(resultset) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(resultset, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = resultset for s in setlist: r = intersection(r, s) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id,) if not_parm: exclude = self._apply_not(not_parm, resultset) r = difference(r, exclude) return r, (self.id,)
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the argument Normalize the 'query' arguments into integer values at minute precision before querying. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None keys = map( self._convert, record.keys ) index = self._index r = None opr = None #experimental code for specifing the operator operator = record.get( 'operator', self.useOperator ) if not operator in self.operators : raise RuntimeError("operator not valid: %s" % operator) # depending on the operator we use intersection or union if operator=="or": set_func = union else: set_func = intersection # range parameter range_arg = record.get('range',None) if range_arg: opr = "range" opr_args = [] if range_arg.find("min") > -1: opr_args.append("min") if range_arg.find("max") > -1: opr_args.append("max") if record.get('usage',None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr=="range": # range search if 'min' in opr_args: lo = min(keys) else: lo = None if 'max' in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo,hi) else: setlist = index.values(lo) r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if isinstance(set, int): set = IISet((set,)) else: # set can't be bigger than resultset set = intersection(set, resultset) r = set_func(r, set) if isinstance(r, int): r = IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def _apply_index(self, request, resultset=None): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parameters, then return None. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ iid = self.id record = parseIndexRequest(request, iid, self.query_options) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = aq_get(self, 'REQUEST', None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = self._cache_key(catalog) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or 'None' if resultset is None: cachekey = '_daterangeindex_%s_%s' % (iid, tid) else: cachekey = '_daterangeindex_inverse_%s_%s' % (iid, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return (cached, (self._since_field, self._until_field)) else: return (difference(resultset, cached), (self._since_field, self._until_field)) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by resultset if REQUEST is None: until = intersection(resultset, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion( [bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (result, (self._since_field, self._until_field)) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([until_only, since_only, until, since]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (difference(resultset, result), (self._since_field, self._until_field))
and "and". If None is not returned as a result of the abovementioned constraints, two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. FAQ answer: to search a Field Index for documents that have a blank string as their value, wrap the request value up in a tuple ala: request = {'id':('',)} """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys==None: return None index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get('operator',self.useOperator) if not operator in self.operators : raise RuntimeError,"operator not valid: %s" % escape(operator) # depending on the operator we use intersection or union if operator=="or": set_func = union else: set_func = intersection
def unindex_apply_index(self, request, cid='', type=type, res=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys == None: return None index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError, "operator not valid: %s" % escape(operator) # depending on the operator we use intersection or union if operator == "or": set_func = union else: set_func = intersection # Range parameter range_parm = record.get('range', None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min") > -1: opr_args.append("min") if range_parm.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use 1 key (default setting), intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) return result, (self.id, ) if operator == 'or': r = multiunion(setlist) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = res for s in setlist: r = intersection(r, s) else: # not a range search # Filter duplicates, and sort by length keys = set(record.keys) setlist = [] for k in keys: s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id, ) elif isinstance(s, int): s = IISet((s, )) setlist.append(s) # If we only use 1 key (default setting), intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) return result, (self.id, ) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is faster # than creating a multiunion first. if res is not None and len(res) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(res, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = res for s in setlist: r = intersection(r, s) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id, ) else: return r, (self.id, )
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ config = get_configuration() timeout = getattr(config, 'request_timeout', 20) search_fields = getattr(config, 'search_fields', None) if not search_fields: search_fields = SEARCH_FIELDS search_fields = search_fields.split() if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None es = get_query_client() search = Search(using=es, index=index_name()) search = search.params( request_timeout=timeout, size=BATCH_SIZE, preserve_order=True, ) search = search.source(include='rid') query_string = record.keys[0] if query_string and query_string.startswith('*'): # plone.app.querystring contains op sends a leading *, remove it query_string = query_string[1:] search = search.query('simple_query_string', query=query_string, fields=search_fields) # setup highlighting for field in search_fields: name = field.split('^')[0] if name == 'title': # title shows up in results anyway continue search = search.highlight(name, fragment_size=FRAGMENT_SIZE) try: result = search.scan() except TransportError: # No es client, return empty results logger.exception('ElasticSearch client not available.') return IIBTree(), (self.id, ) # initial return value, other batches to be applied retval = IIBTree() highlights = OOBTree() for r in result: if getattr(r, 'rid', None) is None: # something was indexed with no rid. Ignore for now. # this is only for highlights, so no big deal if we # skip one continue retval[r.rid] = int(10000 * float(r.meta.score)) # Index query returns only rids, so we need # to save highlights for later use highlight_list = [] if getattr(r.meta, 'highlight', None) is not None: for key in dir(r.meta.highlight): highlight_list.extend(r.meta.highlight[key]) highlights[r.meta.id] = highlight_list # store highlights try: annotations = IAnnotations(self.REQUEST) annotations[HIGHLIGHT_KEY] = highlights except TypeError: # maybe we are in a test pass return retval, (self.id, )
def _apply_index(self, request, cid='', type=type): """Apply the index to query parameters given in the request arg. The request argument should be a mapping object. If the request does not have a key which matches the "id" of the index instance, then None is returned. If the request *does* have a key which matches the "id" of the index instance, one of a few things can happen: - if the value is a blank string, None is returned (in order to support requests from web forms where you can't tell a blank string from empty). - if the value is a nonblank string, turn the value into a single-element sequence, and proceed. - if the value is a sequence, return a union search. If the request contains a parameter with the name of the column + '_usage', it is sniffed for information on how to handle applying the index. If the request contains a parameter with the name of the column = '_operator' this overrides the default method ('or') to combine search results. Valid values are "or" and "and". If None is not returned as a result of the abovementioned constraints, two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. FAQ answer: to search a Field Index for documents that have a blank string as their value, wrap the request value up in a tuple ala: request = {'id':('',)} """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys==None: return None index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get('operator',self.useOperator) if not operator in self.operators : raise RuntimeError,"operator not valid: %s" % escape(operator) # depending on the operator we use intersection or union if operator=="or": set_func = union else: set_func = intersection # Range parameter range_parm = record.get('range',None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min")>-1: opr_args.append("min") if range_parm.find("max")>-1: opr_args.append("max") if record.get('usage',None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args=opr[0], opr[1:] if opr=="range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.items(lo,hi) else: setlist = index.items(lo) for k, set in setlist: if isinstance(set, int): set = IISet((set,)) r = set_func(r, set) else: # not a range search for key in record.keys: set=index.get(key, None) if set is None: set = IISet(()) elif isinstance(set, int): set = IISet((set,)) r = set_func(r, set) if isinstance(r, int): r=IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the request arg. The request argument should be a mapping object. If the request does not have a key which matches the "id" of the index instance, then None is returned. If the request *does* have a key which matches the "id" of the index instance, one of a few things can happen: - if the value is a blank string, None is returned (in order to support requests from web forms where you can't tell a blank string from empty). - if the value is a nonblank string, turn the value into a single-element sequence, and proceed. - if the value is a sequence, return a union search. - If the value is a dict and contains a key of the form '<index>_operator' this overrides the default method ('or') to combine search results. Valid values are "or" and "and". If None is not returned as a result of the abovementioned constraints, two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. FAQ answer: to search a Field Index for documents that have a blank string as their value, wrap the request value up in a tuple ala: request = {'id':('',)} """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index r = None opr = None # not / exclude parameter not_parm = record.get('not', None) if not record.keys and not_parm: # convert into indexed format not_parm = map(self._convert, not_parm) # we have only a 'not' query record.keys = [k for k in index.keys() if k not in not_parm] else: # convert query arguments into indexed format record.keys = map(self._convert, record.keys) # experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError("operator not valid: %s" % escape(operator)) # Range parameter range_parm = record.get('range', None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min") > -1: opr_args.append("min") if range_parm.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use one key, intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result, (self.id, ) if operator == 'or': tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) r = multiunion(tmp) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = resultset for s in setlist: # the result is bound by the resultset r = intersection(r, s) else: # not a range search # Filter duplicates setlist = [] for k in record.keys: s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id, ) elif isinstance(s, int): s = IISet((s, )) setlist.append(s) # If we only use one key return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result, (self.id, ) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is # faster than creating a multiunion first. if resultset is not None and len(resultset) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(resultset, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = resultset for s in setlist: r = intersection(r, s) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id, ) if not_parm: exclude = self._apply_not(not_parm, resultset) r = difference(r, exclude) return r, (self.id, )
def daterangeindex_apply_index(self, request, cid='', res=None): record = parseIndexRequest(request, self.getId()) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = getattr(self, 'REQUEST', None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = '%s_%s' % (catalog.getId(), catalog.getCounter()) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or 'None' index_id = self.getId() if res is None: cachekey = '_daterangeindex_%s_%s' % (index_id, tid) else: cachekey = '_daterangeindex_inverse_%s_%s' % (index_id, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if res is None: return cached, (self._since_field, self._until_field) else: return (difference(res, cached), (self._since_field, self._until_field)) if res is None: # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # XXX Does this apply for multiunion? # until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by res if REQUEST is None: until = intersection(res, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) result = multiunion([bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return result, (self._since_field, self._until_field) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([until_only, since_only, until, since]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return difference(res, result), (self._since_field, self._until_field)
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ config = get_configuration() timeout = getattr(config, 'request_timeout', 20) search_fields = getattr(config, 'search_fields', None) if not search_fields: search_fields = SEARCH_FIELDS search_fields = search_fields.split() logger.info(search_fields) if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None es = get_query_client() search = Search(using=es, index=index_name()) search = search.params(request_timeout=timeout) search = search.sort('rid', '_id') search = search.source(include='rid') query_string = record.keys[0].decode('utf8') logger.info(query_string) if '*' in query_string: query_string = query_string.replace('*', ' ') query_string = query_string.strip() search = search.query('simple_query_string', query=query_string, fields=search_fields) results_count = search.count() search = search.params(request_timeout=timeout, size=BATCH_SIZE, track_scores=True) # setup highlighting for field in search_fields: name = field.split('^')[0] if name == 'title': # title shows up in results anyway continue search = search.highlight(name, fragment_size=FRAGMENT_SIZE) # initial return value, other batches to be applied retval = IIBTree() highlights = OOBTree() last_seen = None count = 0 batch_count = results_count / BATCH_SIZE if results_count % BATCH_SIZE != 0: batch_count = batch_count + 1 for i in xrange(batch_count): if last_seen is not None: search = search.update_from_dict({'search_after': last_seen}) try: results = search.execute(ignore_cache=True) except TransportError: # No es client, return empty results logger.exception('ElasticSearch client not available.') return IIBTree(), (self.id, ) for r in results: rid = getattr(r, 'rid', None) if rid is not None: retval[rid] = int(10000 * float(r.meta.score)) # Index query returns only rids, so we need # to save highlights for later use highlight_list = [] if getattr(r.meta, 'highlight', None) is not None: for key in dir(r.meta.highlight): highlight_list.extend(r.meta.highlight[key]) highlights[r.meta.id] = highlight_list last_seen = [rid, r.meta.id] count = count + 1 # store highlights try: annotations = IAnnotations(self.REQUEST) annotations[HIGHLIGHT_KEY] = highlights except TypeError: # maybe we are in a test pass return retval, (self.id, )