def testPairs(self): t1 = IIBTree([(1, 10), (3, 30), (7, 70)]) t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)]) allkeys = [1, 3, 5, 7, 9] b1 = IIBucket(t1) b2 = IIBucket(t2) for x in t1, t2, b1, b2: for key in x.keys(): self.assertEqual(key in allkeys, 1) for y in t1, t2, b1, b2: for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3): # Test the union. expected = [] for key in allkeys: if x.has_key(key) or y.has_key(key): result = x.get(key, 0) * w1 + y.get(key, 0) * w2 expected.append((key, result)) expected.sort() got = mass_weightedUnion([(x, w1), (y, w2)]) self.assertEqual(expected, list(got.items())) got = mass_weightedUnion([(y, w2), (x, w1)]) self.assertEqual(expected, list(got.items())) # Test the intersection. expected = [] for key in allkeys: if x.has_key(key) and y.has_key(key): result = x[key] * w1 + y[key] * w2 expected.append((key, result)) expected.sort() got = mass_weightedIntersection([(x, w1), (y, w2)]) self.assertEqual(expected, list(got.items())) got = mass_weightedIntersection([(y, w2), (x, w1)]) self.assertEqual(expected, list(got.items()))
def convertScores(scores, type=type, TupleType=TupleType, IIBTree=IIBTree): if type(scores) is not TupleType and type(scores) is not IIBTree(): scores = IIBTree(scores) return scores
def __init__(self, lexicon): self._lexicon = lexicon # wid -> {docid -> weight}; t -> D -> w(D, t) # Different indexers have different notions of term weight, but we # expect each indexer to use ._wordinfo to map wids to its notion # of a docid-to-weight map. # There are two kinds of OOV words: wid 0 is explicitly OOV, # and it's possible that the lexicon will return a non-zero wid # for a word we don't currently know about. For example, if we # unindex the last doc containing a particular word, that wid # remains in the lexicon, but is no longer in our _wordinfo map; # lexicons can also be shared across indices, and some other index # may introduce a lexicon word we've never seen. # A word is in-vocabulary for this index if and only if # _wordinfo.has_key(wid). Note that wid 0 must not be a key. self._wordinfo = IOBTree() # docid -> weight # Different indexers have different notions of doc weight, but we # expect each indexer to use ._docweight to map docids to its # notion of what a doc weight is. self._docweight = IIBTree() # docid -> WidCode'd list of wids # Used for un-indexing, and for phrase search. self._docwords = IOBTree() # Use a BTree length for efficient length computation w/o conflicts self.length = Length() self.document_count = Length()
def search_phrase(self, phrase): wids = self._lexicon.termToWordIds(phrase) cleaned_wids = self._remove_oov_wids(wids) if len(wids) != len(cleaned_wids): # At least one wid was OOV: can't possibly find it. return IIBTree() scores = self._search_wids(wids) hits = mass_weightedIntersection(scores) if not hits: return hits code = WidCode.encode(wids) result = IIBTree() for docid, weight in hits.items(): docwords = self._docwords[docid] if docwords.find(code) >= 0: result[docid] = weight return result
def testIdentity(self): t = IIBTree([(1, 2)]) b = IIBucket([(1, 2)]) for x in t, b: for func in mass_weightedUnion, mass_weightedIntersection: result = func([(x, 1)]) self.assertEqual(len(result), 1) self.assertEqual(list(result.items()), list(x.items()))
def __init__(self, id=None, **kwargs): super(TracListing, self).__init__(id, **kwargs) # manage an index of parent-to-child, int parent ticket id key # to PersistentList of int ticket id value: self._children = IOBTree() # indexes for score and reward-ratio values: self._scores = IIBTree() # int (ticket#) -> int (sum/score) self._reward = IOBTree() # int (ticket#) -> float (ratio)
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None template_params = { 'keys': record.keys, } query_body = self._apply_template(template_params) logger.info(query_body) es_kwargs = dict( index=index_name(), body=query_body, size=BATCH_SIZE, scroll='1m', _source_include=['rid'], ) es = get_query_client() result = es.search(**es_kwargs) # initial return value, other batches to be applied def score(record): return int(10000 * float(record['_score'])) retval = IIBTree() for r in result['hits']['hits']: retval[r['_source']['rid']] = score(r) total = result['hits']['total'] if total > BATCH_SIZE: sid = result['_scroll_id'] counter = BATCH_SIZE while counter < total: result = es.scroll(scroll_id=sid, scroll='1m') for record in result['hits']['hits']: retval[record['_source']['rid']] = score(record) counter += BATCH_SIZE return retval, (self.id,)
def clear(self): """ Complete reset """ self._index = IOBTree() self._unindex = IIBTree() self._length = Length() if self._counter is None: self._counter = Length() else: self._increment_counter()
def clear(self): self._index = IITreeSet() self._index_length = BTrees.Length.Length() self._index_value = 1 self._unindex = IIBTree() self._length = BTrees.Length.Length() if self._counter is None: self._counter = BTrees.Length.Length() else: self._increment_counter()
def __init__(self, name, root): # m_order maintains a newest-first mapping of int -> version id. # m_date maintains a mapping of a packed date (int # of minutes # since the epoch) to a lookup key in m_order. The two structures # are separate because we only support minute precision for date # lookups (and multiple versions could be added in a minute). self.date_created = time.time() self.m_order = IOBTree() self.m_date = IIBTree() self.name = name self.root = root
def testScalarMultiply(self): t = IIBTree([(1, 2), (2, 3), (3, 4)]) allkeys = [1, 2, 3] b = IIBucket(t) for x in t, b: self.assertEqual(list(x.keys()), allkeys) for func in mass_weightedUnion, mass_weightedIntersection: for factor in 0, 1, 5, 10: result = func([(x, factor)]) self.assertEqual(allkeys, list(result.keys())) for key in x.keys(): self.assertEqual(x[key] * factor, result[key])
def test_walk_w_normal_btree(self): from BTrees.IIBTree import IIBTree obj = IIBTree() for i in range(1000): obj[i] = i walker = self._makeOne(obj) path = '/' parent = object() is_mapping = True keys = [] kids = [] lo = 0 hi = None self.assertRaises(NotImplementedError, walker.walk)
def _mass_add_wordinfo(self, wid2weight, docid): dicttype = type({}) get_doc2score = self._wordinfo.get new_word_count = 0 for wid, weight in wid2weight.items(): doc2score = get_doc2score(wid) if doc2score is None: doc2score = {} new_word_count += 1 elif (isinstance(doc2score, dicttype) and len(doc2score) == self.DICT_CUTOFF): doc2score = IIBTree(doc2score) doc2score[docid] = weight self._wordinfo[wid] = doc2score # not redundant: Persistency! self.length.change(new_word_count)
def optimize_dateindex(index): # migrate _unindex from OIBTree to IIBTree old_unindex = index._unindex if isinstance(old_unindex, IIBTree): return index._unindex = _unindex = IIBTree() logger.info('Converting to IIBTree for index `%s`.' % index.getId()) for pos, (k, v) in enumerate(old_unindex.items()): _unindex[k] = v if pos and pos % 10000 == 0: transaction.savepoint(optimistic=True) logger.info('Processed %s items.' % pos) transaction.savepoint(optimistic=True) logger.info('Finished conversion.')
def insertDocument(self, docid, widlist): Storage.insertDocument(self, docid, widlist) occurences = {} # wid -> #(occurences) num_wids = float(len(widlist)) for wid in widlist: if not occurences.has_key(wid): occurences[wid] = 1 else: occurences[wid] += 1 self._frequencies[docid] = IIBTree() tree = self._frequencies[docid] for wid, num in occurences.items(): tree[wid] = num
def insertForwardIndexEntry(self, entry, documentId, score=1): """Uses the information provided to update the indexes. The basic logic for choice of data structure is based on the number of entries as follows: 1 tuple 2-3 dictionary 4+ bucket. """ index = self._index indexRow = index.get(entry, None) if indexRow is not None: if type(indexRow) is TupleType: # Tuples are only used for rows which have only # a single entry. Since we now need more, we'll # promote it to a mapping object (dictionary). # First, make sure we're not already in it, if so # update the score if necessary. if indexRow[0] == documentId: if indexRow[1] != score: indexRow = (documentId, score) index[entry] = indexRow else: indexRow = { indexRow[0]: indexRow[1], documentId: score, } index[entry] = indexRow else: if indexRow.get(documentId, -1) != score: # score changed (or new entry) if type(indexRow) is DictType: indexRow[documentId] = score if len(indexRow) > 3: # Big enough to give it's own database record indexRow = IIBTree(indexRow) index[entry] = indexRow else: indexRow[documentId] = score else: # We don't have any information at this point, so we'll # put our first entry in, and use a tuple to save space index[entry] = (documentId, score)
def optimize_dateindex(index): # migrate _unindex from OIBTree to IIBTree old_unindex = index._unindex if isinstance(old_unindex, IIBTree): return index._unindex = _unindex = IIBTree() logger.info('Converting to IIBTree for index `%s`.', index.getId()) for pos, (k, v) in enumerate(old_unindex.items()): _unindex[k] = v # Note: flake8 erroneously complains about module formatter. if pos and pos % 10000 == 0: # noqa S001 transaction.savepoint(optimistic=True) logger.info('Processed %s items.', pos) transaction.savepoint(optimistic=True) logger.info('Finished conversion.')
def testMany(self): import random N = 15 # number of IIBTrees to feed in L = [] commonkey = N * 1000 allkeys = {commonkey: 1} for i in range(N): t = IIBTree() t[commonkey] = i for j in range(N - i): key = i + j allkeys[key] = 1 t[key] = N * i + j L.append((t, i + 1)) random.shuffle(L) allkeys = allkeys.keys() allkeys.sort() # Test the union. expected = [] for key in allkeys: sum = 0 for t, w in L: if t.has_key(key): sum += t[key] * w expected.append((key, sum)) # print 'union', expected got = mass_weightedUnion(L) self.assertEqual(expected, list(got.items())) # Test the intersection. expected = [] for key in allkeys: sum = 0 for t, w in L: if t.has_key(key): sum += t[key] * w else: break else: # We didn't break out of the loop so it's in the intersection. expected.append((key, sum)) # print 'intersection', expected got = mass_weightedIntersection(L) self.assertEqual(expected, list(got.items()))
def insert(self, idx, results, relnames=None, treePrefix=None): unindex = None for brain in results: # Use the first brain to get a reference to the index, then reuse # that reference unindex = unindex or brain.global_catalog._catalog.indexes[ 'path']._unindex path = brain.getPath() if treePrefix and not path.startswith(treePrefix): for p in unindex[brain.getRID()]: if p.startswith(treePrefix): path = p break path = path.split('/', 3)[-1] for depth in xrange(path.count('/') + 1): comp = idx.setdefault(path, IIBTree()) comp[depth] = comp.get(depth, 0) + 1 path = path.rsplit('/', 1)[0]
def __init__(self, datafs, writable=0, trans=0, pack=0): self.trans_limit = trans self.pack_limit = pack self.trans_count = 0 self.pack_count = 0 self.stopdict = get_stopdict() self.mh = mhlib.MH() self.filestorage = FileStorage(datafs, read_only=(not writable)) self.database = DB(self.filestorage) self.connection = self.database.open() self.root = self.connection.root() try: self.index = self.root["index"] except KeyError: self.index = self.root["index"] = TextIndex() try: self.docpaths = self.root["docpaths"] except KeyError: self.docpaths = self.root["docpaths"] = IOBTree() try: self.doctimes = self.root["doctimes"] except KeyError: self.doctimes = self.root["doctimes"] = IIBTree() try: self.watchfolders = self.root["watchfolders"] except KeyError: self.watchfolders = self.root["watchfolders"] = {} self.path2docid = OIBTree() for docid in self.docpaths.keys(): path = self.docpaths[docid] self.path2docid[path] = docid try: self.maxdocid = max(self.docpaths.keys()) except ValueError: self.maxdocid = 0 print len(self.docpaths), "Document ids" print len(self.path2docid), "Pathnames" print self.index.lexicon.length(), "Words"
def _add_wordinfo(self, wid, f, docid): # Store a wordinfo in a dict as long as there are less than # DICT_CUTOFF docids in the dict. Otherwise use an IIBTree. # The pickle of a dict is smaller than the pickle of an # IIBTree, substantially so for small mappings. Thus, we use # a dictionary until the mapping reaches DICT_CUTOFF elements. # The cutoff is chosen based on the implementation # characteristics of Python dictionaries. The dict hashtable # always has 2**N slots and is resized whenever it is 2/3s # full. A pickled dict with 10 elts is half the size of an # IIBTree with 10 elts, and 10 happens to be 2/3s of 2**4. So # choose 10 as the cutoff for now. # The IIBTree has a smaller in-memory representation than a # dictionary, so pickle size isn't the only consideration when # choosing the threshold. The pickle of a 500-elt dict is 92% # of the size of the same IIBTree, but the dict uses more # space when it is live in memory. An IIBTree stores two C # arrays of ints, one for the keys and one for the values. It # holds up to 120 key-value pairs in a single bucket. doc2score = self._wordinfo.get(wid) if doc2score is None: doc2score = {} self.length.change(1) else: # _add_wordinfo() is called for each update. If the map # size exceeds the DICT_CUTOFF, convert to an IIBTree. # Obscure: First check the type. If it's not a dict, it # can't need conversion, and then we can avoid an expensive # len(IIBTree). if (isinstance(doc2score, type({})) and len(doc2score) == self.DICT_CUTOFF): doc2score = IIBTree(doc2score) doc2score[docid] = f self._wordinfo[wid] = doc2score # not redundant: Persistency!
def convert_to_booleanindex(catalog, index): if isinstance(index, BooleanIndex): return logger.info('Converting index `%s` to BooleanIndex.' % index.getId()) index.__class__ = BooleanIndex index._p_changed = True catalog._catalog._p_changed = True # convert _unindex from IOBTree to IIBTree sets = {0: IITreeSet(), 1: IITreeSet()} old_unindex = index._unindex index._unindex = _unindex = IIBTree() for k, v in old_unindex.items(): # docid to value (True, False) value = int(bool(v)) _unindex[k] = value sets[value].add(k) del old_unindex # convert _index from OOBTree to IITreeSet and set lengths false_length = len(sets[0]) true_length = len(sets[1]) index._length = Length(false_length + true_length) # we put the smaller set into the index if false_length < true_length: index._index_value = 0 index._index_length = Length(false_length) index._index = sets[0] del sets[1] else: index._index_value = 1 index._index_length = Length(true_length) index._index = sets[1] del sets[0] transaction.savepoint(optimistic=True) logger.info('Finished conversion.')
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.id) if record.keys is None: return None keys = [] for key in record.keys: key = key.replace("\\", "").replace('"', "") if not isinstance(key, bytes): key = key.encode("utf8") keys.append(key) template_params = {"keys": keys} __traceback_info__ = "template parameters: {0}".format(template_params) query_body = self._apply_template(template_params) logger.info(query_body) es_kwargs = dict( index=index_name(), body=query_body, size=BATCH_SIZE, scroll="1m", _source_includes=["rid"], ) es = get_query_client() try: result = es.search(**es_kwargs) except RequestError: logger.info("Query failed:\n{0}".format(query_body)) return None except TransportError: logger.exception("ElasticSearch failed") return None # initial return value, other batches to be applied def score(record): return int(10000 * float(record["_score"])) retval = IIBTree() for r in result["hits"]["hits"]: retval[r["_source"]["rid"]] = score(r) total = result["hits"]["total"]["value"] if total > BATCH_SIZE: sid = result["_scroll_id"] counter = BATCH_SIZE while counter < total: result = es.scroll(scroll_id=sid, scroll="1m") for record in result["hits"]["hits"]: retval[record["_source"]["rid"]] = score(record) counter += BATCH_SIZE return retval, (self.id,)
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ config = get_configuration() timeout = getattr(config, 'request_timeout', 20) search_fields = getattr(config, 'search_fields', None) if not search_fields: search_fields = SEARCH_FIELDS search_fields = search_fields.split() logger.info(search_fields) if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None es = get_query_client() search = Search(using=es, index=index_name()) search = search.params(request_timeout=timeout) search = search.sort('rid', '_id') search = search.source(include='rid') query_string = record.keys[0].decode('utf8') logger.info(query_string) if '*' in query_string: query_string = query_string.replace('*', ' ') query_string = query_string.strip() search = search.query('simple_query_string', query=query_string, fields=search_fields) results_count = search.count() search = search.params(request_timeout=timeout, size=BATCH_SIZE, track_scores=True) # setup highlighting for field in search_fields: name = field.split('^')[0] if name == 'title': # title shows up in results anyway continue search = search.highlight(name, fragment_size=FRAGMENT_SIZE) # initial return value, other batches to be applied retval = IIBTree() highlights = OOBTree() last_seen = None count = 0 batch_count = results_count / BATCH_SIZE if results_count % BATCH_SIZE != 0: batch_count = batch_count + 1 for i in xrange(batch_count): if last_seen is not None: search = search.update_from_dict({'search_after': last_seen}) try: results = search.execute(ignore_cache=True) except TransportError: # No es client, return empty results logger.exception('ElasticSearch client not available.') return IIBTree(), (self.id, ) for r in results: rid = getattr(r, 'rid', None) if rid is not None: retval[rid] = int(10000 * float(r.meta.score)) # Index query returns only rids, so we need # to save highlights for later use highlight_list = [] if getattr(r.meta, 'highlight', None) is not None: for key in dir(r.meta.highlight): highlight_list.extend(r.meta.highlight[key]) highlights[r.meta.id] = highlight_list last_seen = [rid, r.meta.id] count = count + 1 # store highlights try: annotations = IAnnotations(self.REQUEST) annotations[HIGHLIGHT_KEY] = highlights except TypeError: # maybe we are in a test pass return retval, (self.id, )
def clear(self): self._index = IITreeSet() self._index_length = BTrees.Length.Length() self._index_value = 1 self._unindex = IIBTree() self._length = BTrees.Length.Length()
def clear(self): """ Complete reset """ self._index = IOBTree() self._unindex = IIBTree() self._length = Length()
def _apply_index(self, request, cid=''): """Apply query specified by request, a mapping containing the query. Returns two objects on success: the resultSet containing the matching record numbers, and a tuple containing the names of the fields used. Returns None if request is not valid for this index. """ if disable_solr: return None cm = self.connection_manager q = [] # List of query texts to pass as "q" queried = [] # List of field names queried stored = [] # List of stored field names solr_params = {} # Get the Solr parameters from the catalog query if request.has_key('solr_params'): solr_params.update(request['solr_params']) # Include parameters from field queries for field in cm.schema.fields: name = field.name if field.stored: stored.append(name) if not request.has_key(name): continue field_query = self._decode_param(request[name]) field_params = field.handler.parse_query(field, field_query) if field_params: queried.append(name) for k in field_params: to_add = field_params[k] if k not in solr_params: solr_params[k] = to_add else: # add to the list v = solr_params[k] if not isinstance(v, list): v = [v] solr_params[k] = v if isinstance(to_add, basestring): v.append(to_add) else: v.extend(to_add) if not solr_params: return None solr_params['fields'] = cm.schema.uniqueKey # We only add highlighting for any field that is marked as stored. # 'queried' returns the list of fields queried, # a specific list of names will narrow the list. to_highlight = [] hfields = solr_params.get('highlight', None) if hfields and stored: if hfields == 'queried': solr_params['highlight'] = queried for fname in hfields: if fname in stored: to_highlight.append(fname) else: log.debug( "Requested field isn't marked as 'stored', " "cannot enable highlighting: %s", fname) solr_params['highlight'] = to_highlight if not solr_params.get('q'): # Solr requires a 'q' parameter, so provide an # all-inclusive one. If the query is using dismax, then # use the 'q.alt' parameter since dismax does not know how # to parse '*:*' in the 'q' param. if solr_params.get('defType', '') == 'dismax': solr_params['q.alt'] = '*:*' solr_params['q'] = '' else: solr_params['q'] = '*:*' # Additional fields can be added into the query above in the # field_params check. The 'q' variable cannot be sent to solr # multiple times (as is the case when it is a list). Only the # first instance of the 'q' param will be recognized by solr, so # we turn it back into a string here. # # XXX: Should the logic for field_params be changed above? if isinstance(solr_params['q'], list): solr_params['q'] = ' '.join(solr_params['q']) # Decode all strings using list from `expected_encodings`, # then transcode to UTF-8 transcoded_params = self._transcode_params(solr_params) log.debug("querying: %r", solr_params) response = cm.connection.query(**transcoded_params) if request.has_key('solr_callback'): # Call a function with the Solr response object callback = request['solr_callback'] callback(response) # Since highlighting can be either enabled by default in the Solr # config, or as a query parameter we just check to see if the # response has any highlighting returned. if hasattr(response, 'highlighting'): catalog = get_catalog(self, name=self.catalog_name) if catalog: hkey = tuple( sorted([(fname, request.get(fname)) for fname in queried])) if not issubclass(catalog._v_brains, HighlightingBrain) or \ (hasattr(catalog._v_brains, 'highlighting_key') and \ catalog._v_brains.highlighting_key != hkey) or \ (hasattr(catalog._v_brains, 'catalog_name') and \ catalog._v_brains.catalog_name != self.catalog_name): # We use an inline class here so that the brain has # enough data to retrieve the stored highlighting data class myhighlightingbrains(HighlightingBrain): highlighting_key = hkey highlighting = response.highlighting catalog.useBrains(myhighlightingbrains) log.debug("Creating new custom brain class, hkey: '%s'", hkey) else: catalog._v_brains.highlighting = response.highlighting log.debug("Using existing custom brain class, hkey: '%s'", hkey) else: log.debug( "Cannot retrieve catalog '%s', highlighting unavailable", self.catalog_name) uniqueKey = cm.schema.uniqueKey result = IIBTree() for r in response: result[int(r[uniqueKey])] = int(r.get('score', 0) * 1000) return result, queried
def clear(self): self._doc2wid = IOBTree() # docid -> [wordids] self._wid2doc = IOBTree() # wordid -> [docids] self._docweight = IIBTree() # docid -> (# terms in document) self._length = BTrees.Length.Length()
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ config = get_configuration() timeout = getattr(config, 'request_timeout', 20) search_fields = getattr(config, 'search_fields', None) if not search_fields: search_fields = SEARCH_FIELDS search_fields = search_fields.split() if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None es = get_query_client() search = Search(using=es, index=index_name()) search = search.params( request_timeout=timeout, size=BATCH_SIZE, preserve_order=True, ) search = search.source(include='rid') query_string = record.keys[0] if query_string and query_string.startswith('*'): # plone.app.querystring contains op sends a leading *, remove it query_string = query_string[1:] search = search.query('simple_query_string', query=query_string, fields=search_fields) # setup highlighting for field in search_fields: name = field.split('^')[0] if name == 'title': # title shows up in results anyway continue search = search.highlight(name, fragment_size=FRAGMENT_SIZE) try: result = search.scan() except TransportError: # No es client, return empty results logger.exception('ElasticSearch client not available.') return IIBTree(), (self.id, ) # initial return value, other batches to be applied retval = IIBTree() highlights = OOBTree() for r in result: if getattr(r, 'rid', None) is None: # something was indexed with no rid. Ignore for now. # this is only for highlights, so no big deal if we # skip one continue retval[r.rid] = int(10000 * float(r.meta.score)) # Index query returns only rids, so we need # to save highlights for later use highlight_list = [] if getattr(r.meta, 'highlight', None) is not None: for key in dir(r.meta.highlight): highlight_list.extend(r.meta.highlight[key]) highlights[r.meta.id] = highlight_list # store highlights try: annotations = IAnnotations(self.REQUEST) annotations[HIGHLIGHT_KEY] = highlights except TypeError: # maybe we are in a test pass return retval, (self.id, )
def _makeOne(self): from BTrees.IIBTree import IIBTree return IIBTree()