class FilteredSetBase(Persistent): def __init__(self, id, expr): self.id = id self.expr = expr self.clear() def clear(self): self.ids = IISet() def index_object(self, documentId, obj): raise RuntimeError,'index_object not defined' def unindex_object(self,documentId): try: self.ids.remove(Id) except: pass def getId(self): return self.id def getExpression(self): return self.expr def getIds(self): return self.ids def getType(self): return self.meta_type def setExpression(self, expr): self.expr = expr def __repr__(self): return '%s: (%s) %s' % (self.id,self.expr,map(None,self.ids)) __str__ = __repr__
def missing_entries_for_index(self, catalog, index_name): """ Return the difference between catalog and index ids """ index = catalog._catalog.getIndex(index_name) referenced = IISet(index.referencedObjects()) return (difference(IISet(catalog._catalog.paths), referenced), len(catalog) - len(referenced))
def _insertForwardIndexEntry(self, since, until, documentId): """ Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert(documentId) elif since is None: set = self._until_only.get(until, None) if set is None: set = self._until_only[until] = IISet() # XXX: Store an int? set.insert(documentId) elif until is None: set = self._since_only.get(since, None) if set is None: set = self._since_only[since] = IISet() # XXX: Store an int? set.insert(documentId) else: set = self._since.get(since, None) if set is None: set = self._since[since] = IISet() # XXX: Store an int? set.insert(documentId) set = self._until.get(until, None) if set is None: set = self._until[until] = IISet() # XXX: Store an int? set.insert(documentId)
def test_lookup(self): bigsize = 1000000 smallsize = 1000 large = IISet(xrange(bigsize)) small = IISet(xrange(0, bigsize, bigsize/smallsize)) start = time() for i in small: a = large[i] print "\ngetitem distributed %.6f" % (time()-start) start = time() for i in small: a = large[bigsize-1] print "getitem end %.6f" % (time()-start) start = time() for i in small: a = large[0] print "getitem start %.6f" % (time()-start) start = time() for i in small: a = large.has_key(i) print "\nhas_key distributed %.6f" % (time()-start) start = time() for i in small: a = large.has_key(bigsize-1) print "has_key end %.6f" % (time()-start) start = time() for i in small: a = large.has_key(0) print "has_key start %.6f" % (time()-start)
def _sort_iterate_index(self, actual_result_count, result, rs, limit, merge, reverse, sort_index, sort_index_length, sort_spec, second_indexes_key_map): # The result set is much larger than the sorted index, # so iterate over the sorted index for speed. # TODO: len(sort_index) isn't actually what we want for a keyword # index, as it's only the unique values, not the documents. # Don't use this case while using limit, as we return results of # non-flattened intsets, and would have to merge/unflattened those # before limiting. length = 0 try: intersection(rs, IISet(())) except TypeError: # rs is not an object in the IIBTree family. # Try to turn rs into an IISet. rs = IISet(rs) if sort_index_length == 1: for k, intset in sort_index.items(): # We have an index that has a set of values for # each sort key, so we intersect with each set and # get a sorted sequence of the intersections. intset = intersection(rs, intset) if intset: keys = getattr(intset, 'keys', None) if keys is not None: # Is this ever true? intset = keys() length += len(intset) result.append((k, intset, self.__getitem__)) result.sort(reverse=reverse) else: for k, intset in sort_index.items(): # We have an index that has a set of values for # each sort key, so we intersect with each set and # get a sorted sequence of the intersections. intset = intersection(rs, intset) if intset: keys = getattr(intset, 'keys', None) if keys is not None: # Is this ever true? intset = keys() length += len(intset) # sort on secondary index keysets = defaultdict(list) for i in intset: full_key = (k, ) for km in second_indexes_key_map: try: full_key += (km[i], ) except KeyError: pass keysets[full_key].append(i) for k2, v2 in keysets.items(): result.append((k2, v2, self.__getitem__)) result = multisort(result, sort_spec) return (actual_result_count, length, result)
def test_even_dist(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IITreeSet(xrange(smallsize)) self.timing( small, large, 'Intersection small set even distribution + small treeset') self.timing( large, small, 'Intersection small treeset + small set even distribution') small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IITreeSet(xrange(bigsize)) self.timing( small, large, 'Intersection small set even distribution + large treeset') self.timing( large, small, 'Intersection large treeset + small set even distribution') small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IISet(xrange(bigsize)) self.timing(small, large, 'Intersection small set even distribution + large set') self.timing(large, small, 'Intersection large set, small set even distribution')
def _load(self, term, _isInstance=isinstance, _IntType=IntType): '''the docId list for *term*.''' index = self._index dl = index.get(term) if dl is None: return IISet() if _isInstance(dl, _IntType): return IISet((dl, )) return dl
def train(self): """ """ catalog = getToolByName(self, 'portal_catalog') presentNouns = dict() trainingData = [] allNouns = catalog.uniqueValuesFor('noun_terms') for item in allNouns: presentNouns.setdefault(item, 0) subjectIndex = catalog._catalog.getIndex('Subject') nounTermsIndex = catalog._catalog.getIndex('noun_terms') # The internal catalog ids of the objects # that have noun terms in the catalog nounTermIndexIds = IISet(nounTermsIndex._unindex.keys()) # The internal catalog ids of the objects # that have subjects in the catalog subjectIndexIds = IISet(subjectIndex._unindex.keys()) commonIds = intersection(subjectIndexIds, nounTermIndexIds) for cid in commonIds: nounPresence = presentNouns.copy() nouns = nounTermsIndex._unindex[cid] tags = subjectIndex._unindex[cid] for noun in nouns: nounPresence[noun] = 1 for tag in tags: trainingData.append(( nounPresence, tag, )) if trainingData: self.classifier = NaiveBayesClassifier.train(trainingData)
def test_search_inputresult(self): index = self._makeOne() obj = Dummy(1, True) index._index_object(obj.id, obj, attr='truth') obj = Dummy(2, False) index._index_object(obj.id, obj, attr='truth') res, idx = index._apply_index({'truth': True}, resultset=IISet([])) self.failUnlessEqual(idx, ('truth', )) self.failUnlessEqual(list(res), []) res, idx = index._apply_index({'truth': True}, resultset=IISet([2])) self.failUnlessEqual(idx, ('truth', )) self.failUnlessEqual(list(res), []) res, idx = index._apply_index({'truth': True}, resultset=IISet([1])) self.failUnlessEqual(idx, ('truth', )) self.failUnlessEqual(list(res), [1]) res, idx = index._apply_index({'truth': True}, resultset=IISet([1, 2])) self.failUnlessEqual(idx, ('truth', )) self.failUnlessEqual(list(res), [1]) res, idx = index._apply_index({'truth': False}, resultset=IISet([1, 2])) self.failUnlessEqual(idx, ('truth', )) self.failUnlessEqual(list(res), [2])
def test_heavy_start(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(smallsize)) large = IITreeSet(xrange(smallsize)) self.timing(small, large, 'Intersection small set low values + small treeset') self.timing(large, small, 'Intersection small treeset + small set low values') small = IISet(xrange(smallsize)) large = IITreeSet(xrange(bigsize)) self.timing(small, large, 'Intersection small set low values + large treeset') self.timing(large, small, 'Intersection large treeset + small set low values') small = IISet(xrange(smallsize)) large = IISet(xrange(bigsize)) self.timing(small, large, 'Intersection small set low values + large set') self.timing(large, small, 'Intersection large set + small set low values') small = IITreeSet(xrange(smallsize)) large = IISet(xrange(bigsize)) self.timing(small, large, 'Intersection small treeset + large set') self.timing(large, small, 'Intersection large set + small treeset')
def testLargerInputs(self): from BTrees.IIBTree import IISet from random import randint MAXSIZE = 200 MAXVAL = 400 for i in range(3): n = randint(0, MAXSIZE) Akeys = [randint(1, MAXVAL) for j in range(n)] As = [makeset(Akeys) for makeset in self.builders()] Akeys = IISet(Akeys) n = randint(0, MAXSIZE) Bkeys = [randint(1, MAXVAL) for j in range(n)] Bs = [makeset(Bkeys) for makeset in self.builders()] Bkeys = IISet(Bkeys) for op, simulator in ((self.union, self._union), (self.intersection, self._intersection), (self.difference, self._difference)): for A in As: for B in Bs: got = op(A, B) want = simulator(Akeys, Bkeys) self.assertEqual(list(got), want, (A, B, Akeys, Bkeys, list(got), want))
def count(self, brains, sequence=None): """ Intersect results """ res = {} # by checking for facet_counts we assume this is a SolrResponse # from collective.solr if hasattr(brains, 'facet_counts'): facet_fields = brains.facet_counts.get('facet_fields') if facet_fields: index_id = self.data.get('index') facet_field = facet_fields.get(index_id, {}) for value, num in facet_field.items(): if isinstance(value, unicode): res[value] = num else: unicode_value = value.decode('utf-8') res[unicode_value] = num else: # no facet counts were returned. we exit anyway because # zcatalog methods throw an error on solr responses return res res[""] = res['all'] = len(brains) return res else: # this is handled by the zcatalog. see below pass if not sequence: sequence = [key for key, value in self.vocabulary()] if not sequence: return res index_id = self.data.get('index') if not index_id: return res ctool = getToolByName(self.context, 'portal_catalog') index = ctool._catalog.getIndex(index_id) ctool = queryUtility(IFacetedCatalog) if not ctool: return res brains = IISet(brain.getRID() for brain in brains) res[""] = res['all'] = len(brains) for value in sequence: item = uuidToCatalogBrain(value) if not item: res[value] = len(brains) continue rset = ctool.apply_index(self.context, index, item.getPath())[0] rset = IISet(rset) rset = weightedIntersection(brains, rset)[1] if isinstance(value, unicode): res[value] = len(rset) else: unicode_value = value.decode('utf-8') res[unicode_value] = len(rset) return res
def _eval(query, cat): '''evaluate *query* in the context of *cat* (a 'Products.ZCatalog.Catalog.Catalog').''' rs = query._eval(_QueryContext(cat)) if isinstance(rs, ISearch): if hasattr(rs, 'asSet'): rs = rs.asSet() elif isinstance(rs, IBTree): rs = rs.getTree() else: hits = tuple(rs); rs = IISet(); rs.__setstate__((hits,)) return rs
def testFixed1843(self): from BTrees.IIBTree import IISet t = IISet() t.insert(1) # This one used to fail to raise the TypeError when it occurred. self.assertRaises(TypeError, t.keys, "") # This one used to segfault. self.assertRaises(TypeError, t.keys, 0, "")
def group(self, seq): sortIndex = self._sortIndex sortReverse = self._sortReverse ns = len(seq) ni = len(sortIndex) if ns >= 0.1 * ni: # result large compared to index -- sort via index handled = IISet() hn = 0 _load = getattr(sortIndex, '_load', None) if _load is None: # not an optimized index items = sortIndex.items() _load = lambda (x1, x2): x2 if sortReverse: items.reverse() elif sortReverse: gRO = getattr(sortIndex, 'getReverseOrder', None) items = gRO and gRO() if items is None: items = list(sortIndex._index.keys()) items.reverse() else: items = sortIndex._index.keys() for i in items: ids = intersection(seq, _load(i)) if ids: handled.update(ids) hn += len(ids) yield i, ids if hn != len(seq): yield None, difference(seq, handled) else: # result relatively small -- sort via result m = OOBTree() keyFor = getattr(sortIndex, 'keyForDocument', None) # work around "nogopip" bug: it defines "keyForDocument" as an integer if not callable(keyFor): # this will fail, when the index neither defines a reasonable # "keyForDocument" nor "documentToKeyMap". In this case, # the index cannot be used for sorting. keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc] noValue = IITreeSet() for doc in seq.keys(): try: k = keyFor(doc) except KeyError: noValue.insert(doc) continue l = m.get(k) if l is None: l = m[k] = IITreeSet() l.insert(doc) items = m.items() if sortReverse: items = list(items) items.reverse() for i in items: yield i if noValue: yield None, noValue
def get(self, pattern): """ Query the lexicon for words matching a pattern.""" # single word pattern produce a slicing problem below. # Because the splitter throws away single characters we can # return an empty tuple here. if len(pattern)==1: return () wc_set = [self.multi_wc, self.single_wc] digrams = [] globbing = 0 for i in range(len(pattern)): if pattern[i] in wc_set: globbing = 1 continue if i == 0: digrams.insert(i, (self.eow + pattern[i]) ) digrams.append((pattern[i] + pattern[i+1])) else: try: if pattern[i+1] not in wc_set: digrams.append( pattern[i] + pattern[i+1] ) except IndexError: digrams.append( (pattern[i] + self.eow) ) if not globbing: result = self._lexicon.get(pattern, None) if result is None: return () return (result, ) ## now get all of the intsets that contain the result digrams result = None for digram in digrams: result=union(result, self._digrams.get(digram, None)) if not result: return () else: ## now we have narrowed the list of possible candidates ## down to those words which contain digrams. However, ## some words may have been returned that match digrams, ## but do not match 'pattern'. This is because some words ## may contain all matching digrams, but in the wrong ## order. expr = re.compile(self.createRegex(pattern)) words = [] hits = IISet() for x in result: if expr.match(self._inverseLex[x]): hits.insert(x) return hits
def test_large(self): bigsize = BIGSETSIZE / 10 small = IITreeSet(xrange(bigsize)) large = IITreeSet(xrange(bigsize)) self.timing(small, large, 'Intersection Large tree sets') small = IISet(xrange(bigsize)) large = IISet(xrange(bigsize)) self.timing(small, large, 'Intersection Large sets')
def test_small(self): smallsize = SMALLSETSIZE small = IITreeSet(xrange(smallsize)) large = IITreeSet(xrange(smallsize)) self.timing(small, large, 'Intersection small tree sets') small = IISet(xrange(smallsize)) large = IISet(xrange(smallsize)) self.timing(small, large, 'Intersection small sets')
def get(self, pattern): """ Query the lexicon for words matching a pattern.""" # single word pattern produce a slicing problem below. # Because the splitter throws away single characters we can # return an empty tuple here. if len(pattern) == 1: return () wc_set = [self.multi_wc, self.single_wc] digrams = [] globbing = 0 for i in range(len(pattern)): if pattern[i] in wc_set: globbing = 1 continue if i == 0: digrams.insert(i, (self.eow + pattern[i])) digrams.append((pattern[i] + pattern[i + 1])) else: try: if pattern[i + 1] not in wc_set: digrams.append(pattern[i] + pattern[i + 1]) except IndexError: digrams.append((pattern[i] + self.eow)) if not globbing: result = self._lexicon.get(pattern, None) if result is None: return () return (result, ) ## now get all of the intsets that contain the result digrams result = None for digram in digrams: result = union(result, self._digrams.get(digram, None)) if not result: return () else: ## now we have narrowed the list of possible candidates ## down to those words which contain digrams. However, ## some words may have been returned that match digrams, ## but do not match 'pattern'. This is because some words ## may contain all matching digrams, but in the wrong ## order. expr = re.compile(self.createRegex(pattern)) words = [] hits = IISet() for x in result: if expr.match(self._inverseLex[x]): hits.insert(x) return hits
def items(self): # return a list of value to int set of rid tuples indexed = self._index_value items = [(bool(indexed), self._index)] false = IISet() for rid, value in self._unindex.iteritems(): if value != indexed: false.add(rid) items.append((not bool(indexed), false)) return items
def test_lookup(self): bigsize = 1000000 smallsize = 1000 large = IISet(xrange(bigsize)) small = IISet(xrange(0, bigsize, bigsize / smallsize)) start = time() for i in small: large[i] print "\ngetitem distributed %.6f" % (time() - start) start = time() for i in small: large[bigsize - 1] print "getitem end %.6f" % (time() - start) start = time() for i in small: large[0] print "getitem start %.6f" % (time() - start) start = time() for i in small: large.has_key(i) print "\nhas_key distributed %.6f" % (time() - start) start = time() for i in small: large.has_key(bigsize - 1) print "has_key end %.6f" % (time() - start) start = time() for i in small: large.has_key(0) print "has_key start %.6f" % (time() - start)
def _eval(query, cat): '''evaluate *query* in the context of *cat* (a 'Products.ZCatalog.Catalog.Catalog').''' rs = query._eval(_QueryContext(cat)) if isinstance(rs, ISearch): if hasattr(rs, 'asSet'): rs = rs.asSet() elif isinstance(rs, IBTree): rs = rs.getTree() else: hits = tuple(rs) rs = IISet() rs.__setstate__((hits, )) return rs
def test_even_dist(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IITreeSet(xrange(bigsize)) print '\nDifference Small set even distribution + large treeset' self.timing(small, large) small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IISet(xrange(bigsize)) print '\nDifference Small set even distribution + large set' self.timing(small, large)
def test_heavy_end(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(bigsize - smallsize, bigsize)) large = IITreeSet(xrange(bigsize)) print '\nDifference Small set high values + large treeset' self.timing(small, large) small = IISet(xrange(bigsize - smallsize, bigsize)) large = IISet(xrange(bigsize)) print '\nDifference Small set high values + large set' self.timing(small, large)
def test_empty(self): bigsize = BIGSETSIZE smallsize = 0 small = IISet(xrange(smallsize)) large = IITreeSet(xrange(bigsize)) print '\nIntersection empty set + large treeset' self.timing(small, large) small = IITreeSet(xrange(smallsize)) large = IISet(xrange(bigsize)) print '\nIntersection empty tree set + large set' self.timing(small, large)
def test_empty(self): bigsize = BIGSETSIZE smallsize = 0 small = IISet(xrange(smallsize)) large = IITreeSet(xrange(bigsize)) self.timing(small, large, 'Intersection empty set + large treeset') self.timing(large, small, 'Intersection large treeset + empty set') small = IITreeSet(xrange(smallsize)) large = IISet(xrange(bigsize)) self.timing(small, large, 'Intersection empty tree set + large set') self.timing(large, small, 'Intersection large set + empty tree set')
def count(self, brains): """ Intersect results """ res = {} sequence = [key for key, value in self.vocabulary()] if not sequence: return res index_id = self.data.get('index') if not index_id: return res ctool = getToolByName(self.context, 'portal_catalog') index = ctool._catalog.getIndex(index_id) apply_index = getattr(index, "_apply_index", None) if not apply_index: return res countryGroupsView = getMultiAdapter((self.context, self.request), name=u'getCountryGroups') countryGroups = countryGroupsView() getCountriesByGroupView = getMultiAdapter((self.context, self.request), name=u'getCountriesByGroup') brains = IISet(brain.getRID() for brain in brains) for value in sequence: if not value: res[value] = len(brains) continue if value in countryGroups: gr_value = getCountriesByGroupView(value) rset = apply_index({ index_id: gr_value, index_id + '_operator': self.operator, }) else: rset = apply_index({index_id: value}) if not rset: continue rset, _u = rset rset = IISet(rset) _u, rset = weightedIntersection(brains, rset) if isinstance(value, str): value = value.decode('utf-8', 'replace') res[value] = len(rset) return res
def query_index(self, record, resultset=None): index = self._index indexed = self._index_value for key in record.keys: if bool(key) is bool(indexed): # If we match the indexed value, check index return intersection(index, resultset) else: # Otherwise, remove from resultset or _unindex if resultset is None: return union(difference(self._unindex, index), IISet([])) else: return difference(resultset, index) return IISet()
def test_findlargesmallset(self): # Test different approaches to finding the large and small set bigsize = 10 smallsize = 2 o1 = IISet(xrange(bigsize)) l1 = len(o1) o2 = IISet(xrange(0, bigsize, bigsize / smallsize)) l2 = len(o2) # 3 approaches: if/else, sorted and max/min def alternative1(): if l1 < l2: ls = l1 small = o1 lb = l2 big = o2 else: ls = l2 small = o2 lb = l1 big = o1 return (ls, small), (lb, big) def alternative2(): return sorted(((l2, o2), (l1, o1))) def alternative3(): small = min((l2, o2), (l1, o1)) big = max((l2, o2), (l1, o1)) return small, big self.failUnlessEqual(list(alternative1()), list(alternative2())) self.failUnlessEqual(list(alternative1()), list(alternative3())) start = time() for i in xrange(1000): alternative1() print '\nif/else took %.6f' % (time() - start) start = time() for i in xrange(1000): alternative2() print 'sorted took %.6f' % (time() - start) start = time() for i in xrange(1000): alternative3() print 'minmax took %.6f' % (time() - start)
def insertEntry(self, comp, id, level, parent_path=None, object_path=None): """Insert an entry. parent_path is the path of the parent object path is the object path, it is assumed to be unique, i.e. there is a one to one mapping between physical paths and docids. This will be large, and is only used for breadcrumbs. id is the docid """ PathIndex.insertEntry(self, comp, id, level) if parent_path is not None: if not self._index_parents.has_key(parent_path): self._index_parents[parent_path] = IISet() self._index_parents[parent_path].insert(id) # We make the assumption that a full path corresponds one and only # one object. if object_path is not None: self._index_items[object_path] = id
def extendedpathindex_apply_index(self, request, res=None): """ hook for (Z)Catalog 'request' -- mapping type (usually {"path": "..." } additionaly a parameter "path_level" might be passed to specify the level (see search()) """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys == None: return None level = record.get("level", 0) operator = record.get('operator', self.useOperator).lower() depth = getattr(record, 'depth', -1) # use getattr to get 0 value navtree = record.get('navtree', 0) navtree_start = record.get('navtree_start', 0) # depending on the operator we use intersection of union if operator == "or": set_func = union else: set_func = intersection result = None for k in record.keys: rows = self.search(k, level, depth, navtree, navtree_start, tmpres=res) result = set_func(result, rows) if result: return result, (self.id, ) else: return IISet(), (self.id, )
def _apply_index(self, request): """ See IPluggableIndex. o Unpacks args from catalog and mapps onto '_search'. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None level = record.get("level", 0) operator = record.get('operator', self.useOperator).lower() # depending on the operator we use intersection of union if operator == "or": set_func = union else: set_func = intersection res = None for k in record.keys: rows = self._search(k, level) res = set_func(res, rows) if res: return res, (self.id, ) else: return IISet(), (self.id, )
def test_sortResults_reversed(self): catalog = self._make_one() brains = catalog({'att1': 'att1'}) rs = IISet([b.getRID() for b in brains]) si = catalog.getIndex('num') result = catalog.sortResults(rs, si, reverse=True) self.assertEqual([r.num for r in result], list(reversed(range(100))))
def items(self): items = [] for k, v in self._index.items(): if isinstance(v, int): v = IISet((v, )) items.append((k, v)) return items
def group(self, seq): sortIndex = self._sortIndex; sortReverse = self._sortReverse ns = len(seq); ni = len(sortIndex) if ns >= 0.1 * ni: # result large compared to index -- sort via index handled = IISet(); hn = 0 _load = getattr(sortIndex, '_load', None) if _load is None: # not an optimized index items = sortIndex.items() _load = lambda (x1, x2): x2 if sortReverse: items.reverse() elif sortReverse: gRO = getattr(sortIndex, 'getReverseOrder', None) items = gRO and gRO() if items is None: items = list(sortIndex._index.keys()); items.reverse() else: items = sortIndex._index.keys() for i in items: ids = intersection(seq, _load(i)) if ids: handled.update(ids); hn += len(ids) yield i, ids if hn != len(seq): yield None, difference(seq, handled) else: # result relatively small -- sort via result m = OOBTree() keyFor = getattr(sortIndex, 'keyForDocument', None) # work around "nogopip" bug: it defines "keyForDocument" as an integer if not callable(keyFor): # this will fail, when the index neither defines a reasonable # "keyForDocument" nor "documentToKeyMap". In this case, # the index cannot be used for sorting. keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc] noValue = IITreeSet() for doc in seq.keys(): try: k = keyFor(doc) except KeyError: noValue.insert(doc); continue l = m.get(k) if l is None: l = m[k] = IITreeSet() l.insert(doc) items = m.items() if sortReverse: items = list(items); items.reverse() for i in items: yield i if noValue: yield None, noValue
def lookupWordsBySimilarity(self, word): """ perform a similarity lookup """ lst = self._lexicon.getSimiliarWords(word) docids = IISet() used_words = {} getwid = self._lexicon.getWordId for word, threshold in lst: used_words[word] = threshold wid = getwid(word) docids.update( self._storage.get(wid) ) return ResultSet(docids, used_words)
class AccountingFolder(BaseFolder, BrowserDefaultMixin): """ """ security = ClassSecurityInfo() implements(interfaces.IAccountingFolder) meta_type = 'AccountingFolder' _at_rename_after_creation = True schema = AccountingFolder_schema ##code-section class-header #fill in your manual code here ##/code-section class-header # Methods # Manually created methods def __init__(self, oid, **kwargs): BaseFolder.__init__(self, oid, **kwargs) self._closing_transfers = IISet() security.declareProtected(permissions.View, 'getAccountingRoot') def getAccountingRoot(self): ''' Return 'self' as accounting root ''' return self def displayContentsTab(self): """ Hide contents tab """ return False def registerClosingDate(self, date): """ register closing transfer date """ # strip time before insert date = int(DateTime(date.Date())) self._closing_transfers.insert(date) def getClosingDates(self): """ return all registered closing dates """ return self._closing_transfers
def _lookup(self, words, do_autoexpand=1): """ search a word or a list of words in the lexicon and return a ResultSet of found documents. """ docids = IISet() used_words = {} # remove stopwords from data if self.use_stopwords: words = self.use_stopwords.process( words ) if self.use_thesaurus and self.thesaurus_mode == 'expand_always': TH = ThesaurusRegistry.get(self.use_thesaurus) for word in words[:]: r = TH.getTermsFor(word) words.extend(r) for word in words: # perform casefolding if necessary if self.splitter_casefolding: word = word.lower() if self.use_normalizer: word = NormalizerRegistry.get(self.use_normalizer).process(word) used_words[word] = 1.0 wid = self._lexicon.getWordId(word) # Retrieve list of docIds for this wordid if wid is not None: docids.update( self._storage.get(wid) ) # perform autoexpansion of terms by performing # a search using right-truncation if do_autoexpand and self.autoexpand and len(word) >= self.autoexpand_limit: rs = self.lookupWordsByTruncation(word, right=1) docids.update(rs.docIds()) wlen = len(word) for w in rs.words().keys(): used_words[w] = TRUNC_WEIGHT[len(w)-wlen] return ResultSet(docids, used_words)
def setOperation(op, sets, isearch): '''perform *op* on *sets*. if *isearch*, return an incremental search. *op* may be '"and"' or '"or"'. Uses 'IncrementalSearch', if available. ''' if not sets: if op == 'and': return # None means all results if isearch: search = IOr(); search.complete(); return search return IISet() # Note: "multiunion" is *much* faster than "IOr"! #if IAnd is not None and (isearch or len(sets) > 1): if IAnd is not None and (isearch or (op == 'and' and len(sets) > 1)): isets = [] for set in sets: if set is None: # all results if op == 'and': continue else: return if not isinstance(set, ISearch): set = IBTree(set) isets.append(set) if op == 'and' and not isets: return # empty 'and' cl = op == 'and' and IAnd or IOr if len(isets) == 1: # do not wrap a one element search search = isets[0] else: search = cl(*isets); search.complete() if isearch: return search if hasattr(search, 'asSet'): r = search.asSet() else: r = IISet(); r.__setstate__((tuple(search),)) return r if op == 'or' and len(sets) > 5: r = multiunion(sets) else: combine = op == 'and' and intersection or union r= None for set in sets: r= combine(r,set) if r is None: if combine is union: r = IISet() else: return if isearch: r = IBTree(r) return r
class FilteredSetBase(object): """ Base class for all filtered sets. A filtered set is a collection of documents represented by their document ids that match a common criteria given by a condition. """ implements(ITopicFilteredSet) def __init__(self, id, expr): self.id = id self.expr = expr self.clear() def clear(self): self._ids = IISet() def index_doc(self, docid, context): raise NotImplementedError def unindex_doc(self, docid): try: self._ids.remove(docid) except KeyError: pass def getId(self): return self.id def getExpression(self): return self.expr def setExpression(self, expr): self.expr = expr def getIds(self): return self._ids def __repr__(self): return '%s: (%s) %s' % (self.id, self.expr, map(None, self._ids)) __str__ = __repr__
def group(self, seq): sortIndex = self._sortIndex; sortReverse = self._sortReverse ns = len(seq); ni = len(sortIndex) if ns >= 0.1 * ni: # result large compared to index -- sort via index handled = IISet(); hn = 0 _load = getattr(sortIndex, '_load', None) if _load is None: # not an optimized index items = sortIndex.items() _load = lambda (x1, x2): x2 if sortReverse: items.reverse() elif sortReverse: gRO = getattr(sortIndex, 'getReverseOrder', None) items = gRO and gRO() if items is None: items = list(sortIndex._index.keys()); items.reverse() else: items = sortIndex._index.keys() for i in items: ids = intersection(seq, _load(i)) if ids: handled.update(ids); hn += len(ids) yield i, ids if hn != len(seq): yield None, difference(seq, handled) else: # result relatively small -- sort via result keyFor = sortIndex.keyForDocument; m = OOBTree() noValue = IITreeSet() for doc in seq.keys(): try: k = keyFor(doc) except KeyError: noValue.insert(doc); continue l = m.get(k) if l is None: l = m[k] = IITreeSet() l.insert(doc) items = m.items() if sortReverse: items = list(items); items.reverse() for i in items: yield i if noValue: yield None, noValue
def count(self, context, facet, intersect=None): if IQueryResults.providedBy(intersect): intersect = IISet(intersect.keys()) sm = sitemanager_for(context) unique_name = '%s.%s' % (facet.name, self.name) cache_tools = queryUtility(ISetCacheTools, context=sm) invalidated = cache_tools.invalidated_records if not isinstance(invalidated, IISet): invalidated = IISet(invalidated) if isinstance(intersect, IISet): invalid = len(intersection(intersect, invalidated)) > 0 if unique_name in cache_tools.filter_setid_cache: setid = cache_tools.filter_setid_cache[unique_name] if setid in cache_tools.set_cache: if invalid: del(cache_tools.set_cache[setid]) del(cache_tools.filter_setid_cache[unique_name]) else: records = cache_tools.set_cache[setid] if intersect is None: return len(records) if isinstance(intersect, IISet): #optimal to cast smaller set to match IISet. return len(intersection(intersect, IISet(records))) return len(set(intersect) & records) #otherwise, at this point, no cached value, so query catalog... qf = self(unique_name) runner = AdvancedQueryRunner(context) result = runner(qf) setid = result.setid cache_tools.set_cache[setid] = result.frozen cache_tools.filter_setid_cache[unique_name] = setid if intersect is None: return len(result) if isinstance(intersect, IISet): return len(intersection(intersect, IISet(result.frozen))) return len(set(intersect) & result.frozen)
def nearResultSets(sets, index, distance=5, bidirectional=1): """ perform near search on results sets """ # One resultset consists of an IISet() or documentIds and # tuple whose first element is the word (from LexiconLookup()) # First we perform an intersection to get the documentIds of # those documents that contain all the words docids = intersectResultSets(sets).docIds() # Now we determine for every document the positions of all # the words inside the document. Then we compare all the positions # to determine neighbourship words = [] for set in sets: for word in set.words().keys(): words.append(word) res_docids = IISet() for docId in docids: # the posMap is a list of tuples(word,IISet[positions]) posMap = index.positionsFromDocumentLookup(docId, words) if bidirectional: if len(posMap.checkPositionMapBidirectional(distance)) > 0: res_docids.insert(docId) else: if len(posMap.checkPositionMapUnidirectional(distance)) > 0: res_docids.insert(docId) d = {} for w in words: d[w] = 1.0 return ResultSet(res_docids, d)
def __init__(self, id, title, skelton, fileattache, parent, elements): if elements: from Products.ZCTextIndex.ZCTextIndex import manage_addLexicon manage_addLexicon(self,id='lexicon',elements = elements) self.__of__(parent)._buildIndexing(id,title) t=time() self.created = t self.modified = t self.fileattache = fileattache self.data =IOBTree() # id -> Message self.ids =IISet() # ids of children self.loadSkelton(None, skelton) self.loadProperties(skelton) self.skelton = skelton
def index_object(self, documentId, obj, threshold=None): """ Index an object: 'documentId' is the integer id of the document 'obj' is the object to be indexed 'threshold' is the number of words to process between commiting subtransactions. If 'None' subtransactions are disabled. """ # sniff the object for our 'id', the 'document source' of the # index is this attribute. If it smells callable, call it. try: source = getattr(obj, self.id) if safe_callable(source): source = source() if not isinstance(source, UnicodeType): source = str(source) except (AttributeError, TypeError): return 0 # sniff the object for 'id'+'_encoding' try: encoding = getattr(obj, self.id+'_encoding') if safe_callable(encoding ): encoding = str(encoding()) else: encoding = str(encoding) except (AttributeError, TypeError): encoding = 'latin1' lexicon = self.getLexicon() splitter = lexicon.Splitter wordScores = OIBTree() last = None # Run through the words and score them for word in list(splitter(source,encoding=encoding)): if word[0] == '\"': last = self._subindex(word[1:-1], wordScores, last, splitter) else: if word==last: continue last=word wordScores[word]=wordScores.get(word,0)+1 # Convert scores to use wids: widScores=IIBucket() getWid=lexicon.getWordId for word, score in wordScores.items(): widScores[getWid(word)]=score del wordScores currentWids=IISet(self._unindex.get(documentId, [])) # Get rid of document words that are no longer indexed self.unindex_objectWids(documentId, difference(currentWids, widScores)) # Now index the words. Note that the new xIBTrees are clever # enough to do nothing when there isn't a change. Woo hoo. insert=self.insertForwardIndexEntry for wid, score in widScores.items(): insert(wid, documentId, score) # Save the unindexing info if it's changed: wids=widScores.keys() if wids != currentWids.keys(): self._unindex[documentId]=wids return len(wids)
def _build_degenerate_tree(self): # Build the buckets and chain them together. from BTrees.IIBTree import IISet from BTrees.IIBTree import IITreeSet from BTrees.check import check bucket11 = IISet([11]) bucket7 = IISet() bucket7.__setstate__(((7,), bucket11)) bucket5 = IISet() bucket5.__setstate__(((5,), bucket7)) bucket3 = IISet() bucket3.__setstate__(((3,), bucket5)) bucket1 = IISet() bucket1.__setstate__(((1,), bucket3)) # Build the deepest layers of indirection nodes. ts = IITreeSet tree1 = ts() tree1.__setstate__(((bucket1,), bucket1)) tree3 = ts() tree3.__setstate__(((bucket3,), bucket3)) tree5lower = ts() tree5lower.__setstate__(((bucket5,), bucket5)) tree5 = ts() tree5.__setstate__(((tree5lower,), bucket5)) tree7 = ts() tree7.__setstate__(((bucket7,), bucket7)) tree11 = ts() tree11.__setstate__(((bucket11,), bucket11)) # Paste together the middle layers. tree13 = ts() tree13.__setstate__(((tree1, 2, tree3), bucket1)) tree5711lower = ts() tree5711lower.__setstate__(((tree5, 6, tree7, 10, tree11), bucket5)) tree5711 = ts() tree5711.__setstate__(((tree5711lower,), bucket5)) # One more. t = ts() t.__setstate__(((tree13, 4, tree5711), bucket1)) t._check() check(t) return t, [1, 3, 5, 7, 11]
def checkCatalog(path,indexes): """ perform some consistency checks on a ZCatalog instance""" root = Zope2.app() try: catalog = root.unrestrictedTraverse(path) except AttributeError: print 'Error: catalog object not found' sys.exit(1) # get Catalog instance _cat = catalog._catalog # check Catalog internal BTrees l_data = list(_cat.data.keys()) l_data.sort() l_uids = list(_cat.uids.values()) l_uids.sort() l_paths = list(_cat.data.keys()) l_paths.sort() print "Checking catalog internal BTrees" print "\tINFO: Mapping data: %d entries" % len(l_data) print "\tINFO: Mapping uids: %d entries" % len(l_uids) print "\tINFO: Mapping paths: %d entries" % len(l_paths) if l_data == l_uids: print "\tOK: Mapping data equals Mapping uids" else: print "\tERR: Mapping data does not equal Mapping uids" if l_data == l_paths: print "\tOK: Mapping data equals Maaping paths" else: print "\tERR: Mapping data does not equal Maaping paths" # check BTrees of indexes for id,idx in _cat.indexes.items(): if indexes and not idx.meta_type in indexes: continue print "Checking index '%s' (type: %s)" % (id, idx.meta_type) if idx.meta_type in ['FieldIndex','KeywordIndex']: # check forward entries RIDS = IISet() for key, rids in idx._index.items(): if isinstance(rids,IntType): RIDS.insert( rids ) else: map(RIDS.insert , rids.keys()) diff = difference(RIDS, IISet(_cat.data.keys())) if len(diff)!=0: print '\tERR: Problem with forward entries' print '\tERR: too much forward entries:', diff else: print '\tOK: Forward entries (%d entries)' % (len(RIDS)) elif idx.meta_type in ['PathIndex']: RIDS = IISet() for rids in map(None,idx._index.values()): map(RIDS.insert , rids.values()[0]) diff = difference(RIDS, IISet(_cat.data.keys())) if len(diff)!=0: print '\tERR: Problem with forward entries' print '\tERR: too much forward entries:', diff else: print '\tOK: Forward entries (%d entries)' % (len(RIDS)) if idx.meta_type in ['FieldIndex','KeywordIndex','PathIndex']: # check backward entries RIDS = IISet(idx._unindex.keys()) diff = difference(RIDS, IISet(_cat.data.keys())) if len(diff)!=0: print '\tERR: Problem with backward entries' print '\tERR: too much backward entries:', diff else: print '\tOK: Backward entries (%d entries)' % (len(RIDS))
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] # Optimization - avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about catalog/index having # the same container as content if default_level == 0: indexpath = list(filter(None, self.getPhysicalPath())) while min(len(indexpath), len(comps)): if indexpath[0] == comps[0]: del indexpath[0] del comps[0] startlevel += 1 else: break if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level==0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level==0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel+len(comps) + depth): if level-startlevel < len(comps): comp = comps[level-startlevel] if not self._index.has_key(comp) or not self._index[comp].has_key(level): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: pathset = intersection(pathset, self._index[comp][level]) if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union(navset, intersection(pathset, self._index[None][level+depth])) if level-startlevel >= len(comps) or navtree: if self._index.has_key(None) and self._index[None].has_key(level): depthset = union(depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0,self._depth + 1): ids = None error = 0 for cn in range(0,len(comps)): comp = comps[cn] try: ids = intersection(ids,self._index[comp][level+cn]) except KeyError: error = 1 if error==0: results = union(results,ids) return results
def languageindex_search(self, language, fallback=True, res=None): main, sub = splitLanguage(language) if main not in self._index: return None if fallback: # Search in sorted order, specific sub tag first, None second subs = list(self._index[main].keys()) subs.sort() if sub in subs: subs.remove(sub) subs.insert(0, sub) else: subs = [sub] if not fallback and res is not None: # We do not support any optimization when fallback is enabled. # # TODO: The core loop is not in C here. Casual benchmarks suggest this # is still more effecient than trying to move it to C. The problem is # that we only have an IISet of docids as an input. We need to filter # this per language. The available index structures we have are: # # IndexEntry objects used as entries. Complex objects storing docid, # main and sub languages and UID of the canonical. Their hash and # compare function uses the canonical UID. # # self._index # An OOBTreeSet structure per language. In the outermost nodes we have # OOBTree's per language. Useful to get all items in a language. # Otherwise useless, as we would have to compare the docid attribute # of the object in the tree against our wanted set, requiring a full # loop over all items. # # self._unindex # An IOBTree of docid to entry. Better to match our docid wanted set, # but we would still have to compare the language code to the entry # object itself. # # self._sortindex # An IOBTree of docid to language tag. Looks like the best candidate # for us, as we can compare the language directly as a simple string # comparision. # # One thing to keep in mind, is that once we get a wanted set, this # will usually have gone through a path query already. This means # we will almost always already have matching set and won't filter # out any item at all. So the edge-case of a 100% match is actually # the most common one for us. # # Casual benchmarks show that trying to construct an IOBTree from the # wanted set and intersecting it with the sortindex is still slower # than having the core loop in Python code. tag = lang_tag(main, sub) result = IISet() for r in res: lang = self._sortindex.get(r) if lang == tag: result.insert(r) return result result = OOSet() for sublanguage in subs: result = oo_union(result, self._index[main][sublanguage]) return IISet(entry.docid for entry in result)
def clear(self): self._ids = IISet()
def __init__(self, oid, **kwargs): BaseFolder.__init__(self, oid, **kwargs) self._closing_transfers = IISet()
class ZchSite(ZCatalog.ZCatalog): """A Zch Site is a self contained web-based news publishing and discussion system""" meta_type ='Zch Site' description='Zch Site' security = ClassSecurityInfo() security.setPermissionDefault(ManageZch,('Manager',)) security.setPermissionDefault(AddArticleZch,('Manager',)) security.setPermissionDefault(AddCommentZch,('Anonymous','Manager',)) security.setPermissionDefault(View,('Anonymous','Manager',)) icon ='misc_/Zch/Zch_img' _properties=({'id':'title', 'type':'string','mode':'w'},) fileattache=0 sage=0 manage_options=({'label':'Contents', 'icon':icon, 'action':'manage_main', 'target':'manage_main'}, {'label':'View', 'icon':'', 'action':'index_html', 'target':'manage_main'}, {'label':'Postings', 'icon':'', 'action':'manage_postings', 'target':'manage_main'}, {'label':'Options', 'icon':'', 'action':'manage_editForm', 'target':'manage_main'}, {'label':'Properties', 'icon':'', 'action':'manage_propertiesForm', 'target':'manage_main'}, {'label':'Catalog', 'icon':'', 'action':'manage_catalogView', 'target':'manage_main'}, {'label':'Indexes', 'icon':'', 'action':'manage_catalogIndexes', 'target':'manage_main'}, {'label':'Security', 'icon':'', 'action':'manage_access', 'target':'manage_main'}, {'label':'Undo', 'icon':'', 'action':'manage_UndoForm', 'target':'manage_main'} ) security.declareProtected(ManageZch, 'manage_postings') manage_postings = HTMLFile('dtml/manage_postings', globals()) security.declareProtected(ManageZch, 'manage_editForm') manage_editForm = HTMLFile('dtml/editForm', globals()) security.declarePrivate('_buildIndexing') def _buildIndexing(self, id, title): # Initialise ZCatalog if not hasattr(self,'_catalog'): ZCatalog.ZCatalog.__init__(self, id, title) # delete any existing indexes for name in self.indexes(): self.delIndex(name) # add the default indexes for (name,index_type) in [('meta_type', 'FieldIndex'), ('author', 'FieldIndex'), ('body', 'ZCTextIndex'), ('title', 'ZCTextIndex'), ('date', 'FieldIndex')]: if index_type == 'ZCTextIndex': extras = EmptyClass() extras.doc_attr = name extras.index_type = 'Okapi BM25 Rank' extras.lexicon_id = 'lexicon' self.addIndex(name, index_type, extra=extras) else: self.addIndex(name,index_type) # delete the default metadata columns for name in self.schema(): self.delColumn(name) # Add the meta data columns for search results for name in ['id','title','absolute_url','author','date_posted','date','body', 'tnum']: self.addColumn(name,'') security.declareProtected(ManageZch, 'recatalogPostings') def recatalogPostings(self,REQUEST=None): """ Clear the Catalog and then Index all the postings. """ self._catalog.clear() for article_id in self.ids: article = self.data[article_id].__of__(self) if type(article.body)==type([]): article.body = join(article.body, '\n') for comment_id in article.ids: comment = self.data[comment_id].__of__(article) if type(comment.body)==type([]): comment.body = join(comment.body, '\n') self.catalog_object(comment, join(comment.getPhysicalPath(), '/')) self.catalog_object(article, join(article.getPhysicalPath(), '/')) if REQUEST is not None: return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER']) security.declareProtected(ManageZch, 'loadSkelton') def loadSkelton(self, REQUEST, skelton='zch'): "Add Page Template PythonScript, DTMLMethod and Image read from skelton directory." for entry in os.listdir(os.path.join(package_home(globals()), 'skelton', skelton)): if entry[-3:] == '.pt' or entry[-4:]=='.pys' or entry[-5:]=='.dtml' or entry[-4:]=='.gif': f=open(os.path.join(package_home(globals()), 'skelton', skelton, entry), 'rb') file=f.read() f.close() try: if entry[-3:] == '.pt': id = entry[:-3] manage_addPageTemplate(self, id, '', file, encoding='utf-8') elif entry[-4:] == '.pys': id = entry[:-4] manage_addPythonScript(self,id) self._getOb(id).write(file) elif entry[-5:] == '.dtml': id = entry[:-5] self.manage_addDTMLMethod(id,'',file) elif entry[-4:] == '.gif': id = entry[:-4] self.manage_addImage(id,file,content_type='image/gif') except: pass if REQUEST is not None: return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER']) security.declarePrivate('loadProperties') def loadProperties(self, skelton): "Add properties from 'properties' file." p = re.compile(r'(\w+?):(\w+?)=\s*(.*)\s*') newprop = list(self._properties) f = open(os.path.join(package_home(globals()), 'skelton', skelton, 'properties'), 'r') for s in f: if s[0] == '#': continue m = p.match(s) if m: newprop.append({'id':m.group(1), 'type':m.group(2), 'mode': 'wd'}) f.close() self._properties = tuple(newprop) f = open(os.path.join(package_home(globals()), 'skelton', skelton, 'properties'), 'r') for s in f: if s[0] == '#': continue m = p.match(s) if m: self._updateProperty(m.group(1), m.group(3)) f.close() security.declarePrivate('__init__') def __init__(self, id, title, skelton, fileattache, parent, elements): if elements: from Products.ZCTextIndex.ZCTextIndex import manage_addLexicon manage_addLexicon(self,id='lexicon',elements = elements) self.__of__(parent)._buildIndexing(id,title) t=time() self.created = t self.modified = t self.fileattache = fileattache self.data =IOBTree() # id -> Message self.ids =IISet() # ids of children self.loadSkelton(None, skelton) self.loadProperties(skelton) self.skelton = skelton security.declarePublic('__len__') def __len__(self): return len(self.ids) + 1 security.declareProtected(View, '__getitem__') def __getitem__(self,id): """ Get a posting from the ZchSite data store """ # make sure id is an integer try: if not isinstance(id,IntType): id=atoi(id) except ValueError: raise KeyError, id # make sure it's in our list of children if not self.ids.has_key(id): raise KeyError, id # return the posting return self.data[id].__of__(self) security.declareProtected(View, 'zchcrypt') def zchcrypt(self,word,key): import hmac, base64 h = hmac.new(key) h.update(word) return base64.encodestring(h.digest())[:-3] security.declareProtected(View, 'zchfqdn') def zchfqdn(self,n): return getfqdn(n) security.declarePrivate('delItem') def delItem(self,id): if not self.data.has_key(id): return if self.ids.has_key(id): # article article = self.data[id].__of__(self) for comment_id in article.ids: obj = self.data[comment_id].__of__(article) self.uncatalog_object(obj.getPhysicalPath()) del self.data[comment_id] self.uncatalog_object(article.getPhysicalPath()) del self.data[id] self.ids.remove(id) else: # comment parent = self.data[self.data[id].parent_id].__of__(self) # remove it from it's parents list of ids obj = self.data[id].__of__(parent) self.uncatalog_object(obj.getPhysicalPath()) del self.data[id] parent.ids.remove(id) security.declarePrivate('createId') def createId(self): id=int(time()) while self.data.has_key(id): id=id+1 return id security.declarePrivate('data_map') def data_map(self,ids): result=[] for id in ids: result.append(self.data[id].__of__(self)) return result security.declareProtected(View, 'article_list') def article_list(self, size=None): """ returns article items """ def cmp_by_modified(x, y): return cmp(y.modified, x.modified) items = self.data_map(self.ids) items.sort(cmp_by_modified) if size: items = items[:size] for i in range(len(items)): items[i].sequence_number = i + 1 return items security.declareProtected(ManageZch, 'postingValues') postingValues = article_list security.declareProtected(View, 'tpId') def tpId(self): return self.id security.declareProtected(View, 'tpURL') def tpURL(self): return self.id security.declareProtected(View, 'this') def this(self): return self security.declareProtected(View, 'site_url') def site_url(self): # """ url of the Zch main page """ return self.absolute_url() security.declareProtected(View, 'has_items') def has_items(self): return len(self.ids) security.declareProtected(View, 'item_count') def item_count(self): return len(self.data) security.declareProtected(AddArticleZch, 'addPosting') def addPosting(self,file='',REQUEST=None,RESPONSE=None, index=1): """ add an article """ id=self.createId() msg=Article(id) err, sage = msg.__of__(self)._validation(REQUEST,RESPONSE,'delete attachment',file) if err: return err # Set thread number. msg.tnum = '1' self.ids.insert(id) self.data[id]=msg if index: msg.__of__(self).index() if RESPONSE: return self.showMessage(self, REQUEST=REQUEST, title='Article Posted', message ='Your article has been posted', action=self.absolute_url() ) return id security.declareProtected(View, 'search') def search(self,REQUEST): """ fulfill a search request """ if REQUEST.has_key('op') and REQUEST['op']=='articles': REQUEST.set('meta_type','Article') sr=self.__call__(REQUEST) rc=len(sr) return self.showSearchResults(self,REQUEST,search_results=sr, result_count=rc) security.declareProtected(ManageZch, 'manage_edit') def manage_edit(self, REQUEST=None, fileattache=0): """ edit Zch options """ self.fileattache = fileattache if REQUEST is not None: return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER']) security.declareProtected(ManageZch, 'manage_delete') def manage_delete(self,ids=[],REQUEST=None): """ delete selected articles from a Zch site """ ids=map(atoi, ids) for id in ids: self.delItem(id) if REQUEST is not None: return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER']) security.declarePrivate('list_skelton') def list_skelton(self): skelton = [] for item in os.listdir(os.path.join(package_home(globals()), 'skelton')): skelton.append(item) return skelton # Searchable interface security.declareProtected(View, '__call__') def __call__(self, REQUEST=None, internal=0, **kw): brains = apply(self.searchResults,(REQUEST,),kw) if internal: return map(lambda x: x.getObject(), brains) return brains
class Article(Posting): """ """ security = ClassSecurityInfo() meta_type ='Article' icon ='misc_/Zch/posting_img' security.declarePrivate('__init__') def __init__(self, id): Posting.__init__(self, id) self.ids =IISet() #Article has sub ids. security.declareProtected(View, 'relative_path') def relative_path(self): return self.id security.declareProtected(View, 'index_html') def index_html(self,REQUEST): """ Zch article main page (the read more page) """ return self.article_html(self,REQUEST) security.declareProtected(ManageZch, 'postingValues') def postingValues(self): """ return all replies """ return self.data_map(self.ids) security.declareProtected(View, 'comment_list_size') def comment_list_size(self, start=0, size=0): """ returns comment items """ if start: start = int(start) else: start = 0 if size: size = int(size) else: size = 0 # Adjust start to tnum if start == 1: start = 2 if size: size = size-1 # Convert to ids[] index number if start: start = start -2 if size == 0: ids = [id for id in self.ids][start:] else: ids = [id for id in self.ids][start:start+size] else: if size == 0: ids = [id for id in self.ids][:] else: ids = [id for id in self.ids][size*-1:] return self.data_map(ids) security.declareProtected(View, 'comment_list_from_to') def comment_list_from_to(self, from_tnum=0, to_tnum=0): """ returns comment items """ from_tnum = int(from_tnum) to_tnum = int(to_tnum) ids = [id for id in self.ids if (from_tnum == 0 or int(self.data[id].tnum) >= from_tnum) and (to_tnum == 0 or int(self.data[id].tnum) <= to_tnum)] return self.data_map(ids) security.declareProtected(AddCommentZch, 'addPosting') def addPosting(self, file='', REQUEST=None,RESPONSE=None): """ add a Comment """ index=1 id=self.createId() msg=Comment(id, self.id) err, sage = msg.__of__(self)._validation(REQUEST,RESPONSE,'delete attachment',file) if err: return err # Set thread number. msg.tnum = str(len(self.ids) + 2) if sage==0: self.modified=id self.ids.insert(id) self.data[id]=msg if index: msg.__of__(self).index() if RESPONSE: return self.showMessage(self, REQUEST=REQUEST, title='Comment Posted', message ='Your reply has been posted', action=self.absolute_url() ) return id security.declareProtected(View, 'recent_entry') def recent_entry(self): if len (self.ids) != 0: return self.data[self.ids[-1]].body else: return self.body security.declareProtected(View, 'recent_creator') def recent_creator(self): if len (self.ids) != 0: return self.data[self.ids[-1]].author else: return self.author security.declarePublic('__len__') def __len__(self): return len(self.ids) + 1 security.declareProtected(View, '__getitem__') def __getitem__(self,id): """ Get a posting from the ZchSite data store """ # make sure id is an integer try: if not isinstance(id,IntType): id=atoi(id) except ValueError: raise KeyError, id try: return Posting.__getitem__(self,id) except KeyError: try: return self.data[self.ids[id-2]].__of__(self) except: raise KeyError, id
def __init__(self, id): Posting.__init__(self, id) self.ids =IISet() #Article has sub ids.
def get(self, key, default=None): """Return the matched word against the key.""" r=IISet() wid=self._lexicon.get(key, default) if wid is not None: r.insert(wid) return r
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] if depth > 0: raise ValueError, "Can't do depth searches anymore" if not comps: comps = ['dmd'] startlevel = 1 elif comps[0] == 'zport': comps = comps[1:] elif comps[0] != 'dmd': raise ValueError, "Depth searches must start with 'dmd'" startlevel = len(comps) #startlevel = len(comps)-1 if len(comps) > 1 else 1 if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level==0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level==0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel+len(comps)): if level <= len(comps): comp = "/".join(comps[:level]) if (not self._index.has_key(comp) or not self._index[comp].has_key(level)): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: return self._index[comp][level] if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union(navset, intersection(pathset, self._index[None][level+depth])) if level-startlevel >= len(comps) or navtree: if (self._index.has_key(None) and self._index[None].has_key(level)): depthset = union(depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0,self._depth + 1): ids = None error = 0 for cn in range(0,len(comps)): comp = comps[cn] try: ids = intersection(ids,self._index[comp][level+cn]) except KeyError: error = 1 if error==0: results = union(results,ids) return results