Esempio n. 1
0
class FilteredSetBase(Persistent):

    def __init__(self, id, expr):
        self.id   = id
        self.expr = expr
        self.clear()


    def clear(self):
        self.ids  = IISet()


    def index_object(self, documentId, obj):
        raise RuntimeError,'index_object not defined'


    def unindex_object(self,documentId):
        try: self.ids.remove(Id)
        except: pass


    def getId(self):            return self.id
    def getExpression(self):    return self.expr
    def getIds(self):           return self.ids
    def getType(self):          return self.meta_type

    def setExpression(self, expr): self.expr = expr

    def __repr__(self):
        return '%s: (%s) %s' % (self.id,self.expr,map(None,self.ids))

    __str__ = __repr__
Esempio n. 2
0
 def missing_entries_for_index(self, catalog, index_name):
     """ Return the difference between catalog and index ids
     """
     index = catalog._catalog.getIndex(index_name)
     referenced = IISet(index.referencedObjects())
     return (difference(IISet(catalog._catalog.paths),
                        referenced), len(catalog) - len(referenced))
Esempio n. 3
0
    def _insertForwardIndexEntry(self, since, until, documentId):
        """
            Insert 'documentId' into the appropriate set based on
            'datum'.
        """
        if since is None and until is None:

            self._always.insert(documentId)

        elif since is None:

            set = self._until_only.get(until, None)
            if set is None:
                set = self._until_only[until] = IISet()  # XXX: Store an int?
            set.insert(documentId)

        elif until is None:

            set = self._since_only.get(since, None)
            if set is None:
                set = self._since_only[since] = IISet()  # XXX: Store an int?
            set.insert(documentId)

        else:

            set = self._since.get(since, None)
            if set is None:
                set = self._since[since] = IISet()  # XXX: Store an int?
            set.insert(documentId)

            set = self._until.get(until, None)
            if set is None:
                set = self._until[until] = IISet()  # XXX: Store an int?
            set.insert(documentId)
    def test_lookup(self):
        bigsize = 1000000
        smallsize = 1000
        large = IISet(xrange(bigsize))
        small = IISet(xrange(0, bigsize, bigsize/smallsize))

        start = time()
        for i in small:
            a = large[i]
        print "\ngetitem distributed %.6f" % (time()-start)

        start = time()
        for i in small:
            a = large[bigsize-1]
        print "getitem end %.6f" % (time()-start)

        start = time()
        for i in small:
            a = large[0]
        print "getitem start %.6f" % (time()-start)

        start = time()
        for i in small:
            a = large.has_key(i)
        print "\nhas_key distributed %.6f" % (time()-start)

        start = time()
        for i in small:
            a = large.has_key(bigsize-1)
        print "has_key end %.6f" % (time()-start)

        start = time()
        for i in small:
            a = large.has_key(0)
        print "has_key start %.6f" % (time()-start)
Esempio n. 5
0
    def _sort_iterate_index(self, actual_result_count, result, rs,
                            limit, merge, reverse,
                            sort_index, sort_index_length, sort_spec,
                            second_indexes_key_map):
        # The result set is much larger than the sorted index,
        # so iterate over the sorted index for speed.
        # TODO: len(sort_index) isn't actually what we want for a keyword
        # index, as it's only the unique values, not the documents.
        # Don't use this case while using limit, as we return results of
        # non-flattened intsets, and would have to merge/unflattened those
        # before limiting.
        length = 0
        try:
            intersection(rs, IISet(()))
        except TypeError:
            # rs is not an object in the IIBTree family.
            # Try to turn rs into an IISet.
            rs = IISet(rs)

        if sort_index_length == 1:
            for k, intset in sort_index.items():
                # We have an index that has a set of values for
                # each sort key, so we intersect with each set and
                # get a sorted sequence of the intersections.
                intset = intersection(rs, intset)
                if intset:
                    keys = getattr(intset, 'keys', None)
                    if keys is not None:
                        # Is this ever true?
                        intset = keys()
                    length += len(intset)
                    result.append((k, intset, self.__getitem__))
            result.sort(reverse=reverse)
        else:
            for k, intset in sort_index.items():
                # We have an index that has a set of values for
                # each sort key, so we intersect with each set and
                # get a sorted sequence of the intersections.
                intset = intersection(rs, intset)
                if intset:
                    keys = getattr(intset, 'keys', None)
                    if keys is not None:
                        # Is this ever true?
                        intset = keys()
                    length += len(intset)
                    # sort on secondary index
                    keysets = defaultdict(list)
                    for i in intset:
                        full_key = (k, )
                        for km in second_indexes_key_map:
                            try:
                                full_key += (km[i], )
                            except KeyError:
                                pass
                        keysets[full_key].append(i)
                    for k2, v2 in keysets.items():
                        result.append((k2, v2, self.__getitem__))
            result = multisort(result, sort_spec)

        return (actual_result_count, length, result)
Esempio n. 6
0
    def test_even_dist(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE

        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IITreeSet(xrange(smallsize))
        self.timing(
            small, large,
            'Intersection small set even distribution + small treeset')
        self.timing(
            large, small,
            'Intersection small treeset + small set even distribution')

        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IITreeSet(xrange(bigsize))
        self.timing(
            small, large,
            'Intersection small set even distribution + large treeset')
        self.timing(
            large, small,
            'Intersection large treeset + small set even distribution')

        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IISet(xrange(bigsize))

        self.timing(small, large,
                    'Intersection small set even distribution + large set')
        self.timing(large, small,
                    'Intersection large set, small set even distribution')
Esempio n. 7
0
 def _load(self, term, _isInstance=isinstance, _IntType=IntType):
     '''the docId list for *term*.'''
     index = self._index
     dl = index.get(term)
     if dl is None: return IISet()
     if _isInstance(dl, _IntType): return IISet((dl, ))
     return dl
    def train(self):
        """
        """
        catalog = getToolByName(self, 'portal_catalog')
        presentNouns = dict()
        trainingData = []
        allNouns = catalog.uniqueValuesFor('noun_terms')
        for item in allNouns:
            presentNouns.setdefault(item, 0)

        subjectIndex = catalog._catalog.getIndex('Subject')
        nounTermsIndex = catalog._catalog.getIndex('noun_terms')

        # The internal catalog ids of the objects
        # that have noun terms in the catalog
        nounTermIndexIds = IISet(nounTermsIndex._unindex.keys())

        # The internal catalog ids of the objects
        # that have subjects in the catalog
        subjectIndexIds = IISet(subjectIndex._unindex.keys())
        commonIds = intersection(subjectIndexIds, nounTermIndexIds)

        for cid in commonIds:
            nounPresence = presentNouns.copy()
            nouns = nounTermsIndex._unindex[cid]
            tags = subjectIndex._unindex[cid]
            for noun in nouns:
                nounPresence[noun] = 1
            for tag in tags:
                trainingData.append((
                    nounPresence,
                    tag,
                ))
        if trainingData:
            self.classifier = NaiveBayesClassifier.train(trainingData)
Esempio n. 9
0
    def test_search_inputresult(self):
        index = self._makeOne()
        obj = Dummy(1, True)
        index._index_object(obj.id, obj, attr='truth')
        obj = Dummy(2, False)
        index._index_object(obj.id, obj, attr='truth')

        res, idx = index._apply_index({'truth': True}, resultset=IISet([]))
        self.failUnlessEqual(idx, ('truth', ))
        self.failUnlessEqual(list(res), [])

        res, idx = index._apply_index({'truth': True}, resultset=IISet([2]))
        self.failUnlessEqual(idx, ('truth', ))
        self.failUnlessEqual(list(res), [])

        res, idx = index._apply_index({'truth': True}, resultset=IISet([1]))
        self.failUnlessEqual(idx, ('truth', ))
        self.failUnlessEqual(list(res), [1])

        res, idx = index._apply_index({'truth': True}, resultset=IISet([1, 2]))
        self.failUnlessEqual(idx, ('truth', ))
        self.failUnlessEqual(list(res), [1])

        res, idx = index._apply_index({'truth': False},
                                      resultset=IISet([1, 2]))
        self.failUnlessEqual(idx, ('truth', ))
        self.failUnlessEqual(list(res), [2])
Esempio n. 10
0
    def test_heavy_start(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE

        small = IISet(xrange(smallsize))
        large = IITreeSet(xrange(smallsize))
        self.timing(small, large,
                    'Intersection small set low values + small treeset')
        self.timing(large, small,
                    'Intersection small treeset + small set low values')

        small = IISet(xrange(smallsize))
        large = IITreeSet(xrange(bigsize))
        self.timing(small, large,
                    'Intersection small set low values + large treeset')
        self.timing(large, small,
                    'Intersection large treeset + small set low values')

        small = IISet(xrange(smallsize))
        large = IISet(xrange(bigsize))
        self.timing(small, large,
                    'Intersection small set low values + large set')
        self.timing(large, small,
                    'Intersection large set + small set low values')

        small = IITreeSet(xrange(smallsize))
        large = IISet(xrange(bigsize))
        self.timing(small, large, 'Intersection small treeset + large set')
        self.timing(large, small, 'Intersection large set + small treeset')
Esempio n. 11
0
    def testLargerInputs(self):
        from BTrees.IIBTree import IISet
        from random import randint
        MAXSIZE = 200
        MAXVAL = 400
        for i in range(3):
            n = randint(0, MAXSIZE)
            Akeys = [randint(1, MAXVAL) for j in range(n)]
            As = [makeset(Akeys) for makeset in self.builders()]
            Akeys = IISet(Akeys)

            n = randint(0, MAXSIZE)
            Bkeys = [randint(1, MAXVAL) for j in range(n)]
            Bs = [makeset(Bkeys) for makeset in self.builders()]
            Bkeys = IISet(Bkeys)

            for op, simulator in ((self.union, self._union),
                                  (self.intersection, self._intersection),
                                  (self.difference, self._difference)):
                for A in As:
                    for B in Bs:
                        got = op(A, B)
                        want = simulator(Akeys, Bkeys)
                        self.assertEqual(list(got), want,
                                         (A, B, Akeys, Bkeys, list(got), want))
Esempio n. 12
0
    def count(self, brains, sequence=None):
        """ Intersect results
        """
        res = {}
        # by checking for facet_counts we assume this is a SolrResponse
        # from collective.solr
        if hasattr(brains, 'facet_counts'):
            facet_fields = brains.facet_counts.get('facet_fields')
            if facet_fields:
                index_id = self.data.get('index')
                facet_field = facet_fields.get(index_id, {})
                for value, num in facet_field.items():
                    if isinstance(value, unicode):
                        res[value] = num
                    else:
                        unicode_value = value.decode('utf-8')
                    res[unicode_value] = num
            else:
                # no facet counts were returned. we exit anyway because
                # zcatalog methods throw an error on solr responses
                return res
            res[""] = res['all'] = len(brains)
            return res
        else:
            # this is handled by the zcatalog. see below
            pass

        if not sequence:
            sequence = [key for key, value in self.vocabulary()]

        if not sequence:
            return res

        index_id = self.data.get('index')
        if not index_id:
            return res

        ctool = getToolByName(self.context, 'portal_catalog')
        index = ctool._catalog.getIndex(index_id)
        ctool = queryUtility(IFacetedCatalog)
        if not ctool:
            return res

        brains = IISet(brain.getRID() for brain in brains)
        res[""] = res['all'] = len(brains)
        for value in sequence:
            item = uuidToCatalogBrain(value)
            if not item:
                res[value] = len(brains)
                continue
            rset = ctool.apply_index(self.context, index, item.getPath())[0]
            rset = IISet(rset)
            rset = weightedIntersection(brains, rset)[1]
            if isinstance(value, unicode):
                res[value] = len(rset)
            else:
                unicode_value = value.decode('utf-8')
                res[unicode_value] = len(rset)
        return res
Esempio n. 13
0
def _eval(query, cat):
  '''evaluate *query* in the context of *cat* (a 'Products.ZCatalog.Catalog.Catalog').'''
  rs = query._eval(_QueryContext(cat))
  if isinstance(rs, ISearch):
    if hasattr(rs, 'asSet'): rs = rs.asSet()
    elif isinstance(rs, IBTree): rs = rs.getTree()
    else: hits = tuple(rs); rs = IISet(); rs.__setstate__((hits,))
  return rs
Esempio n. 14
0
 def testFixed1843(self):
     from BTrees.IIBTree import IISet
     t = IISet()
     t.insert(1)
     # This one used to fail to raise the TypeError when it occurred.
     self.assertRaises(TypeError, t.keys, "")
     # This one used to segfault.
     self.assertRaises(TypeError, t.keys, 0, "")
Esempio n. 15
0
 def testFixed1843(self):
     from BTrees.IIBTree import IISet
     t = IISet()
     t.insert(1)
     # This one used to fail to raise the TypeError when it occurred.
     self.assertRaises(TypeError, t.keys, "")
     # This one used to segfault.
     self.assertRaises(TypeError, t.keys, 0, "")
Esempio n. 16
0
    def group(self, seq):
        sortIndex = self._sortIndex
        sortReverse = self._sortReverse
        ns = len(seq)
        ni = len(sortIndex)
        if ns >= 0.1 * ni:
            # result large compared to index -- sort via index
            handled = IISet()
            hn = 0
            _load = getattr(sortIndex, '_load', None)
            if _load is None:
                # not an optimized index
                items = sortIndex.items()

                _load = lambda (x1, x2): x2
                if sortReverse: items.reverse()
            elif sortReverse:
                gRO = getattr(sortIndex, 'getReverseOrder', None)
                items = gRO and gRO()
                if items is None:
                    items = list(sortIndex._index.keys())
                    items.reverse()
            else:
                items = sortIndex._index.keys()
            for i in items:
                ids = intersection(seq, _load(i))
                if ids:
                    handled.update(ids)
                    hn += len(ids)
                    yield i, ids
            if hn != len(seq): yield None, difference(seq, handled)
        else:
            # result relatively small -- sort via result
            m = OOBTree()
            keyFor = getattr(sortIndex, 'keyForDocument', None)
            # work around "nogopip" bug: it defines "keyForDocument" as an integer
            if not callable(keyFor):
                # this will fail, when the index neither defines a reasonable
                # "keyForDocument" nor "documentToKeyMap". In this case,
                # the index cannot be used for sorting.
                keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc]
            noValue = IITreeSet()
            for doc in seq.keys():
                try:
                    k = keyFor(doc)
                except KeyError:
                    noValue.insert(doc)
                    continue
                l = m.get(k)
                if l is None: l = m[k] = IITreeSet()
                l.insert(doc)
            items = m.items()
            if sortReverse:
                items = list(items)
                items.reverse()
            for i in items:
                yield i
            if noValue: yield None, noValue
Esempio n. 17
0
    def get(self, pattern):
        """ Query the lexicon for words matching a pattern."""

        # single word pattern  produce a slicing problem below.
        # Because the splitter throws away single characters we can
        # return an empty tuple here.

        if len(pattern)==1: return ()

        wc_set = [self.multi_wc, self.single_wc]

        digrams = []
        globbing = 0
        for i in range(len(pattern)):
            if pattern[i] in wc_set:
                globbing = 1
                continue

            if i == 0:
                digrams.insert(i, (self.eow + pattern[i]) )
                digrams.append((pattern[i] + pattern[i+1]))
            else:
                try:
                    if pattern[i+1] not in wc_set:
                        digrams.append( pattern[i] + pattern[i+1] )

                except IndexError:
                    digrams.append( (pattern[i] + self.eow) )

        if not globbing:
            result =  self._lexicon.get(pattern, None)
            if result is None:
                return ()
            return (result, )

        ## now get all of the intsets that contain the result digrams
        result = None
        for digram in digrams:
            result=union(result, self._digrams.get(digram, None))

        if not result:
            return ()
        else:
            ## now we have narrowed the list of possible candidates
            ## down to those words which contain digrams.  However,
            ## some words may have been returned that match digrams,
            ## but do not match 'pattern'.  This is because some words
            ## may contain all matching digrams, but in the wrong
            ## order.

            expr = re.compile(self.createRegex(pattern))
            words = []
            hits = IISet()
            for x in result:
                if expr.match(self._inverseLex[x]):
                    hits.insert(x)
            return hits
Esempio n. 18
0
    def test_large(self):
        bigsize = BIGSETSIZE / 10
        small = IITreeSet(xrange(bigsize))
        large = IITreeSet(xrange(bigsize))
        self.timing(small, large, 'Intersection Large tree sets')

        small = IISet(xrange(bigsize))
        large = IISet(xrange(bigsize))
        self.timing(small, large, 'Intersection Large sets')
Esempio n. 19
0
    def test_small(self):
        smallsize = SMALLSETSIZE
        small = IITreeSet(xrange(smallsize))
        large = IITreeSet(xrange(smallsize))
        self.timing(small, large, 'Intersection small tree sets')

        small = IISet(xrange(smallsize))
        large = IISet(xrange(smallsize))
        self.timing(small, large, 'Intersection small sets')
Esempio n. 20
0
    def get(self, pattern):
        """ Query the lexicon for words matching a pattern."""

        # single word pattern  produce a slicing problem below.
        # Because the splitter throws away single characters we can
        # return an empty tuple here.

        if len(pattern) == 1: return ()

        wc_set = [self.multi_wc, self.single_wc]

        digrams = []
        globbing = 0
        for i in range(len(pattern)):
            if pattern[i] in wc_set:
                globbing = 1
                continue

            if i == 0:
                digrams.insert(i, (self.eow + pattern[i]))
                digrams.append((pattern[i] + pattern[i + 1]))
            else:
                try:
                    if pattern[i + 1] not in wc_set:
                        digrams.append(pattern[i] + pattern[i + 1])

                except IndexError:
                    digrams.append((pattern[i] + self.eow))

        if not globbing:
            result = self._lexicon.get(pattern, None)
            if result is None:
                return ()
            return (result, )

        ## now get all of the intsets that contain the result digrams
        result = None
        for digram in digrams:
            result = union(result, self._digrams.get(digram, None))

        if not result:
            return ()
        else:
            ## now we have narrowed the list of possible candidates
            ## down to those words which contain digrams.  However,
            ## some words may have been returned that match digrams,
            ## but do not match 'pattern'.  This is because some words
            ## may contain all matching digrams, but in the wrong
            ## order.

            expr = re.compile(self.createRegex(pattern))
            words = []
            hits = IISet()
            for x in result:
                if expr.match(self._inverseLex[x]):
                    hits.insert(x)
            return hits
 def items(self):
     # return a list of value to int set of rid tuples
     indexed = self._index_value
     items = [(bool(indexed), self._index)]
     false = IISet()
     for rid, value in self._unindex.iteritems():
         if value != indexed:
             false.add(rid)
     items.append((not bool(indexed), false))
     return items
Esempio n. 22
0
 def items(self):
     # return a list of value to int set of rid tuples
     indexed = self._index_value
     items = [(bool(indexed), self._index)]
     false = IISet()
     for rid, value in self._unindex.iteritems():
         if value != indexed:
             false.add(rid)
     items.append((not bool(indexed), false))
     return items
Esempio n. 23
0
    def test_lookup(self):
        bigsize = 1000000
        smallsize = 1000
        large = IISet(xrange(bigsize))
        small = IISet(xrange(0, bigsize, bigsize / smallsize))

        start = time()
        for i in small:
            large[i]
        print "\ngetitem distributed %.6f" % (time() - start)

        start = time()
        for i in small:
            large[bigsize - 1]
        print "getitem end %.6f" % (time() - start)

        start = time()
        for i in small:
            large[0]
        print "getitem start %.6f" % (time() - start)

        start = time()
        for i in small:
            large.has_key(i)
        print "\nhas_key distributed %.6f" % (time() - start)

        start = time()
        for i in small:
            large.has_key(bigsize - 1)
        print "has_key end %.6f" % (time() - start)

        start = time()
        for i in small:
            large.has_key(0)
        print "has_key start %.6f" % (time() - start)
Esempio n. 24
0
def _eval(query, cat):
    '''evaluate *query* in the context of *cat* (a 'Products.ZCatalog.Catalog.Catalog').'''
    rs = query._eval(_QueryContext(cat))
    if isinstance(rs, ISearch):
        if hasattr(rs, 'asSet'): rs = rs.asSet()
        elif isinstance(rs, IBTree): rs = rs.getTree()
        else:
            hits = tuple(rs)
            rs = IISet()
            rs.__setstate__((hits, ))
    return rs
Esempio n. 25
0
    def test_even_dist(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE
        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IITreeSet(xrange(bigsize))
        print '\nDifference Small set even distribution + large treeset'
        self.timing(small, large)

        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IISet(xrange(bigsize))
        print '\nDifference Small set even distribution + large set'
        self.timing(small, large)
Esempio n. 26
0
    def test_heavy_end(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE
        small = IISet(xrange(bigsize - smallsize, bigsize))
        large = IITreeSet(xrange(bigsize))
        print '\nDifference Small set high values + large treeset'
        self.timing(small, large)

        small = IISet(xrange(bigsize - smallsize, bigsize))
        large = IISet(xrange(bigsize))
        print '\nDifference Small set high values + large set'
        self.timing(small, large)
Esempio n. 27
0
    def test_empty(self):
        bigsize = BIGSETSIZE
        smallsize = 0
        small = IISet(xrange(smallsize))
        large = IITreeSet(xrange(bigsize))

        print '\nIntersection empty set + large treeset'
        self.timing(small, large)

        small = IITreeSet(xrange(smallsize))
        large = IISet(xrange(bigsize))
        print '\nIntersection empty tree set + large set'
        self.timing(small, large)
Esempio n. 28
0
    def test_empty(self):
        bigsize = BIGSETSIZE
        smallsize = 0
        small = IISet(xrange(smallsize))
        large = IITreeSet(xrange(bigsize))

        self.timing(small, large, 'Intersection empty set + large treeset')
        self.timing(large, small, 'Intersection large treeset + empty set')

        small = IITreeSet(xrange(smallsize))
        large = IISet(xrange(bigsize))
        self.timing(small, large, 'Intersection empty tree set + large set')
        self.timing(large, small, 'Intersection large set + empty tree set')
Esempio n. 29
0
    def count(self, brains):
        """ Intersect results
        """
        res = {}
        sequence = [key for key, value in self.vocabulary()]
        if not sequence:
            return res

        index_id = self.data.get('index')
        if not index_id:
            return res

        ctool = getToolByName(self.context, 'portal_catalog')
        index = ctool._catalog.getIndex(index_id)
        apply_index = getattr(index, "_apply_index", None)
        if not apply_index:
            return res

        countryGroupsView = getMultiAdapter((self.context, self.request),
                                            name=u'getCountryGroups')
        countryGroups = countryGroupsView()

        getCountriesByGroupView = getMultiAdapter((self.context, self.request),
                                                  name=u'getCountriesByGroup')

        brains = IISet(brain.getRID() for brain in brains)
        for value in sequence:
            if not value:
                res[value] = len(brains)
                continue

            if value in countryGroups:
                gr_value = getCountriesByGroupView(value)
                rset = apply_index({
                    index_id: gr_value,
                    index_id + '_operator': self.operator,
                })
            else:
                rset = apply_index({index_id: value})

            if not rset:
                continue
            rset, _u = rset
            rset = IISet(rset)
            _u, rset = weightedIntersection(brains, rset)
            if isinstance(value, str):
                value = value.decode('utf-8', 'replace')
            res[value] = len(rset)
        return res
Esempio n. 30
0
    def query_index(self, record, resultset=None):
        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return intersection(index, resultset)
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return union(difference(self._unindex, index), IISet([]))
                else:
                    return difference(resultset, index)
        return IISet()
Esempio n. 31
0
    def test_findlargesmallset(self):
        # Test different approaches to finding the large and small set
        bigsize = 10
        smallsize = 2
        o1 = IISet(xrange(bigsize))
        l1 = len(o1)
        o2 = IISet(xrange(0, bigsize, bigsize / smallsize))
        l2 = len(o2)

        # 3 approaches: if/else, sorted and max/min
        def alternative1():
            if l1 < l2:
                ls = l1
                small = o1
                lb = l2
                big = o2
            else:
                ls = l2
                small = o2
                lb = l1
                big = o1
            return (ls, small), (lb, big)

        def alternative2():
            return sorted(((l2, o2), (l1, o1)))

        def alternative3():
            small = min((l2, o2), (l1, o1))
            big = max((l2, o2), (l1, o1))
            return small, big

        self.failUnlessEqual(list(alternative1()), list(alternative2()))
        self.failUnlessEqual(list(alternative1()), list(alternative3()))

        start = time()
        for i in xrange(1000):
            alternative1()
        print '\nif/else took %.6f' % (time() - start)

        start = time()
        for i in xrange(1000):
            alternative2()
        print 'sorted took  %.6f' % (time() - start)

        start = time()
        for i in xrange(1000):
            alternative3()
        print 'minmax took  %.6f' % (time() - start)
Esempio n. 32
0
    def insertEntry(self, comp, id, level, parent_path=None, object_path=None):
        """Insert an entry.

           parent_path is the path of the parent object

           path is the object path, it is assumed to be unique, i.e. there
           is a one to one mapping between physical paths and docids.  This
           will be large, and is only used for breadcrumbs.

           id is the docid
        """

        PathIndex.insertEntry(self, comp, id, level)

        if parent_path is not None:
            if not self._index_parents.has_key(parent_path):
                self._index_parents[parent_path] = IISet()

            self._index_parents[parent_path].insert(id)

        # We make the assumption that a full path corresponds one and only
        # one object.

        if object_path is not None:
            self._index_items[object_path] = id
Esempio n. 33
0
def extendedpathindex_apply_index(self, request, res=None):
    """ hook for (Z)Catalog
        'request' --  mapping type (usually {"path": "..." }
         additionaly a parameter "path_level" might be passed
         to specify the level (see search())
    """

    record = parseIndexRequest(request, self.id, self.query_options)
    if record.keys == None: return None

    level = record.get("level", 0)
    operator = record.get('operator', self.useOperator).lower()
    depth = getattr(record, 'depth', -1)  # use getattr to get 0 value
    navtree = record.get('navtree', 0)
    navtree_start = record.get('navtree_start', 0)

    # depending on the operator we use intersection of union
    if operator == "or": set_func = union
    else: set_func = intersection

    result = None
    for k in record.keys:
        rows = self.search(k, level, depth, navtree, navtree_start, tmpres=res)
        result = set_func(result, rows)

    if result:
        return result, (self.id, )
    else:
        return IISet(), (self.id, )
Esempio n. 34
0
    def _apply_index(self, request):
        """ See IPluggableIndex.

        o Unpacks args from catalog and mapps onto '_search'.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        level = record.get("level", 0)
        operator = record.get('operator', self.useOperator).lower()

        # depending on the operator we use intersection of union
        if operator == "or":
            set_func = union
        else:
            set_func = intersection

        res = None
        for k in record.keys:
            rows = self._search(k, level)
            res = set_func(res, rows)

        if res:
            return res, (self.id, )
        else:
            return IISet(), (self.id, )
Esempio n. 35
0
 def test_sortResults_reversed(self):
     catalog = self._make_one()
     brains = catalog({'att1': 'att1'})
     rs = IISet([b.getRID() for b in brains])
     si = catalog.getIndex('num')
     result = catalog.sortResults(rs, si, reverse=True)
     self.assertEqual([r.num for r in result], list(reversed(range(100))))
Esempio n. 36
0
 def items(self):
     items = []
     for k, v in self._index.items():
         if isinstance(v, int):
             v = IISet((v, ))
         items.append((k, v))
     return items
Esempio n. 37
0
 def group(self, seq):
   sortIndex = self._sortIndex; sortReverse = self._sortReverse
   ns = len(seq); ni = len(sortIndex)
   if ns >= 0.1 * ni:
     # result large compared to index -- sort via index
     handled = IISet(); hn = 0
     _load = getattr(sortIndex, '_load', None)
     if _load is None:
       # not an optimized index
       items = sortIndex.items()
       
       _load = lambda (x1, x2): x2
       if sortReverse: items.reverse()
     elif sortReverse:
       gRO = getattr(sortIndex, 'getReverseOrder', None)
       items = gRO and gRO()
       if items is None:
         items = list(sortIndex._index.keys()); items.reverse()
     else: items = sortIndex._index.keys()
     for i in items:
       ids = intersection(seq, _load(i))
       if ids:
         handled.update(ids); hn += len(ids)
         yield i, ids
     if hn != len(seq): yield None, difference(seq, handled)
   else:
     # result relatively small -- sort via result
     m = OOBTree()
     keyFor = getattr(sortIndex, 'keyForDocument', None)
     # work around "nogopip" bug: it defines "keyForDocument" as an integer
     if not callable(keyFor):
       # this will fail, when the index neither defines a reasonable
       # "keyForDocument" nor "documentToKeyMap". In this case,
       # the index cannot be used for sorting.
       keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc]
     noValue = IITreeSet()
     for doc in seq.keys():
       try: k = keyFor(doc)
       except KeyError: noValue.insert(doc); continue
       l = m.get(k)
       if l is None: l = m[k] = IITreeSet()
       l.insert(doc)
     items = m.items()
     if sortReverse: items = list(items); items.reverse()
     for i in items: yield i
     if noValue: yield None, noValue
Esempio n. 38
0
    def lookupWordsBySimilarity(self, word):       
        """ perform a similarity lookup """

        lst = self._lexicon.getSimiliarWords(word)

        docids = IISet()
        used_words = {} 

        getwid = self._lexicon.getWordId

        for word, threshold in lst:
            used_words[word] = threshold
            wid = getwid(word)

            docids.update( self._storage.get(wid) )

        return ResultSet(docids, used_words)
class AccountingFolder(BaseFolder, BrowserDefaultMixin):
    """
    """
    security = ClassSecurityInfo()

    implements(interfaces.IAccountingFolder)

    meta_type = 'AccountingFolder'
    _at_rename_after_creation = True

    schema = AccountingFolder_schema

    ##code-section class-header #fill in your manual code here
    ##/code-section class-header

    # Methods

    # Manually created methods

    def __init__(self, oid, **kwargs):
        BaseFolder.__init__(self, oid, **kwargs)
        self._closing_transfers = IISet()

    security.declareProtected(permissions.View, 'getAccountingRoot')
    def getAccountingRoot(self):
        ''' Return 'self' as accounting root
        '''
        return self

    def displayContentsTab(self):
        """ Hide contents tab
        """
        return False

    def registerClosingDate(self, date):
        """ register closing transfer date
        """
        # strip time before insert
        date = int(DateTime(date.Date()))
        self._closing_transfers.insert(date)

    def getClosingDates(self):
        """ return all registered closing dates
        """
        return self._closing_transfers
Esempio n. 40
0
    def _lookup(self, words, do_autoexpand=1):
        """ search a word or a list of words in the lexicon and 
            return a ResultSet of found documents.
        """

        docids = IISet()
        used_words = {} 

        #  remove stopwords from data
        if self.use_stopwords:
            words = self.use_stopwords.process( words ) 

        if self.use_thesaurus and self.thesaurus_mode == 'expand_always':
            TH = ThesaurusRegistry.get(self.use_thesaurus)
            for word in words[:]:
                r = TH.getTermsFor(word)
                words.extend(r)

        for word in words:

            # perform casefolding if necessary
            if self.splitter_casefolding:
                word = word.lower()

            if self.use_normalizer:
                word = NormalizerRegistry.get(self.use_normalizer).process(word)    
 
            used_words[word] = 1.0

            wid = self._lexicon.getWordId(word)

            # Retrieve list of docIds for this wordid
            if wid is not None:
                docids.update( self._storage.get(wid) )

            # perform autoexpansion of terms by performing
            # a search using right-truncation
            if do_autoexpand and self.autoexpand and len(word) >= self.autoexpand_limit:
                rs = self.lookupWordsByTruncation(word, right=1)
                docids.update(rs.docIds())
                wlen = len(word)
                for w in rs.words().keys():
                    used_words[w] = TRUNC_WEIGHT[len(w)-wlen]

        return ResultSet(docids, used_words)
def setOperation(op, sets, isearch):
  '''perform *op* on *sets*. if *isearch*, return an incremental search.

  *op* may be '"and"' or '"or"'.

  Uses 'IncrementalSearch', if available.
  '''
  if not sets:
    if op == 'and': return # None means all results
    if isearch: search = IOr(); search.complete(); return search
    return IISet()
  # Note: "multiunion" is *much* faster than "IOr"!
  #if IAnd is not None and (isearch or len(sets) > 1):
  if IAnd is not None and (isearch or (op == 'and' and len(sets) > 1)):
    isets = []
    for set in sets:
      if set is None:
        # all results
        if op == 'and': continue
        else: return
      if not isinstance(set, ISearch): set = IBTree(set)
      isets.append(set)
    if op == 'and' and not isets: return # empty 'and'
    cl = op == 'and' and IAnd or IOr
    if len(isets) == 1:
      # do not wrap a one element search
      search = isets[0]
    else: search = cl(*isets); search.complete()
    if isearch: return search
    if hasattr(search, 'asSet'): r = search.asSet()
    else: r = IISet(); r.__setstate__((tuple(search),))
    return r
  if op == 'or' and len(sets) > 5:
    r = multiunion(sets)
  else:
    combine = op == 'and' and intersection or union
    r= None
    for set in sets: r= combine(r,set)
    if r is None:
      if combine is union: r = IISet()
      else: return
    if isearch: r = IBTree(r)
  return r
Esempio n. 42
0
class FilteredSetBase(object):
    """ Base class for all filtered sets.
    
        A filtered set is a collection of documents represented
        by their document ids that match a common criteria given
        by a condition.
    """

    implements(ITopicFilteredSet)

    def __init__(self, id, expr):
        self.id   = id
        self.expr = expr
        self.clear()

    def clear(self):
        self._ids  = IISet()

    def index_doc(self, docid, context):
        raise NotImplementedError

    def unindex_doc(self, docid):
        try: self._ids.remove(docid)
        except KeyError: pass

    def getId(self):            
        return self.id

    def getExpression(self):    
        return self.expr

    def setExpression(self, expr): 
        self.expr = expr

    def getIds(self): 
        return self._ids

    def __repr__(self):
        return '%s: (%s) %s' % (self.id, self.expr, map(None, self._ids))

    __str__ = __repr__
Esempio n. 43
0
 def group(self, seq):
   sortIndex = self._sortIndex; sortReverse = self._sortReverse
   ns = len(seq); ni = len(sortIndex)
   if ns >= 0.1 * ni:
     # result large compared to index -- sort via index
     handled = IISet(); hn = 0
     _load = getattr(sortIndex, '_load', None)
     if _load is None:
       # not an optimized index
       items = sortIndex.items()
       
       _load = lambda (x1, x2): x2
       if sortReverse: items.reverse()
     elif sortReverse:
       gRO = getattr(sortIndex, 'getReverseOrder', None)
       items = gRO and gRO()
       if items is None:
         items = list(sortIndex._index.keys()); items.reverse()
     else: items = sortIndex._index.keys()
     for i in items:
       ids = intersection(seq, _load(i))
       if ids:
         handled.update(ids); hn += len(ids)
         yield i, ids
     if hn != len(seq): yield None, difference(seq, handled)
   else:
     # result relatively small -- sort via result
     keyFor = sortIndex.keyForDocument; m = OOBTree()
     noValue = IITreeSet()
     for doc in seq.keys():
       try: k = keyFor(doc)
       except KeyError: noValue.insert(doc); continue
       l = m.get(k)
       if l is None: l = m[k] = IITreeSet()
       l.insert(doc)
     items = m.items()
     if sortReverse: items = list(items); items.reverse()
     for i in items: yield i
     if noValue: yield None, noValue
Esempio n. 44
0
 def count(self, context, facet, intersect=None):
     if IQueryResults.providedBy(intersect):
         intersect = IISet(intersect.keys())
     sm = sitemanager_for(context)
     unique_name = '%s.%s' % (facet.name, self.name)
     cache_tools = queryUtility(ISetCacheTools, context=sm)
     invalidated = cache_tools.invalidated_records
     if not isinstance(invalidated, IISet):
         invalidated = IISet(invalidated)
     if isinstance(intersect, IISet):
         invalid = len(intersection(intersect, invalidated)) > 0
     if unique_name in cache_tools.filter_setid_cache:
         setid = cache_tools.filter_setid_cache[unique_name]
         if setid in cache_tools.set_cache:
             if invalid:
                 del(cache_tools.set_cache[setid])
                 del(cache_tools.filter_setid_cache[unique_name])
             else:
                 records = cache_tools.set_cache[setid]
                 if intersect is None:
                     return len(records)
                 if isinstance(intersect, IISet):
                     #optimal to cast smaller set to match IISet.
                     return len(intersection(intersect, IISet(records)))
                 return len(set(intersect) & records)
     #otherwise, at this point, no cached value, so query catalog...
     qf = self(unique_name)
     runner = AdvancedQueryRunner(context)
     result = runner(qf)
     setid = result.setid
     cache_tools.set_cache[setid] = result.frozen
     cache_tools.filter_setid_cache[unique_name] = setid
     if intersect is None:
         return len(result)
     if isinstance(intersect, IISet):
         return len(intersection(intersect, IISet(result.frozen)))
     return len(set(intersect) & result.frozen)
Esempio n. 45
0
def nearResultSets(sets, index, distance=5, bidirectional=1):
    """ perform near search on results sets """
    
    # One resultset consists of an IISet() or documentIds and 
    # tuple whose first element is the word (from LexiconLookup())
    # First we perform an intersection to get the documentIds of
    # those documents that contain all the words

    docids =  intersectResultSets(sets).docIds()

    # Now we determine for every document the positions of all
    # the words inside the document. Then we compare all the positions
    # to determine neighbourship
    
    words = []
    for set in sets:
        for word in set.words().keys():
            words.append(word)

    res_docids = IISet()

    for docId in docids:
        # the posMap is a list of tuples(word,IISet[positions])
        posMap = index.positionsFromDocumentLookup(docId, words)

        if bidirectional:
            if len(posMap.checkPositionMapBidirectional(distance)) > 0:
                res_docids.insert(docId)
        else:
            if len(posMap.checkPositionMapUnidirectional(distance)) > 0:
                res_docids.insert(docId)

    d = {}
    for w in words: d[w] = 1.0

    return ResultSet(res_docids, d)       
Esempio n. 46
0
    def __init__(self, id, title, skelton, fileattache, parent, elements):
        if elements:
            from Products.ZCTextIndex.ZCTextIndex import manage_addLexicon
            manage_addLexicon(self,id='lexicon',elements = elements)

        self.__of__(parent)._buildIndexing(id,title)

        t=time()     
        self.created  = t     
        self.modified = t     

        self.fileattache = fileattache

        self.data     =IOBTree()  # id -> Message     
        self.ids      =IISet() # ids of children

        self.loadSkelton(None, skelton)
        self.loadProperties(skelton)
        self.skelton = skelton
    def index_object(self, documentId, obj, threshold=None):
        """ Index an object:
        'documentId' is the integer id of the document

        'obj' is the object to be indexed

        'threshold' is the number of words to process between
        commiting subtransactions.  If 'None' subtransactions are
        disabled. """

        # sniff the object for our 'id', the 'document source' of the
        # index is this attribute.  If it smells callable, call it.
        try:
            source = getattr(obj, self.id)
            if safe_callable(source):
                source = source()

            if not isinstance(source, UnicodeType):
                source = str(source)

        except (AttributeError, TypeError):
            return 0

        # sniff the object for 'id'+'_encoding'

        try:
            encoding = getattr(obj, self.id+'_encoding')
            if safe_callable(encoding ):
                encoding = str(encoding())
            else:
                encoding = str(encoding)
        except (AttributeError, TypeError):
            encoding = 'latin1'

        lexicon = self.getLexicon()

        splitter = lexicon.Splitter

        wordScores = OIBTree()
        last = None

        # Run through the words and score them

        for word in list(splitter(source,encoding=encoding)):
            if word[0] == '\"':
                last = self._subindex(word[1:-1], wordScores, last, splitter)
            else:
                if word==last: continue
                last=word
                wordScores[word]=wordScores.get(word,0)+1

        # Convert scores to use wids:
        widScores=IIBucket()
        getWid=lexicon.getWordId
        for word, score in wordScores.items():
            widScores[getWid(word)]=score

        del wordScores

        currentWids=IISet(self._unindex.get(documentId, []))

        # Get rid of document words that are no longer indexed
        self.unindex_objectWids(documentId, difference(currentWids, widScores))

        # Now index the words. Note that the new xIBTrees are clever
        # enough to do nothing when there isn't a change. Woo hoo.
        insert=self.insertForwardIndexEntry
        for wid, score in widScores.items():
            insert(wid, documentId, score)

        # Save the unindexing info if it's changed:
        wids=widScores.keys()
        if wids != currentWids.keys():
            self._unindex[documentId]=wids

        return len(wids)
Esempio n. 48
0
    def _build_degenerate_tree(self):
        # Build the buckets and chain them together.
        from BTrees.IIBTree import IISet
        from BTrees.IIBTree import IITreeSet
        from BTrees.check import check
        bucket11 = IISet([11])

        bucket7 = IISet()
        bucket7.__setstate__(((7,), bucket11))

        bucket5 = IISet()
        bucket5.__setstate__(((5,), bucket7))

        bucket3 = IISet()
        bucket3.__setstate__(((3,), bucket5))

        bucket1 = IISet()
        bucket1.__setstate__(((1,), bucket3))

        # Build the deepest layers of indirection nodes.
        ts = IITreeSet
        tree1 = ts()
        tree1.__setstate__(((bucket1,), bucket1))

        tree3 = ts()
        tree3.__setstate__(((bucket3,), bucket3))

        tree5lower = ts()
        tree5lower.__setstate__(((bucket5,), bucket5))
        tree5 = ts()
        tree5.__setstate__(((tree5lower,), bucket5))

        tree7 = ts()
        tree7.__setstate__(((bucket7,), bucket7))

        tree11 = ts()
        tree11.__setstate__(((bucket11,), bucket11))

        # Paste together the middle layers.
        tree13 = ts()
        tree13.__setstate__(((tree1, 2, tree3), bucket1))

        tree5711lower = ts()
        tree5711lower.__setstate__(((tree5, 6, tree7, 10, tree11), bucket5))
        tree5711 = ts()
        tree5711.__setstate__(((tree5711lower,), bucket5))

        # One more.
        t = ts()
        t.__setstate__(((tree13, 4, tree5711), bucket1))
        t._check()
        check(t)
        return t, [1, 3, 5, 7, 11]
Esempio n. 49
0
def checkCatalog(path,indexes):
    """ perform some consistency checks on a ZCatalog instance"""

    root = Zope2.app()

    try:
        catalog = root.unrestrictedTraverse(path)
    except AttributeError:
        print 'Error: catalog object not found'
        sys.exit(1)

    # get Catalog instance
    _cat = catalog._catalog

    # check Catalog internal BTrees
    l_data  = list(_cat.data.keys())
    l_data.sort()
    l_uids  = list(_cat.uids.values())
    l_uids.sort()
    l_paths = list(_cat.data.keys())
    l_paths.sort()

    print "Checking catalog internal BTrees"
    print "\tINFO: Mapping data:  %d entries" % len(l_data)
    print "\tINFO: Mapping uids:  %d entries" % len(l_uids)
    print "\tINFO: Mapping paths: %d entries" % len(l_paths)

    if l_data == l_uids:
        print "\tOK:  Mapping data equals Mapping uids"
    else:
        print "\tERR: Mapping data does not equal Mapping uids"

    if l_data == l_paths:
        print "\tOK:  Mapping data equals Maaping paths"
    else:
        print "\tERR: Mapping data does not equal Maaping paths"


    # check BTrees of indexes

    for id,idx in _cat.indexes.items():

        if indexes and not idx.meta_type in indexes: continue

        print "Checking index '%s' (type: %s)" % (id, idx.meta_type)

        if idx.meta_type in ['FieldIndex','KeywordIndex']:

            # check forward entries
            RIDS = IISet()
            for key, rids in idx._index.items():
                if isinstance(rids,IntType):
                    RIDS.insert(  rids  )
                else:
                    map(RIDS.insert , rids.keys())

            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff)!=0:
                print '\tERR: Problem with forward entries'
                print '\tERR: too much forward entries:', diff
            else:
                print '\tOK:  Forward entries (%d entries)'  % (len(RIDS))


        elif idx.meta_type in ['PathIndex']:

            RIDS = IISet()

            for rids in map(None,idx._index.values()):
                map(RIDS.insert , rids.values()[0])

            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff)!=0:
                print '\tERR: Problem with forward entries'
                print '\tERR: too much forward entries:', diff
            else:
                print '\tOK:  Forward entries (%d entries)'  % (len(RIDS))


        if idx.meta_type in ['FieldIndex','KeywordIndex','PathIndex']:

            # check backward entries
            RIDS = IISet(idx._unindex.keys())
            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff)!=0:
                print '\tERR: Problem with backward entries'
                print '\tERR: too much backward entries:', diff
            else:
                print '\tOK:  Backward entries (%d entries)'  % (len(RIDS))
    def search(self, path, default_level=0, depth=-1, navtree=0,
                                                             navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')

        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]
        # Optimization - avoid using the root set
        # as it is common for all objects anyway and add overhead
        # There is an assumption about catalog/index having
        # the same container as content
        if default_level == 0:
            indexpath = list(filter(None, self.getPhysicalPath()))
            while min(len(indexpath), len(comps)):
                if indexpath[0] == comps[0]:
                    del indexpath[0]
                    del comps[0]
                    startlevel += 1
                else:
                    break

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level==0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level==0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None # Same as pathindex
            navset  = None # For collecting siblings along the way
            depthset = None # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]

            for level in range(startlevel, startlevel+len(comps) + depth):
                if level-startlevel < len(comps):
                    comp = comps[level-startlevel]
                    if not self._index.has_key(comp) or not self._index[comp].has_key(level): 
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        pathset = intersection(pathset,
                                                     self._index[comp][level])
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset  = union(navset, intersection(pathset,
                                              self._index[None][level+depth]))
                if level-startlevel >= len(comps) or navtree:
                    if self._index.has_key(None) and self._index[None].has_key(level):
                        depthset = union(depthset, intersection(pathset,
                                                    self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0,self._depth + 1):
                ids = None
                error = 0
                for cn in range(0,len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids,self._index[comp][level+cn])
                    except KeyError:
                        error = 1
                if error==0:
                    results = union(results,ids)
            return results
def languageindex_search(self, language, fallback=True, res=None):
    main, sub = splitLanguage(language)

    if main not in self._index:
        return None

    if fallback:
        # Search in sorted order, specific sub tag first, None second
        subs = list(self._index[main].keys())
        subs.sort()
        if sub in subs:
            subs.remove(sub)
            subs.insert(0, sub)
    else:
        subs = [sub]

    if not fallback and res is not None:
        # We do not support any optimization when fallback is enabled.
        #
        # TODO: The core loop is not in C here. Casual benchmarks suggest this
        # is still more effecient than trying to move it to C. The problem is
        # that we only have an IISet of docids as an input. We need to filter
        # this per language. The available index structures we have are:
        #
        # IndexEntry objects used as entries. Complex objects storing docid,
        # main and sub languages and UID of the canonical. Their hash and
        # compare function uses the canonical UID.
        #
        # self._index
        # An OOBTreeSet structure per language. In the outermost nodes we have
        # OOBTree's per language. Useful to get all items in a language.
        # Otherwise useless, as we would have to compare the docid attribute
        # of the object in the tree against our wanted set, requiring a full
        # loop over all items.
        #
        # self._unindex
        # An IOBTree of docid to entry. Better to match our docid wanted set,
        # but we would still have to compare the language code to the entry
        # object itself.
        #
        # self._sortindex
        # An IOBTree of docid to language tag. Looks like the best candidate
        # for us, as we can compare the language directly as a simple string
        # comparision.
        #
        # One thing to keep in mind, is that once we get a wanted set, this
        # will usually have gone through a path query already. This means
        # we will almost always already have matching set and won't filter
        # out any item at all. So the edge-case of a 100% match is actually
        # the most common one for us.
        #
        # Casual benchmarks show that trying to construct an IOBTree from the
        # wanted set and intersecting it with the sortindex is still slower
        # than having the core loop in Python code.
        tag = lang_tag(main, sub)

        result = IISet()
        for r in res:
            lang = self._sortindex.get(r)
            if lang == tag:
                result.insert(r)
        return result

    result = OOSet()
    for sublanguage in subs:
        result = oo_union(result, self._index[main][sublanguage])

    return IISet(entry.docid for entry in result)
Esempio n. 52
0
 def clear(self):
     self._ids  = IISet()
 def __init__(self, oid, **kwargs):
     BaseFolder.__init__(self, oid, **kwargs)
     self._closing_transfers = IISet()
Esempio n. 54
0
class ZchSite(ZCatalog.ZCatalog):     
    """A Zch Site is a self contained web-based news publishing and discussion system"""     
    meta_type  ='Zch Site'     
    description='Zch Site'     
     
    security = ClassSecurityInfo()
    security.setPermissionDefault(ManageZch,('Manager',))
    security.setPermissionDefault(AddArticleZch,('Manager',))
    security.setPermissionDefault(AddCommentZch,('Anonymous','Manager',))
    security.setPermissionDefault(View,('Anonymous','Manager',))

    icon       ='misc_/Zch/Zch_img'     
    
    _properties=({'id':'title', 'type':'string','mode':'w'},)     
     
    fileattache=0
    sage=0

    manage_options=({'label':'Contents', 'icon':icon, 'action':'manage_main', 'target':'manage_main'},     
                    {'label':'View', 'icon':'', 'action':'index_html', 'target':'manage_main'},     
                    {'label':'Postings', 'icon':'', 'action':'manage_postings', 'target':'manage_main'},     
                    {'label':'Options', 'icon':'', 'action':'manage_editForm', 'target':'manage_main'},     
                    {'label':'Properties', 'icon':'', 'action':'manage_propertiesForm', 'target':'manage_main'},
                    {'label':'Catalog', 'icon':'', 'action':'manage_catalogView', 'target':'manage_main'},
                    {'label':'Indexes', 'icon':'', 'action':'manage_catalogIndexes', 'target':'manage_main'},
                    {'label':'Security', 'icon':'', 'action':'manage_access', 'target':'manage_main'},
                    {'label':'Undo', 'icon':'', 'action':'manage_UndoForm', 'target':'manage_main'}
                    )     

    security.declareProtected(ManageZch, 'manage_postings')
    manage_postings   = HTMLFile('dtml/manage_postings', globals())

    security.declareProtected(ManageZch, 'manage_editForm')
    manage_editForm   = HTMLFile('dtml/editForm', globals())     

    security.declarePrivate('_buildIndexing')
    def _buildIndexing(self, id, title):
        # Initialise ZCatalog
        if not hasattr(self,'_catalog'):
            ZCatalog.ZCatalog.__init__(self, id, title)

        # delete any existing indexes
        for name in self.indexes():
            self.delIndex(name)
            
        # add the default indexes
        for (name,index_type) in [('meta_type', 'FieldIndex'),
                                  ('author', 'FieldIndex'),
                                  ('body', 'ZCTextIndex'),
                                  ('title', 'ZCTextIndex'),
                                  ('date', 'FieldIndex')]:
            if index_type == 'ZCTextIndex':
                extras = EmptyClass()
                extras.doc_attr = name
                extras.index_type = 'Okapi BM25 Rank'
                extras.lexicon_id = 'lexicon'
                self.addIndex(name, index_type, extra=extras)
            else:
                self.addIndex(name,index_type)
                          
        # delete the default metadata columns
        for name in self.schema():
            self.delColumn(name)

        # Add the meta data columns for search results
        for name in ['id','title','absolute_url','author','date_posted','date','body', 'tnum']:
            self.addColumn(name,'')
      
    security.declareProtected(ManageZch, 'recatalogPostings')
    def recatalogPostings(self,REQUEST=None):
        """ Clear the Catalog and then Index all the postings. """
        self._catalog.clear()
        for article_id in self.ids:
            article = self.data[article_id].__of__(self)
            if type(article.body)==type([]):
                article.body = join(article.body, '\n')
            for comment_id in article.ids:
                comment = self.data[comment_id].__of__(article)
                if type(comment.body)==type([]):
                    comment.body = join(comment.body, '\n')
                self.catalog_object(comment, join(comment.getPhysicalPath(), '/'))
            
            self.catalog_object(article, join(article.getPhysicalPath(), '/'))
                
        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER'])

    security.declareProtected(ManageZch, 'loadSkelton')
    def loadSkelton(self, REQUEST, skelton='zch'):
        "Add Page Template PythonScript, DTMLMethod and Image read from skelton directory."
        for entry in os.listdir(os.path.join(package_home(globals()), 'skelton', skelton)):
            if entry[-3:] == '.pt' or entry[-4:]=='.pys' or entry[-5:]=='.dtml' or entry[-4:]=='.gif':
                f=open(os.path.join(package_home(globals()), 'skelton', skelton, entry), 'rb') 
                file=f.read()     
                f.close()     
                try:
                    if entry[-3:] == '.pt':
                        id = entry[:-3]
                        manage_addPageTemplate(self, id, '', file, encoding='utf-8')
                    elif entry[-4:] == '.pys':
                        id = entry[:-4]
                        manage_addPythonScript(self,id)
                        self._getOb(id).write(file)
                    elif entry[-5:] == '.dtml':
                        id = entry[:-5]
                        self.manage_addDTMLMethod(id,'',file)     
                    elif entry[-4:] == '.gif':
                        id = entry[:-4]
                        self.manage_addImage(id,file,content_type='image/gif')
                except:
                    pass
        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER'])


    security.declarePrivate('loadProperties')
    def loadProperties(self, skelton):
        "Add properties from 'properties' file."
        p = re.compile(r'(\w+?):(\w+?)=\s*(.*)\s*')
        newprop = list(self._properties)
        f = open(os.path.join(package_home(globals()), 'skelton', skelton, 'properties'), 'r')
        for s in f:
            if s[0] == '#':
                continue
            m = p.match(s)
            if m:
                newprop.append({'id':m.group(1), 'type':m.group(2), 'mode': 'wd'})
        f.close()
        self._properties = tuple(newprop)     
        f = open(os.path.join(package_home(globals()), 'skelton', skelton, 'properties'), 'r')
        for s in f:
            if s[0] == '#':
                continue
            m = p.match(s)
            if m:
                self._updateProperty(m.group(1), m.group(3))
        f.close()
    

    security.declarePrivate('__init__')
    def __init__(self, id, title, skelton, fileattache, parent, elements):
        if elements:
            from Products.ZCTextIndex.ZCTextIndex import manage_addLexicon
            manage_addLexicon(self,id='lexicon',elements = elements)

        self.__of__(parent)._buildIndexing(id,title)

        t=time()     
        self.created  = t     
        self.modified = t     

        self.fileattache = fileattache

        self.data     =IOBTree()  # id -> Message     
        self.ids      =IISet() # ids of children

        self.loadSkelton(None, skelton)
        self.loadProperties(skelton)
        self.skelton = skelton

    security.declarePublic('__len__')
    def __len__(self):
        return len(self.ids) + 1     
     
    security.declareProtected(View, '__getitem__')
    def __getitem__(self,id):
        """ Get a posting from the ZchSite data store """
    
        # make sure id is an integer
        try:
            if not isinstance(id,IntType):
                id=atoi(id)
        except ValueError:
            raise KeyError, id
    
        # make sure it's in our list of children
        if not self.ids.has_key(id):
            raise KeyError, id
            
        # return the posting
        return self.data[id].__of__(self)
     
    security.declareProtected(View, 'zchcrypt')
    def zchcrypt(self,word,key):        
        import hmac, base64
        h = hmac.new(key)
        h.update(word)
        return base64.encodestring(h.digest())[:-3]

    security.declareProtected(View, 'zchfqdn')
    def zchfqdn(self,n):        
        return getfqdn(n)

    security.declarePrivate('delItem')
    def delItem(self,id):
        if not self.data.has_key(id):
            return

        if self.ids.has_key(id): # article
            article = self.data[id].__of__(self)
            for comment_id in article.ids:     
                obj = self.data[comment_id].__of__(article)
                self.uncatalog_object(obj.getPhysicalPath())
                del self.data[comment_id]
            self.uncatalog_object(article.getPhysicalPath())
            del self.data[id]
            self.ids.remove(id)
        else: # comment
            parent = self.data[self.data[id].parent_id].__of__(self)
            # remove it from it's parents list of ids
            obj = self.data[id].__of__(parent)
            self.uncatalog_object(obj.getPhysicalPath())
            del self.data[id]
            parent.ids.remove(id)
     
    security.declarePrivate('createId')
    def createId(self):     
        id=int(time())     
        while self.data.has_key(id):     
            id=id+1     
        return id     
     
    security.declarePrivate('data_map')
    def data_map(self,ids):
        result=[]
        for id in ids:
            result.append(self.data[id].__of__(self))
        return result
    
    security.declareProtected(View, 'article_list')
    def article_list(self, size=None):
        """ returns article items  """                          
        def cmp_by_modified(x, y):
          return cmp(y.modified, x.modified)
        items = self.data_map(self.ids)
        items.sort(cmp_by_modified)
        if size:
            items = items[:size]
        for i in range(len(items)):
            items[i].sequence_number = i + 1
        return items

    security.declareProtected(ManageZch, 'postingValues')
    postingValues = article_list

    security.declareProtected(View, 'tpId')
    def tpId(self):     
        return self.id     
     
    security.declareProtected(View, 'tpURL')
    def tpURL(self):     
        return self.id     
     
    security.declareProtected(View, 'this')
    def this(self):     
        return self     
     
    security.declareProtected(View, 'site_url')
    def site_url(self):    
        # """ url of the Zch main page """ 
        return self.absolute_url()
     
    security.declareProtected(View, 'has_items')
    def has_items(self):     
        return len(self.ids)     
     
    security.declareProtected(View, 'item_count')
    def item_count(self):     
        return len(self.data)     
     
    security.declareProtected(AddArticleZch, 'addPosting')
    def addPosting(self,file='',REQUEST=None,RESPONSE=None, index=1):
        """ add an article """
        
        id=self.createId()     
     
        msg=Article(id)
        err, sage = msg.__of__(self)._validation(REQUEST,RESPONSE,'delete attachment',file)
        if err:
            return err

        # Set thread number. 
        msg.tnum = '1'

        self.ids.insert(id)     
        self.data[id]=msg

        if index:
            msg.__of__(self).index()

        if RESPONSE:
            return self.showMessage(self, REQUEST=REQUEST, 
                                title='Article Posted',     
                                message  ='Your article has been posted',
                                action=self.absolute_url()
                                )

        return id
     
    security.declareProtected(View, 'search')
    def search(self,REQUEST):     
        """ fulfill a search request """
        if REQUEST.has_key('op') and REQUEST['op']=='articles':
            REQUEST.set('meta_type','Article')
    
        sr=self.__call__(REQUEST)     
        rc=len(sr)     
        return self.showSearchResults(self,REQUEST,search_results=sr,     
                                  result_count=rc)     
     
    security.declareProtected(ManageZch, 'manage_edit')
    def manage_edit(self, REQUEST=None, fileattache=0):     
        """ edit Zch options  """     
        self.fileattache = fileattache

        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER'])
     
    security.declareProtected(ManageZch, 'manage_delete')
    def manage_delete(self,ids=[],REQUEST=None):     
        """ delete selected articles from a Zch site """     
        ids=map(atoi, ids)     
        for id in ids:     
            self.delItem(id)
        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER'])

    security.declarePrivate('list_skelton')
    def list_skelton(self):
        skelton = []
        for item in os.listdir(os.path.join(package_home(globals()), 'skelton')):
            skelton.append(item)
        return skelton

        
    # Searchable interface     
    security.declareProtected(View, '__call__')
    def __call__(self, REQUEST=None, internal=0, **kw):        
        brains = apply(self.searchResults,(REQUEST,),kw)
	if internal:
	    return map(lambda x: x.getObject(), brains)
	return brains
Esempio n. 55
0
class Article(Posting):     
    """ """

    security = ClassSecurityInfo()
    
    meta_type  ='Article'     
    icon   ='misc_/Zch/posting_img'

    security.declarePrivate('__init__')
    def __init__(self, id):     
        Posting.__init__(self, id)
        self.ids     =IISet()     #Article has sub ids.
     
    
    security.declareProtected(View, 'relative_path')
    def relative_path(self):
        return self.id

    security.declareProtected(View, 'index_html')
    def index_html(self,REQUEST):     
        """ Zch article main page (the read more page) """    
        return self.article_html(self,REQUEST)     
     
    security.declareProtected(ManageZch, 'postingValues')
    def postingValues(self):     
        """ return all replies """     
        return self.data_map(self.ids)     
     
    security.declareProtected(View, 'comment_list_size')
    def comment_list_size(self, start=0, size=0):
        """ returns comment items  """                          
        if start:
            start = int(start)
        else:
            start = 0
        if size:
            size = int(size)
        else:
            size = 0

        # Adjust start to tnum
        if start == 1:
            start = 2
            if size:
                size = size-1
        # Convert to ids[] index number
        if start:
            start = start -2
    
            if size == 0:
                ids = [id for id in self.ids][start:]
            else:
                ids = [id for id in self.ids][start:start+size]
        else:
            if size == 0:
                ids = [id for id in self.ids][:]
            else:
                ids = [id for id in self.ids][size*-1:]
    
        return self.data_map(ids)

    security.declareProtected(View, 'comment_list_from_to')
    def comment_list_from_to(self, from_tnum=0, to_tnum=0):
        """ returns comment items  """                          
        from_tnum = int(from_tnum)
        to_tnum = int(to_tnum)
        ids = [id for id in self.ids if (from_tnum == 0 or int(self.data[id].tnum) >= from_tnum) and (to_tnum == 0 or int(self.data[id].tnum) <= to_tnum)]
        return self.data_map(ids)
    
    security.declareProtected(AddCommentZch, 'addPosting')
    def addPosting(self, file='', REQUEST=None,RESPONSE=None):     
        """ add a Comment """
        index=1
        id=self.createId()
        msg=Comment(id, self.id)
        err, sage = msg.__of__(self)._validation(REQUEST,RESPONSE,'delete attachment',file)
        if err:
            return err
        # Set thread number. 
        msg.tnum = str(len(self.ids) + 2)

        if sage==0:
            self.modified=id     

        self.ids.insert(id)     
        self.data[id]=msg

        if index:
            msg.__of__(self).index()
          
        if RESPONSE:
            return self.showMessage(self, REQUEST=REQUEST, 
                                title='Comment Posted',
                                message  ='Your reply has been posted',
                                action=self.absolute_url()     
                                )

        return id
     
    security.declareProtected(View, 'recent_entry')
    def recent_entry(self):
        if len (self.ids) != 0:
            return self.data[self.ids[-1]].body
        else:
            return self.body

    security.declareProtected(View, 'recent_creator')
    def recent_creator(self):
        if len (self.ids) != 0:
            return self.data[self.ids[-1]].author
        else:
            return self.author

    security.declarePublic('__len__')
    def __len__(self):
        return len(self.ids) + 1

    security.declareProtected(View, '__getitem__')
    def __getitem__(self,id):
        """ Get a posting from the ZchSite data store """
        # make sure id is an integer
        try:
            if not isinstance(id,IntType):
                id=atoi(id)
        except ValueError:
            raise KeyError, id

        try:
            return Posting.__getitem__(self,id)
        except KeyError:
            try:
                return self.data[self.ids[id-2]].__of__(self)
            except:
                raise KeyError, id
Esempio n. 56
0
 def __init__(self, id):     
     Posting.__init__(self, id)
     self.ids     =IISet()     #Article has sub ids.
Esempio n. 57
0
 def get(self, key, default=None):
     """Return the matched word against the key."""
     r=IISet()
     wid=self._lexicon.get(key, default)
     if wid is not None: r.insert(wid)
     return r
    def search(self, path, default_level=0, depth=-1, navtree=0,
                                                             navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')
        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]

        if depth > 0:
            raise ValueError, "Can't do depth searches anymore"

        if not comps:
            comps = ['dmd']
            startlevel = 1
        elif comps[0] == 'zport':
            comps = comps[1:]
        elif comps[0] != 'dmd':
            raise ValueError, "Depth searches must start with 'dmd'"
        startlevel = len(comps)
        #startlevel = len(comps)-1 if len(comps) > 1 else 1

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level==0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level==0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None # Same as pathindex
            navset  = None # For collecting siblings along the way
            depthset = None # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]
            for level in range(startlevel, startlevel+len(comps)):
                if level <= len(comps):
                    comp = "/".join(comps[:level])
                    if (not self._index.has_key(comp)
                        or not self._index[comp].has_key(level)):
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        return self._index[comp][level]
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset  = union(navset, intersection(pathset,
                                              self._index[None][level+depth]))
                if level-startlevel >= len(comps) or navtree:
                    if (self._index.has_key(None)
                        and self._index[None].has_key(level)):
                        depthset = union(depthset, intersection(pathset,
                                                    self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0,self._depth + 1):
                ids = None
                error = 0
                for cn in range(0,len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids,self._index[comp][level+cn])
                    except KeyError:
                        error = 1
                if error==0:
                    results = union(results,ids)
            return results