Beispiel #1
0
 def testFixed1843(self):
     from BTrees.IIBTree import IISet
     t = IISet()
     t.insert(1)
     # This one used to fail to raise the TypeError when it occurred.
     self.assertRaises(TypeError, t.keys, "")
     # This one used to segfault.
     self.assertRaises(TypeError, t.keys, 0, "")
Beispiel #2
0
 def testFixed1843(self):
     from BTrees.IIBTree import IISet
     t = IISet()
     t.insert(1)
     # This one used to fail to raise the TypeError when it occurred.
     self.assertRaises(TypeError, t.keys, "")
     # This one used to segfault.
     self.assertRaises(TypeError, t.keys, 0, "")
    def get(self, pattern):
        """ Query the lexicon for words matching a pattern."""

        # single word pattern  produce a slicing problem below.
        # Because the splitter throws away single characters we can
        # return an empty tuple here.

        if len(pattern)==1: return ()

        wc_set = [self.multi_wc, self.single_wc]

        digrams = []
        globbing = 0
        for i in range(len(pattern)):
            if pattern[i] in wc_set:
                globbing = 1
                continue

            if i == 0:
                digrams.insert(i, (self.eow + pattern[i]) )
                digrams.append((pattern[i] + pattern[i+1]))
            else:
                try:
                    if pattern[i+1] not in wc_set:
                        digrams.append( pattern[i] + pattern[i+1] )

                except IndexError:
                    digrams.append( (pattern[i] + self.eow) )

        if not globbing:
            result =  self._lexicon.get(pattern, None)
            if result is None:
                return ()
            return (result, )

        ## now get all of the intsets that contain the result digrams
        result = None
        for digram in digrams:
            result=union(result, self._digrams.get(digram, None))

        if not result:
            return ()
        else:
            ## now we have narrowed the list of possible candidates
            ## down to those words which contain digrams.  However,
            ## some words may have been returned that match digrams,
            ## but do not match 'pattern'.  This is because some words
            ## may contain all matching digrams, but in the wrong
            ## order.

            expr = re.compile(self.createRegex(pattern))
            words = []
            hits = IISet()
            for x in result:
                if expr.match(self._inverseLex[x]):
                    hits.insert(x)
            return hits
Beispiel #4
0
    def get(self, pattern):
        """ Query the lexicon for words matching a pattern."""

        # single word pattern  produce a slicing problem below.
        # Because the splitter throws away single characters we can
        # return an empty tuple here.

        if len(pattern) == 1: return ()

        wc_set = [self.multi_wc, self.single_wc]

        digrams = []
        globbing = 0
        for i in range(len(pattern)):
            if pattern[i] in wc_set:
                globbing = 1
                continue

            if i == 0:
                digrams.insert(i, (self.eow + pattern[i]))
                digrams.append((pattern[i] + pattern[i + 1]))
            else:
                try:
                    if pattern[i + 1] not in wc_set:
                        digrams.append(pattern[i] + pattern[i + 1])

                except IndexError:
                    digrams.append((pattern[i] + self.eow))

        if not globbing:
            result = self._lexicon.get(pattern, None)
            if result is None:
                return ()
            return (result, )

        ## now get all of the intsets that contain the result digrams
        result = None
        for digram in digrams:
            result = union(result, self._digrams.get(digram, None))

        if not result:
            return ()
        else:
            ## now we have narrowed the list of possible candidates
            ## down to those words which contain digrams.  However,
            ## some words may have been returned that match digrams,
            ## but do not match 'pattern'.  This is because some words
            ## may contain all matching digrams, but in the wrong
            ## order.

            expr = re.compile(self.createRegex(pattern))
            words = []
            hits = IISet()
            for x in result:
                if expr.match(self._inverseLex[x]):
                    hits.insert(x)
            return hits
class AccountingFolder(BaseFolder, BrowserDefaultMixin):
    """
    """
    security = ClassSecurityInfo()

    implements(interfaces.IAccountingFolder)

    meta_type = 'AccountingFolder'
    _at_rename_after_creation = True

    schema = AccountingFolder_schema

    ##code-section class-header #fill in your manual code here
    ##/code-section class-header

    # Methods

    # Manually created methods

    def __init__(self, oid, **kwargs):
        BaseFolder.__init__(self, oid, **kwargs)
        self._closing_transfers = IISet()

    security.declareProtected(permissions.View, 'getAccountingRoot')
    def getAccountingRoot(self):
        ''' Return 'self' as accounting root
        '''
        return self

    def displayContentsTab(self):
        """ Hide contents tab
        """
        return False

    def registerClosingDate(self, date):
        """ register closing transfer date
        """
        # strip time before insert
        date = int(DateTime(date.Date()))
        self._closing_transfers.insert(date)

    def getClosingDates(self):
        """ return all registered closing dates
        """
        return self._closing_transfers
Beispiel #6
0
def nearResultSets(sets, index, distance=5, bidirectional=1):
    """ perform near search on results sets """
    
    # One resultset consists of an IISet() or documentIds and 
    # tuple whose first element is the word (from LexiconLookup())
    # First we perform an intersection to get the documentIds of
    # those documents that contain all the words

    docids =  intersectResultSets(sets).docIds()

    # Now we determine for every document the positions of all
    # the words inside the document. Then we compare all the positions
    # to determine neighbourship
    
    words = []
    for set in sets:
        for word in set.words().keys():
            words.append(word)

    res_docids = IISet()

    for docId in docids:
        # the posMap is a list of tuples(word,IISet[positions])
        posMap = index.positionsFromDocumentLookup(docId, words)

        if bidirectional:
            if len(posMap.checkPositionMapBidirectional(distance)) > 0:
                res_docids.insert(docId)
        else:
            if len(posMap.checkPositionMapUnidirectional(distance)) > 0:
                res_docids.insert(docId)

    d = {}
    for w in words: d[w] = 1.0

    return ResultSet(res_docids, d)       
Beispiel #7
0
def checkCatalog(path, indexes):
    """ perform some consistency checks on a ZCatalog instance"""

    root = Zope2.app()

    try:
        catalog = root.unrestrictedTraverse(path)
    except AttributeError:
        print 'Error: catalog object not found'
        sys.exit(1)

    # get Catalog instance
    _cat = catalog._catalog

    # check Catalog internal BTrees
    l_data = list(_cat.data.keys())
    l_data.sort()
    l_uids = list(_cat.uids.values())
    l_uids.sort()
    l_paths = list(_cat.data.keys())
    l_paths.sort()

    print "Checking catalog internal BTrees"
    print "\tINFO: Mapping data:  %d entries" % len(l_data)
    print "\tINFO: Mapping uids:  %d entries" % len(l_uids)
    print "\tINFO: Mapping paths: %d entries" % len(l_paths)

    if l_data == l_uids:
        print "\tOK:  Mapping data equals Mapping uids"
    else:
        print "\tERR: Mapping data does not equal Mapping uids"

    if l_data == l_paths:
        print "\tOK:  Mapping data equals Maaping paths"
    else:
        print "\tERR: Mapping data does not equal Maaping paths"

    # check BTrees of indexes

    for id, idx in _cat.indexes.items():

        if indexes and not idx.meta_type in indexes: continue

        print "Checking index '%s' (type: %s)" % (id, idx.meta_type)

        if idx.meta_type in ['FieldIndex', 'KeywordIndex']:

            # check forward entries
            RIDS = IISet()
            for key, rids in idx._index.items():
                if isinstance(rids, IntType):
                    RIDS.insert(rids)
                else:
                    map(RIDS.insert, rids.keys())

            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff) != 0:
                print '\tERR: Problem with forward entries'
                print '\tERR: too much forward entries:', diff
            else:
                print '\tOK:  Forward entries (%d entries)' % (len(RIDS))

        elif idx.meta_type in ['PathIndex']:

            RIDS = IISet()

            for rids in map(None, idx._index.values()):
                map(RIDS.insert, rids.values()[0])

            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff) != 0:
                print '\tERR: Problem with forward entries'
                print '\tERR: too much forward entries:', diff
            else:
                print '\tOK:  Forward entries (%d entries)' % (len(RIDS))

        if idx.meta_type in ['FieldIndex', 'KeywordIndex', 'PathIndex']:

            # check backward entries
            RIDS = IISet(idx._unindex.keys())
            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff) != 0:
                print '\tERR: Problem with backward entries'
                print '\tERR: too much backward entries:', diff
            else:
                print '\tOK:  Backward entries (%d entries)' % (len(RIDS))
    def search(self,
               path,
               default_level=0,
               depth=-1,
               navtree=0,
               navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')

        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]
        # Optimization - avoid using the root set
        # as it is common for all objects anyway and add overhead
        # There is an assumption about catalog/index having
        # the same container as content
        if default_level == 0:
            indexpath = list(filter(None, self.getPhysicalPath()))
            while min(len(indexpath), len(comps)):
                if indexpath[0] == comps[0]:
                    del indexpath[0]
                    del comps[0]
                    startlevel += 1
                else:
                    break

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level == 0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level == 0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None  # Same as pathindex
            navset = None  # For collecting siblings along the way
            depthset = None  # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]

            for level in range(startlevel, startlevel + len(comps) + depth):
                if level - startlevel < len(comps):
                    comp = comps[level - startlevel]
                    if not self._index.has_key(
                            comp) or not self._index[comp].has_key(level):
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        pathset = intersection(pathset,
                                               self._index[comp][level])
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset = union(
                            navset,
                            intersection(pathset,
                                         self._index[None][level + depth]))
                if level - startlevel >= len(comps) or navtree:
                    if self._index.has_key(None) and self._index[None].has_key(
                            level):
                        depthset = union(
                            depthset,
                            intersection(pathset, self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0, self._depth + 1):
                ids = None
                error = 0
                for cn in range(0, len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids, self._index[comp][level + cn])
                    except KeyError:
                        error = 1
                if error == 0:
                    results = union(results, ids)
            return results
Beispiel #9
0
class Article(Posting):     
    """ """

    security = ClassSecurityInfo()
    
    meta_type  ='Article'     
    icon   ='misc_/Zch/posting_img'

    security.declarePrivate('__init__')
    def __init__(self, id):     
        Posting.__init__(self, id)
        self.ids     =IISet()     #Article has sub ids.
     
    
    security.declareProtected(View, 'relative_path')
    def relative_path(self):
        return self.id

    security.declareProtected(View, 'index_html')
    def index_html(self,REQUEST):     
        """ Zch article main page (the read more page) """    
        return self.article_html(self,REQUEST)     
     
    security.declareProtected(ManageZch, 'postingValues')
    def postingValues(self):     
        """ return all replies """     
        return self.data_map(self.ids)     
     
    security.declareProtected(View, 'comment_list_size')
    def comment_list_size(self, start=0, size=0):
        """ returns comment items  """                          
        if start:
            start = int(start)
        else:
            start = 0
        if size:
            size = int(size)
        else:
            size = 0

        # Adjust start to tnum
        if start == 1:
            start = 2
            if size:
                size = size-1
        # Convert to ids[] index number
        if start:
            start = start -2
    
            if size == 0:
                ids = [id for id in self.ids][start:]
            else:
                ids = [id for id in self.ids][start:start+size]
        else:
            if size == 0:
                ids = [id for id in self.ids][:]
            else:
                ids = [id for id in self.ids][size*-1:]
    
        return self.data_map(ids)

    security.declareProtected(View, 'comment_list_from_to')
    def comment_list_from_to(self, from_tnum=0, to_tnum=0):
        """ returns comment items  """                          
        from_tnum = int(from_tnum)
        to_tnum = int(to_tnum)
        ids = [id for id in self.ids if (from_tnum == 0 or int(self.data[id].tnum) >= from_tnum) and (to_tnum == 0 or int(self.data[id].tnum) <= to_tnum)]
        return self.data_map(ids)
    
    security.declareProtected(AddCommentZch, 'addPosting')
    def addPosting(self, file='', REQUEST=None,RESPONSE=None):     
        """ add a Comment """
        index=1
        id=self.createId()
        msg=Comment(id, self.id)
        err, sage = msg.__of__(self)._validation(REQUEST,RESPONSE,'delete attachment',file)
        if err:
            return err
        # Set thread number. 
        msg.tnum = str(len(self.ids) + 2)

        if sage==0:
            self.modified=id     

        self.ids.insert(id)     
        self.data[id]=msg

        if index:
            msg.__of__(self).index()
          
        if RESPONSE:
            return self.showMessage(self, REQUEST=REQUEST, 
                                title='Comment Posted',
                                message  ='Your reply has been posted',
                                action=self.absolute_url()     
                                )

        return id
     
    security.declareProtected(View, 'recent_entry')
    def recent_entry(self):
        if len (self.ids) != 0:
            return self.data[self.ids[-1]].body
        else:
            return self.body

    security.declareProtected(View, 'recent_creator')
    def recent_creator(self):
        if len (self.ids) != 0:
            return self.data[self.ids[-1]].author
        else:
            return self.author

    security.declarePublic('__len__')
    def __len__(self):
        return len(self.ids) + 1

    security.declareProtected(View, '__getitem__')
    def __getitem__(self,id):
        """ Get a posting from the ZchSite data store """
        # make sure id is an integer
        try:
            if not isinstance(id,IntType):
                id=atoi(id)
        except ValueError:
            raise KeyError, id

        try:
            return Posting.__getitem__(self,id)
        except KeyError:
            try:
                return self.data[self.ids[id-2]].__of__(self)
            except:
                raise KeyError, id
    def search(self, path, default_level=0, depth=-1, navtree=0,
                                                             navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')
        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]

        if depth > 0:
            raise ValueError, "Can't do depth searches anymore"

        if not comps:
            comps = ['dmd']
            startlevel = 1
        elif comps[0] == 'zport':
            comps = comps[1:]
        elif comps[0] != 'dmd':
            raise ValueError, "Depth searches must start with 'dmd'"
        startlevel = len(comps)
        #startlevel = len(comps)-1 if len(comps) > 1 else 1

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level==0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level==0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None # Same as pathindex
            navset  = None # For collecting siblings along the way
            depthset = None # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]
            for level in range(startlevel, startlevel+len(comps)):
                if level <= len(comps):
                    comp = "/".join(comps[:level])
                    if (not self._index.has_key(comp)
                        or not self._index[comp].has_key(level)):
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        return self._index[comp][level]
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset  = union(navset, intersection(pathset,
                                              self._index[None][level+depth]))
                if level-startlevel >= len(comps) or navtree:
                    if (self._index.has_key(None)
                        and self._index[None].has_key(level)):
                        depthset = union(depthset, intersection(pathset,
                                                    self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0,self._depth + 1):
                ids = None
                error = 0
                for cn in range(0,len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids,self._index[comp][level+cn])
                    except KeyError:
                        error = 1
                if error==0:
                    results = union(results,ids)
            return results
Beispiel #11
0
 def get(self, key, default=None):
     """Return the matched word against the key."""
     r=IISet()
     wid=self._lexicon.get(key, default)
     if wid is not None: r.insert(wid)
     return r
    def search(self, path, default_level=0, depth=-1, navtree=0,
                                                             navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')

        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]
        # Optimization - avoid using the root set
        # as it is common for all objects anyway and add overhead
        # There is an assumption about catalog/index having
        # the same container as content
        if default_level == 0:
            indexpath = list(filter(None, self.getPhysicalPath()))
            while min(len(indexpath), len(comps)):
                if indexpath[0] == comps[0]:
                    del indexpath[0]
                    del comps[0]
                    startlevel += 1
                else:
                    break

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level==0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level==0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None # Same as pathindex
            navset  = None # For collecting siblings along the way
            depthset = None # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]

            for level in range(startlevel, startlevel+len(comps) + depth):
                if level-startlevel < len(comps):
                    comp = comps[level-startlevel]
                    if not self._index.has_key(comp) or not self._index[comp].has_key(level): 
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        pathset = intersection(pathset,
                                                     self._index[comp][level])
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset  = union(navset, intersection(pathset,
                                              self._index[None][level+depth]))
                if level-startlevel >= len(comps) or navtree:
                    if self._index.has_key(None) and self._index[None].has_key(level):
                        depthset = union(depthset, intersection(pathset,
                                                    self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0,self._depth + 1):
                ids = None
                error = 0
                for cn in range(0,len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids,self._index[comp][level+cn])
                    except KeyError:
                        error = 1
                if error==0:
                    results = union(results,ids)
            return results
Beispiel #13
0
def checkCatalog(path,indexes):
    """ perform some consistency checks on a ZCatalog instance"""

    root = Zope2.app()

    try:
        catalog = root.unrestrictedTraverse(path)
    except AttributeError:
        print 'Error: catalog object not found'
        sys.exit(1)

    # get Catalog instance
    _cat = catalog._catalog

    # check Catalog internal BTrees
    l_data  = list(_cat.data.keys())
    l_data.sort()
    l_uids  = list(_cat.uids.values())
    l_uids.sort()
    l_paths = list(_cat.data.keys())
    l_paths.sort()

    print "Checking catalog internal BTrees"
    print "\tINFO: Mapping data:  %d entries" % len(l_data)
    print "\tINFO: Mapping uids:  %d entries" % len(l_uids)
    print "\tINFO: Mapping paths: %d entries" % len(l_paths)

    if l_data == l_uids:
        print "\tOK:  Mapping data equals Mapping uids"
    else:
        print "\tERR: Mapping data does not equal Mapping uids"

    if l_data == l_paths:
        print "\tOK:  Mapping data equals Maaping paths"
    else:
        print "\tERR: Mapping data does not equal Maaping paths"


    # check BTrees of indexes

    for id,idx in _cat.indexes.items():

        if indexes and not idx.meta_type in indexes: continue

        print "Checking index '%s' (type: %s)" % (id, idx.meta_type)

        if idx.meta_type in ['FieldIndex','KeywordIndex']:

            # check forward entries
            RIDS = IISet()
            for key, rids in idx._index.items():
                if isinstance(rids,IntType):
                    RIDS.insert(  rids  )
                else:
                    map(RIDS.insert , rids.keys())

            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff)!=0:
                print '\tERR: Problem with forward entries'
                print '\tERR: too much forward entries:', diff
            else:
                print '\tOK:  Forward entries (%d entries)'  % (len(RIDS))


        elif idx.meta_type in ['PathIndex']:

            RIDS = IISet()

            for rids in map(None,idx._index.values()):
                map(RIDS.insert , rids.values()[0])

            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff)!=0:
                print '\tERR: Problem with forward entries'
                print '\tERR: too much forward entries:', diff
            else:
                print '\tOK:  Forward entries (%d entries)'  % (len(RIDS))


        if idx.meta_type in ['FieldIndex','KeywordIndex','PathIndex']:

            # check backward entries
            RIDS = IISet(idx._unindex.keys())
            diff = difference(RIDS, IISet(_cat.data.keys()))
            if len(diff)!=0:
                print '\tERR: Problem with backward entries'
                print '\tERR: too much backward entries:', diff
            else:
                print '\tOK:  Backward entries (%d entries)'  % (len(RIDS))
def languageindex_search(self, language, fallback=True, res=None):
    main, sub = splitLanguage(language)

    if main not in self._index:
        return None

    if fallback:
        # Search in sorted order, specific sub tag first, None second
        subs = list(self._index[main].keys())
        subs.sort()
        if sub in subs:
            subs.remove(sub)
            subs.insert(0, sub)
    else:
        subs = [sub]

    if not fallback and res is not None:
        # We do not support any optimization when fallback is enabled.
        #
        # TODO: The core loop is not in C here. Casual benchmarks suggest this
        # is still more effecient than trying to move it to C. The problem is
        # that we only have an IISet of docids as an input. We need to filter
        # this per language. The available index structures we have are:
        #
        # IndexEntry objects used as entries. Complex objects storing docid,
        # main and sub languages and UID of the canonical. Their hash and
        # compare function uses the canonical UID.
        #
        # self._index
        # An OOBTreeSet structure per language. In the outermost nodes we have
        # OOBTree's per language. Useful to get all items in a language.
        # Otherwise useless, as we would have to compare the docid attribute
        # of the object in the tree against our wanted set, requiring a full
        # loop over all items.
        #
        # self._unindex
        # An IOBTree of docid to entry. Better to match our docid wanted set,
        # but we would still have to compare the language code to the entry
        # object itself.
        #
        # self._sortindex
        # An IOBTree of docid to language tag. Looks like the best candidate
        # for us, as we can compare the language directly as a simple string
        # comparision.
        #
        # One thing to keep in mind, is that once we get a wanted set, this
        # will usually have gone through a path query already. This means
        # we will almost always already have matching set and won't filter
        # out any item at all. So the edge-case of a 100% match is actually
        # the most common one for us.
        #
        # Casual benchmarks show that trying to construct an IOBTree from the
        # wanted set and intersecting it with the sortindex is still slower
        # than having the core loop in Python code.
        tag = lang_tag(main, sub)

        result = IISet()
        for r in res:
            lang = self._sortindex.get(r)
            if lang == tag:
                result.insert(r)
        return result

    result = OOSet()
    for sublanguage in subs:
        result = oo_union(result, self._index[main][sublanguage])

    return IISet(entry.docid for entry in result)
def languageindex_search(self, language, fallback=True, res=None):
    main, sub = splitLanguage(language)

    if main not in self._index:
        return None

    if fallback:
        # Search in sorted order, specific sub tag first, None second
        subs = list(self._index[main].keys())
        subs.sort()
        if sub in subs:
            subs.remove(sub)
            subs.insert(0, sub)
    else:
        subs = [sub]

    if not fallback and res is not None:
        # We do not support any optimization when fallback is enabled.
        #
        # TODO: The core loop is not in C here. Casual benchmarks suggest this
        # is still more effecient than trying to move it to C. The problem is
        # that we only have an IISet of docids as an input. We need to filter
        # this per language. The available index structures we have are:
        #
        # IndexEntry objects used as entries. Complex objects storing docid,
        # main and sub languages and UID of the canonical. Their hash and
        # compare function uses the canonical UID.
        #
        # self._index
        # An OOBTreeSet structure per language. In the outermost nodes we have
        # OOBTree's per language. Useful to get all items in a language.
        # Otherwise useless, as we would have to compare the docid attribute
        # of the object in the tree against our wanted set, requiring a full
        # loop over all items.
        #
        # self._unindex
        # An IOBTree of docid to entry. Better to match our docid wanted set,
        # but we would still have to compare the language code to the entry
        # object itself.
        #
        # self._sortindex
        # An IOBTree of docid to language tag. Looks like the best candidate
        # for us, as we can compare the language directly as a simple string
        # comparision.
        #
        # One thing to keep in mind, is that once we get a wanted set, this
        # will usually have gone through a path query already. This means
        # we will almost always already have matching set and won't filter
        # out any item at all. So the edge-case of a 100% match is actually
        # the most common one for us.
        #
        # Casual benchmarks show that trying to construct an IOBTree from the
        # wanted set and intersecting it with the sortindex is still slower
        # than having the core loop in Python code.
        tag = lang_tag(main, sub)

        result = IISet()
        for r in res:
            lang = self._sortindex.get(r)
            if lang == tag:
                result.insert(r)
        return result

    result = OOSet()
    for sublanguage in subs:
        result = oo_union(result, self._index[main][sublanguage])

    return IISet(entry.docid for entry in result)
Beispiel #16
0
class ZchSite(ZCatalog.ZCatalog):     
    """A Zch Site is a self contained web-based news publishing and discussion system"""     
    meta_type  ='Zch Site'     
    description='Zch Site'     
     
    security = ClassSecurityInfo()
    security.setPermissionDefault(ManageZch,('Manager',))
    security.setPermissionDefault(AddArticleZch,('Manager',))
    security.setPermissionDefault(AddCommentZch,('Anonymous','Manager',))
    security.setPermissionDefault(View,('Anonymous','Manager',))

    icon       ='misc_/Zch/Zch_img'     
    
    _properties=({'id':'title', 'type':'string','mode':'w'},)     
     
    fileattache=0
    sage=0

    manage_options=({'label':'Contents', 'icon':icon, 'action':'manage_main', 'target':'manage_main'},     
                    {'label':'View', 'icon':'', 'action':'index_html', 'target':'manage_main'},     
                    {'label':'Postings', 'icon':'', 'action':'manage_postings', 'target':'manage_main'},     
                    {'label':'Options', 'icon':'', 'action':'manage_editForm', 'target':'manage_main'},     
                    {'label':'Properties', 'icon':'', 'action':'manage_propertiesForm', 'target':'manage_main'},
                    {'label':'Catalog', 'icon':'', 'action':'manage_catalogView', 'target':'manage_main'},
                    {'label':'Indexes', 'icon':'', 'action':'manage_catalogIndexes', 'target':'manage_main'},
                    {'label':'Security', 'icon':'', 'action':'manage_access', 'target':'manage_main'},
                    {'label':'Undo', 'icon':'', 'action':'manage_UndoForm', 'target':'manage_main'}
                    )     

    security.declareProtected(ManageZch, 'manage_postings')
    manage_postings   = HTMLFile('dtml/manage_postings', globals())

    security.declareProtected(ManageZch, 'manage_editForm')
    manage_editForm   = HTMLFile('dtml/editForm', globals())     

    security.declarePrivate('_buildIndexing')
    def _buildIndexing(self, id, title):
        # Initialise ZCatalog
        if not hasattr(self,'_catalog'):
            ZCatalog.ZCatalog.__init__(self, id, title)

        # delete any existing indexes
        for name in self.indexes():
            self.delIndex(name)
            
        # add the default indexes
        for (name,index_type) in [('meta_type', 'FieldIndex'),
                                  ('author', 'FieldIndex'),
                                  ('body', 'ZCTextIndex'),
                                  ('title', 'ZCTextIndex'),
                                  ('date', 'FieldIndex')]:
            if index_type == 'ZCTextIndex':
                extras = EmptyClass()
                extras.doc_attr = name
                extras.index_type = 'Okapi BM25 Rank'
                extras.lexicon_id = 'lexicon'
                self.addIndex(name, index_type, extra=extras)
            else:
                self.addIndex(name,index_type)
                          
        # delete the default metadata columns
        for name in self.schema():
            self.delColumn(name)

        # Add the meta data columns for search results
        for name in ['id','title','absolute_url','author','date_posted','date','body', 'tnum']:
            self.addColumn(name,'')
      
    security.declareProtected(ManageZch, 'recatalogPostings')
    def recatalogPostings(self,REQUEST=None):
        """ Clear the Catalog and then Index all the postings. """
        self._catalog.clear()
        for article_id in self.ids:
            article = self.data[article_id].__of__(self)
            if type(article.body)==type([]):
                article.body = join(article.body, '\n')
            for comment_id in article.ids:
                comment = self.data[comment_id].__of__(article)
                if type(comment.body)==type([]):
                    comment.body = join(comment.body, '\n')
                self.catalog_object(comment, join(comment.getPhysicalPath(), '/'))
            
            self.catalog_object(article, join(article.getPhysicalPath(), '/'))
                
        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER'])

    security.declareProtected(ManageZch, 'loadSkelton')
    def loadSkelton(self, REQUEST, skelton='zch'):
        "Add Page Template PythonScript, DTMLMethod and Image read from skelton directory."
        for entry in os.listdir(os.path.join(package_home(globals()), 'skelton', skelton)):
            if entry[-3:] == '.pt' or entry[-4:]=='.pys' or entry[-5:]=='.dtml' or entry[-4:]=='.gif':
                f=open(os.path.join(package_home(globals()), 'skelton', skelton, entry), 'rb') 
                file=f.read()     
                f.close()     
                try:
                    if entry[-3:] == '.pt':
                        id = entry[:-3]
                        manage_addPageTemplate(self, id, '', file, encoding='utf-8')
                    elif entry[-4:] == '.pys':
                        id = entry[:-4]
                        manage_addPythonScript(self,id)
                        self._getOb(id).write(file)
                    elif entry[-5:] == '.dtml':
                        id = entry[:-5]
                        self.manage_addDTMLMethod(id,'',file)     
                    elif entry[-4:] == '.gif':
                        id = entry[:-4]
                        self.manage_addImage(id,file,content_type='image/gif')
                except:
                    pass
        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER'])


    security.declarePrivate('loadProperties')
    def loadProperties(self, skelton):
        "Add properties from 'properties' file."
        p = re.compile(r'(\w+?):(\w+?)=\s*(.*)\s*')
        newprop = list(self._properties)
        f = open(os.path.join(package_home(globals()), 'skelton', skelton, 'properties'), 'r')
        for s in f:
            if s[0] == '#':
                continue
            m = p.match(s)
            if m:
                newprop.append({'id':m.group(1), 'type':m.group(2), 'mode': 'wd'})
        f.close()
        self._properties = tuple(newprop)     
        f = open(os.path.join(package_home(globals()), 'skelton', skelton, 'properties'), 'r')
        for s in f:
            if s[0] == '#':
                continue
            m = p.match(s)
            if m:
                self._updateProperty(m.group(1), m.group(3))
        f.close()
    

    security.declarePrivate('__init__')
    def __init__(self, id, title, skelton, fileattache, parent, elements):
        if elements:
            from Products.ZCTextIndex.ZCTextIndex import manage_addLexicon
            manage_addLexicon(self,id='lexicon',elements = elements)

        self.__of__(parent)._buildIndexing(id,title)

        t=time()     
        self.created  = t     
        self.modified = t     

        self.fileattache = fileattache

        self.data     =IOBTree()  # id -> Message     
        self.ids      =IISet() # ids of children

        self.loadSkelton(None, skelton)
        self.loadProperties(skelton)
        self.skelton = skelton

    security.declarePublic('__len__')
    def __len__(self):
        return len(self.ids) + 1     
     
    security.declareProtected(View, '__getitem__')
    def __getitem__(self,id):
        """ Get a posting from the ZchSite data store """
    
        # make sure id is an integer
        try:
            if not isinstance(id,IntType):
                id=atoi(id)
        except ValueError:
            raise KeyError, id
    
        # make sure it's in our list of children
        if not self.ids.has_key(id):
            raise KeyError, id
            
        # return the posting
        return self.data[id].__of__(self)
     
    security.declareProtected(View, 'zchcrypt')
    def zchcrypt(self,word,key):        
        import hmac, base64
        h = hmac.new(key)
        h.update(word)
        return base64.encodestring(h.digest())[:-3]

    security.declareProtected(View, 'zchfqdn')
    def zchfqdn(self,n):        
        return getfqdn(n)

    security.declarePrivate('delItem')
    def delItem(self,id):
        if not self.data.has_key(id):
            return

        if self.ids.has_key(id): # article
            article = self.data[id].__of__(self)
            for comment_id in article.ids:     
                obj = self.data[comment_id].__of__(article)
                self.uncatalog_object(obj.getPhysicalPath())
                del self.data[comment_id]
            self.uncatalog_object(article.getPhysicalPath())
            del self.data[id]
            self.ids.remove(id)
        else: # comment
            parent = self.data[self.data[id].parent_id].__of__(self)
            # remove it from it's parents list of ids
            obj = self.data[id].__of__(parent)
            self.uncatalog_object(obj.getPhysicalPath())
            del self.data[id]
            parent.ids.remove(id)
     
    security.declarePrivate('createId')
    def createId(self):     
        id=int(time())     
        while self.data.has_key(id):     
            id=id+1     
        return id     
     
    security.declarePrivate('data_map')
    def data_map(self,ids):
        result=[]
        for id in ids:
            result.append(self.data[id].__of__(self))
        return result
    
    security.declareProtected(View, 'article_list')
    def article_list(self, size=None):
        """ returns article items  """                          
        def cmp_by_modified(x, y):
          return cmp(y.modified, x.modified)
        items = self.data_map(self.ids)
        items.sort(cmp_by_modified)
        if size:
            items = items[:size]
        for i in range(len(items)):
            items[i].sequence_number = i + 1
        return items

    security.declareProtected(ManageZch, 'postingValues')
    postingValues = article_list

    security.declareProtected(View, 'tpId')
    def tpId(self):     
        return self.id     
     
    security.declareProtected(View, 'tpURL')
    def tpURL(self):     
        return self.id     
     
    security.declareProtected(View, 'this')
    def this(self):     
        return self     
     
    security.declareProtected(View, 'site_url')
    def site_url(self):    
        # """ url of the Zch main page """ 
        return self.absolute_url()
     
    security.declareProtected(View, 'has_items')
    def has_items(self):     
        return len(self.ids)     
     
    security.declareProtected(View, 'item_count')
    def item_count(self):     
        return len(self.data)     
     
    security.declareProtected(AddArticleZch, 'addPosting')
    def addPosting(self,file='',REQUEST=None,RESPONSE=None, index=1):
        """ add an article """
        
        id=self.createId()     
     
        msg=Article(id)
        err, sage = msg.__of__(self)._validation(REQUEST,RESPONSE,'delete attachment',file)
        if err:
            return err

        # Set thread number. 
        msg.tnum = '1'

        self.ids.insert(id)     
        self.data[id]=msg

        if index:
            msg.__of__(self).index()

        if RESPONSE:
            return self.showMessage(self, REQUEST=REQUEST, 
                                title='Article Posted',     
                                message  ='Your article has been posted',
                                action=self.absolute_url()
                                )

        return id
     
    security.declareProtected(View, 'search')
    def search(self,REQUEST):     
        """ fulfill a search request """
        if REQUEST.has_key('op') and REQUEST['op']=='articles':
            REQUEST.set('meta_type','Article')
    
        sr=self.__call__(REQUEST)     
        rc=len(sr)     
        return self.showSearchResults(self,REQUEST,search_results=sr,     
                                  result_count=rc)     
     
    security.declareProtected(ManageZch, 'manage_edit')
    def manage_edit(self, REQUEST=None, fileattache=0):     
        """ edit Zch options  """     
        self.fileattache = fileattache

        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER'])
     
    security.declareProtected(ManageZch, 'manage_delete')
    def manage_delete(self,ids=[],REQUEST=None):     
        """ delete selected articles from a Zch site """     
        ids=map(atoi, ids)     
        for id in ids:     
            self.delItem(id)
        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(REQUEST['HTTP_REFERER'])

    security.declarePrivate('list_skelton')
    def list_skelton(self):
        skelton = []
        for item in os.listdir(os.path.join(package_home(globals()), 'skelton')):
            skelton.append(item)
        return skelton

        
    # Searchable interface     
    security.declareProtected(View, '__call__')
    def __call__(self, REQUEST=None, internal=0, **kw):        
        brains = apply(self.searchResults,(REQUEST,),kw)
	if internal:
	    return map(lambda x: x.getObject(), brains)
	return brains
Beispiel #17
0
    def search(self,
               path,
               default_level=0,
               depth=-1,
               navtree=0,
               navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')
        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]

        if depth > 0:
            raise ValueError("Can't do depth searches anymore")
        if not comps:
            comps = ['dmd']
            startlevel = 1
        else:
            if comps[0] == getCSEConf().get('virtualroot',
                                            '').replace('/', ''):
                comps = comps[1:]
            if comps[0] == 'zport':
                comps = comps[1:]

        if comps[0] != 'dmd':
            raise ValueError("Depth searches must start with 'dmd'")
        startlevel = len(comps)

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level == 0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level == 0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None  # Same as pathindex
            navset = None  # For collecting siblings along the way
            depthset = None  # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]
            for level in range(startlevel, startlevel + len(comps)):
                if level <= len(comps):
                    comp = "/".join(comps[:level])
                    if (not self._index.has_key(comp)
                            or not self._index[comp].has_key(level)):
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        return self._index[comp][level]
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset = union(
                            navset,
                            intersection(pathset,
                                         self._index[None][level + depth]))
                if level - startlevel >= len(comps) or navtree:
                    if (self._index.has_key(None)
                            and self._index[None].has_key(level)):
                        depthset = union(
                            depthset,
                            intersection(pathset, self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0, self._depth + 1):
                ids = None
                error = 0
                for cn in range(0, len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids, self._index[comp][level + cn])
                    except KeyError:
                        error = 1
                if error == 0:
                    results = union(results, ids)
            return results
Beispiel #18
0
 def get(self, key, default=None):
     """Return the matched word against the key."""
     r=IISet()
     wid=self._lexicon.get(key, default)
     if wid is not None: r.insert(wid)
     return r