Python ZCatalog.addIndex Examples

Programming Language: Python

Namespace/Package Name: Products.ZCatalog.ZCatalog

Class/Type: ZCatalog

Method/Function: addIndex

Examples at hotexamples.com: 17

Python ZCatalog.addIndex - 17 examples found. These are the top rated real world Python examples of Products.ZCatalog.ZCatalog.ZCatalog.addIndex extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ZCatalog(30)

searchResults(20)

catalog_object(18)

__init__(15)

addIndex(8)

addColumn(5)

getCatalogPlan(4)

schema(2)

_setObject(2)

__of__(1)

manage_resetCatalogReport(1)

search(1)

reindexIndex(1)

getMetadataForUID(1)

manage_catalogObject(1)

lexicon(1)

indexes(1)

getCatalogReport(1)

absolute_url(1)

getIndexDataForUID(1)

Example #1

Show file

File: sitesetup.py Project: mohalfaki/bungeni-portal

def setup_catalog(context):
    portal = context.getSite()

    catalog_name = 'marginalia_catalog'
    try:
        catalog = cmfutils.getToolByName(portal, catalog_name)
    except AttributeError:
        # register catalog
        catalog = ZCatalog(catalog_name, u'Marginalia catalog', None, portal)
        portal._setObject(catalog_name, catalog)

    # add indexes and columns
    plaintext_extra = SimpleRecord(lexicon_id='plaintext_lexicon',
                                   index_type='Okapi BM25 Rank')

    indexes = catalog.indexes()
    columns = catalog.schema()

    # install lexicon
    _id = 'plaintext_lexicon'
    if not hasattr(catalog, _id):
        lexicon = PLexicon(_id, '', Splitter(), CaseNormalizer(),
                           StopWordRemover())
        catalog._setObject(_id, lexicon)

        for indexName, indexType, extra in (('edit_type', 'FieldIndex',
                                             None), ('note', 'ZCTextIndex',
                                                     plaintext_extra),
                                            ('link_title', 'FieldIndex',
                                             None)):

            if indexName not in indexes:
                catalog.addIndex(indexName, indexType, extra=extra)

Example #2

Show file

File: sitesetup.py Project: BenoitTalbot/bungeni-portal

def setup_catalog(context):
    portal = context.getSite()

    catalog_name = 'marginalia_catalog'
    try:
        catalog = cmfutils.getToolByName(portal, catalog_name)
    except AttributeError:
        # register catalog
        catalog = ZCatalog(catalog_name, u'Marginalia catalog', None, portal)
        portal._setObject(catalog_name, catalog)
            
    # add indexes and columns
    plaintext_extra = SimpleRecord(lexicon_id='plaintext_lexicon',
                                   index_type='Okapi BM25 Rank')

    indexes = catalog.indexes()
    columns = catalog.schema()

    # install lexicon
    _id = 'plaintext_lexicon'
    if not hasattr(catalog, _id):
        lexicon = PLexicon(
            _id, '', Splitter(), CaseNormalizer(), StopWordRemover())
        catalog._setObject(_id, lexicon)

        for indexName, indexType, extra in (
            ('edit_type', 'FieldIndex', None),
            ('note', 'ZCTextIndex', plaintext_extra),
            ('link_title', 'FieldIndex', None)):      
            
            if indexName not in indexes:
                catalog.addIndex(indexName, indexType, extra=extra)

Example #3

Show file

 def setUp(self):
     from Products.ZCatalog.ZCatalog import ZCatalog
     catalog = ZCatalog('catalog')
     catalog.addIndex('id', 'FieldIndex')
     root = Folder('')
     root.getPhysicalRoot = lambda: root
     self.root = root
     self.root.catalog = catalog

Example #4

Show file

File: testCatalog.py Project: wpjunior/proled

 def setUp(self):
     from Products.ZCatalog.ZCatalog import ZCatalog
     catalog = ZCatalog('catalog')
     catalog.addIndex('id', 'FieldIndex')
     root = Folder('')
     root.getPhysicalRoot = lambda: root
     self.root = root
     self.root.catalog = catalog

Example #5

Show file

File: tests.py Project: CGTIC/Plone_SP

    def test_fixOkapiIndexes(self):
        catalog = ZCatalog('catalog')
        catalog.lexicon = PLexicon('lexicon')
        catalog.addIndex('test',
                         ZCTextIndex('test', index_factory=OkapiIndex,
                                     caller=catalog, lexicon_id='lexicon'))
        catalog.Indexes['test'].index._totaldoclen = -1000

        from plone.app.upgrade.v41.final import fixOkapiIndexes
        fixOkapiIndexes(catalog)
        self.assertEqual(0L, catalog.Indexes['test'].index._totaldoclen())

Example #6

Show file

    def test_fixOkapiIndexes(self):
        catalog = ZCatalog('catalog')
        catalog.lexicon = PLexicon('lexicon')
        catalog.addIndex(
            'test',
            ZCTextIndex('test',
                        index_factory=OkapiIndex,
                        caller=catalog,
                        lexicon_id='lexicon'))
        catalog.Indexes['test'].index._totaldoclen = -1000

        from plone.app.upgrade.v41.final import fixOkapiIndexes
        fixOkapiIndexes(catalog)
        self.assertEqual(0, catalog.Indexes['test'].index._totaldoclen())

Example #7

Show file

class CollaborationTool (UniqueObject, CollaborationFolder):
    """ This tool provides a way to request a collaboration with another user and allows them to accept or decline
    """
    __implements__ = ICollaborationTool

    id = 'portal_collaboration'
    meta_type = 'Collaboration Tool'
    _actions = ()

    security = ClassSecurityInfo()

    manage_options=( ( { 'label' : 'Overview'
                         , 'action' : 'manage_overview'
                         },
                       { 'label' : 'Catalog',
                         'action' : 'manage_catalog'
                         },
                       ) + CollaborationFolder.manage_options)

    #Define all the information needed for optional roles here:
    #The dictionary is keyed by the attribute that stores the role
    #The value is a tuple of the role name, the role display
    #name, and the byline for giving attribution for that role.
    #Note: The attribute name (key) is defined as:
    #role_name.lower()+'s'
    optional_role_info = {'editors':('Editor','Editors','Edited By'),
                          'translators':('Translator','Translators', 'Translated By')}

    def __init__(self):

        # Create the ZCatalog instance
        self.catalog = ZCatalog('catalog')
        self.catalog.addIndex('requester', 'FieldIndex')
        self.catalog.addIndex('status', 'FieldIndex')
        self.catalog.addIndex('user', 'FieldIndex')

        self.catalog.addColumn('requester')
        self.catalog.addColumn('roles')        
        self.catalog.addColumn('status')
        self.catalog.addColumn('user')
        self._p_changed=1

    #
    #   ZMI methods
    #
    security.declareProtected(ManagePortal, 'manage_overview')
    manage_overview = PageTemplateFile('zpt/explainCollaborationTool', globals())

    def manage_catalog(self, REQUEST=None):
        """Access to the ZCatalog of objects"""
	if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.catalog.absolute_url()+'/manage_catalogView')

Example #8

Show file

File: regressionUnicode.py Project: bendavis78/zope

class UnicodeTextIndexCatalogTest(unittest.TestCase):

    def setUp(self):

        self.cat = ZCatalog("catalog")
        self.cat.addIndex('text',"TextIndex")
        self.cat.addColumn('text')
        self.cat.addIndex('kw','KeywordIndex')
        self.cat.addColumn('kw')

        t1 = TO('the quick brown fox jumps over the lazy dog',['quick','fox'])
        t2 = TO('i am the nice alien from the future',['alien','future'])
        t3 = TO('i am a brown fox dancing with a future alien',['zerstört','könnten'])
        t4 = TO('i am a brown ' + unicode('fox') + ' dancing with a future alien',[])
        t5 = TO("""
        Die USA und Großbritannien können nach der Zerstörung der
        afghanischen Luftabwehr nun rund um die Uhr Angriffe fliegen. Das gab
        Verteidigungsminister Donald Rumsfeld bekannt. Bei den dreitägigen Angriffen
        seien auch bis auf einen alle Flugplätze der Taliban zerstört worden. Rumsfeld
        erklärte weiter, er könne die Berichte nicht bestätigen, wonach bei den
        amerikanischen Angriffen vier afghanische Mitarbeiter einer von den UN
        finanzierten Hilfsorganisation getötet wurden. Diese könnten auch durch
        Gegenfeuer der Taliban getötet worden sein.
        """,[unicode('dreitägigen','latin1'),'zerstört'])


        self.cat.catalog_object(t1,"o1")
        self.cat.catalog_object(t2,"o2")
        self.cat.catalog_object(t3,"o3")
        self.cat.catalog_object(t4,"o4")
        self.cat.catalog_object(t5,"o5")

        self.tests = [('quick',('o1',)),
              ('fox',('o1','o3','o4')),
              ('afghanischen', ('o5',)),
              ('dreitägigen',('o5',))
            ]


        self.kw_tests = [ ('quick',('o1',) ),
                          ('zerstört',('o3','o5')),
                          ('dreitägigen',('o5',))
                        ]


    def _doTests(self,tests,field,test_unicode=0):

        for q,objs in tests:
            if test_unicode:
                res=self.cat.searchResults({field:{'query':unicode(q,'latin1')}})
            else:
                res=self.cat.searchResults({field:{'query':q}})

            got = [ x.getURL() for x in res]
            got.sort()

            expected = list(objs)
            expected.sort()

            assert got == expected, \
                    "%s: got: %s, expected: %s" % (q,got,expected)



    def testAsciiQuery(self):
        """ ascii query textindex """
        self._doTests(self.tests, 'text', test_unicode=0)


    def testUnicodeQuery(self):
        """ unicode query textindex """
        self._doTests(self.tests, 'text', test_unicode=1)

Example #9

Show file

File: testIndexedAttrs.py Project: dtgit/dtedu

class TestIndexedAttrs(ZopeTestCase.ZopeTestCase):
    def afterSetUp(self):
        self.catalog = ZCatalog("catalog")
        self.folder._setObject("dummy", Dummy("dummy"))
        self.dummy = self.folder.dummy
        self.physical_path = "/".join(self.dummy.getPhysicalPath())
        self.custom_path = "/".join(self.dummy.getCustomPath())
        self.string_path = self.dummy.getStringPath()

    def addIndex(self, id="path", extra=None):
        self.catalog.addIndex(id, "ExtendedPathIndex", extra)
        return self.catalog.Indexes[id]

    def testAddIndex(self):
        self.catalog.addIndex("path", "ExtendedPathIndex")
        try:
            self.catalog.Indexes["path"]
        except KeyError:
            self.fail("Failed to create index")

    def testDefaultIndexedAttrs(self):
        # By default we don't have indexed_attrs at all
        idx = self.addIndex()
        self.failIf(hasattr(idx, "indexed_attrs"))

    def testDefaultIndexSourceNames(self):
        # However, getIndexSourceName returns 'getPhysicalPath'
        idx = self.addIndex()
        self.assertEqual(idx.getIndexSourceNames(), ("getPhysicalPath",))

    def testDefaultIndexObject(self):
        # By default PathIndex indexes getPhysicalPath
        idx = self.addIndex()
        idx.index_object(123, self.dummy)
        self.assertEqual(idx.getEntryForObject(123), self.physical_path)

    def testDefaultSearchObject(self):
        # We can find the object in the catalog by physical path
        self.addIndex()
        self.catalog.catalog_object(self.dummy)
        self.assertEqual(len(self.catalog(path=self.physical_path)), 1)

    def testDefaultSearchDictSyntax(self):
        # PathIndex supports dictionary syntax for queries
        self.addIndex()
        self.catalog.catalog_object(self.dummy)
        self.assertEqual(len(self.catalog(path={"query": self.physical_path})), 1)

    def testExtraAsRecord(self):
        # 'extra' can be a record type object
        idx = self.addIndex(extra=Record(indexed_attrs="getCustomPath"))
        self.assertEqual(idx.indexed_attrs, ("getCustomPath",))

    def testExtraAsMapping(self):
        # or a dictionary
        idx = self.addIndex(extra={"indexed_attrs": "getCustomPath"})
        self.assertEqual(idx.indexed_attrs, ("getCustomPath",))

    def testCustomIndexSourceNames(self):
        # getIndexSourceName returns the indexed_attrs
        idx = self.addIndex(extra={"indexed_attrs": "getCustomPath"})
        self.assertEqual(idx.getIndexSourceNames(), ("getCustomPath",))

    def testCustomIndexObject(self):
        # PathIndex indexes getCustomPath
        idx = self.addIndex(extra={"indexed_attrs": "getCustomPath"})
        idx.index_object(123, self.dummy)
        self.assertEqual(idx.getEntryForObject(123), self.custom_path)

    def testCustomSearchObject(self):
        # We can find the object in the catalog by custom path
        self.addIndex(extra={"indexed_attrs": "getCustomPath"})
        self.catalog.catalog_object(self.dummy)
        self.assertEqual(len(self.catalog(path=self.custom_path)), 1)

    def testStringIndexObject(self):
        # PathIndex accepts a path as tuple or string
        idx = self.addIndex(extra={"indexed_attrs": "getStringPath"})
        idx.index_object(123, self.dummy)
        self.assertEqual(idx.getEntryForObject(123), self.string_path)

    def testStringSearchObject(self):
        # And we can find the object in the catalog again
        self.addIndex(extra={"indexed_attrs": "getStringPath"})
        self.catalog.catalog_object(self.dummy)
        self.assertEqual(len(self.catalog(path=self.string_path)), 1)

    def testIdIndexObject(self):
        # PathIndex prefers an attribute matching its id over getPhysicalPath
        idx = self.addIndex(id="getId")
        idx.index_object(123, self.dummy)
        self.assertEqual(idx.getEntryForObject(123), "dummy")

    def testIdIndexObject(self):
        # Using indexed_attr overrides this behavior
        idx = self.addIndex(id="getId", extra={"indexed_attrs": "getCustomPath"})
        idx.index_object(123, self.dummy)
        self.assertEqual(idx.getEntryForObject(123), self.custom_path)

    def testListIndexedAttr(self):
        # indexed_attrs can be a list
        idx = self.addIndex(id="getId", extra={"indexed_attrs": ["getCustomPath", "foo"]})
        # only the first attribute is used
        self.assertEqual(idx.getIndexSourceNames(), ("getCustomPath",))

    def testStringIndexedAttr(self):
        # indexed_attrs can also be a comma separated string
        idx = self.addIndex(id="getId", extra={"indexed_attrs": "getCustomPath, foo"})
        # only the first attribute is used
        self.assertEqual(idx.getIndexSourceNames(), ("getCustomPath",))

    def testEmtpyListAttr(self):
        # Empty indexed_attrs falls back to defaults
        idx = self.addIndex(extra={"indexed_attrs": []})
        self.assertEqual(idx.getIndexSourceNames(), ("getPhysicalPath",))

    def testEmtpyStringAttr(self):
        # Empty indexed_attrs falls back to defaults
        idx = self.addIndex(extra={"indexed_attrs": ""})
        self.assertEqual(idx.getIndexSourceNames(), ("getPhysicalPath",))

Example #10

Show file

class TestZCatalog(unittest.TestCase):
    def setUp(self):
        from Products.ZCatalog.ZCatalog import ZCatalog
        self._catalog = ZCatalog('Catalog')
        self._catalog.resolve_path = self._resolve_num
        self._catalog.addIndex('title', 'KeywordIndex')
        self._catalog.addColumn('title')

        self.upper = 10

        self.d = {}
        for x in range(0, self.upper):
            # make uid a string of the number
            ob = zdummy(x)
            self.d[str(x)] = ob
            self._catalog.catalog_object(ob, str(x))

    def _resolve_num(self, num):
        return self.d[num]

    def test_z2interfaces(self):
        from Interface.Verify import verifyClass
        from Products.ZCatalog.IZCatalog import IZCatalog
        from Products.ZCatalog.ZCatalog import ZCatalog

        verifyClass(IZCatalog, ZCatalog)

    def test_z3interfaces(self):
        from Products.ZCatalog.interfaces import IZCatalog
        from Products.ZCatalog.ZCatalog import ZCatalog
        from zope.interface.verify import verifyClass

        verifyClass(IZCatalog, ZCatalog)

    def testGetMetadataForUID(self):
        testNum = str(self.upper - 3)  # as good as any..
        data = self._catalog.getMetadataForUID(testNum)
        self.assertEqual(data['title'], testNum)

    def testGetIndexDataForUID(self):
        testNum = str(self.upper - 3)
        data = self._catalog.getIndexDataForUID(testNum)
        self.assertEqual(data['title'][0], testNum)

    def testSearch(self):
        query = {'title': ['5', '6', '7']}
        sr = self._catalog.searchResults(query)
        self.assertEqual(len(sr), 3)
        sr = self._catalog.search(query)
        self.assertEqual(len(sr), 3)

    def testUpdateMetadata(self):
        self._catalog.catalog_object(zdummy(1), '1')
        data = self._catalog.getMetadataForUID('1')
        self.assertEqual(data['title'], '1')
        self._catalog.catalog_object(zdummy(2), '1', update_metadata=0)
        data = self._catalog.getMetadataForUID('1')
        self.assertEqual(data['title'], '1')
        self._catalog.catalog_object(zdummy(2), '1', update_metadata=1)
        data = self._catalog.getMetadataForUID('1')
        self.assertEqual(data['title'], '2')
        # update_metadata defaults to true, test that here
        self._catalog.catalog_object(zdummy(1), '1')
        data = self._catalog.getMetadataForUID('1')
        self.assertEqual(data['title'], '1')

    def testReindexIndexDoesntDoMetadata(self):
        self.d['0'].num = 9999
        self._catalog.reindexIndex('title', {})
        data = self._catalog.getMetadataForUID('0')
        self.assertEqual(data['title'], '0')

    def testReindexIndexesFalse(self):
        # setup
        false_id = self.upper + 1
        ob = zdummyFalse(false_id)
        self.d[str(false_id)] = ob
        self._catalog.catalog_object(ob, str(false_id))
        # test, object evaluates to false; there was bug which caused the
        # object to be removed from index
        ob.num = 9999
        self._catalog.reindexIndex('title', {})
        result = self._catalog(title='9999')
        self.assertEquals(1, len(result))

    def testBooleanEvalOn_manage_catalogObject(self):
        self.d['11'] = dummyLenFail(11, self.fail)
        self.d['12'] = dummyNonzeroFail(12, self.fail)

        # create a fake response that doesn't bomb on manage_catalogObject()
        class myresponse:
            def redirect(self, url):
                pass

        # this next call should not fail
        self._catalog.manage_catalogObject(None,
                                           myresponse(),
                                           'URL1',
                                           urls=('11', '12'))

    def testBooleanEvalOn_refreshCatalog_getobject(self):
        # wrap catalog under the fake parent providing unrestrictedTraverse()
        catalog = self._catalog.__of__(fakeparent(self.d))
        # replace entries to test refreshCatalog
        self.d['0'] = dummyLenFail(0, self.fail)
        self.d['1'] = dummyNonzeroFail(1, self.fail)
        # this next call should not fail
        catalog.refreshCatalog()

        for uid in ('0', '1'):
            rid = catalog.getrid(uid)
            # neither should these
            catalog.getobject(rid)

    def test_getobject_doesntMaskTraversalErrorsAndDoesntDelegateTo_resolve_url(
            self):
        # wrap catalog under the fake parent providing unrestrictedTraverse()
        catalog = self._catalog.__of__(fakeparent(self.d))

        # make resolve_url fail if ZCatalog falls back on it
        def resolve_url(path, REQUEST):
            self.fail(".resolve_url() should not be called by .getobject()")

        catalog.resolve_url = resolve_url

        # traversal should work at first
        rid0 = catalog.getrid('0')
        # lets set it up so the traversal fails
        del self.d['0']
        self.assertRaises(FakeTraversalError,
                          catalog.getobject,
                          rid0,
                          REQUEST=object())
        # and if there is a None at the traversal point, that's where it should return
        self.d['0'] = None
        self.assertEquals(catalog.getobject(rid0), None)

Example #11

Show file

class Repository(UniqueObject, DynamicType, StorageManager, BTreeFolder2):
    """Rhaptos Version Repository tool"""

    __implements__ = (IRepository, StorageManager.__implements__, DynamicType.__implements__)

    meta_type = 'Repository'

    security = AccessControl.ClassSecurityInfo()
    # Fake out the types tool since we can't really use the Factory creation method
    portal_type = 'Repository'

    dsw = default_search_weights = {'fulltext':1,'abstract':1,'subject':10,'keyword':10,
                              'author':50,  'translator':40,'editor':20, 'maintainer':10,
			      'licensor':10, 'institution':10, 'exact_title':100, 'title':10,
			      'language':5, 'containedIn':200, 'parentAuthor':0,
			      'containedAuthor':0, 'objectid':1000}

    # placeholder attributes for otherwise-not-present catalog columns/indexes
    fields = {}
    matched = {}
    weight = 0
    sortTitle = None    # FIXME: I don't think this should even be a column; nothing provides it. A fine index, though.

    default_browse_batch_size = 50

    __allow_access_to_unprotected_subobjects__ = 1

    # ZMI methods
    manage_options=( BTreeFolder2.manage_options +
                     ( { 'label' : 'Overview',
                         'action' : 'manage_overview'
                         },
                       { 'label' : 'Catalog',
                         'action' : 'manage_catalog'
                         })
                   )

    manage_overview = DTMLFile( 'explainRepository', globals() )


    def manage_catalog(self, REQUEST=None):
        """Access to the ZCatalog of versioned objects"""
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.catalog.absolute_url()+'/manage_catalogView')


    def __init__(self, id, title=''):
        """Initialize Repository object"""
        StorageManager.__init__(self, id)
        self.OAI = OAIHandler('OAI')
        self.title = title
        self._create_catalog()
        self._create_cache() #results cache needs better invalidation code - consider fake=True if you're getting conflicts on publish

    #  Copied from PortalContent
    def __call__(self):
        '''
        Invokes the default view.
        '''
        view = _getViewFor(self)
        if getattr(aq_base(view), 'isDocTemp', 0):
            return view(self, self.REQUEST)
        else:
            return view()

    def _create_module(self, key, data):
        """Create a module in ZODB from data in the postgres db"""
        from Products.RhaptosModuleStorage.ModuleVersionFolder import \
                ModuleVersionStub

        # Create a module version stub (e.g. /plone/content/m9001)
        storage = self.getStorageForType('Module')
        mvs = ModuleVersionStub(data['id'], storage=storage.id)
        self._setObject(data['id'], mvs, set_owner=0)
        logger.debug('Created ModuleVersionStub %s'
                     % '/'.join(mvs.getPhysicalPath()))

        # Code copied from publishObject
        self.cache.clearSearchCache()
        #FIXME: these things shouldn't be done here, but with some sort of
        # event system hitcount update
        hitcount = getToolByName(self, 'portal_hitcount', None)
        if hitcount:
            hitcount.registerObject(data['id'], DateTime())
        # Not going to trigger pdf production here
        # (storage.notifyObjectRevised), should be in cnx-publishing
        # instead

        pubobj = storage.getObject(data['id'], 'latest')
        self.catalog.catalog_object(pubobj)
        logger.debug('Add %s to catalog'
                     % '/'.join(pubobj.getPhysicalPath()))

    def _create_collection(self, key, data):
        """Create a collection in ZODB from data in the postgres db"""
        moduledb_tool = getToolByName(self, 'portal_moduledb')

        # Get collection tree
        tree = moduledb_tool.sqlGetCollectionTree(
            id=data['id'], version=data['version'], aq_parent=self).tuples()

        if not tree or tree[0][0] is None:
            logger.debug('Unable to get collection tree for %s' % key)
            # can't get the collection tree, nothing to do
            raise KeyError(key)

        tree = simplejson.loads(tree[0][0].decode('utf-8'))

        # Create a version folder (e.g. /plone/content/col11554)
        storage = self.getStorageForType('Collection')
        if data['id'] not in self.objectIds():
            vf = VersionFolder(data['id'], storage=storage.id)
            self._setObject(data['id'], vf, set_owner=0)
            logger.debug('Created VersionFolder %s'
                         % '/'.join(vf.getPhysicalPath()))
        vf = getattr(self, data['id'])

        # Create a collection (e.g. /plone/content/col11554/1.1)
        collection = _createObjectByType('Collection', vf, data['version'])
        collection.objectId = data['id']
        collection.version = data['version']
        collection.created = DateTime(data['_created'].isoformat())
        collection.revised = DateTime(data['_revised'].isoformat())
        for k, v in dict(title=data['name'], authors=data['authors'],
                         maintainers=data['maintainers'],
                         licensors=data['licensors'],
                         _parent_id=data['parent_id'],
                         _parent_version=data['parent_version'],
                         parentAuthors=data['parentAuthors'],
                         language=data['language'],
                         subject=data['_subject'].split(', ')).items():
            setattr(collection, k, v)
        if data.has_key('print_style'):
            collection.parameters.manage_addProperty('printstyle',data['print_style'],'string')
        logger.debug('Created collection %s'
                     % '/'.join(collection.getPhysicalPath()))
        logger.debug(str(collection.propertyItems()))

        # Code copied from publishObject
        self.cache.clearSearchCache()
        #FIXME: these things shouldn't be done here, but with some sort of
        # event system hitcount update
        hitcount = getToolByName(self, 'portal_hitcount', None)
        if hitcount:
            hitcount.registerObject(data['id'], DateTime())
        # Not going to trigger pdf production here
        # (storage.notifyObjectRevised), should be in cnx-publishing
        # instead

        modules = []

        def create_objects(contents, folder):
            # Create the top level objects
            for node in contents:
                new_folder = None
                if node['id'] == 'subcol':
                    new_folder = _createObjectByType(
                        'SubCollection', folder,
                        folder.generateUniqueId('SubCollection'))
                    new_folder.title = node['title']
                    logger.debug('Created subcollection: %s' % new_folder)
                elif node['id'].startswith('m'):
                    if node['id'] not in self.objectIds():
                        # Create the module if it doesn't exist
                        module = self[node['id']]
                    obj = _createObjectByType(
                        'PublishedContentPointer', folder, node['id'])
                    obj.moduleId = node['id']
                    obj.version = 'latest'
                    # FIXME - should track if original was set to latest, but
                    # that info is not sent properly to the DB, nor returned
                    # in the json
                    # if node['latest']:
                    #    obj.version = 'latest'
                    # else:
                    #    obj.version = node['version']
                    modules.append((node['id'], node['version']))
                    logger.debug('Created PublishedContentPointer %s@%s'
                                 % (obj.getModuleId(), obj.getVersion()))

                # Create all the objects in "contents"
                if new_folder:
                    create_objects(node.get('contents') or [], new_folder)

        # Create SubCollections and PublishedContentPointer according to
        # the collection tree
        create_objects(tree['contents'], collection)
        # Copied from Products.RhaptosRepository.VersionFolder.checkinResource
        if 'latest' not in vf.objectIds():
            addLatestReference(vf, 'latest', collection.Title(),
                               collection.version)
            logger.debug('Added latest reference')
        collection.submitter = data['submitter']
        collection.submitlog = data['submitlog']
        collection.state = 'public'
        logger.debug('Finished creating collection')

        # Create collection.xml if it doesn't exist in postgres
        filenames = moduledb_tool.sqlGetModuleFilenames(
            id=data['id'], version=data['version']).tuples()
        if filenames and 'collection.xml' not in str(filenames):
            logger.debug('Create collection.xml for %s' % key)
            xml = collection.restrictedTraverse('source_create')()
            res = moduledb_tool.sqlInsertFile(file = Binary(xml), media_type='text/xml')
            fid = res[0].fileid

            moduledb_tool.sqlInsertModuleFile(
                moduleid=collection.objectId, version=collection.version,
                fileid=fid, filename='collection.xml', mimetype='text/xml',
                aq_parent=self)

        pubobj = storage.getObject(data['id'], 'latest')
        self.catalog.catalog_object(pubobj)
        logger.debug('Add %s to catalog'
                     % '/'.join(pubobj.getPhysicalPath()))


    def __getitem__(self, key):
        try:
            # Try returning the object
            try:
                return getattr(self.aq_inner, key)
            except AttributeError:
                pass
            except TypeError: # key is None or not a string
                raise KeyError('bad type')

            # The key has to be either in the format of col12345 or m12345
            m = re.match('(col|m)([0-9]+)$', key)
            if not m:
                raise KeyError(key)

            # The key is not in the ZODB, look for it in the postgres db
            moduledb_tool = getToolByName(self, 'portal_moduledb')
            data = moduledb_tool.sqlGetLatestModule(id=key).dictionaries()
            if not data:
                # The key isn't in the postgres db either
                raise KeyError(key)
            data = data[0]

            if m.group(1) == 'm':  # Create a module
                logger.debug('Create module %s from postgres' % key)
                self._create_module(key, data)
                logger.debug('Created module %s from postgres' % key)
            elif m.group(1) == 'col':  # Create a collection
                print('logger.level: %s' % logger.level)
                logger.debug('Create collection %s from postgres' % key)
                #History is descending in time- newest first
                history = moduledb_tool.sqlGetHistory(id=key).dictionaries()
                prev_ver='' #need to skip multiple minor versions from rewrite
                for item in history:
                    if item['version'] == prev_ver:
                        continue;
                    data = moduledb_tool.sqlGetModule(
                        id=key, version=item['version']).dictionaries()
                    if data:
                        data = data[0]
                        logger.debug('Create collection %s version %s'
                                     % (data['id'], data['version']))
                        self._create_collection(key, data)
                    prev_ver = item['version']
                logger.debug('Created collection %s from postgres' % key)

        except KeyError:
            # No need to log
            raise
        except Exception:
            # This function often silently fails, so adding explicit logging
            logger.exception('Something failed in %s' % self.__getitem__)
            raise

        return getattr(self, key)

    index_html = __call__

    security.declarePublic("Title")
    def Title(self):
        """Fulfil new-ish interface expectations for title (so we work with breadcrumbs, etc)"""
        return self.title

    security.declarePrivate("_create_catalog")
    def _create_catalog(self):
        """Creates the ZCatalog instance for versioned objects"""
        self.catalog = ZCatalog('catalog')
        lexicon = PLexicon('lexicon', '' , Splitter(), CaseNormalizer(), StopWordAndSingleCharRemover())
        self.catalog._setObject('lexicon', lexicon)

        ZCText_extras = Empty()
        ZCText_extras.doc_attr = 'abstract'
        ZCText_extras.index_type = 'Okapi BM25 Rank'
        ZCText_extras.lexicon_id = 'lexicon'
        self.catalog.addIndex('abstract', 'ZCTextIndex', ZCText_extras)
        ZCText_extras.doc_attr = 'Title'
        self.catalog.addIndex('Title', 'ZCTextIndex', ZCText_extras)
        ZCText_extras.doc_attr = 'institution'
        self.catalog.addIndex('institution', 'ZCTextIndex', ZCText_extras)
        ZCText_extras.doc_attr = 'keywords'
        self.catalog.addIndex('keywordstext', 'ZCTextIndex', ZCText_extras)

        self.catalog.addIndex('atomicInstitution', 'FieldIndex', {'indexed_attrs':'institution'})
        self.catalog.addIndex('authors', 'KeywordIndex')
        self.catalog.addIndex('parentAuthors', 'KeywordIndex')
        self.catalog.addIndex('maintainers', 'KeywordIndex')
        self.catalog.addIndex('language', 'KeywordIndex')
        self.catalog.addIndex('modified', 'DateIndex')
        self.catalog.addIndex('revised', 'DateIndex')
        self.catalog.addIndex('objectId', 'FieldIndex')
        self.catalog.addIndex('portal_type', 'FieldIndex')
        self.catalog.addIndex('containedModuleIds', 'KeywordIndex')
        self.catalog.addIndex('subject', 'KeywordIndex')

        extra=Empty()
        extra.indexed_attrs='keywords'
        self.catalog.addIndex('keywordscase', 'KeywordIndex',extra)

        ki= KeywordIndex('keywords')
        self.catalog._catalog.addIndex('keywords', ki)
        ki._updateProperty('PrenormalizeTerm', 'python: value.lower()')
        ki._updateProperty('TermType', 'string')
        ki.keywords._updateProperty('Normalizer', 'python: [k.lower() for k in value]')

        ki=KeywordIndex('baselanguage')
        self.catalog._catalog.addIndex('baselanguage',ki)
        ki._updateProperty('PrenormalizeTerm', "python: value[:(value.find('-') > 0 ) and value.find('-') or len(value)]")
        ki.baselanguage._updateProperty('Name','language')
        ki.baselanguage._updateProperty('Normalizer', "python: [value[:(value.find('-') > 0 ) and value.find('-') or len(value)]]")

        fi=FieldIndex('sortTitle')
        self.catalog._catalog.addIndex('sortTitle',fi)
        fi._updateProperty('PrenormalizeTerm', 'python: value.lower()')
        fi._updateProperty('TermType', 'string')
        fi.sortTitle._updateProperty('Name', 'Title')
        fi.sortTitle._updateProperty('Normalizer', 'python: here.stripArticles(value)')

        fi=FieldIndex('parent')
        self.catalog._catalog.addIndex('parent',fi)
        fi.parent._updateProperty('Name', 'getParent')
        fi.parent._updateProperty('Normalizer', 'python:value.objectId')

        ki=KeywordIndex('translators')
        self.catalog._catalog.addIndex('translators',ki)
        ki._delObject('translators')
        ee=ExpressionEvaluator()
        ee.id='translators'
        ki._setObject(ee.id,ee)
        ki.translators._updateProperty('Expression',"python: lambda o: o.roles['translators']")

        ki=KeywordIndex('editors')
        self.catalog._catalog.addIndex('editors',ki)
        ki._delObject('editors')
        ee=ExpressionEvaluator()
        ee.id='editors'
        ki._setObject(ee.id,ee)
        ki.editors._updateProperty('Expression',"python: lambda o: o.roles['editors']")

        self._set_metadata()

        self._p_changed=1

    security.declarePrivate("_addColumn")
    def _addColumn(self, fieldname, *args, **kw):
        """Create a metadata field on the content catalog if it doesn't already exist.
        Call as you would 'self.catalog.addColumn'.
        Returns 'fieldname' if that name is actually added; None if it exists.
        """
        if fieldname not in self.catalog.schema():
            self.catalog.addColumn(fieldname, *args, **kw)
            return fieldname
        return None

    security.declarePrivate("_set_metadata")
    def _set_metadata(self):
        """Create the metadata fields on the content catalog.
        This is called by upgrade script and installation, so adding a role here plus reinstall
        is all that's necesary for additional metadata.
        Return tuple of added fields if we actually changed something (so the caller can update metadata.)
        Empty tuple (false) if no change.
        """
        added = set([None])
        added.add(self._addColumn('Title'))
        added.add(self._addColumn('abstract'))
        added.add(self._addColumn('authors'))
        added.add(self._addColumn('language'))
        added.add(self._addColumn('code'))
        added.add(self._addColumn('collectionType'))
        added.add(self._addColumn('created'))
        added.add(self._addColumn('fields', {}))
        added.add(self._addColumn('getHistoryCount'))
        added.add(self._addColumn('getIcon'))
        added.add(self._addColumn('institution'))
        added.add(self._addColumn('instructor'))
        added.add(self._addColumn('keywords'))
        added.add(self._addColumn('language'))
        added.add(self._addColumn('license'))
        added.add(self._addColumn('maintainers'))
        added.add(self._addColumn('matched', {}))
        added.add(self._addColumn('meta_type'))
        added.add(self._addColumn('objectId'))
        added.add(self._addColumn('portal_type'))
        added.add(self._addColumn('revised'))
        added.add(self._addColumn('roles'))
        added.add(self._addColumn('sortTitle'))
        added.add(self._addColumn('subject'))
        added.add(self._addColumn('submitter'))
        added.add(self._addColumn('url'))
        added.add(self._addColumn('version'))
        added.add(self._addColumn('weight', 0))
        added.add(self._addColumn('harvestable', 1))
        added.remove(None)
        return tuple(added)

    security.declarePrivate("_create_cache")
    def _create_cache(self,fake=False):
        """Creates the cache object for results sets"""
        if fake:
            self._setObject('cache', nocache('cache'))
        else:
            self._setObject('cache', cache('cache'))

        self._p_changed=1

    def log(self, message, severity=zLOG.INFO):
        zLOG.LOG("RhaptosRepository", severity, "%s (%s)" % (message, self.REQUEST['PATH_INFO']))


    def _getStorageForObjectId(self, id):
        """Return the storage implementation associated with the given ID"""
        stub = self[id]
        return self.getStorage(stub.storage)

    def hasRhaptosObject(self, id):
        """Returns true if an object with the given ID exists in the repository"""
        return bool(self.hasObject(id))

    def countRhaptosObjects(self, portal_types=None):
        """Returns the number of objects in the repository of the given type, or all types"""

        # Build mapping of storage -> list of portal_types to query
        storages = {}
        # If no portal_types, search everything
        if not portal_types:
            for name in self.listStorages():
                storages[name] = None

        while portal_types:
            pt = portal_types.pop()
            storage = self._storage_map.get(pt, self._default_storage)
            storages.setdefault(storage, []).append(pt)

        count = 0
        for name, portal_types in storages.items():
            storage = self.getStorage(name)
            count += storage.countObjects(portal_types)

        return count


    def getRhaptosObjectLanguageCounts(self, portal_types=None):
        """Returns a list of tuples of language codes and count of objects using them, ordered by number of objects, descending"""

        # Build mapping of storage -> list of portal_types to query
        storages = {}
        # If no portal_types, search everything
        if not portal_types:
            for name in self.listStorages():
                storages[name] = None
        elif type(portal_types) == type(''):
            portal_types=[portal_types]

        while portal_types:
            pt = portal_types.pop()
            storage = self._storage_map.get(pt, self._default_storage)
            storages.setdefault(storage, []).append(pt)

        langdict = {}
        for name, portal_types in storages.items():
            storage = self.getStorage(name)
            for l,c in storage.getLanguageCounts(portal_types):
                langdict[l] = langdict.setdefault(l,0) + c
        langs=langdict.items()
        langs.sort(lambda x,y: cmp(y[1],x[1]))

        return langs


    def langLookup(self,langs=None):
        """Accesses the languageConstants monkeypatch on PloneLanguageTool, which
           generates a static dictionary of language codes, native and English language
           names, and regional variant names from PLT's own specialized dictionaries."""
        lcdict=languageConstants
        if type(langs)==type(''):
          langs=langs.split(',')
        if not langs:
          return lcdict
        else:
          returnDict={}
          for k in langs:
            returnDict[k]=lcdict[k]
          return returnDict

    def getRhaptosObject(self, id, version=None, **kwargs):
        """Returns the object with the specified ID"""
        return self._getStorageForObjectId(id).getObject(id, version, **kwargs)

    security.declarePublic("getHistory")
    def getHistory(self, id):
        """Returns the history of the object with the specified ID or None if there is no such ID in the repository"""
        try:
            return self._getStorageForObjectId(id).getHistory(id)
        except KeyError:
            return None

    security.declarePrivate("deleteRhaptosObject")
    def deleteRhaptosObject(self, objectId, version=None, **kwargs):
        """Deletes all the objects with the specified ID"""

        if not self.hasRhaptosObject(objectId):
            raise KeyError, objectId

        return self._getStorageForObjectId(objectId).deleteObject(objectId, version)

        self.cache.clearSearchCache()

        #FIXME: this shouldn't be done here, but with some sort of event system
        getToolByName(self,'portal_similarity').deleteSimilarity(objectId, version)
        getToolByName(self,'portal_linkmap').deleteLinks(objectId, version)


    def _doGet(self, id, version=None, **kwargs):
        return self._getStorageForObjectId(id).getObject(id, version, **kwargs)

    def getRhaptosObjects(self, objects):
        """Returns a list of objects as defined by the list of id,version tuples"""
        return [self.hasRhaptosObject(oid) and self._getStorageForObjectId(oid).getObject(oid,ver) for oid, ver in objects]

    def publishObject(self, object, message):
        """
        Publish an object for the first time in the repository

        Creates a new folder to hold the version history of this
        object and create the first version of the object, returning
        the new unique ID for this object
        """
        storage = self.getStorageForType(object.portal_type)
        objectId = storage.applyVersionControl(object)
        storage.createVersionFolder(object)
        user = AccessControl.getSecurityManager().getUser().getUserName()
        storage.checkinResource(object, message, user)

        self.cache.clearSearchCache()

        #FIXME: these things shouldn't be done here, but with some sort of event system
        # hitcount update
        hitcount = getToolByName(self, 'portal_hitcount', None)
        if hitcount:
            hitcount.registerObject(objectId, DateTime())

        # storage events (mostly collection printing, at the moment)
        pubobj = storage.getObject(objectId, 'latest')
        storage.notifyObjectRevised(pubobj, None)

        # Removing this 'event' until Lens V2
        #if object.getParent():
            ### FIXME: We really want the Zope3 event system for this.
            ### Once we get that, we'll want to use something to the effect of:
            ### zope.event.notify(ObjectRevisionPublished)
            #self.lens_tool.notifyLensDerivedObject(object)
            ### End Event System Hack

        return objectId


    def publishRevision(self, object, message):
        """
        Publish a revision of an object in the repository

        object: the object to place under version control.  It must
        implement IMetadata and IVersionedObject
        message: a string log message by the user
        baseVersion: the version of the object this is based on

        returns: unique ID string for the new object
        """

        if not self.isUnderVersionControl(object):
            raise CommitError, "Cannot publish revision of object %s not under version control" % object.getId()

        # handle to original object to preserve locked status, if necessary;
        # we could look this up after publication (and would have to with proper events),
        # but that would be version inspection
        origobj = object.getPublishedObject().latest

        storage = self.getStorageForType(object.portal_type)
        user = AccessControl.getSecurityManager().getUser().getUserName()
        storage.checkinResource(object, message, user)
        self.cache.clearSearchCache()

        ### FIXME: We really want the Zope3 event system for these.
        ### Once we get that, we'll want to use something to the effect of:
        ### zope.event.notify(ObjectRevisionPublished)
        try: # Grab the now-published version
            pubobj = object.getPublishedObject().latest
        except AttributeError:
            pass

        # lens events
        self.lens_tool.notifyLensRevisedObject(pubobj)

        # storage events (mostly collection printing, at the moment)
        storage.notifyObjectRevised(pubobj, origobj)

        # notice of change to all containing collections, latest version only
        container_objs = self.catalog(containedModuleIds=pubobj.objectId)
        for col in container_objs:
            colobj = col.getObject()
            colobj.notifyContentsRevised()

        ### End Event System Hack


    def isUnderVersionControl(self, object):
        """Returns true if the object is under version control"""
        return self.getStorageForType(object.portal_type).isUnderVersionControl(object)

    def isLatestVersion(self, object):
        """Returns true if object is the most recent revision of an object"""
        return self.getStorageForType(object.portal_type).isLatestVersion(object)

    def getVersionInfo(self, object):
        return self.getStorageForType(object.portal_type).getVersionInfo(object)

    def searchRepositoryByDate(self, start, end, REQUEST=None):
        """Search repository by date: start and end"""
        result = []

        s = self.getStorage('module_version_storage')
        objects = s.searchDateRange(start, end, ['Module','Collection'])
        result.extend(objects)

        result.sort(lambda x, y: cmp(x.revised, y.revised))

        return result

    security.declarePublic("cookSearchTerms")
    def cookSearchTerms(self, query):
        """return the cooked search terms, as well as the uncook dictionary, aggregated across storages"""

        allcooked = []
        alluncook = {}
        for name in self.listStorages():
            s = self.getStorage(name)
            cooked,uncook = s.cookSearchTerms(query)
            for c in cooked:
                if not c in allcooked:
                    allcooked.append(c)
                    alluncook.setdefault(c,[]).extend(uncook[c])
            # Deal w/ stop words: must be stopped by _all_ searches
            # FIXME this code might now work, but is currently not exercised
            # since both storages use equivalent code for cookSearchTerms
            if alluncook.has_key(''):
                for s in alluncook['']:
                    if s not in uncook['']:
                        alluncook[''].remove(s)
            else:
                alluncook.update(uncook)

        return allcooked,alluncook

    def searchRepository(self, query, query_type="weakAND", weights=dsw, field_queries={}, sorton='weight',recent=False,use_cache=True,min_rating=0):
        """Search the repository: portal_types defaults to all types w/ storage objects
        Default weights are stored in default_search_weights on the repository
        """
        if not weights:
            weights = self.default_search_weights #AKA: dsw

        fq_list = field_queries.items()
        fq_list.sort()
        searchhash = str(query) + str(query_type) + str(weights) + str(fq_list)
        cached_res = self.cache.resultsCacheLookup(searchhash, sorton, recent)
        if use_cache and cached_res:
            result,term_results = cached_res
            return result,term_results,searchhash

        else:
            cached_sort = None
            # Build mapping of storage -> list of portal_types to query
            storages = {}
            # If no portal_types, search everything
            if not field_queries.has_key('portal_types'):
                for name in self.listStorages():
                    storages[name] = None

            else:
                for pt in field_queries.pop('portal_types')[0]:
                    storage = self._storage_map.get(pt, self._default_storage)
                    storages.setdefault(storage, []).append(pt)

            result = []
            skipped = []
            matched = []
            term_results = {}

#            restrict = [(t,v[0]) for t,v in field_queries.items() if v[1] == 'AND']
            restrict = None

            # First, the 'anywhere' query
            if query:
                for name, portal_types in storages.items():
                    storage = self.getStorage(name)
                    result.extend(storage.search(query, portal_types, weights, restrict, min_rating=min_rating))

                result,skipped,matched = applyQueryType(result,query,query_type)
                term_results['any'] = (skipped,matched)

            # Now the rest.
            fq_list = field_queries.items()
            # sort by limit - all limit fields after result fields: this is needed for the intersect logic
            fq_list.sort(lambda x,y: cmp(x[1][2],y[1][2]))

            for field,(fquery,fquery_type,f_limit) in fq_list:
                fq_weights = {}
                if type(field) == type(()):
                    for f in field:
                        fq_weights[f] = self.default_search_weights[f]
                else:
                    fq_weights[field] = self.default_search_weights[field]

                fres = []
                for name, portal_types in storages.items():
                    storage = self.getStorage(name)
                    fres.extend(storage.search(fquery, portal_types, weights=fq_weights, restrict=restrict,min_rating=min_rating))

                fres,fskipped,fmatched = applyQueryType(fres,fquery,fquery_type)
                term_results[field] = (fskipped,fmatched)


                # intersect each result set with the previous ones. Each
                # field_query is ANDed with the others (including the
                # 'anywhere' query), IFF one of the previous searches had a matching term,
                # and this search had a matching term. This 'weakAND' drops any field that had
                # all of its terms dropped. The 'matched' dictionaries of each result object are updated
                # Since limit fields are last, they will not add to result set
                # if nothing before them matched.

                if fmatched:
                    if matched:
                        result_dict = dict([(r.objectId,r) for r in result])
                        result = [r for r in fres if r.objectId in result_dict]
                        for r in result:
                           for t,f in result_dict[r.objectId].matched.items():
                               r.matched.setdefault(t,[]).extend(f)
                           for t,f in result_dict[r.objectId].fields.items():
                               r.fields.setdefault(t,[]).extend(f)
                           r.weight += result_dict[r.objectId].weight
                    elif not f_limit:
                        result = fres
                        matched = fmatched

            result = self.sortSearchResults(result, sorton, recent)
            self.cache.resultsCacheInject(searchhash, (result,term_results,sorton,recent))

            return self.wrapResults(result),term_results,searchhash

    def wrapResults(self,results):
        """wrap list of results from pluggable brains or catalog record
           searches to standalone DBModuleSearch objects that can be
           pickled, and thus cached or stored in a session variable.
           This method is idempotent, so can safely be called on lists m
           """
        return [isinstance(res,DBModuleSearch) and res or DBModuleSearch(res) for res in results]

    security.declarePublic("sortSearchResults")
    def sortSearchResults(self, result, sorton,recent=False):
        """sort a result set"""

        def sort_rating(a, b):
            return cmp(getattr(b, 'rating', 0), getattr(a, 'rating', 0))

        if sorton=='weight':
            result.sort(lambda x,y: int(y.weight-x.weight or cmpTitle(x,y)))
        elif sorton=='popularity':
            hc_tool = getToolByName(self, 'portal_hitcount', None)
            result.sort(lambda x,y: cmp(hc_tool.getPercentileForObject(y.objectId,recent), hc_tool.getPercentileForObject(x.objectId,recent)))
        elif sorton=='views':
            hc_tool = getToolByName(self, 'portal_hitcount', None)
            result.sort(lambda x,y: cmp(hc_tool.getHitCountForObject(y.objectId,recent), hc_tool.getHitCountForObject(x.objectId,recent)))
        elif sorton=='language':
            result.sort(lambda x,y: int(cmp(x.language,y.language) or cmpTitle(x,y)))
        elif sorton=='revised':
            result.sort(lambda x,y: int(cmp(y.revised,x.revised) or cmpTitle(x,y)))
        elif sorton=='title':
            result.sort(cmpTitle)
        elif sorton=='portal_type':
            result.sort(lambda x,y: int(cmp(x.portal_type,y.portal_type) or hasattr(y,'weight') and hasattr(x,'weight') and (y.weight-x.weight) or cmpTitle(x,y)))
        elif sorton == 'rating':
            result.sort(sort_rating)

        return self.wrapResults(result)

    def getContentByAuthor (self, authorid):
        """Return all content by a particular author"""

        return self.getContentByRole('author',authorid)

    def getContentByRole(self, role, user_id):
        """Return all content by where the user has the specified role"""

        storages = {}
        for name in self.listStorages():
            storages[name] = None

        content = []
        for name in storages.keys():
            storage = self.getStorage(name)
            content.extend(storage.getObjectsByRole(role, user_id))

        return content

Example #12

Show file

File: Repository.py Project: Rhaptos/Products.RhaptosRepository

class Repository(UniqueObject, DynamicType, StorageManager, BTreeFolder2):
    """Rhaptos Version Repository tool"""

    __implements__ = (IRepository, StorageManager.__implements__,
                      DynamicType.__implements__)

    meta_type = 'Repository'

    security = AccessControl.ClassSecurityInfo()
    # Fake out the types tool since we can't really use the Factory creation method
    portal_type = 'Repository'

    dsw = default_search_weights = {
        'fulltext': 1,
        'abstract': 1,
        'subject': 10,
        'keyword': 10,
        'author': 50,
        'translator': 40,
        'editor': 20,
        'maintainer': 10,
        'licensor': 10,
        'institution': 10,
        'exact_title': 100,
        'title': 10,
        'language': 5,
        'containedIn': 200,
        'parentAuthor': 0,
        'containedAuthor': 0,
        'objectid': 1000
    }

    # placeholder attributes for otherwise-not-present catalog columns/indexes
    fields = {}
    matched = {}
    weight = 0
    sortTitle = None  # FIXME: I don't think this should even be a column; nothing provides it. A fine index, though.

    default_browse_batch_size = 50

    __allow_access_to_unprotected_subobjects__ = 1

    # ZMI methods
    manage_options = (BTreeFolder2.manage_options +
                      ({
                          'label': 'Overview',
                          'action': 'manage_overview'
                      }, {
                          'label': 'Catalog',
                          'action': 'manage_catalog'
                      }))

    manage_overview = DTMLFile('explainRepository', globals())

    def manage_catalog(self, REQUEST=None):
        """Access to the ZCatalog of versioned objects"""
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.catalog.absolute_url() +
                                         '/manage_catalogView')

    def __init__(self, id, title=''):
        """Initialize Repository object"""
        StorageManager.__init__(self, id)
        self.OAI = OAIHandler('OAI')
        self.title = title
        self._create_catalog()
        self._create_cache(
        )  #results cache needs better invalidation code - consider fake=True if you're getting conflicts on publish

    #  Copied from PortalContent
    def __call__(self):
        '''
        Invokes the default view.
        '''
        view = _getViewFor(self)
        if getattr(aq_base(view), 'isDocTemp', 0):
            return view(self, self.REQUEST)
        else:
            return view()

    def _create_module(self, key, data):
        """Create a module in ZODB from data in the postgres db"""
        from Products.RhaptosModuleStorage.ModuleVersionFolder import \
                ModuleVersionStub

        # Create a module version stub (e.g. /plone/content/m9001)
        storage = self.getStorageForType('Module')
        mvs = ModuleVersionStub(data['id'], storage=storage.id)
        self._setObject(data['id'], mvs, set_owner=0)
        logger.debug('Created ModuleVersionStub %s' %
                     '/'.join(mvs.getPhysicalPath()))

        # Code copied from publishObject
        self.cache.clearSearchCache()
        #FIXME: these things shouldn't be done here, but with some sort of
        # event system hitcount update
        hitcount = getToolByName(self, 'portal_hitcount', None)
        if hitcount:
            hitcount.registerObject(data['id'], DateTime())
        # Not going to trigger pdf production here
        # (storage.notifyObjectRevised), should be in cnx-publishing
        # instead

        pubobj = storage.getObject(data['id'], 'latest')
        self.catalog.catalog_object(pubobj)
        logger.debug('Add %s to catalog' % '/'.join(pubobj.getPhysicalPath()))

    def _create_collection(self, key, data):
        """Create a collection in ZODB from data in the postgres db"""
        moduledb_tool = getToolByName(self, 'portal_moduledb')

        # Get collection tree
        tree = moduledb_tool.sqlGetCollectionTree(id=data['id'],
                                                  version=data['version'],
                                                  aq_parent=self).tuples()

        if not tree or tree[0][0] is None:
            logger.debug('Unable to get collection tree for %s' % key)
            # can't get the collection tree, nothing to do
            raise KeyError(key)

        tree = simplejson.loads(tree[0][0].decode('utf-8'))

        # Create a version folder (e.g. /plone/content/col11554)
        storage = self.getStorageForType('Collection')
        if data['id'] not in self.objectIds():
            vf = VersionFolder(data['id'], storage=storage.id)
            self._setObject(data['id'], vf, set_owner=0)
            logger.debug('Created VersionFolder %s' %
                         '/'.join(vf.getPhysicalPath()))
        vf = getattr(self, data['id'])

        # Create a collection (e.g. /plone/content/col11554/1.1)
        collection = _createObjectByType('Collection', vf, data['version'])
        collection.objectId = data['id']
        collection.version = data['version']
        collection.created = DateTime(data['_created'].isoformat())
        collection.revised = DateTime(data['_revised'].isoformat())
        collection.setKeywords(data['_keywords'].split(', '))
        collection.setAbstract(data['abstract'])
        for k, v in dict(title=data['name'],
                         authors=data['authors'],
                         maintainers=data['maintainers'],
                         licensors=data['licensors'],
                         license=data['license'],
                         _parent_id=data['parent_id'],
                         _parent_version=data['parent_version'],
                         parentAuthors=data['parentAuthors'],
                         language=data['language'],
                         subject=data['_subject'].split(', ')).items():
            setattr(collection, k, v)
        if data.has_key('print_style'):
            collection.parameters.manage_addProperty('printstyle',
                                                     data['print_style'],
                                                     'string')
        logger.debug('Created collection %s' %
                     '/'.join(collection.getPhysicalPath()))
        logger.debug(str(collection.propertyItems()))

        # Code copied from publishObject
        self.cache.clearSearchCache()
        #FIXME: these things shouldn't be done here, but with some sort of
        # event system hitcount update
        hitcount = getToolByName(self, 'portal_hitcount', None)
        if hitcount:
            hitcount.registerObject(data['id'], DateTime())
        # Not going to trigger pdf production here
        # (storage.notifyObjectRevised), should be in cnx-publishing
        # instead

        modules = []

        def create_objects(contents, folder):
            # Create the top level objects
            for node in contents:
                new_folder = None
                if node['id'] == 'subcol' or node['id'].startswith('col'):
                    new_folder = _createObjectByType(
                        'SubCollection', folder,
                        folder.generateUniqueId('SubCollection'))
                    new_folder.title = node['title']
                    logger.debug('Created subcollection: %s' % new_folder)
                elif node['id'].startswith('m'):
                    if node['id'] not in self.objectIds():
                        # Create the module if it doesn't exist
                        module = self[node['id']]
                    obj = _createObjectByType('PublishedContentPointer',
                                              folder, node['id'])
                    obj.moduleId = node['id']
                    obj.version = 'latest'
                    # FIXME - should track if original was set to latest, but
                    # that info is not sent properly to the DB, nor returned
                    # in the json
                    # if node['latest']:
                    #    obj.version = 'latest'
                    # else:
                    #    obj.version = node['version']
                    modules.append((node['id'], node['version']))
                    logger.debug('Created PublishedContentPointer %s@%s' %
                                 (obj.getModuleId(), obj.getVersion()))

                # Create all the objects in "contents"
                if new_folder:
                    create_objects(node.get('contents') or [], new_folder)

        # Create SubCollections and PublishedContentPointer according to
        # the collection tree
        create_objects(tree['contents'], collection)
        # Copied from Products.RhaptosRepository.VersionFolder.checkinResource
        if 'latest' not in vf.objectIds():
            addLatestReference(vf, 'latest', collection.Title(),
                               collection.version)
            logger.debug('Added latest reference')
        else:
            if collection.version.split('.') > vf.latest.version.split('.'):
                vf.latest.edit(collection.Title(), collection.version)

        collection.submitter = data['submitter']
        collection.submitlog = data['submitlog']
        collection.state = 'public'
        logger.debug('Finished creating collection')

        # Create collection.xml if it doesn't exist in postgres
        filenames = moduledb_tool.sqlGetModuleFilenames(
            id=data['id'], version=data['version']).tuples()
        if filenames and 'collection.xml' not in str(filenames):
            logger.debug('Create collection.xml for %s' % key)
            xml = collection.restrictedTraverse('source_create')()
            res = moduledb_tool.sqlInsertFile(file=Binary(xml),
                                              media_type='text/xml')
            fid = res[0].fileid

            moduledb_tool.sqlInsertModuleFile(moduleid=collection.objectId,
                                              version=collection.version,
                                              fileid=fid,
                                              filename='collection.xml',
                                              mimetype='text/xml',
                                              aq_parent=self)

        pubobj = storage.getObject(data['id'], 'latest')
        self.catalog.catalog_object(pubobj)
        logger.debug('Add %s to catalog' % '/'.join(pubobj.getPhysicalPath()))

    def __getitem__(self, key):
        try:
            # Try returning the object
            try:
                return getattr(self.aq_inner, key)
            except AttributeError:
                pass
            except TypeError:  # key is None or not a string
                raise KeyError('bad type')

            # The key has to be either in the format of col12345 or m12345
            m = re.match('(col|m)([0-9]+)$', key)
            if not m:
                raise KeyError(key)

            # The key is not in the ZODB, look for it in the postgres db
            moduledb_tool = getToolByName(self, 'portal_moduledb')
            data = moduledb_tool.sqlGetLatestModule(id=key).dictionaries()
            if not data:
                # The key isn't in the postgres db either
                raise KeyError(key)
            data = data[0]

            if m.group(1) == 'm':  # Create a module
                logger.debug('Create module %s from postgres' % key)
                self._create_module(key, data)
                logger.debug('Created module %s from postgres' % key)
            elif m.group(1) == 'col':  # Create a collection
                print('logger.level: %s' % logger.level)
                logger.debug('Create collection %s from postgres' % key)
                #History is descending in time- newest first
                history = moduledb_tool.sqlGetHistory(id=key).dictionaries()
                prev_ver = ''  #need to skip multiple minor versions from rewrite
                for item in history:
                    if item['version'] == prev_ver:
                        continue
                    data = moduledb_tool.sqlGetModule(
                        id=key, version=item['version']).dictionaries()
                    if data:
                        data = data[0]
                        logger.debug('Create collection %s version %s' %
                                     (data['id'], data['version']))
                        self._create_collection(key, data)
                    prev_ver = item['version']
                logger.debug('Created collection %s from postgres' % key)

        except KeyError:
            # No need to log
            raise
        except Exception:
            # This function often silently fails, so adding explicit logging
            logger.exception('Something failed in %s' % self.__getitem__)
            raise

        return getattr(self, key)

    index_html = __call__

    security.declarePublic("Title")

    def Title(self):
        """Fulfil new-ish interface expectations for title (so we work with breadcrumbs, etc)"""
        return self.title

    security.declarePrivate("_create_catalog")

    def _create_catalog(self):
        """Creates the ZCatalog instance for versioned objects"""
        self.catalog = ZCatalog('catalog')
        lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(),
                           StopWordAndSingleCharRemover())
        self.catalog._setObject('lexicon', lexicon)

        ZCText_extras = Empty()
        ZCText_extras.doc_attr = 'abstract'
        ZCText_extras.index_type = 'Okapi BM25 Rank'
        ZCText_extras.lexicon_id = 'lexicon'
        self.catalog.addIndex('abstract', 'ZCTextIndex', ZCText_extras)
        ZCText_extras.doc_attr = 'Title'
        self.catalog.addIndex('Title', 'ZCTextIndex', ZCText_extras)
        ZCText_extras.doc_attr = 'institution'
        self.catalog.addIndex('institution', 'ZCTextIndex', ZCText_extras)
        ZCText_extras.doc_attr = 'keywords'
        self.catalog.addIndex('keywordstext', 'ZCTextIndex', ZCText_extras)

        self.catalog.addIndex('atomicInstitution', 'FieldIndex',
                              {'indexed_attrs': 'institution'})
        self.catalog.addIndex('authors', 'KeywordIndex')
        self.catalog.addIndex('parentAuthors', 'KeywordIndex')
        self.catalog.addIndex('maintainers', 'KeywordIndex')
        self.catalog.addIndex('language', 'KeywordIndex')
        self.catalog.addIndex('modified', 'DateIndex')
        self.catalog.addIndex('revised', 'DateIndex')
        self.catalog.addIndex('objectId', 'FieldIndex')
        self.catalog.addIndex('portal_type', 'FieldIndex')
        self.catalog.addIndex('containedModuleIds', 'KeywordIndex')
        self.catalog.addIndex('subject', 'KeywordIndex')

        extra = Empty()
        extra.indexed_attrs = 'keywords'
        self.catalog.addIndex('keywordscase', 'KeywordIndex', extra)

        ki = KeywordIndex('keywords')
        self.catalog._catalog.addIndex('keywords', ki)
        ki._updateProperty('PrenormalizeTerm', 'python: value.lower()')
        ki._updateProperty('TermType', 'string')
        ki.keywords._updateProperty('Normalizer',
                                    'python: [k.lower() for k in value]')

        ki = KeywordIndex('baselanguage')
        self.catalog._catalog.addIndex('baselanguage', ki)
        ki._updateProperty(
            'PrenormalizeTerm',
            "python: value[:(value.find('-') > 0 ) and value.find('-') or len(value)]"
        )
        ki.baselanguage._updateProperty('Name', 'language')
        ki.baselanguage._updateProperty(
            'Normalizer',
            "python: [value[:(value.find('-') > 0 ) and value.find('-') or len(value)]]"
        )

        fi = FieldIndex('sortTitle')
        self.catalog._catalog.addIndex('sortTitle', fi)
        fi._updateProperty('PrenormalizeTerm', 'python: value.lower()')
        fi._updateProperty('TermType', 'string')
        fi.sortTitle._updateProperty('Name', 'Title')
        fi.sortTitle._updateProperty('Normalizer',
                                     'python: here.stripArticles(value)')

        fi = FieldIndex('parent')
        self.catalog._catalog.addIndex('parent', fi)
        fi.parent._updateProperty('Name', 'getParent')
        fi.parent._updateProperty('Normalizer', 'python:value.objectId')

        ki = KeywordIndex('translators')
        self.catalog._catalog.addIndex('translators', ki)
        ki._delObject('translators')
        ee = ExpressionEvaluator()
        ee.id = 'translators'
        ki._setObject(ee.id, ee)
        ki.translators._updateProperty(
            'Expression', "python: lambda o: o.roles['translators']")

        ki = KeywordIndex('editors')
        self.catalog._catalog.addIndex('editors', ki)
        ki._delObject('editors')
        ee = ExpressionEvaluator()
        ee.id = 'editors'
        ki._setObject(ee.id, ee)
        ki.editors._updateProperty('Expression',
                                   "python: lambda o: o.roles['editors']")

        self._set_metadata()

        self._p_changed = 1

    security.declarePrivate("_addColumn")

    def _addColumn(self, fieldname, *args, **kw):
        """Create a metadata field on the content catalog if it doesn't already exist.
        Call as you would 'self.catalog.addColumn'.
        Returns 'fieldname' if that name is actually added; None if it exists.
        """
        if fieldname not in self.catalog.schema():
            self.catalog.addColumn(fieldname, *args, **kw)
            return fieldname
        return None

    security.declarePrivate("_set_metadata")

    def _set_metadata(self):
        """Create the metadata fields on the content catalog.
        This is called by upgrade script and installation, so adding a role here plus reinstall
        is all that's necesary for additional metadata.
        Return tuple of added fields if we actually changed something (so the caller can update metadata.)
        Empty tuple (false) if no change.
        """
        added = set([None])
        added.add(self._addColumn('Title'))
        added.add(self._addColumn('abstract'))
        added.add(self._addColumn('authors'))
        added.add(self._addColumn('language'))
        added.add(self._addColumn('code'))
        added.add(self._addColumn('collectionType'))
        added.add(self._addColumn('created'))
        added.add(self._addColumn('fields', {}))
        added.add(self._addColumn('getHistoryCount'))
        added.add(self._addColumn('getIcon'))
        added.add(self._addColumn('institution'))
        added.add(self._addColumn('instructor'))
        added.add(self._addColumn('keywords'))
        added.add(self._addColumn('language'))
        added.add(self._addColumn('license'))
        added.add(self._addColumn('maintainers'))
        added.add(self._addColumn('matched', {}))
        added.add(self._addColumn('meta_type'))
        added.add(self._addColumn('objectId'))
        added.add(self._addColumn('portal_type'))
        added.add(self._addColumn('revised'))
        added.add(self._addColumn('roles'))
        added.add(self._addColumn('sortTitle'))
        added.add(self._addColumn('subject'))
        added.add(self._addColumn('submitter'))
        added.add(self._addColumn('url'))
        added.add(self._addColumn('version'))
        added.add(self._addColumn('weight', 0))
        added.add(self._addColumn('harvestable', 1))
        added.remove(None)
        return tuple(added)

    security.declarePrivate("_create_cache")

    def _create_cache(self, fake=False):
        """Creates the cache object for results sets"""
        if fake:
            self._setObject('cache', nocache('cache'))
        else:
            self._setObject('cache', cache('cache'))

        self._p_changed = 1

    def log(self, message, severity=zLOG.INFO):
        zLOG.LOG("RhaptosRepository", severity,
                 "%s (%s)" % (message, self.REQUEST['PATH_INFO']))

    def _getStorageForObjectId(self, id):
        """Return the storage implementation associated with the given ID"""
        stub = self[id]
        return self.getStorage(stub.storage)

    def hasRhaptosObject(self, id):
        """Returns true if an object with the given ID exists in the repository"""
        return bool(self.hasObject(id))

    def countRhaptosObjects(self, portal_types=None):
        """Returns the number of objects in the repository of the given type, or all types"""

        # Build mapping of storage -> list of portal_types to query
        storages = {}
        # If no portal_types, search everything
        if not portal_types:
            for name in self.listStorages():
                storages[name] = None

        while portal_types:
            pt = portal_types.pop()
            storage = self._storage_map.get(pt, self._default_storage)
            storages.setdefault(storage, []).append(pt)

        count = 0
        for name, portal_types in storages.items():
            storage = self.getStorage(name)
            count += storage.countObjects(portal_types)

        return count

    def getRhaptosObjectLanguageCounts(self, portal_types=None):
        """Returns a list of tuples of language codes and count of objects using them, ordered by number of objects, descending"""

        # Build mapping of storage -> list of portal_types to query
        storages = {}
        # If no portal_types, search everything
        if not portal_types:
            for name in self.listStorages():
                storages[name] = None
        elif type(portal_types) == type(''):
            portal_types = [portal_types]

        while portal_types:
            pt = portal_types.pop()
            storage = self._storage_map.get(pt, self._default_storage)
            storages.setdefault(storage, []).append(pt)

        langdict = {}
        for name, portal_types in storages.items():
            storage = self.getStorage(name)
            for l, c in storage.getLanguageCounts(portal_types):
                langdict[l] = langdict.setdefault(l, 0) + c
        langs = langdict.items()
        langs.sort(lambda x, y: cmp(y[1], x[1]))

        return langs

    def langLookup(self, langs=None):
        """Accesses the languageConstants monkeypatch on PloneLanguageTool, which
           generates a static dictionary of language codes, native and English language
           names, and regional variant names from PLT's own specialized dictionaries."""
        lcdict = languageConstants
        if type(langs) == type(''):
            langs = langs.split(',')
        if not langs:
            return lcdict
        else:
            returnDict = {}
            for k in langs:
                returnDict[k] = lcdict[k]
            return returnDict

    def getRhaptosObject(self, id, version=None, **kwargs):
        """Returns the object with the specified ID"""
        return self._getStorageForObjectId(id).getObject(id, version, **kwargs)

    security.declarePublic("getHistory")

    def getHistory(self, id):
        """Returns the history of the object with the specified ID or None if there is no such ID in the repository"""
        try:
            return self._getStorageForObjectId(id).getHistory(id)
        except KeyError:
            return None

    security.declarePrivate("deleteRhaptosObject")

    def deleteRhaptosObject(self, objectId, version=None, **kwargs):
        """Deletes all the objects with the specified ID"""

        if not self.hasRhaptosObject(objectId):
            raise KeyError, objectId

        return self._getStorageForObjectId(objectId).deleteObject(
            objectId, version)

        self.cache.clearSearchCache()

        #FIXME: this shouldn't be done here, but with some sort of event system
        getToolByName(self,
                      'portal_similarity').deleteSimilarity(objectId, version)
        getToolByName(self, 'portal_linkmap').deleteLinks(objectId, version)

    def _doGet(self, id, version=None, **kwargs):
        return self._getStorageForObjectId(id).getObject(id, version, **kwargs)

    def getRhaptosObjects(self, objects):
        """Returns a list of objects as defined by the list of id,version tuples"""
        return [
            self.hasRhaptosObject(oid)
            and self._getStorageForObjectId(oid).getObject(oid, ver)
            for oid, ver in objects
        ]

    def publishObject(self, object, message):
        """
        Publish an object for the first time in the repository

        Creates a new folder to hold the version history of this
        object and create the first version of the object, returning
        the new unique ID for this object
        """
        storage = self.getStorageForType(object.portal_type)
        objectId = storage.applyVersionControl(object)
        storage.createVersionFolder(object)
        user = AccessControl.getSecurityManager().getUser().getUserName()
        storage.checkinResource(object, message, user)

        self.cache.clearSearchCache()

        #FIXME: these things shouldn't be done here, but with some sort of event system
        # hitcount update
        hitcount = getToolByName(self, 'portal_hitcount', None)
        if hitcount:
            hitcount.registerObject(objectId, DateTime())

        # storage events (mostly collection printing, at the moment)
        pubobj = storage.getObject(objectId, 'latest')
        storage.notifyObjectRevised(pubobj, None)

        # Removing this 'event' until Lens V2
        #if object.getParent():
        ### FIXME: We really want the Zope3 event system for this.
        ### Once we get that, we'll want to use something to the effect of:
        ### zope.event.notify(ObjectRevisionPublished)
        #self.lens_tool.notifyLensDerivedObject(object)
        ### End Event System Hack

        return objectId

    def publishRevision(self, object, message):
        """
        Publish a revision of an object in the repository

        object: the object to place under version control.  It must
        implement IMetadata and IVersionedObject
        message: a string log message by the user
        baseVersion: the version of the object this is based on

        returns: unique ID string for the new object
        """

        if not self.isUnderVersionControl(object):
            raise CommitError, "Cannot publish revision of object %s not under version control" % object.getId(
            )

        # handle to original object to preserve locked status, if necessary;
        # we could look this up after publication (and would have to with proper events),
        # but that would be version inspection
        origobj = object.getPublishedObject().latest

        storage = self.getStorageForType(object.portal_type)
        user = AccessControl.getSecurityManager().getUser().getUserName()
        storage.checkinResource(object, message, user)
        self.cache.clearSearchCache()

        ### FIXME: We really want the Zope3 event system for these.
        ### Once we get that, we'll want to use something to the effect of:
        ### zope.event.notify(ObjectRevisionPublished)
        try:  # Grab the now-published version
            pubobj = object.getPublishedObject().latest
        except AttributeError:
            pass

        # lens events
        self.lens_tool.notifyLensRevisedObject(pubobj)

        # storage events (mostly collection printing, at the moment)
        storage.notifyObjectRevised(pubobj, origobj)

        # notice of change to all containing collections, latest version only
        container_objs = self.catalog(containedModuleIds=pubobj.objectId)
        for col in container_objs:
            colobj = col.getObject()
            colobj.notifyContentsRevised()

        ### End Event System Hack

    def isUnderVersionControl(self, object):
        """Returns true if the object is under version control"""
        return self.getStorageForType(
            object.portal_type).isUnderVersionControl(object)

    def isLatestVersion(self, object):
        """Returns true if object is the most recent revision of an object"""
        return self.getStorageForType(
            object.portal_type).isLatestVersion(object)

    def getVersionInfo(self, object):
        return self.getStorageForType(
            object.portal_type).getVersionInfo(object)

    def searchRepositoryByDate(self, start, end, REQUEST=None):
        """Search repository by date: start and end"""
        result = []

        s = self.getStorage('module_version_storage')
        objects = s.searchDateRange(start, end, ['Module', 'Collection'])
        result.extend(objects)

        result.sort(lambda x, y: cmp(x.revised, y.revised))

        return result

    security.declarePublic("cookSearchTerms")

    def cookSearchTerms(self, query):
        """return the cooked search terms, as well as the uncook dictionary, aggregated across storages"""

        allcooked = []
        alluncook = {}
        for name in self.listStorages():
            s = self.getStorage(name)
            cooked, uncook = s.cookSearchTerms(query)
            for c in cooked:
                if not c in allcooked:
                    allcooked.append(c)
                    alluncook.setdefault(c, []).extend(uncook[c])
            # Deal w/ stop words: must be stopped by _all_ searches
            # FIXME this code might now work, but is currently not exercised
            # since both storages use equivalent code for cookSearchTerms
            if alluncook.has_key(''):
                for s in alluncook['']:
                    if s not in uncook['']:
                        alluncook[''].remove(s)
            else:
                alluncook.update(uncook)

        return allcooked, alluncook

    def searchRepository(self,
                         query,
                         query_type="weakAND",
                         weights=dsw,
                         field_queries={},
                         sorton='weight',
                         recent=False,
                         use_cache=True,
                         min_rating=0):
        """Search the repository: portal_types defaults to all types w/ storage objects
        Default weights are stored in default_search_weights on the repository
        """
        if not weights:
            weights = self.default_search_weights  #AKA: dsw

        fq_list = field_queries.items()
        fq_list.sort()
        searchhash = str(query) + str(query_type) + str(weights) + str(fq_list)
        cached_res = self.cache.resultsCacheLookup(searchhash, sorton, recent)
        if use_cache and cached_res:
            result, term_results = cached_res
            return result, term_results, searchhash

        else:
            cached_sort = None
            # Build mapping of storage -> list of portal_types to query
            storages = {}
            # If no portal_types, search everything
            if not field_queries.has_key('portal_types'):
                for name in self.listStorages():
                    storages[name] = None

            else:
                for pt in field_queries.pop('portal_types')[0]:
                    storage = self._storage_map.get(pt, self._default_storage)
                    storages.setdefault(storage, []).append(pt)

            result = []
            skipped = []
            matched = []
            term_results = {}

            #            restrict = [(t,v[0]) for t,v in field_queries.items() if v[1] == 'AND']
            restrict = None

            # First, the 'anywhere' query
            if query:
                for name, portal_types in storages.items():
                    storage = self.getStorage(name)
                    result.extend(
                        storage.search(query,
                                       portal_types,
                                       weights,
                                       restrict,
                                       min_rating=min_rating))

                result, skipped, matched = applyQueryType(
                    result, query, query_type)
                term_results['any'] = (skipped, matched)

            # Now the rest.
            fq_list = field_queries.items()
            # sort by limit - all limit fields after result fields: this is needed for the intersect logic
            fq_list.sort(lambda x, y: cmp(x[1][2], y[1][2]))

            for field, (fquery, fquery_type, f_limit) in fq_list:
                fq_weights = {}
                if type(field) == type(()):
                    for f in field:
                        fq_weights[f] = self.default_search_weights[f]
                else:
                    fq_weights[field] = self.default_search_weights[field]

                fres = []
                for name, portal_types in storages.items():
                    storage = self.getStorage(name)
                    fres.extend(
                        storage.search(fquery,
                                       portal_types,
                                       weights=fq_weights,
                                       restrict=restrict,
                                       min_rating=min_rating))

                fres, fskipped, fmatched = applyQueryType(
                    fres, fquery, fquery_type)
                term_results[field] = (fskipped, fmatched)

                # intersect each result set with the previous ones. Each
                # field_query is ANDed with the others (including the
                # 'anywhere' query), IFF one of the previous searches had a matching term,
                # and this search had a matching term. This 'weakAND' drops any field that had
                # all of its terms dropped. The 'matched' dictionaries of each result object are updated
                # Since limit fields are last, they will not add to result set
                # if nothing before them matched.

                if fmatched:
                    if matched:
                        result_dict = dict([(r.objectId, r) for r in result])
                        result = [r for r in fres if r.objectId in result_dict]
                        for r in result:
                            for t, f in result_dict[
                                    r.objectId].matched.items():
                                r.matched.setdefault(t, []).extend(f)
                            for t, f in result_dict[r.objectId].fields.items():
                                r.fields.setdefault(t, []).extend(f)
                            r.weight += result_dict[r.objectId].weight
                    elif not f_limit:
                        result = fres
                        matched = fmatched

            result = self.sortSearchResults(result, sorton, recent)
            self.cache.resultsCacheInject(
                searchhash, (result, term_results, sorton, recent))

            return self.wrapResults(result), term_results, searchhash

    def wrapResults(self, results):
        """wrap list of results from pluggable brains or catalog record
           searches to standalone DBModuleSearch objects that can be
           pickled, and thus cached or stored in a session variable.
           This method is idempotent, so can safely be called on lists m
           """
        return [
            isinstance(res, DBModuleSearch) and res or DBModuleSearch(res)
            for res in results
        ]

    security.declarePublic("sortSearchResults")

    def sortSearchResults(self, result, sorton, recent=False):
        """sort a result set"""
        def sort_rating(a, b):
            return cmp(getattr(b, 'rating', 0), getattr(a, 'rating', 0))

        if sorton == 'weight':
            result.sort(
                lambda x, y: int(y.weight - x.weight or cmpTitle(x, y)))
        elif sorton == 'popularity':
            hc_tool = getToolByName(self, 'portal_hitcount', None)
            result.sort(lambda x, y: cmp(
                hc_tool.getPercentileForObject(y.objectId, recent),
                hc_tool.getPercentileForObject(x.objectId, recent)))
        elif sorton == 'views':
            hc_tool = getToolByName(self, 'portal_hitcount', None)
            result.sort(lambda x, y: cmp(
                hc_tool.getHitCountForObject(y.objectId, recent),
                hc_tool.getHitCountForObject(x.objectId, recent)))
        elif sorton == 'language':
            result.sort(lambda x, y: int(
                cmp(x.language, y.language) or cmpTitle(x, y)))
        elif sorton == 'revised':
            result.sort(
                lambda x, y: int(cmp(y.revised, x.revised) or cmpTitle(x, y)))
        elif sorton == 'title':
            result.sort(cmpTitle)
        elif sorton == 'portal_type':
            result.sort(lambda x, y: int(
                cmp(x.portal_type, y.portal_type) or hasattr(
                    y, 'weight') and hasattr(x, 'weight') and
                (y.weight - x.weight) or cmpTitle(x, y)))
        elif sorton == 'rating':
            result.sort(sort_rating)

        return self.wrapResults(result)

    def getContentByAuthor(self, authorid):
        """Return all content by a particular author"""

        return self.getContentByRole('author', authorid)

    def getContentByRole(self, role, user_id):
        """Return all content by where the user has the specified role"""

        storages = {}
        for name in self.listStorages():
            storages[name] = None

        content = []
        for name in storages.keys():
            storage = self.getStorage(name)
            content.extend(storage.getObjectsByRole(role, user_id))

        return content

Example #13

Show file

File: testCatalog.py Project: wpjunior/proled

class TestZCatalog(unittest.TestCase):

    def setUp(self):
        from Products.ZCatalog.ZCatalog import ZCatalog
        self._catalog = ZCatalog('Catalog')
        self._catalog.resolve_path = self._resolve_num
        self._catalog.addIndex('title', 'KeywordIndex')
        self._catalog.addColumn('title')

        self.upper = 10

        self.d = {}
        for x in range(0, self.upper):
            # make uid a string of the number
            ob = zdummy(x)
            self.d[str(x)] = ob
            self._catalog.catalog_object(ob, str(x))
        
    def _resolve_num(self, num):
        return self.d[num]

    def test_z2interfaces(self):
        from Interface.Verify import verifyClass
        from Products.ZCatalog.IZCatalog import IZCatalog
        from Products.ZCatalog.ZCatalog import ZCatalog

        verifyClass(IZCatalog, ZCatalog)

    def test_z3interfaces(self):
        from Products.ZCatalog.interfaces import IZCatalog
        from Products.ZCatalog.ZCatalog import ZCatalog
        from zope.interface.verify import verifyClass

        verifyClass(IZCatalog, ZCatalog)

    def testGetMetadataForUID(self):
        testNum = str(self.upper - 3) # as good as any..
        data = self._catalog.getMetadataForUID(testNum)
        self.assertEqual(data['title'], testNum)

    def testGetIndexDataForUID(self):
        testNum = str(self.upper - 3)
        data = self._catalog.getIndexDataForUID(testNum)
        self.assertEqual(data['title'][0], testNum)

    def testSearch(self):
        query = {'title': ['5','6','7']}
        sr = self._catalog.searchResults(query)
        self.assertEqual(len(sr), 3)
        sr = self._catalog.search(query)
        self.assertEqual(len(sr), 3)

    def testUpdateMetadata(self):
        self._catalog.catalog_object(zdummy(1), '1')
        data = self._catalog.getMetadataForUID('1')
        self.assertEqual(data['title'], '1')
        self._catalog.catalog_object(zdummy(2), '1', update_metadata=0)
        data = self._catalog.getMetadataForUID('1')
        self.assertEqual(data['title'], '1')
        self._catalog.catalog_object(zdummy(2), '1', update_metadata=1)
        data = self._catalog.getMetadataForUID('1')
        self.assertEqual(data['title'], '2')
        # update_metadata defaults to true, test that here
        self._catalog.catalog_object(zdummy(1), '1')
        data = self._catalog.getMetadataForUID('1')
        self.assertEqual(data['title'], '1')

    def testReindexIndexDoesntDoMetadata(self):
        self.d['0'].num = 9999
        self._catalog.reindexIndex('title', {})
        data = self._catalog.getMetadataForUID('0')
        self.assertEqual(data['title'], '0')
    
    def testReindexIndexesFalse(self):
        # setup
        false_id = self.upper + 1
        ob = zdummyFalse(false_id)
        self.d[str(false_id)] = ob
        self._catalog.catalog_object(ob, str(false_id))
        # test, object evaluates to false; there was bug which caused the
        # object to be removed from index
        ob.num = 9999
        self._catalog.reindexIndex('title', {})
        result = self._catalog(title='9999')
        self.assertEquals(1, len(result))

    def testBooleanEvalOn_manage_catalogObject(self):
        self.d['11'] = dummyLenFail(11, self.fail)
        self.d['12'] = dummyNonzeroFail(12, self.fail)
        # create a fake response that doesn't bomb on manage_catalogObject()
        class myresponse:
            def redirect(self, url):
                pass
        # this next call should not fail
        self._catalog.manage_catalogObject(None, myresponse(), 'URL1', urls=('11', '12'))

    def testBooleanEvalOn_refreshCatalog_getobject(self):
        # wrap catalog under the fake parent providing unrestrictedTraverse()
        catalog = self._catalog.__of__(fakeparent(self.d))
        # replace entries to test refreshCatalog
        self.d['0'] = dummyLenFail(0, self.fail)
        self.d['1'] = dummyNonzeroFail(1, self.fail)
        # this next call should not fail
        catalog.refreshCatalog()

        for uid in ('0', '1'):
            rid = catalog.getrid(uid)
            # neither should these
            catalog.getobject(rid)

    def test_getobject_doesntMaskTraversalErrorsAndDoesntDelegateTo_resolve_url(self):
        # wrap catalog under the fake parent providing unrestrictedTraverse()
        catalog = self._catalog.__of__(fakeparent(self.d))
        # make resolve_url fail if ZCatalog falls back on it
        def resolve_url(path, REQUEST):
            self.fail(".resolve_url() should not be called by .getobject()")
        catalog.resolve_url = resolve_url

        # traversal should work at first
        rid0 = catalog.getrid('0')
        # lets set it up so the traversal fails
        del self.d['0']
        self.assertRaises(FakeTraversalError, catalog.getobject, rid0, REQUEST=object())
        # and if there is a None at the traversal point, that's where it should return
        self.d['0'] = None
        self.assertEquals(catalog.getobject(rid0), None)

Example #14

Show file

File: DocManager.py Project: eaudeweb/naaya

    def CreateCatalog(self):
        """ creates ZCatalog object """
        catalog = ZCatalog(DOCMANAGER_CATALOG, '')
        self._setObject(DOCMANAGER_CATALOG, catalog)
        catalog = self._getOb(DOCMANAGER_CATALOG)
        """ creates some indexes """
        available_indexes = catalog.indexes()
        available_metadata = catalog.schema()
        if not ('id' in available_indexes):
            catalog.addIndex('id', 'FieldIndex')
        if not ('id' in available_metadata):
            catalog.addColumn('id')

        if not ('meta_type' in available_indexes):
            catalog.addIndex('meta_type', 'FieldIndex')
        if not ('meta_type' in available_metadata):
            catalog.addColumn('meta_type')

        if not ('title' in available_indexes):
            catalog.addIndex('title', 'TextIndex')
        if not ('title' in available_metadata):
            catalog.addColumn('title')

        if not ('path' in available_indexes):
            catalog.addIndex('path', 'PathIndex')
        
        try:
            catalog.Vocabulary(id='Vocabulary', title='')
        except:
            pass

        if not ('description' in available_indexes):
            catalog.addIndex('description', 'TextIndex')
        
        if not ('abstract' in available_indexes):
            catalog.addIndex('abstract', 'TextIndex')
        
        if not ('author' in available_indexes):
            catalog.addIndex('author', 'TextIndex')
        
        if not ('keywords' in available_indexes):
            catalog.addIndex('keywords', 'FieldIndex')

        if not ('coverage' in available_indexes):
            catalog.addIndex('coverage', 'FieldIndex')
        
        catalog.addIndex('approved', 'TextIndex')

        if not ('indexThematicArea' in available_indexes):
            catalog.addIndex('indexThematicArea', 'FieldIndex')

        try:
            if not ('PrincipiaSearchSource' in available_indexes):
                catalog.addIndex('PrincipiaSearchSource', 'TextIndexNG2', extra={'default_encoding': 'utf-8', 'use_converters':1, 'autoexpand':1})
        except:
            pass

Example #15

Show file

File: LinkMapTool.py Project: Rhaptos/Products.LinkMapTool

class LinkMapTool(UniqueObject, BTreeFolder2):

    __implements__ = (ILinkMapTool)

    id = 'portal_linkmap'
    meta_type = 'LinkMap Tool'
    security = AccessControl.ClassSecurityInfo()

    manage_options=(( {'label':'Overview', 'action':'manage_overview'},
                      { 'label' : 'Catalog', 'action' : 'manage_catalog'},
                      ) + BTreeFolder2.manage_options
                    )

    ##   ZMI methods
    security.declareProtected(ManagePortal, 'manage_overview')
    manage_overview = PageTemplateFile('zpt/explainLinkMapTool', globals() )

    security.declareProtected(ManagePortal, 'manage_catalog')
    def manage_catalog(self, REQUEST=None):
        """Access to the ZCatalog"""
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.catalog.absolute_url()+'/manage_catalogView')


    def __init__(self, *args, **kw):
        BTreeFolder2.__init__(self, *args, **kw)
        self._create_catalog()
        self._linkrange = (1,3)  # currently unused; just a marker

    security.declarePrivate("_create_catalog")
    def _create_catalog(self):
        """Creates the ZCatalog instance for searching links"""
#        self.catalog = ZCatalog('catalog').__of__(self)
        self.catalog = ZCatalog('catalog')
        
        self.catalog.addIndex('source', 'FieldIndex')
        self.catalog.addIndex('strength', 'FieldIndex')

        self.catalog.addColumn('target')
        self.catalog.addColumn('category')
        self.catalog.addColumn('strength')
        self.catalog.addColumn('title')

        self._p_changed=1


    security.declareProtected('LinkMap: Add Link', 'addLink')
    def addLink(self, source, target, title, category, strength, context=None):
        """Create a link"""
        
        id = self.generateId()
        self._setObject(id, ExtendedLink(id))
        ob = getattr(self, id)
        ob.edit(source, target, title, category, strength)

        self.catalog.catalog_object(ob)

    security.declarePublic('searchLinks')
    def searchLinks(self, source=None, context=None):
        """Return all links for a particular source and context"""
        # FIXME: do we have to worry about 'latest' translation?
        results = self.catalog(source=source, sort_on='strength', sort_order='descending')
        
        return results

    def deleteLinks(self,objectId,version=None):
        """Delete all links for which the objectId is either source or target"""
	# This code assumes a ZRepository instance at /content
        myhost = urlparse.urlparse(self.REQUEST.SERVER_URL)[1]
	mypath = '/'.join(filter(None,['/content',objectId,version]))
	mylinks = []
	
	# FIXME: once a better storage and search interface exists, we can use that
	for link in self.objectValues('Extended Link'):
	    #Check source
	    tokens = urlparse.urlparse(link.source)
	    if (tokens[1] or myhost) == myhost and tokens[2].startswith(mypath): 
	        mylinks.append(link.id)
	    else:
	    #Check target
	        tokens = urlparse.urlparse(link.target)
	        if (tokens[1] or myhost) == myhost and tokens[2].startswith(mypath): 
	            mylinks.append(link.id)

	# Blow'em away!
        self.manage_delObjects(mylinks)

	
    security.setPermissionDefault('LinkMap: Add Link', ('Manager', 'Owner',))

Example #16

Show file

File: LinkMapTool.py Project: Rhaptos/Products.LinkMapTool

class LinkMapTool(UniqueObject, BTreeFolder2):

    __implements__ = ILinkMapTool

    id = "portal_linkmap"
    meta_type = "LinkMap Tool"
    security = AccessControl.ClassSecurityInfo()

    manage_options = (
        {"label": "Overview", "action": "manage_overview"},
        {"label": "Catalog", "action": "manage_catalog"},
    ) + BTreeFolder2.manage_options

    ##   ZMI methods
    security.declareProtected(ManagePortal, "manage_overview")
    manage_overview = PageTemplateFile("zpt/explainLinkMapTool", globals())

    security.declareProtected(ManagePortal, "manage_catalog")

    def manage_catalog(self, REQUEST=None):
        """Access to the ZCatalog"""
        if REQUEST is not None:
            REQUEST["RESPONSE"].redirect(self.catalog.absolute_url() + "/manage_catalogView")

    def __init__(self, *args, **kw):
        BTreeFolder2.__init__(self, *args, **kw)
        self._create_catalog()
        self._linkrange = (1, 3)  # currently unused; just a marker

    security.declarePrivate("_create_catalog")

    def _create_catalog(self):
        """Creates the ZCatalog instance for searching links"""
        #        self.catalog = ZCatalog('catalog').__of__(self)
        self.catalog = ZCatalog("catalog")

        self.catalog.addIndex("source", "FieldIndex")
        self.catalog.addIndex("strength", "FieldIndex")

        self.catalog.addColumn("target")
        self.catalog.addColumn("category")
        self.catalog.addColumn("strength")
        self.catalog.addColumn("title")

        self._p_changed = 1

    security.declareProtected("LinkMap: Add Link", "addLink")

    def addLink(self, source, target, title, category, strength, context=None):
        """Create a link"""

        id = self.generateId()
        self._setObject(id, ExtendedLink(id))
        ob = getattr(self, id)
        ob.edit(source, target, title, category, strength)

        self.catalog.catalog_object(ob)

    security.declarePublic("searchLinks")

    def searchLinks(self, source=None, context=None):
        """Return all links for a particular source and context"""
        # FIXME: do we have to worry about 'latest' translation?
        results = self.catalog(source=source, sort_on="strength", sort_order="descending")

        return results

    def deleteLinks(self, objectId, version=None):
        """Delete all links for which the objectId is either source or target"""
        # This code assumes a ZRepository instance at /content
        myhost = urlparse.urlparse(self.REQUEST.SERVER_URL)[1]
        mypath = "/".join(filter(None, ["/content", objectId, version]))
        mylinks = []

        # FIXME: once a better storage and search interface exists, we can use that
        for link in self.objectValues("Extended Link"):
            # Check source
            tokens = urlparse.urlparse(link.source)
            if (tokens[1] or myhost) == myhost and tokens[2].startswith(mypath):
                mylinks.append(link.id)
            else:
                # Check target
                tokens = urlparse.urlparse(link.target)
                if (tokens[1] or myhost) == myhost and tokens[2].startswith(mypath):
                    mylinks.append(link.id)

        # Blow'em away!
        self.manage_delObjects(mylinks)

    security.setPermissionDefault("LinkMap: Add Link", ("Manager", "Owner"))

Example #17

Show file

File: CMFOpenflowTool.py Project: dipp/Products.CMFOpenflow

    def __init__(self, id='portal_openflow'):
        self.id = id
        self._applications = {} #{'application_id':{'url':'application_url'},...}

        catalog = ZCatalog('Catalog', 'Default OpenFlow Catalog')

        try:
          # For ZCatalog 2.2.0
          catalog.addIndex('id', 'FieldIndex')
          catalog.addColumn('id')

          catalog.addIndex('meta_type', 'FieldIndex')
          catalog.addColumn('meta_type')
        except:
          pass

        catalog.addIndex('description', 'FieldIndex')
        catalog.addColumn('description')

        catalog.addIndex('customer', 'FieldIndex')
        catalog.addColumn('customer')

        catalog.addIndex('creation_time', 'FieldIndex')
        catalog.addColumn('creation_time')

        catalog.addIndex('priority', 'FieldIndex')
        catalog.addColumn('priority')

        catalog.addIndex('status', 'FieldIndex')
        catalog.addColumn('status')

        catalog.addIndex('From', 'FieldIndex')
        catalog.addColumn('From')

        catalog.addIndex('To', 'FieldIndex')
        catalog.addColumn('To')

        catalog.addIndex('activity_id', 'FieldIndex')
        catalog.addColumn('activity_id')

        catalog.addIndex('process_id', 'FieldIndex')
        catalog.addColumn('process_id')

        catalog.addIndex('instance_id', 'FieldIndex')
        catalog.addColumn('instance_id')

        catalog.addIndex('workitems_from', 'FieldIndex')
        catalog.addColumn('workitems_from')

        catalog.addIndex('workitems_to', 'FieldIndex')
        catalog.addColumn('workitems_to')

        catalog.addIndex('actor', 'FieldIndex')
        catalog.addColumn('actor')

        catalog.addIndex('push_roles', 'KeywordIndex')
        catalog.addColumn('push_roles')

        catalog.addIndex('pull_roles', 'KeywordIndex')
        catalog.addColumn('pull_roles')

        self._setObject('Catalog', catalog)