def setup_catalog(context): portal = context.getSite() catalog_name = 'marginalia_catalog' try: catalog = cmfutils.getToolByName(portal, catalog_name) except AttributeError: # register catalog catalog = ZCatalog(catalog_name, u'Marginalia catalog', None, portal) portal._setObject(catalog_name, catalog) # add indexes and columns plaintext_extra = SimpleRecord(lexicon_id='plaintext_lexicon', index_type='Okapi BM25 Rank') indexes = catalog.indexes() columns = catalog.schema() # install lexicon _id = 'plaintext_lexicon' if not hasattr(catalog, _id): lexicon = PLexicon(_id, '', Splitter(), CaseNormalizer(), StopWordRemover()) catalog._setObject(_id, lexicon) for indexName, indexType, extra in (('edit_type', 'FieldIndex', None), ('note', 'ZCTextIndex', plaintext_extra), ('link_title', 'FieldIndex', None)): if indexName not in indexes: catalog.addIndex(indexName, indexType, extra=extra)
def setup_catalog(context): portal = context.getSite() catalog_name = 'marginalia_catalog' try: catalog = cmfutils.getToolByName(portal, catalog_name) except AttributeError: # register catalog catalog = ZCatalog(catalog_name, u'Marginalia catalog', None, portal) portal._setObject(catalog_name, catalog) # add indexes and columns plaintext_extra = SimpleRecord(lexicon_id='plaintext_lexicon', index_type='Okapi BM25 Rank') indexes = catalog.indexes() columns = catalog.schema() # install lexicon _id = 'plaintext_lexicon' if not hasattr(catalog, _id): lexicon = PLexicon( _id, '', Splitter(), CaseNormalizer(), StopWordRemover()) catalog._setObject(_id, lexicon) for indexName, indexType, extra in ( ('edit_type', 'FieldIndex', None), ('note', 'ZCTextIndex', plaintext_extra), ('link_title', 'FieldIndex', None)): if indexName not in indexes: catalog.addIndex(indexName, indexType, extra=extra)
def CreateCatalog(self): """ creates ZCatalog object """ catalog = ZCatalog(DOCMANAGER_CATALOG, '') self._setObject(DOCMANAGER_CATALOG, catalog) catalog = self._getOb(DOCMANAGER_CATALOG) """ creates some indexes """ available_indexes = catalog.indexes() available_metadata = catalog.schema() if not ('id' in available_indexes): catalog.addIndex('id', 'FieldIndex') if not ('id' in available_metadata): catalog.addColumn('id') if not ('meta_type' in available_indexes): catalog.addIndex('meta_type', 'FieldIndex') if not ('meta_type' in available_metadata): catalog.addColumn('meta_type') if not ('title' in available_indexes): catalog.addIndex('title', 'TextIndex') if not ('title' in available_metadata): catalog.addColumn('title') if not ('path' in available_indexes): catalog.addIndex('path', 'PathIndex') try: catalog.Vocabulary(id='Vocabulary', title='') except: pass if not ('description' in available_indexes): catalog.addIndex('description', 'TextIndex') if not ('abstract' in available_indexes): catalog.addIndex('abstract', 'TextIndex') if not ('author' in available_indexes): catalog.addIndex('author', 'TextIndex') if not ('keywords' in available_indexes): catalog.addIndex('keywords', 'FieldIndex') if not ('coverage' in available_indexes): catalog.addIndex('coverage', 'FieldIndex') catalog.addIndex('approved', 'TextIndex') if not ('indexThematicArea' in available_indexes): catalog.addIndex('indexThematicArea', 'FieldIndex') try: if not ('PrincipiaSearchSource' in available_indexes): catalog.addIndex('PrincipiaSearchSource', 'TextIndexNG2', extra={'default_encoding': 'utf-8', 'use_converters':1, 'autoexpand':1}) except: pass
class Repository(UniqueObject, DynamicType, StorageManager, BTreeFolder2): """Rhaptos Version Repository tool""" __implements__ = (IRepository, StorageManager.__implements__, DynamicType.__implements__) meta_type = 'Repository' security = AccessControl.ClassSecurityInfo() # Fake out the types tool since we can't really use the Factory creation method portal_type = 'Repository' dsw = default_search_weights = { 'fulltext': 1, 'abstract': 1, 'subject': 10, 'keyword': 10, 'author': 50, 'translator': 40, 'editor': 20, 'maintainer': 10, 'licensor': 10, 'institution': 10, 'exact_title': 100, 'title': 10, 'language': 5, 'containedIn': 200, 'parentAuthor': 0, 'containedAuthor': 0, 'objectid': 1000 } # placeholder attributes for otherwise-not-present catalog columns/indexes fields = {} matched = {} weight = 0 sortTitle = None # FIXME: I don't think this should even be a column; nothing provides it. A fine index, though. default_browse_batch_size = 50 __allow_access_to_unprotected_subobjects__ = 1 # ZMI methods manage_options = (BTreeFolder2.manage_options + ({ 'label': 'Overview', 'action': 'manage_overview' }, { 'label': 'Catalog', 'action': 'manage_catalog' })) manage_overview = DTMLFile('explainRepository', globals()) def manage_catalog(self, REQUEST=None): """Access to the ZCatalog of versioned objects""" if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.catalog.absolute_url() + '/manage_catalogView') def __init__(self, id, title=''): """Initialize Repository object""" StorageManager.__init__(self, id) self.OAI = OAIHandler('OAI') self.title = title self._create_catalog() self._create_cache( ) #results cache needs better invalidation code - consider fake=True if you're getting conflicts on publish # Copied from PortalContent def __call__(self): ''' Invokes the default view. ''' view = _getViewFor(self) if getattr(aq_base(view), 'isDocTemp', 0): return view(self, self.REQUEST) else: return view() def _create_module(self, key, data): """Create a module in ZODB from data in the postgres db""" from Products.RhaptosModuleStorage.ModuleVersionFolder import \ ModuleVersionStub # Create a module version stub (e.g. /plone/content/m9001) storage = self.getStorageForType('Module') mvs = ModuleVersionStub(data['id'], storage=storage.id) self._setObject(data['id'], mvs, set_owner=0) logger.debug('Created ModuleVersionStub %s' % '/'.join(mvs.getPhysicalPath())) # Code copied from publishObject self.cache.clearSearchCache() #FIXME: these things shouldn't be done here, but with some sort of # event system hitcount update hitcount = getToolByName(self, 'portal_hitcount', None) if hitcount: hitcount.registerObject(data['id'], DateTime()) # Not going to trigger pdf production here # (storage.notifyObjectRevised), should be in cnx-publishing # instead pubobj = storage.getObject(data['id'], 'latest') self.catalog.catalog_object(pubobj) logger.debug('Add %s to catalog' % '/'.join(pubobj.getPhysicalPath())) def _create_collection(self, key, data): """Create a collection in ZODB from data in the postgres db""" moduledb_tool = getToolByName(self, 'portal_moduledb') # Get collection tree tree = moduledb_tool.sqlGetCollectionTree(id=data['id'], version=data['version'], aq_parent=self).tuples() if not tree or tree[0][0] is None: logger.debug('Unable to get collection tree for %s' % key) # can't get the collection tree, nothing to do raise KeyError(key) tree = simplejson.loads(tree[0][0].decode('utf-8')) # Create a version folder (e.g. /plone/content/col11554) storage = self.getStorageForType('Collection') if data['id'] not in self.objectIds(): vf = VersionFolder(data['id'], storage=storage.id) self._setObject(data['id'], vf, set_owner=0) logger.debug('Created VersionFolder %s' % '/'.join(vf.getPhysicalPath())) vf = getattr(self, data['id']) # Create a collection (e.g. /plone/content/col11554/1.1) collection = _createObjectByType('Collection', vf, data['version']) collection.objectId = data['id'] collection.version = data['version'] collection.created = DateTime(data['_created'].isoformat()) collection.revised = DateTime(data['_revised'].isoformat()) collection.setKeywords(data['_keywords'].split(', ')) collection.setAbstract(data['abstract']) for k, v in dict(title=data['name'], authors=data['authors'], maintainers=data['maintainers'], licensors=data['licensors'], license=data['license'], _parent_id=data['parent_id'], _parent_version=data['parent_version'], parentAuthors=data['parentAuthors'], language=data['language'], subject=data['_subject'].split(', ')).items(): setattr(collection, k, v) if data.has_key('print_style'): collection.parameters.manage_addProperty('printstyle', data['print_style'], 'string') logger.debug('Created collection %s' % '/'.join(collection.getPhysicalPath())) logger.debug(str(collection.propertyItems())) # Code copied from publishObject self.cache.clearSearchCache() #FIXME: these things shouldn't be done here, but with some sort of # event system hitcount update hitcount = getToolByName(self, 'portal_hitcount', None) if hitcount: hitcount.registerObject(data['id'], DateTime()) # Not going to trigger pdf production here # (storage.notifyObjectRevised), should be in cnx-publishing # instead modules = [] def create_objects(contents, folder): # Create the top level objects for node in contents: new_folder = None if node['id'] == 'subcol' or node['id'].startswith('col'): new_folder = _createObjectByType( 'SubCollection', folder, folder.generateUniqueId('SubCollection')) new_folder.title = node['title'] logger.debug('Created subcollection: %s' % new_folder) elif node['id'].startswith('m'): if node['id'] not in self.objectIds(): # Create the module if it doesn't exist module = self[node['id']] obj = _createObjectByType('PublishedContentPointer', folder, node['id']) obj.moduleId = node['id'] obj.version = 'latest' # FIXME - should track if original was set to latest, but # that info is not sent properly to the DB, nor returned # in the json # if node['latest']: # obj.version = 'latest' # else: # obj.version = node['version'] modules.append((node['id'], node['version'])) logger.debug('Created PublishedContentPointer %s@%s' % (obj.getModuleId(), obj.getVersion())) # Create all the objects in "contents" if new_folder: create_objects(node.get('contents') or [], new_folder) # Create SubCollections and PublishedContentPointer according to # the collection tree create_objects(tree['contents'], collection) # Copied from Products.RhaptosRepository.VersionFolder.checkinResource if 'latest' not in vf.objectIds(): addLatestReference(vf, 'latest', collection.Title(), collection.version) logger.debug('Added latest reference') else: if collection.version.split('.') > vf.latest.version.split('.'): vf.latest.edit(collection.Title(), collection.version) collection.submitter = data['submitter'] collection.submitlog = data['submitlog'] collection.state = 'public' logger.debug('Finished creating collection') # Create collection.xml if it doesn't exist in postgres filenames = moduledb_tool.sqlGetModuleFilenames( id=data['id'], version=data['version']).tuples() if filenames and 'collection.xml' not in str(filenames): logger.debug('Create collection.xml for %s' % key) xml = collection.restrictedTraverse('source_create')() res = moduledb_tool.sqlInsertFile(file=Binary(xml), media_type='text/xml') fid = res[0].fileid moduledb_tool.sqlInsertModuleFile(moduleid=collection.objectId, version=collection.version, fileid=fid, filename='collection.xml', mimetype='text/xml', aq_parent=self) pubobj = storage.getObject(data['id'], 'latest') self.catalog.catalog_object(pubobj) logger.debug('Add %s to catalog' % '/'.join(pubobj.getPhysicalPath())) def __getitem__(self, key): try: # Try returning the object try: return getattr(self.aq_inner, key) except AttributeError: pass except TypeError: # key is None or not a string raise KeyError('bad type') # The key has to be either in the format of col12345 or m12345 m = re.match('(col|m)([0-9]+)$', key) if not m: raise KeyError(key) # The key is not in the ZODB, look for it in the postgres db moduledb_tool = getToolByName(self, 'portal_moduledb') data = moduledb_tool.sqlGetLatestModule(id=key).dictionaries() if not data: # The key isn't in the postgres db either raise KeyError(key) data = data[0] if m.group(1) == 'm': # Create a module logger.debug('Create module %s from postgres' % key) self._create_module(key, data) logger.debug('Created module %s from postgres' % key) elif m.group(1) == 'col': # Create a collection print('logger.level: %s' % logger.level) logger.debug('Create collection %s from postgres' % key) #History is descending in time- newest first history = moduledb_tool.sqlGetHistory(id=key).dictionaries() prev_ver = '' #need to skip multiple minor versions from rewrite for item in history: if item['version'] == prev_ver: continue data = moduledb_tool.sqlGetModule( id=key, version=item['version']).dictionaries() if data: data = data[0] logger.debug('Create collection %s version %s' % (data['id'], data['version'])) self._create_collection(key, data) prev_ver = item['version'] logger.debug('Created collection %s from postgres' % key) except KeyError: # No need to log raise except Exception: # This function often silently fails, so adding explicit logging logger.exception('Something failed in %s' % self.__getitem__) raise return getattr(self, key) index_html = __call__ security.declarePublic("Title") def Title(self): """Fulfil new-ish interface expectations for title (so we work with breadcrumbs, etc)""" return self.title security.declarePrivate("_create_catalog") def _create_catalog(self): """Creates the ZCatalog instance for versioned objects""" self.catalog = ZCatalog('catalog') lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(), StopWordAndSingleCharRemover()) self.catalog._setObject('lexicon', lexicon) ZCText_extras = Empty() ZCText_extras.doc_attr = 'abstract' ZCText_extras.index_type = 'Okapi BM25 Rank' ZCText_extras.lexicon_id = 'lexicon' self.catalog.addIndex('abstract', 'ZCTextIndex', ZCText_extras) ZCText_extras.doc_attr = 'Title' self.catalog.addIndex('Title', 'ZCTextIndex', ZCText_extras) ZCText_extras.doc_attr = 'institution' self.catalog.addIndex('institution', 'ZCTextIndex', ZCText_extras) ZCText_extras.doc_attr = 'keywords' self.catalog.addIndex('keywordstext', 'ZCTextIndex', ZCText_extras) self.catalog.addIndex('atomicInstitution', 'FieldIndex', {'indexed_attrs': 'institution'}) self.catalog.addIndex('authors', 'KeywordIndex') self.catalog.addIndex('parentAuthors', 'KeywordIndex') self.catalog.addIndex('maintainers', 'KeywordIndex') self.catalog.addIndex('language', 'KeywordIndex') self.catalog.addIndex('modified', 'DateIndex') self.catalog.addIndex('revised', 'DateIndex') self.catalog.addIndex('objectId', 'FieldIndex') self.catalog.addIndex('portal_type', 'FieldIndex') self.catalog.addIndex('containedModuleIds', 'KeywordIndex') self.catalog.addIndex('subject', 'KeywordIndex') extra = Empty() extra.indexed_attrs = 'keywords' self.catalog.addIndex('keywordscase', 'KeywordIndex', extra) ki = KeywordIndex('keywords') self.catalog._catalog.addIndex('keywords', ki) ki._updateProperty('PrenormalizeTerm', 'python: value.lower()') ki._updateProperty('TermType', 'string') ki.keywords._updateProperty('Normalizer', 'python: [k.lower() for k in value]') ki = KeywordIndex('baselanguage') self.catalog._catalog.addIndex('baselanguage', ki) ki._updateProperty( 'PrenormalizeTerm', "python: value[:(value.find('-') > 0 ) and value.find('-') or len(value)]" ) ki.baselanguage._updateProperty('Name', 'language') ki.baselanguage._updateProperty( 'Normalizer', "python: [value[:(value.find('-') > 0 ) and value.find('-') or len(value)]]" ) fi = FieldIndex('sortTitle') self.catalog._catalog.addIndex('sortTitle', fi) fi._updateProperty('PrenormalizeTerm', 'python: value.lower()') fi._updateProperty('TermType', 'string') fi.sortTitle._updateProperty('Name', 'Title') fi.sortTitle._updateProperty('Normalizer', 'python: here.stripArticles(value)') fi = FieldIndex('parent') self.catalog._catalog.addIndex('parent', fi) fi.parent._updateProperty('Name', 'getParent') fi.parent._updateProperty('Normalizer', 'python:value.objectId') ki = KeywordIndex('translators') self.catalog._catalog.addIndex('translators', ki) ki._delObject('translators') ee = ExpressionEvaluator() ee.id = 'translators' ki._setObject(ee.id, ee) ki.translators._updateProperty( 'Expression', "python: lambda o: o.roles['translators']") ki = KeywordIndex('editors') self.catalog._catalog.addIndex('editors', ki) ki._delObject('editors') ee = ExpressionEvaluator() ee.id = 'editors' ki._setObject(ee.id, ee) ki.editors._updateProperty('Expression', "python: lambda o: o.roles['editors']") self._set_metadata() self._p_changed = 1 security.declarePrivate("_addColumn") def _addColumn(self, fieldname, *args, **kw): """Create a metadata field on the content catalog if it doesn't already exist. Call as you would 'self.catalog.addColumn'. Returns 'fieldname' if that name is actually added; None if it exists. """ if fieldname not in self.catalog.schema(): self.catalog.addColumn(fieldname, *args, **kw) return fieldname return None security.declarePrivate("_set_metadata") def _set_metadata(self): """Create the metadata fields on the content catalog. This is called by upgrade script and installation, so adding a role here plus reinstall is all that's necesary for additional metadata. Return tuple of added fields if we actually changed something (so the caller can update metadata.) Empty tuple (false) if no change. """ added = set([None]) added.add(self._addColumn('Title')) added.add(self._addColumn('abstract')) added.add(self._addColumn('authors')) added.add(self._addColumn('language')) added.add(self._addColumn('code')) added.add(self._addColumn('collectionType')) added.add(self._addColumn('created')) added.add(self._addColumn('fields', {})) added.add(self._addColumn('getHistoryCount')) added.add(self._addColumn('getIcon')) added.add(self._addColumn('institution')) added.add(self._addColumn('instructor')) added.add(self._addColumn('keywords')) added.add(self._addColumn('language')) added.add(self._addColumn('license')) added.add(self._addColumn('maintainers')) added.add(self._addColumn('matched', {})) added.add(self._addColumn('meta_type')) added.add(self._addColumn('objectId')) added.add(self._addColumn('portal_type')) added.add(self._addColumn('revised')) added.add(self._addColumn('roles')) added.add(self._addColumn('sortTitle')) added.add(self._addColumn('subject')) added.add(self._addColumn('submitter')) added.add(self._addColumn('url')) added.add(self._addColumn('version')) added.add(self._addColumn('weight', 0)) added.add(self._addColumn('harvestable', 1)) added.remove(None) return tuple(added) security.declarePrivate("_create_cache") def _create_cache(self, fake=False): """Creates the cache object for results sets""" if fake: self._setObject('cache', nocache('cache')) else: self._setObject('cache', cache('cache')) self._p_changed = 1 def log(self, message, severity=zLOG.INFO): zLOG.LOG("RhaptosRepository", severity, "%s (%s)" % (message, self.REQUEST['PATH_INFO'])) def _getStorageForObjectId(self, id): """Return the storage implementation associated with the given ID""" stub = self[id] return self.getStorage(stub.storage) def hasRhaptosObject(self, id): """Returns true if an object with the given ID exists in the repository""" return bool(self.hasObject(id)) def countRhaptosObjects(self, portal_types=None): """Returns the number of objects in the repository of the given type, or all types""" # Build mapping of storage -> list of portal_types to query storages = {} # If no portal_types, search everything if not portal_types: for name in self.listStorages(): storages[name] = None while portal_types: pt = portal_types.pop() storage = self._storage_map.get(pt, self._default_storage) storages.setdefault(storage, []).append(pt) count = 0 for name, portal_types in storages.items(): storage = self.getStorage(name) count += storage.countObjects(portal_types) return count def getRhaptosObjectLanguageCounts(self, portal_types=None): """Returns a list of tuples of language codes and count of objects using them, ordered by number of objects, descending""" # Build mapping of storage -> list of portal_types to query storages = {} # If no portal_types, search everything if not portal_types: for name in self.listStorages(): storages[name] = None elif type(portal_types) == type(''): portal_types = [portal_types] while portal_types: pt = portal_types.pop() storage = self._storage_map.get(pt, self._default_storage) storages.setdefault(storage, []).append(pt) langdict = {} for name, portal_types in storages.items(): storage = self.getStorage(name) for l, c in storage.getLanguageCounts(portal_types): langdict[l] = langdict.setdefault(l, 0) + c langs = langdict.items() langs.sort(lambda x, y: cmp(y[1], x[1])) return langs def langLookup(self, langs=None): """Accesses the languageConstants monkeypatch on PloneLanguageTool, which generates a static dictionary of language codes, native and English language names, and regional variant names from PLT's own specialized dictionaries.""" lcdict = languageConstants if type(langs) == type(''): langs = langs.split(',') if not langs: return lcdict else: returnDict = {} for k in langs: returnDict[k] = lcdict[k] return returnDict def getRhaptosObject(self, id, version=None, **kwargs): """Returns the object with the specified ID""" return self._getStorageForObjectId(id).getObject(id, version, **kwargs) security.declarePublic("getHistory") def getHistory(self, id): """Returns the history of the object with the specified ID or None if there is no such ID in the repository""" try: return self._getStorageForObjectId(id).getHistory(id) except KeyError: return None security.declarePrivate("deleteRhaptosObject") def deleteRhaptosObject(self, objectId, version=None, **kwargs): """Deletes all the objects with the specified ID""" if not self.hasRhaptosObject(objectId): raise KeyError, objectId return self._getStorageForObjectId(objectId).deleteObject( objectId, version) self.cache.clearSearchCache() #FIXME: this shouldn't be done here, but with some sort of event system getToolByName(self, 'portal_similarity').deleteSimilarity(objectId, version) getToolByName(self, 'portal_linkmap').deleteLinks(objectId, version) def _doGet(self, id, version=None, **kwargs): return self._getStorageForObjectId(id).getObject(id, version, **kwargs) def getRhaptosObjects(self, objects): """Returns a list of objects as defined by the list of id,version tuples""" return [ self.hasRhaptosObject(oid) and self._getStorageForObjectId(oid).getObject(oid, ver) for oid, ver in objects ] def publishObject(self, object, message): """ Publish an object for the first time in the repository Creates a new folder to hold the version history of this object and create the first version of the object, returning the new unique ID for this object """ storage = self.getStorageForType(object.portal_type) objectId = storage.applyVersionControl(object) storage.createVersionFolder(object) user = AccessControl.getSecurityManager().getUser().getUserName() storage.checkinResource(object, message, user) self.cache.clearSearchCache() #FIXME: these things shouldn't be done here, but with some sort of event system # hitcount update hitcount = getToolByName(self, 'portal_hitcount', None) if hitcount: hitcount.registerObject(objectId, DateTime()) # storage events (mostly collection printing, at the moment) pubobj = storage.getObject(objectId, 'latest') storage.notifyObjectRevised(pubobj, None) # Removing this 'event' until Lens V2 #if object.getParent(): ### FIXME: We really want the Zope3 event system for this. ### Once we get that, we'll want to use something to the effect of: ### zope.event.notify(ObjectRevisionPublished) #self.lens_tool.notifyLensDerivedObject(object) ### End Event System Hack return objectId def publishRevision(self, object, message): """ Publish a revision of an object in the repository object: the object to place under version control. It must implement IMetadata and IVersionedObject message: a string log message by the user baseVersion: the version of the object this is based on returns: unique ID string for the new object """ if not self.isUnderVersionControl(object): raise CommitError, "Cannot publish revision of object %s not under version control" % object.getId( ) # handle to original object to preserve locked status, if necessary; # we could look this up after publication (and would have to with proper events), # but that would be version inspection origobj = object.getPublishedObject().latest storage = self.getStorageForType(object.portal_type) user = AccessControl.getSecurityManager().getUser().getUserName() storage.checkinResource(object, message, user) self.cache.clearSearchCache() ### FIXME: We really want the Zope3 event system for these. ### Once we get that, we'll want to use something to the effect of: ### zope.event.notify(ObjectRevisionPublished) try: # Grab the now-published version pubobj = object.getPublishedObject().latest except AttributeError: pass # lens events self.lens_tool.notifyLensRevisedObject(pubobj) # storage events (mostly collection printing, at the moment) storage.notifyObjectRevised(pubobj, origobj) # notice of change to all containing collections, latest version only container_objs = self.catalog(containedModuleIds=pubobj.objectId) for col in container_objs: colobj = col.getObject() colobj.notifyContentsRevised() ### End Event System Hack def isUnderVersionControl(self, object): """Returns true if the object is under version control""" return self.getStorageForType( object.portal_type).isUnderVersionControl(object) def isLatestVersion(self, object): """Returns true if object is the most recent revision of an object""" return self.getStorageForType( object.portal_type).isLatestVersion(object) def getVersionInfo(self, object): return self.getStorageForType( object.portal_type).getVersionInfo(object) def searchRepositoryByDate(self, start, end, REQUEST=None): """Search repository by date: start and end""" result = [] s = self.getStorage('module_version_storage') objects = s.searchDateRange(start, end, ['Module', 'Collection']) result.extend(objects) result.sort(lambda x, y: cmp(x.revised, y.revised)) return result security.declarePublic("cookSearchTerms") def cookSearchTerms(self, query): """return the cooked search terms, as well as the uncook dictionary, aggregated across storages""" allcooked = [] alluncook = {} for name in self.listStorages(): s = self.getStorage(name) cooked, uncook = s.cookSearchTerms(query) for c in cooked: if not c in allcooked: allcooked.append(c) alluncook.setdefault(c, []).extend(uncook[c]) # Deal w/ stop words: must be stopped by _all_ searches # FIXME this code might now work, but is currently not exercised # since both storages use equivalent code for cookSearchTerms if alluncook.has_key(''): for s in alluncook['']: if s not in uncook['']: alluncook[''].remove(s) else: alluncook.update(uncook) return allcooked, alluncook def searchRepository(self, query, query_type="weakAND", weights=dsw, field_queries={}, sorton='weight', recent=False, use_cache=True, min_rating=0): """Search the repository: portal_types defaults to all types w/ storage objects Default weights are stored in default_search_weights on the repository """ if not weights: weights = self.default_search_weights #AKA: dsw fq_list = field_queries.items() fq_list.sort() searchhash = str(query) + str(query_type) + str(weights) + str(fq_list) cached_res = self.cache.resultsCacheLookup(searchhash, sorton, recent) if use_cache and cached_res: result, term_results = cached_res return result, term_results, searchhash else: cached_sort = None # Build mapping of storage -> list of portal_types to query storages = {} # If no portal_types, search everything if not field_queries.has_key('portal_types'): for name in self.listStorages(): storages[name] = None else: for pt in field_queries.pop('portal_types')[0]: storage = self._storage_map.get(pt, self._default_storage) storages.setdefault(storage, []).append(pt) result = [] skipped = [] matched = [] term_results = {} # restrict = [(t,v[0]) for t,v in field_queries.items() if v[1] == 'AND'] restrict = None # First, the 'anywhere' query if query: for name, portal_types in storages.items(): storage = self.getStorage(name) result.extend( storage.search(query, portal_types, weights, restrict, min_rating=min_rating)) result, skipped, matched = applyQueryType( result, query, query_type) term_results['any'] = (skipped, matched) # Now the rest. fq_list = field_queries.items() # sort by limit - all limit fields after result fields: this is needed for the intersect logic fq_list.sort(lambda x, y: cmp(x[1][2], y[1][2])) for field, (fquery, fquery_type, f_limit) in fq_list: fq_weights = {} if type(field) == type(()): for f in field: fq_weights[f] = self.default_search_weights[f] else: fq_weights[field] = self.default_search_weights[field] fres = [] for name, portal_types in storages.items(): storage = self.getStorage(name) fres.extend( storage.search(fquery, portal_types, weights=fq_weights, restrict=restrict, min_rating=min_rating)) fres, fskipped, fmatched = applyQueryType( fres, fquery, fquery_type) term_results[field] = (fskipped, fmatched) # intersect each result set with the previous ones. Each # field_query is ANDed with the others (including the # 'anywhere' query), IFF one of the previous searches had a matching term, # and this search had a matching term. This 'weakAND' drops any field that had # all of its terms dropped. The 'matched' dictionaries of each result object are updated # Since limit fields are last, they will not add to result set # if nothing before them matched. if fmatched: if matched: result_dict = dict([(r.objectId, r) for r in result]) result = [r for r in fres if r.objectId in result_dict] for r in result: for t, f in result_dict[ r.objectId].matched.items(): r.matched.setdefault(t, []).extend(f) for t, f in result_dict[r.objectId].fields.items(): r.fields.setdefault(t, []).extend(f) r.weight += result_dict[r.objectId].weight elif not f_limit: result = fres matched = fmatched result = self.sortSearchResults(result, sorton, recent) self.cache.resultsCacheInject( searchhash, (result, term_results, sorton, recent)) return self.wrapResults(result), term_results, searchhash def wrapResults(self, results): """wrap list of results from pluggable brains or catalog record searches to standalone DBModuleSearch objects that can be pickled, and thus cached or stored in a session variable. This method is idempotent, so can safely be called on lists m """ return [ isinstance(res, DBModuleSearch) and res or DBModuleSearch(res) for res in results ] security.declarePublic("sortSearchResults") def sortSearchResults(self, result, sorton, recent=False): """sort a result set""" def sort_rating(a, b): return cmp(getattr(b, 'rating', 0), getattr(a, 'rating', 0)) if sorton == 'weight': result.sort( lambda x, y: int(y.weight - x.weight or cmpTitle(x, y))) elif sorton == 'popularity': hc_tool = getToolByName(self, 'portal_hitcount', None) result.sort(lambda x, y: cmp( hc_tool.getPercentileForObject(y.objectId, recent), hc_tool.getPercentileForObject(x.objectId, recent))) elif sorton == 'views': hc_tool = getToolByName(self, 'portal_hitcount', None) result.sort(lambda x, y: cmp( hc_tool.getHitCountForObject(y.objectId, recent), hc_tool.getHitCountForObject(x.objectId, recent))) elif sorton == 'language': result.sort(lambda x, y: int( cmp(x.language, y.language) or cmpTitle(x, y))) elif sorton == 'revised': result.sort( lambda x, y: int(cmp(y.revised, x.revised) or cmpTitle(x, y))) elif sorton == 'title': result.sort(cmpTitle) elif sorton == 'portal_type': result.sort(lambda x, y: int( cmp(x.portal_type, y.portal_type) or hasattr( y, 'weight') and hasattr(x, 'weight') and (y.weight - x.weight) or cmpTitle(x, y))) elif sorton == 'rating': result.sort(sort_rating) return self.wrapResults(result) def getContentByAuthor(self, authorid): """Return all content by a particular author""" return self.getContentByRole('author', authorid) def getContentByRole(self, role, user_id): """Return all content by where the user has the specified role""" storages = {} for name in self.listStorages(): storages[name] = None content = [] for name in storages.keys(): storage = self.getStorage(name) content.extend(storage.getObjectsByRole(role, user_id)) return content
class Repository(UniqueObject, DynamicType, StorageManager, BTreeFolder2): """Rhaptos Version Repository tool""" __implements__ = (IRepository, StorageManager.__implements__, DynamicType.__implements__) meta_type = 'Repository' security = AccessControl.ClassSecurityInfo() # Fake out the types tool since we can't really use the Factory creation method portal_type = 'Repository' dsw = default_search_weights = {'fulltext':1,'abstract':1,'subject':10,'keyword':10, 'author':50, 'translator':40,'editor':20, 'maintainer':10, 'licensor':10, 'institution':10, 'exact_title':100, 'title':10, 'language':5, 'containedIn':200, 'parentAuthor':0, 'containedAuthor':0, 'objectid':1000} # placeholder attributes for otherwise-not-present catalog columns/indexes fields = {} matched = {} weight = 0 sortTitle = None # FIXME: I don't think this should even be a column; nothing provides it. A fine index, though. default_browse_batch_size = 50 __allow_access_to_unprotected_subobjects__ = 1 # ZMI methods manage_options=( BTreeFolder2.manage_options + ( { 'label' : 'Overview', 'action' : 'manage_overview' }, { 'label' : 'Catalog', 'action' : 'manage_catalog' }) ) manage_overview = DTMLFile( 'explainRepository', globals() ) def manage_catalog(self, REQUEST=None): """Access to the ZCatalog of versioned objects""" if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.catalog.absolute_url()+'/manage_catalogView') def __init__(self, id, title=''): """Initialize Repository object""" StorageManager.__init__(self, id) self.OAI = OAIHandler('OAI') self.title = title self._create_catalog() self._create_cache() #results cache needs better invalidation code - consider fake=True if you're getting conflicts on publish # Copied from PortalContent def __call__(self): ''' Invokes the default view. ''' view = _getViewFor(self) if getattr(aq_base(view), 'isDocTemp', 0): return view(self, self.REQUEST) else: return view() def _create_module(self, key, data): """Create a module in ZODB from data in the postgres db""" from Products.RhaptosModuleStorage.ModuleVersionFolder import \ ModuleVersionStub # Create a module version stub (e.g. /plone/content/m9001) storage = self.getStorageForType('Module') mvs = ModuleVersionStub(data['id'], storage=storage.id) self._setObject(data['id'], mvs, set_owner=0) logger.debug('Created ModuleVersionStub %s' % '/'.join(mvs.getPhysicalPath())) # Code copied from publishObject self.cache.clearSearchCache() #FIXME: these things shouldn't be done here, but with some sort of # event system hitcount update hitcount = getToolByName(self, 'portal_hitcount', None) if hitcount: hitcount.registerObject(data['id'], DateTime()) # Not going to trigger pdf production here # (storage.notifyObjectRevised), should be in cnx-publishing # instead pubobj = storage.getObject(data['id'], 'latest') self.catalog.catalog_object(pubobj) logger.debug('Add %s to catalog' % '/'.join(pubobj.getPhysicalPath())) def _create_collection(self, key, data): """Create a collection in ZODB from data in the postgres db""" moduledb_tool = getToolByName(self, 'portal_moduledb') # Get collection tree tree = moduledb_tool.sqlGetCollectionTree( id=data['id'], version=data['version'], aq_parent=self).tuples() if not tree or tree[0][0] is None: logger.debug('Unable to get collection tree for %s' % key) # can't get the collection tree, nothing to do raise KeyError(key) tree = simplejson.loads(tree[0][0].decode('utf-8')) # Create a version folder (e.g. /plone/content/col11554) storage = self.getStorageForType('Collection') if data['id'] not in self.objectIds(): vf = VersionFolder(data['id'], storage=storage.id) self._setObject(data['id'], vf, set_owner=0) logger.debug('Created VersionFolder %s' % '/'.join(vf.getPhysicalPath())) vf = getattr(self, data['id']) # Create a collection (e.g. /plone/content/col11554/1.1) collection = _createObjectByType('Collection', vf, data['version']) collection.objectId = data['id'] collection.version = data['version'] collection.created = DateTime(data['_created'].isoformat()) collection.revised = DateTime(data['_revised'].isoformat()) for k, v in dict(title=data['name'], authors=data['authors'], maintainers=data['maintainers'], licensors=data['licensors'], _parent_id=data['parent_id'], _parent_version=data['parent_version'], parentAuthors=data['parentAuthors'], language=data['language'], subject=data['_subject'].split(', ')).items(): setattr(collection, k, v) if data.has_key('print_style'): collection.parameters.manage_addProperty('printstyle',data['print_style'],'string') logger.debug('Created collection %s' % '/'.join(collection.getPhysicalPath())) logger.debug(str(collection.propertyItems())) # Code copied from publishObject self.cache.clearSearchCache() #FIXME: these things shouldn't be done here, but with some sort of # event system hitcount update hitcount = getToolByName(self, 'portal_hitcount', None) if hitcount: hitcount.registerObject(data['id'], DateTime()) # Not going to trigger pdf production here # (storage.notifyObjectRevised), should be in cnx-publishing # instead modules = [] def create_objects(contents, folder): # Create the top level objects for node in contents: new_folder = None if node['id'] == 'subcol': new_folder = _createObjectByType( 'SubCollection', folder, folder.generateUniqueId('SubCollection')) new_folder.title = node['title'] logger.debug('Created subcollection: %s' % new_folder) elif node['id'].startswith('m'): if node['id'] not in self.objectIds(): # Create the module if it doesn't exist module = self[node['id']] obj = _createObjectByType( 'PublishedContentPointer', folder, node['id']) obj.moduleId = node['id'] obj.version = 'latest' # FIXME - should track if original was set to latest, but # that info is not sent properly to the DB, nor returned # in the json # if node['latest']: # obj.version = 'latest' # else: # obj.version = node['version'] modules.append((node['id'], node['version'])) logger.debug('Created PublishedContentPointer %s@%s' % (obj.getModuleId(), obj.getVersion())) # Create all the objects in "contents" if new_folder: create_objects(node.get('contents') or [], new_folder) # Create SubCollections and PublishedContentPointer according to # the collection tree create_objects(tree['contents'], collection) # Copied from Products.RhaptosRepository.VersionFolder.checkinResource if 'latest' not in vf.objectIds(): addLatestReference(vf, 'latest', collection.Title(), collection.version) logger.debug('Added latest reference') collection.submitter = data['submitter'] collection.submitlog = data['submitlog'] collection.state = 'public' logger.debug('Finished creating collection') # Create collection.xml if it doesn't exist in postgres filenames = moduledb_tool.sqlGetModuleFilenames( id=data['id'], version=data['version']).tuples() if filenames and 'collection.xml' not in str(filenames): logger.debug('Create collection.xml for %s' % key) xml = collection.restrictedTraverse('source_create')() res = moduledb_tool.sqlInsertFile(file = Binary(xml), media_type='text/xml') fid = res[0].fileid moduledb_tool.sqlInsertModuleFile( moduleid=collection.objectId, version=collection.version, fileid=fid, filename='collection.xml', mimetype='text/xml', aq_parent=self) pubobj = storage.getObject(data['id'], 'latest') self.catalog.catalog_object(pubobj) logger.debug('Add %s to catalog' % '/'.join(pubobj.getPhysicalPath())) def __getitem__(self, key): try: # Try returning the object try: return getattr(self.aq_inner, key) except AttributeError: pass except TypeError: # key is None or not a string raise KeyError('bad type') # The key has to be either in the format of col12345 or m12345 m = re.match('(col|m)([0-9]+)$', key) if not m: raise KeyError(key) # The key is not in the ZODB, look for it in the postgres db moduledb_tool = getToolByName(self, 'portal_moduledb') data = moduledb_tool.sqlGetLatestModule(id=key).dictionaries() if not data: # The key isn't in the postgres db either raise KeyError(key) data = data[0] if m.group(1) == 'm': # Create a module logger.debug('Create module %s from postgres' % key) self._create_module(key, data) logger.debug('Created module %s from postgres' % key) elif m.group(1) == 'col': # Create a collection print('logger.level: %s' % logger.level) logger.debug('Create collection %s from postgres' % key) #History is descending in time- newest first history = moduledb_tool.sqlGetHistory(id=key).dictionaries() prev_ver='' #need to skip multiple minor versions from rewrite for item in history: if item['version'] == prev_ver: continue; data = moduledb_tool.sqlGetModule( id=key, version=item['version']).dictionaries() if data: data = data[0] logger.debug('Create collection %s version %s' % (data['id'], data['version'])) self._create_collection(key, data) prev_ver = item['version'] logger.debug('Created collection %s from postgres' % key) except KeyError: # No need to log raise except Exception: # This function often silently fails, so adding explicit logging logger.exception('Something failed in %s' % self.__getitem__) raise return getattr(self, key) index_html = __call__ security.declarePublic("Title") def Title(self): """Fulfil new-ish interface expectations for title (so we work with breadcrumbs, etc)""" return self.title security.declarePrivate("_create_catalog") def _create_catalog(self): """Creates the ZCatalog instance for versioned objects""" self.catalog = ZCatalog('catalog') lexicon = PLexicon('lexicon', '' , Splitter(), CaseNormalizer(), StopWordAndSingleCharRemover()) self.catalog._setObject('lexicon', lexicon) ZCText_extras = Empty() ZCText_extras.doc_attr = 'abstract' ZCText_extras.index_type = 'Okapi BM25 Rank' ZCText_extras.lexicon_id = 'lexicon' self.catalog.addIndex('abstract', 'ZCTextIndex', ZCText_extras) ZCText_extras.doc_attr = 'Title' self.catalog.addIndex('Title', 'ZCTextIndex', ZCText_extras) ZCText_extras.doc_attr = 'institution' self.catalog.addIndex('institution', 'ZCTextIndex', ZCText_extras) ZCText_extras.doc_attr = 'keywords' self.catalog.addIndex('keywordstext', 'ZCTextIndex', ZCText_extras) self.catalog.addIndex('atomicInstitution', 'FieldIndex', {'indexed_attrs':'institution'}) self.catalog.addIndex('authors', 'KeywordIndex') self.catalog.addIndex('parentAuthors', 'KeywordIndex') self.catalog.addIndex('maintainers', 'KeywordIndex') self.catalog.addIndex('language', 'KeywordIndex') self.catalog.addIndex('modified', 'DateIndex') self.catalog.addIndex('revised', 'DateIndex') self.catalog.addIndex('objectId', 'FieldIndex') self.catalog.addIndex('portal_type', 'FieldIndex') self.catalog.addIndex('containedModuleIds', 'KeywordIndex') self.catalog.addIndex('subject', 'KeywordIndex') extra=Empty() extra.indexed_attrs='keywords' self.catalog.addIndex('keywordscase', 'KeywordIndex',extra) ki= KeywordIndex('keywords') self.catalog._catalog.addIndex('keywords', ki) ki._updateProperty('PrenormalizeTerm', 'python: value.lower()') ki._updateProperty('TermType', 'string') ki.keywords._updateProperty('Normalizer', 'python: [k.lower() for k in value]') ki=KeywordIndex('baselanguage') self.catalog._catalog.addIndex('baselanguage',ki) ki._updateProperty('PrenormalizeTerm', "python: value[:(value.find('-') > 0 ) and value.find('-') or len(value)]") ki.baselanguage._updateProperty('Name','language') ki.baselanguage._updateProperty('Normalizer', "python: [value[:(value.find('-') > 0 ) and value.find('-') or len(value)]]") fi=FieldIndex('sortTitle') self.catalog._catalog.addIndex('sortTitle',fi) fi._updateProperty('PrenormalizeTerm', 'python: value.lower()') fi._updateProperty('TermType', 'string') fi.sortTitle._updateProperty('Name', 'Title') fi.sortTitle._updateProperty('Normalizer', 'python: here.stripArticles(value)') fi=FieldIndex('parent') self.catalog._catalog.addIndex('parent',fi) fi.parent._updateProperty('Name', 'getParent') fi.parent._updateProperty('Normalizer', 'python:value.objectId') ki=KeywordIndex('translators') self.catalog._catalog.addIndex('translators',ki) ki._delObject('translators') ee=ExpressionEvaluator() ee.id='translators' ki._setObject(ee.id,ee) ki.translators._updateProperty('Expression',"python: lambda o: o.roles['translators']") ki=KeywordIndex('editors') self.catalog._catalog.addIndex('editors',ki) ki._delObject('editors') ee=ExpressionEvaluator() ee.id='editors' ki._setObject(ee.id,ee) ki.editors._updateProperty('Expression',"python: lambda o: o.roles['editors']") self._set_metadata() self._p_changed=1 security.declarePrivate("_addColumn") def _addColumn(self, fieldname, *args, **kw): """Create a metadata field on the content catalog if it doesn't already exist. Call as you would 'self.catalog.addColumn'. Returns 'fieldname' if that name is actually added; None if it exists. """ if fieldname not in self.catalog.schema(): self.catalog.addColumn(fieldname, *args, **kw) return fieldname return None security.declarePrivate("_set_metadata") def _set_metadata(self): """Create the metadata fields on the content catalog. This is called by upgrade script and installation, so adding a role here plus reinstall is all that's necesary for additional metadata. Return tuple of added fields if we actually changed something (so the caller can update metadata.) Empty tuple (false) if no change. """ added = set([None]) added.add(self._addColumn('Title')) added.add(self._addColumn('abstract')) added.add(self._addColumn('authors')) added.add(self._addColumn('language')) added.add(self._addColumn('code')) added.add(self._addColumn('collectionType')) added.add(self._addColumn('created')) added.add(self._addColumn('fields', {})) added.add(self._addColumn('getHistoryCount')) added.add(self._addColumn('getIcon')) added.add(self._addColumn('institution')) added.add(self._addColumn('instructor')) added.add(self._addColumn('keywords')) added.add(self._addColumn('language')) added.add(self._addColumn('license')) added.add(self._addColumn('maintainers')) added.add(self._addColumn('matched', {})) added.add(self._addColumn('meta_type')) added.add(self._addColumn('objectId')) added.add(self._addColumn('portal_type')) added.add(self._addColumn('revised')) added.add(self._addColumn('roles')) added.add(self._addColumn('sortTitle')) added.add(self._addColumn('subject')) added.add(self._addColumn('submitter')) added.add(self._addColumn('url')) added.add(self._addColumn('version')) added.add(self._addColumn('weight', 0)) added.add(self._addColumn('harvestable', 1)) added.remove(None) return tuple(added) security.declarePrivate("_create_cache") def _create_cache(self,fake=False): """Creates the cache object for results sets""" if fake: self._setObject('cache', nocache('cache')) else: self._setObject('cache', cache('cache')) self._p_changed=1 def log(self, message, severity=zLOG.INFO): zLOG.LOG("RhaptosRepository", severity, "%s (%s)" % (message, self.REQUEST['PATH_INFO'])) def _getStorageForObjectId(self, id): """Return the storage implementation associated with the given ID""" stub = self[id] return self.getStorage(stub.storage) def hasRhaptosObject(self, id): """Returns true if an object with the given ID exists in the repository""" return bool(self.hasObject(id)) def countRhaptosObjects(self, portal_types=None): """Returns the number of objects in the repository of the given type, or all types""" # Build mapping of storage -> list of portal_types to query storages = {} # If no portal_types, search everything if not portal_types: for name in self.listStorages(): storages[name] = None while portal_types: pt = portal_types.pop() storage = self._storage_map.get(pt, self._default_storage) storages.setdefault(storage, []).append(pt) count = 0 for name, portal_types in storages.items(): storage = self.getStorage(name) count += storage.countObjects(portal_types) return count def getRhaptosObjectLanguageCounts(self, portal_types=None): """Returns a list of tuples of language codes and count of objects using them, ordered by number of objects, descending""" # Build mapping of storage -> list of portal_types to query storages = {} # If no portal_types, search everything if not portal_types: for name in self.listStorages(): storages[name] = None elif type(portal_types) == type(''): portal_types=[portal_types] while portal_types: pt = portal_types.pop() storage = self._storage_map.get(pt, self._default_storage) storages.setdefault(storage, []).append(pt) langdict = {} for name, portal_types in storages.items(): storage = self.getStorage(name) for l,c in storage.getLanguageCounts(portal_types): langdict[l] = langdict.setdefault(l,0) + c langs=langdict.items() langs.sort(lambda x,y: cmp(y[1],x[1])) return langs def langLookup(self,langs=None): """Accesses the languageConstants monkeypatch on PloneLanguageTool, which generates a static dictionary of language codes, native and English language names, and regional variant names from PLT's own specialized dictionaries.""" lcdict=languageConstants if type(langs)==type(''): langs=langs.split(',') if not langs: return lcdict else: returnDict={} for k in langs: returnDict[k]=lcdict[k] return returnDict def getRhaptosObject(self, id, version=None, **kwargs): """Returns the object with the specified ID""" return self._getStorageForObjectId(id).getObject(id, version, **kwargs) security.declarePublic("getHistory") def getHistory(self, id): """Returns the history of the object with the specified ID or None if there is no such ID in the repository""" try: return self._getStorageForObjectId(id).getHistory(id) except KeyError: return None security.declarePrivate("deleteRhaptosObject") def deleteRhaptosObject(self, objectId, version=None, **kwargs): """Deletes all the objects with the specified ID""" if not self.hasRhaptosObject(objectId): raise KeyError, objectId return self._getStorageForObjectId(objectId).deleteObject(objectId, version) self.cache.clearSearchCache() #FIXME: this shouldn't be done here, but with some sort of event system getToolByName(self,'portal_similarity').deleteSimilarity(objectId, version) getToolByName(self,'portal_linkmap').deleteLinks(objectId, version) def _doGet(self, id, version=None, **kwargs): return self._getStorageForObjectId(id).getObject(id, version, **kwargs) def getRhaptosObjects(self, objects): """Returns a list of objects as defined by the list of id,version tuples""" return [self.hasRhaptosObject(oid) and self._getStorageForObjectId(oid).getObject(oid,ver) for oid, ver in objects] def publishObject(self, object, message): """ Publish an object for the first time in the repository Creates a new folder to hold the version history of this object and create the first version of the object, returning the new unique ID for this object """ storage = self.getStorageForType(object.portal_type) objectId = storage.applyVersionControl(object) storage.createVersionFolder(object) user = AccessControl.getSecurityManager().getUser().getUserName() storage.checkinResource(object, message, user) self.cache.clearSearchCache() #FIXME: these things shouldn't be done here, but with some sort of event system # hitcount update hitcount = getToolByName(self, 'portal_hitcount', None) if hitcount: hitcount.registerObject(objectId, DateTime()) # storage events (mostly collection printing, at the moment) pubobj = storage.getObject(objectId, 'latest') storage.notifyObjectRevised(pubobj, None) # Removing this 'event' until Lens V2 #if object.getParent(): ### FIXME: We really want the Zope3 event system for this. ### Once we get that, we'll want to use something to the effect of: ### zope.event.notify(ObjectRevisionPublished) #self.lens_tool.notifyLensDerivedObject(object) ### End Event System Hack return objectId def publishRevision(self, object, message): """ Publish a revision of an object in the repository object: the object to place under version control. It must implement IMetadata and IVersionedObject message: a string log message by the user baseVersion: the version of the object this is based on returns: unique ID string for the new object """ if not self.isUnderVersionControl(object): raise CommitError, "Cannot publish revision of object %s not under version control" % object.getId() # handle to original object to preserve locked status, if necessary; # we could look this up after publication (and would have to with proper events), # but that would be version inspection origobj = object.getPublishedObject().latest storage = self.getStorageForType(object.portal_type) user = AccessControl.getSecurityManager().getUser().getUserName() storage.checkinResource(object, message, user) self.cache.clearSearchCache() ### FIXME: We really want the Zope3 event system for these. ### Once we get that, we'll want to use something to the effect of: ### zope.event.notify(ObjectRevisionPublished) try: # Grab the now-published version pubobj = object.getPublishedObject().latest except AttributeError: pass # lens events self.lens_tool.notifyLensRevisedObject(pubobj) # storage events (mostly collection printing, at the moment) storage.notifyObjectRevised(pubobj, origobj) # notice of change to all containing collections, latest version only container_objs = self.catalog(containedModuleIds=pubobj.objectId) for col in container_objs: colobj = col.getObject() colobj.notifyContentsRevised() ### End Event System Hack def isUnderVersionControl(self, object): """Returns true if the object is under version control""" return self.getStorageForType(object.portal_type).isUnderVersionControl(object) def isLatestVersion(self, object): """Returns true if object is the most recent revision of an object""" return self.getStorageForType(object.portal_type).isLatestVersion(object) def getVersionInfo(self, object): return self.getStorageForType(object.portal_type).getVersionInfo(object) def searchRepositoryByDate(self, start, end, REQUEST=None): """Search repository by date: start and end""" result = [] s = self.getStorage('module_version_storage') objects = s.searchDateRange(start, end, ['Module','Collection']) result.extend(objects) result.sort(lambda x, y: cmp(x.revised, y.revised)) return result security.declarePublic("cookSearchTerms") def cookSearchTerms(self, query): """return the cooked search terms, as well as the uncook dictionary, aggregated across storages""" allcooked = [] alluncook = {} for name in self.listStorages(): s = self.getStorage(name) cooked,uncook = s.cookSearchTerms(query) for c in cooked: if not c in allcooked: allcooked.append(c) alluncook.setdefault(c,[]).extend(uncook[c]) # Deal w/ stop words: must be stopped by _all_ searches # FIXME this code might now work, but is currently not exercised # since both storages use equivalent code for cookSearchTerms if alluncook.has_key(''): for s in alluncook['']: if s not in uncook['']: alluncook[''].remove(s) else: alluncook.update(uncook) return allcooked,alluncook def searchRepository(self, query, query_type="weakAND", weights=dsw, field_queries={}, sorton='weight',recent=False,use_cache=True,min_rating=0): """Search the repository: portal_types defaults to all types w/ storage objects Default weights are stored in default_search_weights on the repository """ if not weights: weights = self.default_search_weights #AKA: dsw fq_list = field_queries.items() fq_list.sort() searchhash = str(query) + str(query_type) + str(weights) + str(fq_list) cached_res = self.cache.resultsCacheLookup(searchhash, sorton, recent) if use_cache and cached_res: result,term_results = cached_res return result,term_results,searchhash else: cached_sort = None # Build mapping of storage -> list of portal_types to query storages = {} # If no portal_types, search everything if not field_queries.has_key('portal_types'): for name in self.listStorages(): storages[name] = None else: for pt in field_queries.pop('portal_types')[0]: storage = self._storage_map.get(pt, self._default_storage) storages.setdefault(storage, []).append(pt) result = [] skipped = [] matched = [] term_results = {} # restrict = [(t,v[0]) for t,v in field_queries.items() if v[1] == 'AND'] restrict = None # First, the 'anywhere' query if query: for name, portal_types in storages.items(): storage = self.getStorage(name) result.extend(storage.search(query, portal_types, weights, restrict, min_rating=min_rating)) result,skipped,matched = applyQueryType(result,query,query_type) term_results['any'] = (skipped,matched) # Now the rest. fq_list = field_queries.items() # sort by limit - all limit fields after result fields: this is needed for the intersect logic fq_list.sort(lambda x,y: cmp(x[1][2],y[1][2])) for field,(fquery,fquery_type,f_limit) in fq_list: fq_weights = {} if type(field) == type(()): for f in field: fq_weights[f] = self.default_search_weights[f] else: fq_weights[field] = self.default_search_weights[field] fres = [] for name, portal_types in storages.items(): storage = self.getStorage(name) fres.extend(storage.search(fquery, portal_types, weights=fq_weights, restrict=restrict,min_rating=min_rating)) fres,fskipped,fmatched = applyQueryType(fres,fquery,fquery_type) term_results[field] = (fskipped,fmatched) # intersect each result set with the previous ones. Each # field_query is ANDed with the others (including the # 'anywhere' query), IFF one of the previous searches had a matching term, # and this search had a matching term. This 'weakAND' drops any field that had # all of its terms dropped. The 'matched' dictionaries of each result object are updated # Since limit fields are last, they will not add to result set # if nothing before them matched. if fmatched: if matched: result_dict = dict([(r.objectId,r) for r in result]) result = [r for r in fres if r.objectId in result_dict] for r in result: for t,f in result_dict[r.objectId].matched.items(): r.matched.setdefault(t,[]).extend(f) for t,f in result_dict[r.objectId].fields.items(): r.fields.setdefault(t,[]).extend(f) r.weight += result_dict[r.objectId].weight elif not f_limit: result = fres matched = fmatched result = self.sortSearchResults(result, sorton, recent) self.cache.resultsCacheInject(searchhash, (result,term_results,sorton,recent)) return self.wrapResults(result),term_results,searchhash def wrapResults(self,results): """wrap list of results from pluggable brains or catalog record searches to standalone DBModuleSearch objects that can be pickled, and thus cached or stored in a session variable. This method is idempotent, so can safely be called on lists m """ return [isinstance(res,DBModuleSearch) and res or DBModuleSearch(res) for res in results] security.declarePublic("sortSearchResults") def sortSearchResults(self, result, sorton,recent=False): """sort a result set""" def sort_rating(a, b): return cmp(getattr(b, 'rating', 0), getattr(a, 'rating', 0)) if sorton=='weight': result.sort(lambda x,y: int(y.weight-x.weight or cmpTitle(x,y))) elif sorton=='popularity': hc_tool = getToolByName(self, 'portal_hitcount', None) result.sort(lambda x,y: cmp(hc_tool.getPercentileForObject(y.objectId,recent), hc_tool.getPercentileForObject(x.objectId,recent))) elif sorton=='views': hc_tool = getToolByName(self, 'portal_hitcount', None) result.sort(lambda x,y: cmp(hc_tool.getHitCountForObject(y.objectId,recent), hc_tool.getHitCountForObject(x.objectId,recent))) elif sorton=='language': result.sort(lambda x,y: int(cmp(x.language,y.language) or cmpTitle(x,y))) elif sorton=='revised': result.sort(lambda x,y: int(cmp(y.revised,x.revised) or cmpTitle(x,y))) elif sorton=='title': result.sort(cmpTitle) elif sorton=='portal_type': result.sort(lambda x,y: int(cmp(x.portal_type,y.portal_type) or hasattr(y,'weight') and hasattr(x,'weight') and (y.weight-x.weight) or cmpTitle(x,y))) elif sorton == 'rating': result.sort(sort_rating) return self.wrapResults(result) def getContentByAuthor (self, authorid): """Return all content by a particular author""" return self.getContentByRole('author',authorid) def getContentByRole(self, role, user_id): """Return all content by where the user has the specified role""" storages = {} for name in self.listStorages(): storages[name] = None content = [] for name in storages.keys(): storage = self.getStorage(name) content.extend(storage.getObjectsByRole(role, user_id)) return content