def cleanHistory(self, obj): context, history_id = dereference(obj) historiesstorage = api.portal.get_tool(name='portal_historiesstorage') history = historiesstorage._getShadowHistory(history_id) if not history: return keys = set( [ historiesstorage._getZVCAccessInfo(history_id, selector, True)[ 0 ] for selector in history._available ] ) versions_repo = historiesstorage._getZVCRepo() for key in keys: zope_version_history = versions_repo._histories.get(key, None) if zope_version_history: zope_version_history = OOBTree() storage = historiesstorage._getShadowStorage()._storage storage.pop(history_id, None) dereferenced_obj = dereference( history_id=history_id, zodb_hook=self.context )[0] if hasattr(dereferenced_obj, 'version_id'): delattr(dereferenced_obj, 'version_id')
def purge(self, obj=None, history_id=None, selector=None, metadata={}, countPurged=True): """See IPurgeSupport. """ storage = getToolByName(self, 'portal_historiesstorage') obj, history_id = dereference(obj, history_id, self) storage.purge(history_id, selector, metadata, countPurged)
def purge_history(site, portal_types_to_purge=None, maxNumberOfVersionsToKeep=None, verbose=False): policy = site.portal_purgepolicy portal_repository = site.portal_repository if policy.maxNumberOfVersionsToKeep==-1 and not maxNumberOfVersionsToKeep: print "... maxNumberOfVersionsToKeep is -1; skipping" return old_maxNumberOfVersionsToKeep = policy.maxNumberOfVersionsToKeep if maxNumberOfVersionsToKeep is not None: print "... Putting maxNumberOfVersionsToKeep from", print old_maxNumberOfVersionsToKeep, "to", maxNumberOfVersionsToKeep policy.maxNumberOfVersionsToKeep = maxNumberOfVersionsToKeep if portal_types_to_purge: results = site.portal_catalog(portal_type=portal_types_to_purge) else: results = site.portal_catalog() for x in results: try: obj = x.getObject() if not portal_repository.isVersionable(obj): continue if verbose: print "... cleaning history for", x.getPath(), "(%s)" % x.portal_type obj, history_id = dereference(obj) policy.beforeSaveHook(history_id, obj) if shasattr(obj, 'version_id'): del obj.version_id except ConflictError: raise except Exception, inst: # sometimes, even with the spoofed request, the getObject failed print "ERROR purging", x.getPath(), "(%s)" % x.portal_type print " ", inst
def isUpToDate(self, obj=None, history_id=None, selector=None, countPurged=True): """See IPurgeSupport. """ storage = getToolByName(self, 'portal_historiesstorage') obj, history_id = dereference(obj, history_id, self) if not storage.isRegistered(history_id): raise ArchivistUnregisteredError( "The object %r is not registered" % obj) modified = storage.getModificationDate(history_id, selector, countPurged) return modified == obj.modified()
def prepare(self, obj, app_metadata=None, sys_metadata={}): """See IArchivist. """ storage = getToolByName(self, 'portal_historiesstorage') modifier = getToolByName(self, 'portal_modifier') obj, history_id = dereference(obj, zodb_hook=self) if storage.isRegistered(history_id): # already registered version_id = len(self.queryHistory(obj)) is_registered = True else: # object isn't under version control yet # A working copy being under version control needs to have # a history_id, version_id (starts with 0) and a location_id # (the current implementation isn't able yet to handle multiple # locations. Nevertheless lets set the location id to a well # known default value) uidhandler = getToolByName(self, 'portal_historyidhandler') history_id = uidhandler.register(obj) version_id = obj.version_id = 0 alsoProvides(obj, IVersioned) obj.location_id = 0 is_registered = False # the hard work done here is: # 1. ask for all attributes that have to be passed to the # history storage by reference # 2. clone the object with some modifications # 3. modify the clone further referenced_data = modifier.getReferencedAttributes(obj) approxSize, clone, inside_orefs, outside_orefs = \ self._cloneByPickle(obj) metadata, inside_crefs, outside_crefs = \ modifier.beforeSaveModifier(obj, clone) # extend the ``sys_metadata`` by the metadata returned by the # ``beforeSaveModifier`` modifier sys_metadata.update(metadata) # set the version id of the clone to be saved to the repository # location_id and history_id are the same as on the working copy # and remain unchanged clone.version_id = version_id # return the prepared infos (clone, refs, etc.) clone_info = ObjectData(clone, inside_crefs, outside_crefs) obj_info = ObjectData(obj, inside_orefs, outside_orefs) return PreparedObject(history_id, obj_info, clone_info, referenced_data, app_metadata, sys_metadata, is_registered, approxSize)
def getHistoryMetadata(self, obj=None, history_id=None): """ Return the metadata blob for presenting summary information, etc. If obj is not supplied, history is found by history_id, if history_id is not supplied, history is found by obj. If neither, return None. """ obj, history_id = dereference(obj, history_id, self) storage = getToolByName(self, 'portal_historiesstorage') try: return storage.getHistoryMetadata(history_id) except StorageUnregisteredError: raise ArchivistUnregisteredError( "Retrieving a version of an unregistered object is not " "possible. Register the object '%r' first. " % obj)
def __init__(self, archivist, obj, history_id, preserve, countPurged): """Sets up a lazy history. Takes an object which should be the original object in the portal, and a history_id for the storage lookup. If the history id is omitted then the history_id will be determined by dereferencing the obj. If the obj is omitted, then the obj will be obtained by dereferencing the history_id. """ self._modifier = getToolByName(archivist, 'portal_modifier') storage = getToolByName(archivist, 'portal_historiesstorage') self._obj, history_id = dereference(obj, history_id, archivist) self._preserve = preserve self._history = storage.getHistory(history_id, countPurged)
def object_removed(obj, event): """ an object is being deleted - also delete it's history """ if not IContentish.providedBy(obj): return try: histories_storage = getToolByName(obj, 'portal_historiesstorage') repo_tool = getToolByName(obj, 'portal_repository') except AttributeError: # XXX If tools are missing, there is nothing we can do. # This occurs in some Products.CMFDiffTool and # Products.CMFTestCase tests for 4.3.x. Maybe it should # be fixed there. return obj, histid = dereference(obj) if histid is None: return metadata = repo_tool.getHistoryMetadata(obj) try: num_versions = metadata.getLength(countPurged=False) except AttributeError: # portal_historiesstorage will return # an empty list in certain cases, # do nothing return current = metadata.retrieve(num_versions - 1) sys_metadata = current['metadata']['sys_metadata'] if ('parent' in sys_metadata) and \ (sys_metadata['parent']['history_id'] != histid): try: histories_storage.retrieve( history_id=sys_metadata['parent']['history_id']) return except StorageRetrieveError: pass length = len(histories_storage.getHistory(histid, countPurged=False)) for i in range(length): histories_storage.purge( histid, 0, metadata={'sys_metadata': { 'comment': 'purged' }}, countPurged=False)
def _set(kls, obj, versions): if not versions: return portal_storage = getToolByName(obj, 'portal_historiesstorage') # purge all existing first. Should only be one version though. repo_tool = getToolByName(obj, "portal_repository") history = repo_tool.getHistoryMetadata(obj) if history: length = history.getLength(countPurged=False) history_id = dereference(obj) for i in xrange(length - 1, -1, -1): try: portal_storage.purge(history_id, i) except StoragePurgeError: pass for version in versions: kls.saveVersion(obj, version)
def object_removed(obj, event): """ an object is being deleted - also delete it's history """ if not IContentish.providedBy(obj): return try: histories_storage = getToolByName(obj, 'portal_historiesstorage') repo_tool = getToolByName(obj, 'portal_repository') except AttributeError: # XXX If tools are missing, there is nothing we can do. # This occurs in some Products.CMFDiffTool and # Products.CMFTestCase tests for 4.3.x. Maybe it should # be fixed there. return obj, histid = dereference(obj) if histid is None: return metadata = repo_tool.getHistoryMetadata(obj) try: num_versions = metadata.getLength(countPurged=False) except AttributeError: # portal_historiesstorage will return # an empty list in certain cases, # do nothing return current = metadata.retrieve(num_versions - 1) sys_metadata = current['metadata']['sys_metadata'] if ('parent' in sys_metadata) and \ (sys_metadata['parent']['history_id'] != histid): try: histories_storage.retrieve( history_id=sys_metadata['parent']['history_id']) return except StorageRetrieveError: pass length = len(histories_storage.getHistory(histid, countPurged=False)) for i in range(length): histories_storage.purge( histid, 0, metadata={'sys_metadata': {'comment': 'purged'}}, countPurged=False)
def retrieve(self, obj=None, history_id=None, selector=None, preserve=(), countPurged=True): obj, history_id = dereference(obj, history_id, self) if selector is None: selector = len(self._archive[history_id]) - 1 #HEAD self.log("%sretrieve %s: hid=%s, selector=%s" % (self.alog_indent, obj.getId(), history_id, selector)) data = self._archive[history_id][selector] attr_handling_references = ['_objects','_tree','_count','_mt_index', '__annotations__'] attr_handling_references.extend(data['clone'].object.objectIds()) attr_handling_references.extend(obj.objectIds()) vdata = VersionData(data['clone'], [], attr_handling_references, data['referenced_data'], data['metadata']) return deepCopy(vdata)
def setReference(self, target_obj, remove_info=True): """See IVersionAwareReference """ storage = getToolByName(target_obj, 'portal_historiesstorage') # save as much information as possible # it may be that the target object is not yet registered with the # storage (aka not under version control) target_obj, self.history_id = dereference(target_obj) if storage.isRegistered(self.history_id): self.version_id = target_obj.version_id # XXX the location id has to be gotten from the object directly self.location_id = 0 # XXX only one location possible currently # XXX store the information if the referenced working copy # was unchanged since the last checkin. In this case the # the exact state of the referenced object may be retrieved also. # XXX we really need a isUpToDate/isChanged methods! if remove_info and hasattr(self, 'info'): del self.info
def del_old_notice(self): print 'Start' policy = getToolByName(self.portal, 'portal_purgepolicy') catalog = getToolByName(self.portal, 'portal_catalog') removeCount = 0 for count, brain in enumerate(catalog()): obj = brain.getObject() obj, history_id = dereference(obj) if history_id is not None: policy.beforeSaveHook(history_id, obj) print 'purged object %s: %s' % (count, obj.absolute_url_path()) removeCount += 1 if removeCount == 1000: break if not count % 10000: print count transaction.commit() return
def dereference_by_id(self, history_id): ''' Dereference an object by history_id ''' return dereference(history_id=history_id, zodb_hook=self.context)
def getHistory(self, obj=None, history_id=None, preserve=()): obj, history_id = dereference(obj, history_id, self) return [deepCopy(obj) for obj in self._archive[history_id]]
from zope.app.component.hooks import setSite from Products.CMFEditions.utilities import dereference import transaction from DateTime import DateTime sites = {} for child_node in app.getChildNodes(): # Find all plone sites hosted on this db if isinstance(child_node, PloneSite): sites[child_node.__name__] = child_node print "Found sites in the db(sites): %s" % ", ".join(sites.keys()) for site in sites.values(): setSite(site) admin=app.acl_users.getUserById("portaladmin") newSecurityManager(None, admin) policy = getToolByName(site, 'portal_purgepolicy') catalog = getToolByName(site, 'portal_catalog') for count, brain in enumerate(catalog()): obj = brain.getObject() # only purge old content if obj.created() < (DateTime() - 30): obj, history_id = dereference(obj) policy.beforeSaveHook(history_id, obj) print 'purged object ' + obj.absolute_url_path() transaction.commit()
def isUpToDate(self, obj=None, history_id=None, selector=None): obj = dereference(obj=obj, history_id=history_id, zodb_hook=self)[0] mem = self.retrieve(obj=obj, history_id=history_id, selector=selector) return mem.data.object.modified() == obj.modified()
def getHistoryMetadata(self, obj=None, history_id=None): obj, history_id = dereference(obj, history_id, self) return [item['metadata'] for item in self._archive[history_id]]
def prepare(self, obj, app_metadata=None, sys_metadata={}): obj, history_id = dereference(obj) if history_id is None: # object isn't under version control yet # An working copy beeing under version control needs to have # a history_id, version_id (starts with 0) and a location_id # (the current implementation isn't able yet to handle multiple # locations. Nevertheless lets set the location id to a well # known default value) portal_hidhandler = getToolByName(obj, 'portal_historyidhandler') history_id = portal_hidhandler.register(obj) version_id = obj.version_id = 0 obj.location_id = 0 is_registered = False else: version_id = len(self.queryHistory(obj)) is_registered = True base_obj = aq_base(obj) doc1_inside = getattr(base_obj, 'doc1_inside', None) doc2_inside = getattr(base_obj, 'doc2_inside', None) doc3_outside = getattr(base_obj, 'doc3_outside', None) # simulate clone modifiers icrefs = [] ocrefs = [] clone = deepCopy(base_obj) if doc1_inside is not None: icrefs.append(ObjectManagerStorageAdapter(clone, 'doc1_inside')) if doc2_inside is not None: icrefs.append(ObjectManagerStorageAdapter(clone, 'doc2_inside')) if doc3_outside is not None: ocrefs.append(ObjectManagerStorageAdapter(clone, 'doc3_outside')) crefs = icrefs + ocrefs # simulate before save modifier iorefs = [] oorefs = [] if doc1_inside is not None: iorefs.append(getattr(obj, 'doc1_inside')) if doc2_inside is not None: iorefs.append(getattr(obj, 'doc2_inside')) if doc3_outside is not None: oorefs.append(getattr(obj, 'doc3_outside')) orefs = iorefs + oorefs for cref in crefs: cref.setAttribute(VersionAwareReference()) # log if sys_metadata['originator'] is None: self.log("") if orefs: self.log("%sprepare %s: hid=%s, refs=(%s)" % (self.alog_indent, obj.getId(), history_id, ', '.join([ref.getId() for ref in orefs]))) else: self.log("%sprepare %s: hid=%s" % (self.alog_indent, obj.getId(), history_id)) self.alog_indent += ' ' # prepare object structure original_info = ObjectData(obj, iorefs, oorefs) clone_info = ObjectData(clone, icrefs, ocrefs) approxSize = None return PreparedObject(history_id, original_info, clone_info, (), app_metadata, sys_metadata, is_registered, approxSize)
def dereference(self, target=None): ''' Return the portal_historiesstorage tool ''' return dereference(target or self.context)
def __init__(self, obj, version_data): self.obj, self.history_id = dereference(obj) self.selector = version_data.version_id self.version_data = version_data self.storage = api.portal.get_tool('portal_historiesstorage')
def _recursiveRetrieve(self, obj=None, history_id=None, selector=None, preserve=(), inplace=False, source=None, fixup_queue=None, ignore_existing=False, countPurged=True): """This is the real workhorse pulling objects out recursively. """ portal_archivist = getToolByName(self, 'portal_archivist') portal_reffactories = getToolByName(self, 'portal_referencefactories') if ignore_existing: obj = None else: obj, history_id = dereference(obj, history_id, self) hasBeenDeleted = obj is None # CMF's invokeFactory needs the added object be traversable from # itself to the root and from the root to the itself. This is the # reason why it is necessary to replace the working copies current # state with the one of the versions state retrieved. If the # operation is not an inplace operation (retrieve instead of # revert) this has to be reversed after having recursed into the # tree. if hasBeenDeleted: # if the object to retreive doesn't have a counterpart in the tree # build a new one before retrieving an old state vdata = portal_archivist.retrieve(obj, history_id, selector, preserve, countPurged) repo_clone = vdata.data.object obj = portal_reffactories.invokeFactory(repo_clone, source) hasBeenMoved = False else: if source is None: ##### the source has to be stored with the object at save time # I(gregweb)'m pretty sure the whole source stuff here gets # obsolete as soon as a va_ref to the source is stored also # XXX for now let's stick with this: source = aq_parent(aq_inner(obj)) # in the special case the object has been moved the retrieved # object has to get a new history (it's like copying back back # the object and then retrieve an old state) hasBeenMoved = portal_reffactories.hasBeenMoved(obj, source) if hasBeenMoved: if getattr(aq_base(source), obj.getId(), None) is None: vdata = portal_archivist.retrieve(obj, history_id, selector, preserve, countPurged) repo_clone = vdata.data.object obj = portal_reffactories.invokeFactory(repo_clone, source) else: # What is the desired behavior pass vdata = portal_archivist.retrieve(obj, history_id, selector, preserve, countPurged) # Replace the objects attributes retaining identity. _missing = object() attrs_to_leave = vdata.attr_handling_references for key, val in vdata.data.object.__dict__.items(): if key in attrs_to_leave: continue obj_val = getattr(aq_base(obj), key, _missing) setattr(obj, key, val) # Delete reference attributes. for ref in vdata.refs_to_be_deleted: ref.remove(permanent=inplace) # retrieve all inside refs for attr_ref in vdata.data.inside_refs: # get the referenced working copy # XXX if the working copy we're searching for was moved to # somewhere else *outside* we generate an another object with # the same history_id. Unfortunately we're not able to handle # this correctly before multi location stuff is implemented. # XXX Perhaps there is a need for a workaround! va_ref = attr_ref.getAttribute() if va_ref is None: # a missing reference, the policy has changed, # don't try to replace it continue history_id = va_ref.history_id # retrieve the referenced version (always count purged versions # also!) ref_vdata= self._recursiveRetrieve(history_id=history_id, selector=va_ref.version_id, preserve=(), inplace=inplace, source=obj, fixup_queue=fixup_queue, ignore_existing=ignore_existing, countPurged=True) # reattach the python reference attr_ref.setAttribute(ref_vdata.data.object) # reattach all outside refs to the current working copy # XXX this is an implicit policy we can live with for now for attr_ref in vdata.data.outside_refs: va_ref = attr_ref.getAttribute() cur_value = attr_ref.getAttribute(alternate=obj) # If the attribute has been removed by a modifier, then we get # None, move on to the next ref. if va_ref is None: continue try: ref = dereference(history_id=va_ref.history_id, zodb_hook=self)[0] except (TypeError, AttributeError): # get the attribute from the working copy ref = cur_value # If the object is not under version control just attach # the current working copy if it exists and is not already # in place if ref is not None and aq_base(ref) is not aq_base(va_ref): attr_ref.setAttribute(ref) # feed the fixup queue defined in revert() and retrieve() to # perform post-retrieve fixups on the object if fixup_queue is not None: fixup_queue.append(obj) return vdata
def _recursiveRetrieve(self, obj=None, history_id=None, selector=None, preserve=(), inplace=False, source=None, fixup_queue=None, ignore_existing=False, countPurged=True): """This is the real workhorse pulling objects out recursively. """ portal_archivist = getToolByName(self, 'portal_archivist') portal_reffactories = getToolByName(self, 'portal_referencefactories') if ignore_existing: obj = None else: obj, history_id = dereference(obj, history_id, self) hasBeenDeleted = obj is None # CMF's invokeFactory needs the added object be traversable from # itself to the root and from the root to the itself. This is the # reason why it is necessary to replace the working copies current # state with the one of the versions state retrieved. If the # operation is not an inplace operation (retrieve instead of # revert) this has to be reversed after having recursed into the # tree. if hasBeenDeleted: # if the object to retreive doesn't have a counterpart in the tree # build a new one before retrieving an old state vdata = portal_archivist.retrieve(obj, history_id, selector, preserve, countPurged) repo_clone = vdata.data.object obj = portal_reffactories.invokeFactory(repo_clone, source) hasBeenMoved = False else: if source is None: # #### the source has to be stored with the object at save time # I(gregweb)'m pretty sure the whole source stuff here gets # obsolete as soon as a va_ref to the source is stored also # XXX for now let's stick with this: source = aq_parent(aq_inner(obj)) # in the special case the object has been moved the retrieved # object has to get a new history (it's like copying back back # the object and then retrieve an old state) hasBeenMoved = portal_reffactories.hasBeenMoved(obj, source) if hasBeenMoved: if getattr(aq_base(source), obj.getId(), None) is None: vdata = portal_archivist.retrieve(obj, history_id, selector, preserve, countPurged) repo_clone = vdata.data.object obj = portal_reffactories.invokeFactory(repo_clone, source) else: # What is the desired behavior pass vdata = portal_archivist.retrieve(obj, history_id, selector, preserve, countPurged) # Replace the objects attributes retaining identity. _missing = object() attrs_to_leave = vdata.attr_handling_references for key, val in vdata.data.object.__dict__.items(): if key in attrs_to_leave: continue obj_val = getattr(aq_base(obj), key, _missing) # noqa setattr(obj, key, val) # Delete reference attributes. for ref in vdata.refs_to_be_deleted: ref.remove(permanent=inplace) # retrieve all inside refs for attr_ref in vdata.data.inside_refs: # get the referenced working copy # XXX if the working copy we're searching for was moved to # somewhere else *outside* we generate an another object with # the same history_id. Unfortunately we're not able to handle # this correctly before multi location stuff is implemented. # XXX Perhaps there is a need for a workaround! va_ref = attr_ref.getAttribute() if va_ref is None: # a missing reference, the policy has changed, # don't try to replace it continue history_id = va_ref.history_id # retrieve the referenced version (always count purged versions # also!) ref_vdata = self._recursiveRetrieve( history_id=history_id, selector=va_ref.version_id, preserve=(), inplace=inplace, source=obj, fixup_queue=fixup_queue, ignore_existing=ignore_existing, countPurged=True) # reattach the python reference attr_ref.setAttribute(ref_vdata.data.object) # reattach all outside refs to the current working copy # XXX this is an implicit policy we can live with for now for attr_ref in vdata.data.outside_refs: va_ref = attr_ref.getAttribute() cur_value = attr_ref.getAttribute(alternate=obj) # If the attribute has been removed by a modifier, then we get # None, move on to the next ref. if va_ref is None: continue try: ref = dereference(history_id=va_ref.history_id, zodb_hook=self)[0] except (TypeError, AttributeError): # get the attribute from the working copy ref = cur_value # If the object is not under version control just attach # the current working copy if it exists and is not already # in place if ref is not None and aq_base(ref) is not aq_base(va_ref): attr_ref.setAttribute(ref) # feed the fixup queue defined in revert() and retrieve() to # perform post-retrieve fixups on the object if fixup_queue is not None: fixup_queue.append(obj) return vdata