def test_user(self): meta = { keys.ITEMID: make_uuid(), keys.REVID: make_uuid(), keys.NAME: [ u"user name", ], keys.NAMESPACE: u"userprofiles", keys.EMAIL: u"*****@*****.**", keys.SUBSCRIPTIONS: [ u"{0}:{1}".format(keys.ITEMID, make_uuid()), u"{0}::foo".format(keys.NAME), u"{0}::bar".format(keys.TAGS), u"{0}::".format(keys.NAMERE), u"{0}:userprofiles:a".format(keys.NAMEPREFIX), ] } invalid_meta = { keys.SUBSCRIPTIONS: [ u"", u"unknown_tag:123", u"{0}:123".format(keys.ITEMID), u"{0}:foo".format(keys.NAME), ] } state = { 'trusted': False, # True for loading a serialized representation or other trusted sources keys.NAME: u'somename', # name we decoded from URL path keys.ACTION: keys.ACTION_SAVE, keys.HOSTNAME: u'localhost', keys.ADDRESS: u'127.0.0.1', keys.WIKINAME: u'ThisWiki', keys.NAMESPACE: u'', keys.FQNAME: CompositeName(u'', u'', u'somename') } m = UserMetaSchema(meta) valid = m.validate(state) assert m[keys.CONTENTTYPE].value == CONTENTTYPE_USER if not valid: for e in m.children: print e.valid, e print m.valid, m assert valid m = UserMetaSchema(invalid_meta) valid = m.validate(state) assert not valid for e in m.children: if e.name in (keys.SUBSCRIPTIONS, ): for value in e: assert not value.valid
def __init__(self, path, uid): self.path = path self.uid = uid meta = self._process_usermeta(self._parse_userprofile()) meta[CONTENTTYPE] = CONTENTTYPE_USER meta[UID_OLD] = uid meta[ITEMID] = make_uuid() meta[REVID] = make_uuid() meta[SIZE] = 0 meta[ACTION] = ACTION_SAVE self.meta = meta self.data = StringIO('')
def __init__(self, path, uid): self.path = path self.uid = uid meta = self._process_usermeta(self._parse_userprofile()) meta[CONTENTTYPE] = CONTENTTYPE_USER meta[UID_OLD] = uid meta[ITEMID] = make_uuid() meta[REVID] = make_uuid() meta[SIZE] = 0 meta[ACTION] = ACTION_SAVE self.meta = meta self.data = StringIO("")
def test_user(self): meta = { keys.ITEMID: make_uuid(), keys.REVID: make_uuid(), keys.NAME: [u"user name", ], keys.NAMESPACE: u"userprofiles", keys.EMAIL: u"*****@*****.**", keys.SUBSCRIPTIONS: [u"{0}:{1}".format(keys.ITEMID, make_uuid()), u"{0}::foo".format(keys.NAME), u"{0}::bar".format(keys.TAGS), u"{0}::".format(keys.NAMERE), u"{0}:userprofiles:a".format(keys.NAMEPREFIX), ] } invalid_meta = { keys.SUBSCRIPTIONS: [u"", u"unknown_tag:123", u"{0}:123".format(keys.ITEMID), u"{0}:foo".format(keys.NAME), ] } state = {'trusted': False, # True for loading a serialized representation or other trusted sources keys.NAME: u'somename', # name we decoded from URL path keys.ACTION: keys.ACTION_SAVE, keys.HOSTNAME: u'localhost', keys.ADDRESS: u'127.0.0.1', keys.WIKINAME: u'ThisWiki', keys.NAMESPACE: u'', keys.FQNAME: CompositeName(u'', u'', u'somename') } m = UserMetaSchema(meta) valid = m.validate(state) assert m[keys.CONTENTTYPE].value == CONTENTTYPE_USER if not valid: for e in m.children: print e.valid, e print m.valid, m assert valid m = UserMetaSchema(invalid_meta) valid = m.validate(state) assert not valid for e in m.children: if e.name in (keys.SUBSCRIPTIONS,): for value in e: assert not value.valid
def store(self, meta, data): # XXX Idea: we could check the type the store wants from us: # if it is a str/bytes (BytesStore), just use meta "as is", # if it is a file (FileStore), wrap it into StringIO and give that to the store. if DATAID not in meta: tfw = TrackingFileWrapper(data, hash_method=HASH_ALGORITHM) dataid = make_uuid() self.data_store[dataid] = tfw meta[DATAID] = dataid # check whether size and hash are consistent: size_expected = meta.get(SIZE) size_real = tfw.size if size_expected is not None and size_expected != size_real: raise ValueError("computed data size ({0}) does not match data size declared in metadata ({1})".format(size_real, size_expected)) meta[SIZE] = size_real hash_expected = meta.get(HASH_ALGORITHM) hash_real = tfw.hash.hexdigest() if hash_expected is not None and hash_expected != hash_real: raise ValueError("computed data hash ({0}) does not match data hash declared in metadata ({1})".format(hash_real, hash_expected)) meta[HASH_ALGORITHM] = hash_real else: dataid = meta[DATAID] # we will just asume stuff is correct if you pass it with a data id if dataid not in self.data_store: # XXX issue: if we do not store if we already have the dataid in the store, # XXX deserialization does not work as the fpos does not advance to the next record, # XXX because we do not read from the source file. Remove the check? self.data_store[dataid] = data # if something goes wrong below, the data shall be purged by a garbage collection metaid = self._store_meta(meta) return metaid
def test_content(self): class REV(dict): """ fake rev """ rev = REV() rev[keys.ITEMID] = make_uuid() rev[keys.REVID] = make_uuid() rev[keys.ACL] = u"All:read" meta = { keys.REVID: make_uuid(), keys.PARENTID: make_uuid(), keys.NAME: [ u"a", ], keys.NAMESPACE: u"", keys.ACL: u"All:read", keys.TAGS: [u"foo", u"bar"], } state = { 'trusted': False, # True for loading a serialized representation or other trusted sources keys.NAME: u'somename', # name we decoded from URL path keys.ACTION: keys.ACTION_SAVE, keys.HOSTNAME: u'localhost', keys.ADDRESS: u'127.0.0.1', keys.USERID: make_uuid(), keys.HASH_ALGORITHM: u'b9064b9a5efd8c6cef2d38a8169a0e1cbfdb41ba', keys.SIZE: 0, keys.WIKINAME: u'ThisWiki', keys.NAMESPACE: u'', 'rev_parent': rev, 'acl_parent': u"All:read", 'contenttype_current': u'text/x.moin.wiki;charset=utf-8', 'contenttype_guessed': u'text/plain;charset=utf-8', keys.FQNAME: CompositeName(u'', u'', u'somename'), } m = ContentMetaSchema(meta) valid = m.validate(state) assert m[keys.CONTENTTYPE].value == u'text/x.moin.wiki;charset=utf-8' if not valid: for e in m.children: print e.valid, e print m.valid, m assert valid
def test_content(self): class REV(dict): """ fake rev """ rev = REV() rev[keys.ITEMID] = make_uuid() rev[keys.REVID] = make_uuid() rev[keys.ACL] = u"All:read" meta = { keys.REVID: make_uuid(), keys.PARENTID: make_uuid(), keys.NAME: [u"a", ], keys.NAMESPACE: u"", keys.ACL: u"All:read", keys.TAGS: [u"foo", u"bar"], } state = {'trusted': False, # True for loading a serialized representation or other trusted sources keys.NAME: u'somename', # name we decoded from URL path keys.ACTION: keys.ACTION_SAVE, keys.HOSTNAME: u'localhost', keys.ADDRESS: u'127.0.0.1', keys.USERID: make_uuid(), keys.HASH_ALGORITHM: u'b9064b9a5efd8c6cef2d38a8169a0e1cbfdb41ba', keys.SIZE: 0, keys.WIKINAME: u'ThisWiki', keys.NAMESPACE: u'', 'rev_parent': rev, 'acl_parent': u"All:read", 'contenttype_current': u'text/x.moin.wiki;charset=utf-8', 'contenttype_guessed': u'text/plain;charset=utf-8', keys.FQNAME: CompositeName(u'', u'', u'somename'), } m = ContentMetaSchema(meta) valid = m.validate(state) assert m[keys.CONTENTTYPE].value == u'text/x.moin.wiki;charset=utf-8' if not valid: for e in m.children: print e.valid, e print m.valid, m assert valid
def __init__(self, item_name, attach_name, attpath, editlog, acl): try: meta = editlog.find_attach(attach_name) except KeyError: meta = {MTIME: int(os.path.getmtime(attpath)), ACTION: ACTION_SAVE} # make something up meta[NAME] = [u"{0}/{1}".format(item_name, attach_name)] if acl is not None: meta[ACL] = acl meta[CONTENTTYPE] = unicode(MimeType(filename=attach_name).content_type()) f = open(attpath, "rb") size, hash_name, hash_digest = hash_hexdigest(f) f.seek(0) self.data = f meta[hash_name] = hash_digest meta[SIZE] = size meta[ITEMID] = make_uuid() meta[REVID] = make_uuid() meta[ITEMTYPE] = ITEMTYPE_DEFAULT self.meta = meta
def __init__(self, uid=None, name="", password=None, auth_username="", trusted=False, **kw): """ Initialize User object :param uid: (optional) user ID (user itemid) :param name: (optional) user name :param password: (optional) user password (unicode) :param auth_username: (optional) already authenticated user name (e.g. when using http basic auth) (unicode) :param trusted: (optional) whether user instance is created by a trusted auth method / session :keyword auth_method: method that was used for authentication, default: 'internal' :keyword auth_attribs: tuple of user object attribute names that are determined by auth method and should not be changeable by preferences, default: (). First tuple element was used for authentication. """ self.profile = UserProfile() self._cfg = app.cfg self.valid = False self.trusted = trusted self.auth_method = kw.get('auth_method', 'internal') self.auth_attribs = kw.get('auth_attribs', ()) # XXX currently we just support creating with 1 name: _name = name or auth_username itemid = uid if not itemid and auth_username: users = search_users(**{NAME_EXACT: auth_username}) if users: itemid = users[0].meta[ITEMID] if not itemid and _name and _name != ANON: users = search_users(**{NAME_EXACT: _name}) if users: itemid = users[0].meta[ITEMID] if itemid: self.load_from_id(itemid, password) else: self.profile[ITEMID] = make_uuid() if _name: self.profile[NAME] = [ _name, ] if password is not None: self.set_password(password) # "may" so we can say "if user.may.read(pagename):" self.may = self._cfg.SecurityPolicy(self)
def _store_meta(self, meta): if REVID not in meta: # Item.clear_revision calls us with REVID already present meta[REVID] = make_uuid() metaid = meta[REVID] meta = self._serialize(meta) # XXX Idea: we could check the type the store wants from us: # if it is a str/bytes (BytesStore), just use meta "as is", # if it is a file (FileStore), wrap it into StringIO and give that to the store. self.meta_store[metaid] = meta return metaid
def itemid_validator(element, state): """ an itemid is a uuid that identifies an item """ if not state['trusted'] or element.raw is Unset: itemid = state.get(keys.ITEMID) if itemid is None: # this is first revision of an item itemid = make_uuid() element.set(itemid) return uuid_validator(element, state)
def itemid_validator(element, state): """ an itemid is a uuid that identifies an item """ if not state['trusted'] or element.raw is Unset: fqname = state[keys.FQNAME] itemid = fqname.value if fqname and fqname.field == keys.ITEMID else state.get(keys.ITEMID) if itemid is None: # this is first revision of an item itemid = make_uuid() element.set(itemid) return uuid_validator(element, state)
def __init__(self, item_name, attach_name, attpath, editlog, acl): try: meta = editlog.find_attach(attach_name) except KeyError: meta = { # make something up MTIME: int(os.path.getmtime(attpath)), ACTION: ACTION_SAVE, } meta[NAME] = [u'{0}/{1}'.format(item_name, attach_name)] if acl is not None: meta[ACL] = acl meta[CONTENTTYPE] = unicode(MimeType(filename=attach_name).content_type()) f = open(attpath, 'rb') size, hash_name, hash_digest = hash_hexdigest(f) f.seek(0) self.data = f meta[hash_name] = hash_digest meta[SIZE] = size meta[ITEMID] = make_uuid() meta[REVID] = make_uuid() meta[REV_NUMBER] = 1 meta[ITEMTYPE] = ITEMTYPE_DEFAULT self.meta = meta
def __init__(self, uid=None, name="", password=None, auth_username="", trusted=False, **kw): """ Initialize User object :param uid: (optional) user ID (user itemid) :param name: (optional) user name :param password: (optional) user password (unicode) :param auth_username: (optional) already authenticated user name (e.g. when using http basic auth) (unicode) :param trusted: (optional) whether user instance is created by a trusted auth method / session :keyword auth_method: method that was used for authentication, default: 'internal' :keyword auth_attribs: tuple of user object attribute names that are determined by auth method and should not be changeable by preferences, default: (). First tuple element was used for authentication. """ self.profile = UserProfile() self._cfg = app.cfg self.valid = False self.trusted = trusted self.auth_method = kw.get('auth_method', 'internal') self.auth_attribs = kw.get('auth_attribs', ()) _name = name or auth_username itemid = uid if not itemid and auth_username: users = search_users(name_exact=auth_username) if users: itemid = users[0].meta[ITEMID] if not itemid and _name and _name != 'anonymous': users = search_users(name_exact=_name) if users: itemid = users[0].meta[ITEMID] if itemid: self.load_from_id(itemid, password) else: self.profile[ITEMID] = make_uuid() if _name: self.profile[NAME] = _name if password is not None: self.set_password(password) # "may" so we can say "if user.may.read(pagename):" if self._cfg.SecurityPolicy: self.may = self._cfg.SecurityPolicy(self) else: from MoinMoin.security import Default self.may = Default(self)
def __init__(self, backend, path, itemname): self.backend = backend self.name = itemname self.path = path currentpath = os.path.join(self.path, 'current') with open(currentpath, 'r') as f: self.current = int(f.read().strip()) editlogpath = os.path.join(self.path, 'edit-log') self.editlog = EditLog(editlogpath) self.acl = None # TODO self.itemid = make_uuid() if backend.deleted_mode == DELETED_MODE_KILL: revpath = os.path.join(self.path, 'revisions', '{0:08d}'.format(self.current)) PageRevision(self, self.current, revpath) # will raise exception if killing is requested
def test_user(self): meta = { keys.ITEMID: make_uuid(), keys.REVID: make_uuid(), keys.NAME: u"user name", keys.EMAIL: u"*****@*****.**", } state = {'trusted': False, # True for loading a serialized representation or other trusted sources keys.NAME: u'somename', # name we decoded from URL path keys.ACTION: u'SAVE', keys.HOSTNAME: u'localhost', keys.ADDRESS: u'127.0.0.1', keys.WIKINAME: u'ThisWiki', } m = UserMetaSchema(meta) valid = m.validate(state) assert m[keys.CONTENTTYPE].value == CONTENTTYPE_USER if not valid: for e in m.children: print e.valid, e print m.valid, m assert valid
def __init__(self, backend, path, itemname): self.backend = backend self.name = itemname self.path = path print (u"Processing item {0}".format(itemname)).encode('utf-8') currentpath = os.path.join(self.path, 'current') with open(currentpath, 'r') as f: self.current = int(f.read().strip()) editlogpath = os.path.join(self.path, 'edit-log') self.editlog = EditLog(editlogpath) self.acl = None # TODO self.itemid = make_uuid() if backend.deleted_mode == DELETED_MODE_KILL: revpath = os.path.join(self.path, 'revisions', '{0:08d}'.format(self.current)) if not os.path.exists(revpath): print (u" >> Deleted item not migrated: {0}, last revision no: {1}".format(itemname, self.current)).encode('utf-8') raise KillRequested('deleted_mode wants killing/ignoring')
def __init__(self, backend, path, itemname): self.backend = backend self.name = itemname self.path = path print "Processing item {0}".format(itemname) currentpath = os.path.join(self.path, "current") with open(currentpath, "r") as f: self.current = int(f.read().strip()) editlogpath = os.path.join(self.path, "edit-log") self.editlog = EditLog(editlogpath) self.acl = None # TODO self.itemid = make_uuid() if backend.deleted_mode == DELETED_MODE_KILL: revpath = os.path.join(self.path, "revisions", "{0:08d}".format(self.current)) if not os.path.exists(revpath): print " >> Deleted item not migrated: {0}, last revision no: {1}".format(itemname, self.current) raise KillRequested("deleted_mode wants killing/ignoring")
def store(self, meta, data): # XXX Idea: we could check the type the store wants from us: # if it is a str/bytes (BytesStore), just use meta "as is", # if it is a file (FileStore), wrap it into StringIO and give that to the store. if DATAID not in meta: tfw = TrackingFileWrapper(data, hash_method=HASH_ALGORITHM) dataid = make_uuid() self.data_store[dataid] = tfw meta[DATAID] = dataid # check whether size and hash are consistent: size_expected = meta.get(SIZE) size_real = tfw.size if size_expected is not None and size_expected != size_real: raise ValueError("computed data size ({0}) does not match data size declared in metadata ({1})".format( size_real, size_expected)) meta[SIZE] = size_real hash_expected = meta.get(HASH_ALGORITHM) hash_real = tfw.hash.hexdigest() if hash_expected is not None and hash_expected != hash_real: raise ValueError("computed data hash ({0}) does not match data hash declared in metadata ({1})".format( hash_real, hash_expected)) meta[HASH_ALGORITHM] = hash_real else: dataid = meta[DATAID] # we will just asume stuff is correct if you pass it with a data id if dataid not in self.data_store: self.data_store[dataid] = data else: # this is reading the data to avoid this issue: # if we do not store if we already have the dataid in the store, # deserialization does not work as the fpos does not advance to the next record, # because we do not read from the source file. Remove the check? while data.read(64 * 1024): pass # if something goes wrong below, the data shall be purged by a garbage collection metaid = self._store_meta(meta) return metaid
def run(self, data_dir=None): flaskg.add_lineno_attr = False flaskg.item_name2id = {} userid_old2new = {} indexer = app.storage backend = indexer.backend # backend without indexing print "\nConverting Users...\n" for rev in UserBackend(os.path.join(data_dir, 'user')): # assumes user/ below data_dir global user_names user_names.append(rev.meta['name'][0]) userid_old2new[rev.uid] = rev.meta['itemid'] # map old userid to new userid backend.store(rev.meta, rev.data) print "\nConverting Pages/Attachments...\n" for rev in PageBackend(data_dir, deleted_mode=DELETED_MODE_KILL, default_markup=u'wiki'): for user_name in user_names: if rev.meta['name'][0] == user_name or rev.meta['name'][0].startswith(user_name + '/'): rev.meta['namespace'] = u'users' break backend.store(rev.meta, rev.data) # item_name to itemid xref required for migrating user subscriptions flaskg.item_name2id[rev.meta['name'][0]] = rev.meta['itemid'] print "\nConverting Revision Editors...\n" for mountpoint, revid in backend: meta, data = backend.retrieve(mountpoint, revid) if USERID in meta: try: meta[USERID] = userid_old2new[meta[USERID]] except KeyError: # user profile lost, but userid referred by revision print (u"Missing userid {0!r}, editor of {1} revision {2}".format(meta[USERID], meta[NAME][0], revid)).encode('utf-8') del meta[USERID] backend.store(meta, data) elif meta.get(CONTENTTYPE) == CONTENTTYPE_USER: meta.pop(UID_OLD, None) # not needed any more backend.store(meta, data) print "\nConverting last revision of Moin 1.9 items to Moin 2.0" self.conv_in = conv_in() self.conv_out = conv_out() for item_name, (revno, namespace) in sorted(last_moin19_rev.items()): print ' Processing item "{0}", namespace "{1}", revision "{2}"'.format(item_name, namespace, revno) if namespace == u'': namespace = u'default' meta, data = backend.retrieve(namespace, revno) data_in = ''.join(data.readlines()) dom = self.conv_in(data_in, 'text/x.moin.wiki;format=1.9;charset=utf-8') out = self.conv_out(dom) out = out.encode(CHARSET) size, hash_name, hash_digest = hash_hexdigest(out) out = StringIO(out) meta[hash_name] = hash_digest meta[SIZE] = size meta[REVID] = make_uuid() meta[REV_NUMBER] = meta[REV_NUMBER] + 1 meta[MTIME] = int(time.time()) meta[COMMENT] = 'Convert moin 1.9 markup to 2.0' meta[CONTENTTYPE] = 'text/x.moin.wiki;charset=utf-8' del meta['dataid'] out.seek(0) backend.store(meta, out) print "\nRebuilding the index..." indexer.close() indexer.destroy() indexer.create() indexer.rebuild() indexer.open() print "Finished conversion!"
def __init__(self, item, revno, path): item_name = item.name itemid = item.itemid editlog = item.editlog self.backend = item.backend # we just read the page and parse it here, makes the rest of the code simpler: try: with codecs.open(path, 'r', CHARSET) as f: content = f.read() except (IOError, OSError): if revno == item.current and self.backend.deleted_mode == DELETED_MODE_KILL: raise KillRequested('deleted_mode wants killing/ignoring') # handle deleted revisions (for all revnos with 0<=revno<=current) here # we prepare some values for the case we don't find a better value in edit-log: meta = {MTIME: -1, # fake, will get 0 in the end NAME: item_name, # will get overwritten with name from edit-log # if we have an entry there } try: previous_meta = PageRevision(item, revno-1)._fs_meta # if this page revision is deleted, we have no on-page metadata. # but some metadata is required, thus we have to copy it from the # (non-deleted) revision revno-1: for key in [ACL, NAME, CONTENTTYPE, MTIME, ]: if key in previous_meta: meta[key] = previous_meta[key] except NoSuchRevisionError: pass # should not happen meta[MTIME] += 1 # it is now either 0 or prev rev mtime + 1 data = u'' try: editlog_data = editlog.find_rev(revno) except KeyError: if 0 <= revno <= item._fs_current: editlog_data = { # make something up ACTION: u'SAVE/DELETE', } else: raise NoSuchRevisionError('Item {0!r} has no revision {1} (not even a deleted one)!'.format(item.name, revno)) else: try: editlog_data = editlog.find_rev(revno) except KeyError: if 1 <= revno <= item.current: editlog_data = { # make something up NAME: item.name, MTIME: int(os.path.getmtime(path)), ACTION: u'SAVE', } meta, data = split_body(content) meta.update(editlog_data) format = meta.pop('format', self.backend.format_default) meta[CONTENTTYPE] = FORMAT_TO_CONTENTTYPE.get(format, CONTENTTYPE_DEFAULT) data = self._process_data(meta, data) data = data.encode(CHARSET) size, hash_name, hash_digest = hash_hexdigest(data) meta[hash_name] = hash_digest meta[SIZE] = size meta[ITEMID] = itemid meta[REVID] = make_uuid() self.meta = {} for k, v in meta.iteritems(): if isinstance(v, list): v = tuple(v) self.meta[k] = v self.data = StringIO(data) acl_line = self.meta.get(ACL) if acl_line is not None: self.meta[ACL] = regenerate_acl(acl_line)
def __init__(self, item, revno, path): item_name = item.name itemid = item.itemid editlog = item.editlog self.backend = item.backend editlog.to_begin() # we just read the page and parse it here, makes the rest of the code simpler: try: with codecs.open(path, 'r', CHARSET) as f: content = f.read() except (IOError, OSError): # handle deleted revisions (for all revnos with 0<=revno<=current) here # we prepare some values for the case we don't find a better value in edit-log: meta = {MTIME: -1, # fake, will get 0 in the end NAME: [item_name], # will get overwritten with name from edit-log # if we have an entry there } try: revpath = os.path.join(item.path, 'revisions', '{0:08d}'.format(revno - 1)) previous_meta = PageRevision(item, revno - 1, revpath).meta # if this page revision is deleted, we have no on-page metadata. # but some metadata is required, thus we have to copy it from the # (non-deleted) revision revno-1: for key in [ACL, NAME, CONTENTTYPE, MTIME, ]: if key in previous_meta: meta[key] = previous_meta[key] except NoSuchRevisionError: pass # should not happen meta[MTIME] += 1 # it is now either 0 or prev rev mtime + 1 data = u'' try: editlog_data = editlog.find_rev(revno) except KeyError: print (u" >> Missing edit log data item = {0}, revision = {1}".format(item_name, revno)).encode('utf-8') if 0 <= revno <= item.current: editlog_data = { # make something up ACTION: u'SAVE/DELETE', } else: raise NoSuchRevisionError('Item {0!r} has no revision {1} (not even a deleted one)!'.format( item.name, revno)) else: try: editlog_data = editlog.find_rev(revno) except KeyError: print (u" >> Missing edit log data, name = {0}, revision = {1}".format(item_name, revno)).encode('utf-8') if 1 <= revno <= item.current: editlog_data = { # make something up NAME: [item.name], MTIME: int(os.path.getmtime(path)), ACTION: ACTION_SAVE, } meta, data = split_body(content) meta.update(editlog_data) format = meta.pop('format', self.backend.format_default) meta[CONTENTTYPE] = FORMAT_TO_CONTENTTYPE.get(format, CONTENTTYPE_DEFAULT) data = self._process_data(meta, data) data = data.encode(CHARSET) size, hash_name, hash_digest = hash_hexdigest(data) meta[hash_name] = hash_digest meta[SIZE] = size meta[ITEMID] = itemid meta[REVID] = make_uuid() meta[REV_NUMBER] = revno meta[NAMESPACE] = NAMESPACE_DEFAULT meta[ITEMTYPE] = ITEMTYPE_DEFAULT if meta[NAME][0].endswith('Template'): if TAGS in meta: meta[TAGS].append(TEMPLATE) else: meta[TAGS] = [TEMPLATE] self.meta = {} for k, v in meta.iteritems(): if isinstance(v, list): v = tuple(v) self.meta[k] = v self.data = StringIO(data) acl_line = self.meta.get(ACL) if acl_line is not None: self.meta[ACL] = regenerate_acl(acl_line) for user_name in user_names: if meta['name'][0] == user_name or meta['name'][0].startswith(user_name + '/'): meta['namespace'] = u'users' break print (u" Processed revision {0} of item {1}, revid = {2}, contenttype = {3}".format(revno, item_name, meta[REVID], meta[CONTENTTYPE])).encode('utf-8') global last_moin19_rev if meta[CONTENTTYPE] == CONTENTTYPE_MOINWIKI: last_moin19_rev[item_name] = (meta[REVID], meta[NAMESPACE])
def __init__(self, item, revno, path): item_name = item.name itemid = item.itemid editlog = item.editlog self.backend = item.backend editlog.to_begin() # we just read the page and parse it here, makes the rest of the code simpler: try: with codecs.open(path, "r", CHARSET) as f: content = f.read() except (IOError, OSError): # handle deleted revisions (for all revnos with 0<=revno<=current) here # we prepare some values for the case we don't find a better value in edit-log: meta = { MTIME: -1, # fake, will get 0 in the end NAME: [item_name], # will get overwritten with name from edit-log # if we have an entry there } try: revpath = os.path.join(item.path, "revisions", "{0:08d}".format(revno - 1)) previous_meta = PageRevision(item, revno - 1, revpath).meta # if this page revision is deleted, we have no on-page metadata. # but some metadata is required, thus we have to copy it from the # (non-deleted) revision revno-1: for key in [ACL, NAME, CONTENTTYPE, MTIME]: if key in previous_meta: meta[key] = previous_meta[key] except NoSuchRevisionError: pass # should not happen meta[MTIME] += 1 # it is now either 0 or prev rev mtime + 1 data = u"" try: editlog_data = editlog.find_rev(revno) except KeyError: print " >> Missing edit log data item = {0}, revision = {1}".format(item_name, revno) if 0 <= revno <= item.current: editlog_data = {ACTION: u"SAVE/DELETE"} # make something up else: raise NoSuchRevisionError( "Item {0!r} has no revision {1} (not even a deleted one)!".format(item.name, revno) ) else: try: editlog_data = editlog.find_rev(revno) except KeyError: print " >> Missing edit log data, name = {0}, revision = {1}".format(item_name, revno) if 1 <= revno <= item.current: editlog_data = { # make something up NAME: [item.name], MTIME: int(os.path.getmtime(path)), ACTION: ACTION_SAVE, } meta, data = split_body(content) meta.update(editlog_data) format = meta.pop("format", self.backend.format_default) meta[CONTENTTYPE] = FORMAT_TO_CONTENTTYPE.get(format, CONTENTTYPE_DEFAULT) data = self._process_data(meta, data) data = data.encode(CHARSET) size, hash_name, hash_digest = hash_hexdigest(data) meta[hash_name] = hash_digest meta[SIZE] = size meta[ITEMID] = itemid meta[REVID] = make_uuid() meta[NAMESPACE] = NAMESPACE_DEFAULT meta[ITEMTYPE] = ITEMTYPE_DEFAULT self.meta = {} for k, v in meta.iteritems(): if isinstance(v, list): v = tuple(v) self.meta[k] = v self.data = StringIO(data) acl_line = self.meta.get(ACL) if acl_line is not None: self.meta[ACL] = regenerate_acl(acl_line) print " Processed revision {0} of item {1}, revid = {2}".format(revno, item_name, meta[REVID])