def process_pagedirs(dir_from, dir_to):
    pagelist = listdir(dir_from)
    for pagename in pagelist:
        pagedir_from = opj(dir_from, pagename)
        pagedir_to = opj(dir_to, pagename)
        
        # first we copy all, even the stuff we convert later:
        copy_dir(pagedir_from, pagedir_to)
        
        rev_from = opj(pagedir_from, 'revisions')
        rev_to = opj(pagedir_to, 'revisions')
        if os.path.exists(rev_from):
            revlist = listdir(rev_from)
            for rfile in revlist:
                rev = int(rfile)
                r_from = opj(rev_from, rfile)
                r_to = opj(rev_to, rfile)
                tocrlf(r_from, r_to)
def generate_pages(dir_from, dir_to):
    for pagename in info2:
        entry = info2.get(pagename, {})
        tslist = entry.keys()
        if tslist:
            pagedir = opj(dir_to, 'pages', pagename)
            os.makedirs(opj(pagedir, 'revisions'))
            editlog_file = opj(pagedir, 'edit-log')
            f = open(editlog_file, 'w')
            rev = 0
            tslist.sort()
            for ts in tslist:
                rev += 1
                revstr = '%08d' % rev
                file_from, data = entry[ts]
                data[0] = str(ts)
                data[1] = revstr
                if data[2].endswith('/REVERT'):
                    # replace the timestamp with the revision number
                    revertts = long(data[7]) # must be long for py 2.2.x
                    try:
                        revertrev = int(entry[revertts][1][1])
                    except KeyError:
                        # never should trigger...
                        print "********* KeyError %s entry[%d][1][1] **********" % (pagename, revertts)
                        revertrev = 0
                    data[7] = '%08d' % revertrev
                f.write('\t'.join(data)+'\n')
                if file_from is not None:
                    file_to = opj(pagedir, 'revisions', revstr)
                    copy_file(file_from, file_to)
            f.close()
                
            curr_file = opj(pagedir, 'current')
            f = open(curr_file, 'w')
            f.write(revstr)
            f.close()

        att_from = opj(dir_from, 'pages', pagename, 'attachments')
        if os.path.exists(att_from):
            att_to = opj(pagedir, 'attachments')
            copy_dir(att_from, att_to)
def convert_pagedir(dir_from, dir_to, is_backupdir=0):
    os.mkdir(dir_to)
    for pagedir in listdir(dir_from):
        text_from = opj(dir_from, pagedir, 'text')
        text_to = opj(dir_to, pagedir, 'text')
        os.mkdir(opj(dir_to, pagedir))
        copy_file(text_from, text_to)
        
        backupdir_from = opj(dir_from, pagedir, 'backup')
        backupdir_to = opj(dir_to, pagedir, 'backup')
        if os.path.exists(backupdir_from):
            os.mkdir(backupdir_to)
            for ts in listdir(backupdir_from):
                ts_usec = str(convert_ts(float(ts)))
                backup_from = opj(backupdir_from, ts)
                backup_to = opj(backupdir_to, ts_usec)
                copy_file(backup_from, backup_to)
        
        editlog_from = opj(dir_from, pagedir, 'edit-log')
        editlog_to = opj(dir_to, pagedir, 'edit-log')
        convert_editlog(editlog_from, editlog_to)
        
        #cachedir_from = opj(dir_from, pagedir, 'cache')
        #cachedir_to = opj(dir_to, pagedir, 'cache')
        #if os.path.exists(cachedir_from):
        #    os.mkdir(cachedir_to)
        #    try:
        #        copy_file(
        #            opj(cachedir_from, 'hitcounts'),
        #            opj(cachedir_to, 'hitcounts'))
        #    except: pass

        attachdir_from = opj(dir_from, pagedir, 'attachments')
        attachdir_to = opj(dir_to, pagedir, 'attachments')
        if os.path.exists(attachdir_from):
            try:
                copy_dir(attachdir_from, attachdir_to)
            except: pass
            revlist = listdir(rev_from)
            for rfile in revlist:
                rev = int(rfile)
                r_from = opj(rev_from, rfile)
                r_to = opj(rev_to, rfile)
                tocrlf(r_from, r_to)

origdir = 'data.pre-mig7'

try:
    os.rename('data', origdir)
except OSError:
    print "You need to be in the directory where your copy of the 'data' directory is located."
    sys.exit(1)

os.makedirs(opj('data','pages'))

process_pagedirs(opj(origdir, 'pages'), opj('data', 'pages'))

copy_dir(opj(origdir, 'plugin'), opj('data', 'plugin'))

copy_dir(opj(origdir, 'user'), opj('data', 'user'))

copy_file(opj(origdir, 'edit-log'), opj('data', 'edit-log'))
copy_file(opj(origdir, 'event-log'), opj('data', 'event-log'))

copy_file(opj(origdir, 'intermap.txt'), opj('data', 'intermap.txt'))



Example #5
0
        try:
            os.remove(editlock)
        except:
            pass

        attachdir = os.path.join(pagedir, 'attachments')
        for root, dirs, files in walk(attachdir):
            for f in  files:
                try:
                    f.decode(to_encoding)
                except UnicodeDecodeError:
                    fnew = f.decode(from_encoding).encode(to_encoding)
                    os.rename(os.path.join(root,f), os.path.join(root, fnew))
                    print 'renamed', f, '\n ->', fnew, ' in dir:', root

        
origdir = 'data.pre-mig10'
destdir = 'data'

# Backup original dir and create new empty dir
try:
    os.rename(destdir, origdir)
except OSError:
    print "You need to be in the directory where your copy of the 'data' directory is located."
    sys.exit(1)

copy_dir(origdir, destdir)
migrate(destdir)


            key, val = kvpair.split('=')
            key = urllib.unquote(key)
            val = urllib.unquote(val)
            key = convert_string(key, enc_from, enc_to)
            val = convert_string(val, enc_from, enc_to)
            key = urllib.quote(key)
            val = urllib.quote(val)
            kvlist.append("%s=%s" % (key,val))
        fields[2] = '&'.join(kvlist)
        line = '\t'.join(fields) + '\n'
        file_to.write(line)

    file_to.close()
    file_from.close()
    st=os.stat(fname_from)
    os.utime(fname_to, (st.st_atime,st.st_mtime))

origdir = 'data.pre-mig6'

try:
    os.rename('data', origdir)
except OSError:
    print "You need to be in the directory where your copy of the 'data' directory is located."
    sys.exit(1)

copy_dir(origdir, 'data')
os.remove(opj('data','event-log')) # old format
convert_eventlog(opj(origdir, 'event-log'), opj('data', 'event-log'), from_encoding, to_encoding)


Example #7
0
            f = open(file_to2, "a")
            f.write(data)
            f.close()
        except:
            pass


# Backup original dir and create new empty dir
try:
    os.rename("data", origdir)
    os.mkdir("data")
except OSError:
    print "You need to be in the directory where your copy of the 'data' directory is located."
    sys.exit(1)

convert_pagedir(opj(origdir, "pages"), opj("data", "pages"))

convert_textdir(opj(origdir, "text"), "data")

convert_textdir(opj(origdir, "backup"), "data", 1)

convert_editlog(opj(origdir, "editlog"), opj("data", "edit-log"), opj("data", "pages"))

copy_file(opj(origdir, "event.log"), opj("data", "event.log"))

copy_dir(opj(origdir, "plugin"), opj("data", "plugin"))

copy_dir(opj(origdir, "user"), opj("data", "user"))

copy_file(opj(origdir, "intermap.txt"), opj("data", "intermap.txt"))
def generate_pages(dir_from, dir_to):
    revactions = ['SAVE','SAVENEW','SAVE/REVERT',] # these actions create revisions
    for pn in info:
        entry = info.get(pn, {})
        tslist = entry.keys()
        if tslist:
            pagedir = opj(dir_to, 'pages', pn)
            revdir = opj(pagedir, 'revisions')
            os.makedirs(revdir)
            editlog_file = opj(pagedir, 'edit-log')
            f = open(editlog_file, 'w')
            revnew = 0
            tslist.sort()
            for ts in tslist:
                data = entry[ts][1]
                datanew = data[:]
                (timestamp,rev,action,pagename,ip,host,id,extra,comment) = data
                revstr = '%08d' % rev
                if action in revactions:
                    revnew += 1
                    revnewstr = '%08d' % revnew
                    entry[ts][0] = revnew # remember what new revno we chose
                else: # ATTNEW,ATTDRW,ATTDEL
                    revnewstr = '99999999'
                if action.endswith('/REVERT'):
                    # replace the old revno with the correct new revno
                    revertrevold = int(extra)
                    revertrevnew = 0
                    for ts2 in tslist:
                        data2 = entry[ts2][1]
                        (timestamp2,rev2,action2,pagename2,ip2,host2,id2,extra2,comment2) = data2
                        if rev2 == revertrevold:
                            revertrevnew = entry[ts2][0]
                    datanew[7] = '%08d' % revertrevnew
                    
                datanew[1] = revnewstr
                f.write('\t'.join(datanew)+'\n') # does make a CRLF on win32 in the file
                
                if action in revactions: # we DO have a page rev for this one
                    file_from = opj(dir_from, 'pages', pn, 'revisions', revstr)
                    file_to = opj(revdir, revnewstr)
                    copy_file(file_from, file_to)
            f.close()
            
            # check if page exists or is deleted in orig dir
            pagedir_from = opj(dir_from, 'pages', pn)
            revdir_from = opj(pagedir_from, 'revisions')
            try:
                curr_file_from = opj(pagedir_from, 'current')
                currentfrom = open(curr_file_from).read().strip() # try to access it
                page_exists = 1
            except:
                page_exists = 0
                
            # re-make correct DELETED status!
            if page_exists:
                curr_file = opj(pagedir, 'current')
                f = open(curr_file, 'w')
                f.write("%08d\n" % revnew) # we add a \n, so it is easier to hack in there manually
                f.close()

        att_from = opj(dir_from, 'pages', pn, 'attachments')
        if os.path.exists(att_from):
            att_to = opj(pagedir, 'attachments')
            copy_dir(att_from, att_to)