예제 #1
0
def backupCopy(srcDir, dstDir, exlSrcDir, exlDstDir):  #{0
    entity0 = entityc.entity(srcDir, dstDir, delim)
    srcDir = entity0.srcDir
    dstDir = entity0.dstDir
    dst_drive = entity0.dst_drive
    esList = func.listExl(exlSrcDir, delim)
    edList = func.listExl(exlDstDir, delim)
    print('Analysing sync from ' + srcDir + ' to ' + dstDir)
    print('Source directories to exclude:')
    print(esList)
    print('Destination directories to exclude:')
    print(edList)
    isExlSrc = False
    isExlDst = False

    if not os.path.isdir(srcDir) or not os.path.isdir(
            dstDir) or srcDir == dstDir:  #{1
        print('Error: source or destination directory not found\n')
        return 0
    #}1
    else:  #{1
        dstFolderList = []
        srcFolderList = []
        dstRemoved = []
        #totalFiles = 0
        sameFolderSrc = []

        #Global variables reset
        global totalSrcFile
        global totalDstFile
        global totalSrcCopy
        global totalDstRemove
        global totalDstRename
        global profile1Num
        global profile2Num
        global profile3Num
        totalSrcFile = 0
        totalDstFile = 0
        totalSrcCopy = 0
        totalDstRemove = 0
        totalDstRename = 0
        rmtreeList.clear()
        copytreeList.clear()
        renameList.clear()
        removeList.clear()
        checkNcopyList.clear()

        #code profiling
        profile1.clear()
        profile2.clear()
        profile3.clear()
        profile1Num = 0
        profile2Num = 0
        profile3Num = 0

        #Copying files from the srcDir to dstDir ignoring all subfolders
        if not dstDir in edList and not srcDir in esList:  #{2
            compareNcopy(srcDir, dstDir)
        #}2
        for folderName, subFolders, fileNames in os.walk(dstDir):  #{2
            for edDir in edList:  #{3
                if edDir in folderName:  #{4
                    isExlDst = True
                #}4
            #}3
            if isExlDst == True:  #{3
                isExlDst = False
            #}3
            else:  #{3
                tWorkFolder = folderName.split(delim)
                workFolder = tWorkFolder[len(tWorkFolder) - 1]
                if workFolder != pycache and folderName != dstDir:  #{4
                    dstFolder = folderName.replace(dstDir, '')
                    dstFolderList.append(dstFolder)
                #}4
            #}3
        #}2
        dstFolderList.sort(key=lambda dstFolderList: len(dstFolderList))
        #dstFolderList is replaced with sort result sorted by string length
        #FolderList actually contains pathnames minus the dst/srcDir path

        for folderName, subFolders, fileNames in os.walk(srcDir):  #{2
            for esDir in esList:  #{3
                if esDir in folderName:  #{4
                    isExlSrc = True
                #}4
            #}3
            if isExlSrc == True:  #{3
                isExlSrc = False
            #}3
            else:  #{3
                tWorkFolder = folderName.split(delim)
                workFolder = tWorkFolder[len(tWorkFolder) - 1]
                if workFolder != pycache and folderName != srcDir:  #{4
                    srcFolder = folderName.replace(srcDir, '')
                    srcFolderList.append(srcFolder)
                #}4
            #}3
        #}2
        srcFolderList.sort(key=lambda srcFolderList: len(srcFolderList))
        #srcFolderList is replaced with sort result sorted by string length
        #Loop through dstFolderList, remove dstFolder not in srcFolderList
        #   store dstFolder in srcFolderList on sameFolderList
        if len(dstFolderList) != 0:  #{2
            for d in range(len(dstFolderList)):  #{3
                if not dstFolderList[d] in srcFolderList:  #{4
                    done = False
                    #If dstFolder already removed or is a subfolder to an already removed folder
                    #then do nothing
                    for c in range(len(dstRemoved)):  #{5
                        if dstRemoved[c] in dstFolderList[d]:  #{6
                            done = True
                            break
                        #}6
                    #}5
                    if done == False:  #{5
                        tf = dstDir + dstFolderList[d]
                        cpt = sum([len(cc) for aa, bb, cc in os.walk(tf)])
                        totalDstFile = totalDstFile + cpt  #Assign global variable
                        totalDstRemove = totalDstRemove + cpt  #Assign global variable
                        rmtreeList.append(tf)
                        dstRemoved.append(dstFolderList[d])
                    #}5
                #}4
                else:  #{4
                    sameFolderSrc.append(dstFolderList[d])
                #}4
            #}3
        #}2
        #Loop through srcFolderList, copy src to dst
        #Does not care about dstFolder that was removed in the last bit of code
        #Works with sameFolderList only
        if len(srcFolderList) != 0:  #{2
            for c in range(len(srcFolderList)):  #{3
                srcFolder = srcFolderList[c]
                #if not srcFolder in srcCopied:
                #if src folder does not exist in dst
                #   then use shutil.copytree to copy entire dir
                if not srcFolder in sameFolderSrc:  #{4
                    src = srcDir + srcFolder
                    dst = dstDir + srcFolder

                    #The srcFolderList is sorted by string length
                    #so if src dir is part of previously "copied" dir
                    #then do nothing
                    #do not use os.path.isdir(dst) to check
                    #because no files are copied at this point
                    toCp = True
                    for pp in copytreeList:  #{5
                        if pp[0] in src:  #{6
                            toCp = False
                            break
                        #}6
                    #}5
                    if toCp == True:  #{5
                        cpt = sum([len(cc) for aa, bb, cc in os.walk(src)])
                        totalSrcCopy = totalSrcCopy + cpt
                        totalSrcFile = totalSrcFile + cpt
                        copytreeList.append([src, dst])
                    #}5
                    #Improvement:
                    #   for src dir that doesn't have matching dst dir, do not need to compareNcopy
                    #   All content (sub-folder+files) can be copied directly
                    #   without needing to compare, use shutil.copytree
                #}4
                #else src folder does exist in dst
                #   then straight to compareNcopy
                else:  #{4
                    src = srcDir + srcFolder
                    dst = dstDir + srcFolder
                    compareNcopy(src, dst)
                #}4
            #}3
        #}2
        sync_size_est(dst_drive)
        filecmp.clear_cache()
        #print(profile1)
        #print(profile2)
        #profile1Num = sum(profile1)/len(profile1)
        #profile2Num = sum(profile2)/len(profile2)
        #print('step 1 profile: ' + str(profile1Num))
        #print('step 2 profile: ' + str(profile2Num))
        print('Total source file count: ' + str(totalSrcFile))
        print('Total destination file count: ' + str(totalDstFile))
        print('Total source file to copy: ' + str(totalSrcCopy))
        print('Total destination file to remove: ' + str(totalDstRemove))
        print('Total destination file to rename: ' + str(totalDstRename))
        if totalSrcFile != totalDstFile - totalDstRemove + totalSrcCopy:  #{2
            print("Issue encountered, please investigate")
        #}2
        else:  #{2
            print("No issue encountered")
        #}2
        print('Analysis finished\n')
        #print("copytreeList: " + str(copytreeList))
        #print("rmtreeList: " + str(rmtreeList))
        #print("renameList: " + str(renameList))
        #print("removeList: " + str(removeList))
        #print("checkNcopyList: " + str(checkNcopyList))
        rets = len(rmtreeList)+len(copytreeList)+len(renameList)+\
               len(removeList)+len(checkNcopyList)
        if rets == 0:  #{2
            return 0
        #}2
        else:  #{2
            return 1
예제 #2
0
def validate(srcDir, dstDir, exlSrcDir, exlDstDir):  #{0
    srcList = []
    dstList = []
    print('Checksum...')
    if type(srcDir) is list:  #{1
        for i in srcDir:  #{2
            p1 = func.buildPath(i[0], i[2], delim)
            p2 = func.buildPath(i[1], i[3], delim)
            fileHash1 = func.hasher(p1, False, hash_Mode)
            fileHash2 = func.hasher(p2, False, hash_Mode)
            if fileHash1 != fileHash2:  #{3
                print('Checksum: File checksum failed')
                return 0
            #}3
        #}2
        print('Checksum: File checksum passed')
        return 1
    #}1
    else:  #{1
        entity0 = entityc.entity(srcDir, dstDir, delim)
        srcDir = entity0.srcDir
        dstDir = entity0.dstDir
        esList = func.listExl(exlSrcDir, delim)
        edList = func.listExl(exlDstDir, delim)
        print('Source directories to exclude:')
        print(esList)
        print('Destination directories to exclude:')
        print(edList)
        isExlSrc = False
        isExlDst = False

        for pf, sf, fn in os.walk(srcDir):  #{2
            for esDir in esList:  #{3
                if esDir in pf:  #{4
                    isExlSrc = True
                #}4
            #}3
            if isExlSrc == True:  #{3
                isExlSrc = False
                continue
            #}3
            else:  #{3
                for f in fn:  #{4
                    tempStr = func.buildPath(pf, f, delim)
                    srcList.append(tempStr)
                #}4
            #}3
        #}2
        for pf, sf, fn in os.walk(dstDir):  #{2
            for edDir in edList:  #{3
                if edDir in pf:  #{4
                    isExlDst = True
                #}4
            #}3
            if isExlDst == True:  #{3
                isExlDst = False
                continue
            #}3
            else:  #{3
                for f in fn:  #{4
                    tempStr = func.buildPath(pf, f, delim)
                    dstList.append(tempStr)
                #}4
            #}3
        #}2
        if len(srcList) != len(dstList):  #{2
            print('Checksum: directory checksum failed')
            return 0
        #}2
        else:  #{2
            for i in range(len(srcList)):  #{3
                p1 = srcList[i]
                p2 = dstList[i]

                tStr1 = p1.replace(srcDir, '')
                strHash1 = func.hasher(tStr1, True, hash_Mode)
                fileHash1 = func.hasher(p1, False, hash_Mode)

                tStr2 = p2.replace(dstDir, '')
                strHash2 = func.hasher(tStr2, True, hash_Mode)
                fileHash2 = func.hasher(p2, False, hash_Mode)

                if strHash1 != strHash2 or fileHash1 != fileHash2:  #{4
                    print('Checksum: directory checksum failed')
                    return 0
                #}4
            #}3
        #}2
        print('Checksum: directory checksum passed')
        return 1
예제 #3
0
def globalCompare(targetDir, cmpMode, exlDir):  #{0
    entity0 = entityc.entity(targetDir, targetDir, delim)
    targetDir = entity0.targetDir
    eList = func.listExl(exlDir, delim)
    print('Directories to exclude:')
    print(eList)
    isExl = False

    cwd = os.getcwd()
    if not os.path.isdir(targetDir):  #{1
        print('Error: Target directory not found\n')
        return ''
    #}1
    else:  #{1
        fileList = []
        objList = []
        nameList = []
        dupFileList.clear()
        opCounter = 0
        cpCounter = 0
        dnCounter = 0
        totalFiles = 0
        for folderName, subFolders, fileNames in os.walk(targetDir):  #{2
            for eDir in eList:  #{3
                if eDir in folderName:  #{4
                    isExl = True
                #}4
            #}3
            if isExl == True:  #{3
                isExl = False
            #}3
            else:  #{3
                for fileName in fileNames:  #{4
                    if fileName.lower() != thumbs.lower():  #{5
                        totalFiles = totalFiles + 1
                        tPath = func.buildPath(folderName, fileName, delim)
                        fSize = os.path.getsize(tPath)
                        mTime = os.path.getmtime(tPath)
                        fileList.append([folderName, fileName, fSize])
                        nameList.append([folderName, fileName, mTime])
                    #}5
                #}4
            #}3
        #}2
        #Now a list of all files and dir is ready
        if len(fileList) == 0:  #{2
            print('There are no files in target directory\n')
            return ''
        #}2
        else:  #{2
            cMode = ''
            if cmpMode == 0:  #{3
                cMode = 'Filecmp.cmp (shallow=False compare file content)'
            #}3
            else:  #{3
                cMode = 'Hashlib (Checksum:SHA256)'
            #}3
            c = 0
            dupName = []
            dupList = []
            nameList.sort(key=lambda x: x[1])  #sort in order of file name
            while c < (len(nameList) - 1):  #{3
                nc = c + 1
                dir1 = nameList[c][0]
                dir2 = nameList[nc][0]
                file1 = nameList[c][1]
                file2 = nameList[nc][
                    1]  #file to be removed from nameList if same
                mtime1 = nameList[c][2]
                mtime2 = nameList[nc][2]
                if file1 == file2:  #{4
                    #print(file1 + ' => ' + file2)
                    df1 = func.buildPath(dir1, file1, delim)
                    df2 = func.buildPath(dir2, file2, delim)
                    if mtime1 > mtime2:  #{5
                        df3 = 'latter'
                    #}5
                    elif mtime2 > mtime1:  #{5
                        df3 = 'former'
                    #}5
                    else:  #{5
                        df3 = 'either'
                    #}5
                    dupName.append(df1 + ' == ' + df2 + ', Remove: ' + df3)
                    dnCounter = dnCounter + 1
                    del nameList[nc]
                    continue  #effect is c is not incremented
                #}4
                c = c + 1
            #}3
            fileList.sort(
                key=lambda x: x[2])  #sort fileList against fSize (file size)
            #loadFileObj populates objList list with list of duplicated files
            func.loadFileObj(fileList, objList, True, delim, cmpMode,
                             hash_Mode)
            print('Finding duplicated files across all sub-directories...')
            if len(objList) != 0:  #{3
                for i in objList:  #{4
                    timeList = []
                    Bvalue = 0
                    Bindex = 0
                    counter = 0
                    for j in range(len(i.fileNames)):  #{5
                        if len(i.fileDir) == 1:  #{6
                            k = 0
                        #}6
                        else:  #{6
                            k = j
                        #}6
                        path = func.buildPath(i.fileDir[k], i.fileNames[j],
                                              delim)
                        ct = os.path.getctime(path)
                        at = os.path.getatime(path)
                        timeList.append(ct)
                        timeList.append(at)
                    #}5
                    while counter < len(timeList):  #{5
                        if timeList[counter] > Bvalue:  #{6
                            Bvalue = timeList[counter]
                            Bindex = counter
                        #}6
                        counter = counter + 1
                    #}5
                    cd = Bindex // 2
                    #cr = Bindex%2
                    #print(str(cd))
                    if len(i.fileDir) == 1:  #{5
                        k = 0
                    #}5
                    else:  #{5
                        k = cd
                    #}5
                    keepath = func.buildPath(i.fileDir[k], i.fileNames[cd],
                                             delim)
                    toKeep = 'To keep:\n' + keepath + '\nIn:\n' + keepath + '\n'
                    #print(toKeep)
                    for j in range(len(i.fileNames)):  #{5
                        if j != cd:  #{6
                            if len(i.fileDir) == 1:  #{7
                                k = 0
                            #}7
                            else:  #{7
                                k = j
                            #}7
                            toRM = func.buildPath(i.fileDir[k], i.fileNames[j],
                                                  delim)
                            toKeep = toKeep + toRM + '\n'
                            dupFileList.append(toRM)
                        #}6
                    #}5
                    dupList.append(toKeep)
                    cpCounter = cpCounter + 1
                #}4
            #}3
            filecmp.clear_cache()
            #encoding arg is for writing nonEng char
            with open(cwd + delim + 'findDupFiles_report.txt',
                      'a',
                      encoding='utf8') as outputFile:  #{3
                outputFile.write('\n')
                outputFile.write('Target directory: ' + targetDir + '\n')
                outputFile.write(
                    'Find duplicated files across all sub-directories\n')
                if len(dupList) > 0:  #{4
                    outputFile.write('Files with duplicated content:\n')
                    for i in dupList:  #{5
                        outputFile.write(i + '\n')
                    #}5
                #}4
                if len(dupName) > 0:  #{4
                    outputFile.write('Files with duplicated names:\n')
                    for i in dupName:  #{5
                        outputFile.write(i + '\n')
                    #}5
                #}4
                outputFile.write('Method of file comparison: ' + cMode + '\n')
                outputFile.write('Total file count: ' + str(totalFiles) + '\n')
                outputFile.write('Comparison Ops: ' + str(opCounter) + '\n')
                outputFile.write('Duplicated files: ' + str(cpCounter) + '\n')
                outputFile.write('Duplicated fileNames: ' + str(dnCounter) +
                                 '\n')
                outputFile.write(
                    'Only files with duplicated content are included in deletion\n'
                )
                outputFile.write(
                    'Duplicated file naming require user investigation\n')
                #outputFile.close() #using "with open" auto close file even with exception
            #}3
            print('Method of file comparison: ' + cMode)
            print('Total file count: ' + str(totalFiles))
            print('Duplicated files: ' + str(cpCounter))
            print('Duplicated fileNames: ' + str(dnCounter))
            dup_size_est()
            print(
                'Only files with duplicated content are included in deletion')
            print('Duplicated file naming require user investigation')
            print('Please find analysis report in ' + cwd + delim +
                  'findDupFiles_report.txt')
            print('Analysis completed\n')
            ret = str(totalFiles) + '-' + str(dnCounter) + '-' + str(cpCounter)
            return ret
예제 #4
0
def localCompare(targetDir, cmpMode, exlDir):  #{0
    entity0 = entityc.entity(targetDir, targetDir, delim)
    targetDir = entity0.targetDir
    eList = func.listExl(exlDir, delim)
    print('Directories to exclude:')
    print(eList)
    isExl = False

    cwd = os.getcwd()
    if not os.path.isdir(targetDir):  #{1
        print('Error: Target directory not found\n')
        return ''
    #}1
    else:  #{1
        #Variable init and definition
        dupFileList.clear()
        dupList = []
        opCounter = 0
        cpCounter = 0
        totalFiles = 0
        cMode = ''
        if cmpMode == 0:  #{2
            cMode = 'Filecmp.cmp (shallow=False compare file content)'
        #}2
        else:  #{2
            cMode = 'Hashlib (Checksum:SHA256)'
        #}2
        print('Finding duplicated files within each sub-directory...')
        for folderName, subFolders, fileNames in os.walk(targetDir):  #{2
            fileList = []
            objList = []
            c = 0
            for eDir in eList:  #{3
                if eDir in folderName:  #{4
                    isExl = True
                #}4
            #}3
            if isExl == True:  #{3
                isExl = False
            #}3
            else:  #{3
                for fileName in fileNames:  #{4
                    if fileName.lower() != thumbs.lower():  #{5
                        totalFiles = totalFiles + 1
                        fSize = os.path.getsize(
                            func.buildPath(folderName, fileName, delim))
                        #fileList.append([fileName,fsize])
                        fileList.append([folderName, fileName, fSize])
                    #}5
                #}4
                fileList.sort(key=lambda x: x[2])
                #loadFileObj populates objList list with list of duplicated files
                func.loadFileObj(fileList, objList, True, delim, cmpMode,
                                 hash_Mode)
                ##for i in objList:#{4
                ##    ts = i.fileNames
                ##    print(ts)
                ##}4
                ##print('\n')
                if len(objList) != 0:  #{4
                    for i in objList:  #{5
                        timeList = []
                        Bvalue = 0
                        Bindex = 0
                        counter = 0
                        for j in i.fileNames:  #{6
                            path = func.buildPath(i.fileDir[0], j, delim)
                            ct = os.path.getctime(path)
                            at = os.path.getatime(path)
                            timeList.append(ct)
                            timeList.append(at)
                        #}6
                        while counter < len(timeList):  #{6
                            if timeList[counter] > Bvalue:  #{7
                                Bvalue = timeList[counter]
                                Bindex = counter
                            #}7
                            counter = counter + 1
                        #}6
                        cd = Bindex // 2
                        #cr = Bindex%2
                        #print(str(cd))
                        toKeep = 'To keep: ' + i.fileNames[cd] + ', to remove:'
                        #print(toKeep)
                        for j in range(len(i.fileNames)):  #{6
                            if j != cd:  #{7
                                toKeep = toKeep + ' ' + i.fileNames[j]
                                toRM = func.buildPath(i.fileDir[0],
                                                      i.fileNames[j], delim)
                                dupFileList.append(toRM)
                            #}7
                        #}6
                        toKeep = toKeep + ', at ' + i.fileDir[0]
                        dupList.append(toKeep)
                        cpCounter = cpCounter + 1
                    #}5
                #}4
            #}3
        #}2
        filecmp.clear_cache()
        #encoding arg is for writing nonEng char
        with open(cwd + delim + 'findDupFiles_report.txt',
                  'a',
                  encoding='utf8') as outputFile:  #{2
            outputFile.write('\n')
            outputFile.write('Target directory: ' + targetDir + '\n')
            outputFile.write(
                'Find duplicated files within each sub-directory\n')
            if len(dupList) > 0:  #{3
                outputFile.write('Files with duplicated content:\n')
                for i in dupList:  #{4
                    outputFile.write(i + '\n')
                #}4
            #}3
            outputFile.write('Method of file comparison: ' + cMode + '\n')
            outputFile.write('Total file count: ' + str(totalFiles) + '\n')
            outputFile.write('Comparison Ops: ' + str(opCounter) + '\n')
            outputFile.write('Duplicated files: ' + str(cpCounter) + '\n')
            #outputFile.close() #using "with open" auto close file even with exception
        #}2
        print('Method of file comparison: ' + cMode)
        print('Total file count: ' + str(totalFiles))
        print('Duplicated files: ' + str(cpCounter))
        dup_size_est()
        print('Please find analysis report in ' + cwd + delim +
              'findDupFiles_report.txt')
        print('Analysis completed\n')
        ret = str(totalFiles) + '-0-' + str(cpCounter)
        return ret
def delDirFile(wd, targetName, isFolder, exlDir):  #{0
    entity0 = entityc.entity(wd, wd, delim)
    wd = entity0.targetDir
    eList = func.listExl(exlDir, delim)
    print('Directories to exclude:')
    print(eList)
    isExl = False

    if not os.path.isdir(wd):  #{1
        print('Error: target directory not found\n')
        return 0
    #}1
    else:  #{1
        workFolder = ''
        cpCounter = 0
        delDFList.clear()
        if isFolder == False:  #{2
            checkTar = targetName.split('.')
            ext = checkTar[len(checkTar) - 1]
            if ext == 'sys' or ext == 'dll' or len(checkTar) == 1:  #{3
                print(
                    'Error: file extension must be specified and cannot be .sys or .dll\n'
                )
                return 0
            #}3
        #}2
        else:  #{2
            checkTar = targetName.split('.')
            ext = checkTar[0]
            if ext == '':  #{3
                print('Error: folder name cannot start with "."\n')
                return 0
            #}3
        #}2
        print('Finding specified file/folder for deletion...')
        for folderName, subFolders, fileNames in os.walk(wd):  #{2
            for eDir in eList:  #{3
                if eDir in folderName:  #{4
                    isExl = True
                #}4
            #}3
            if isExl == True:  #{3
                isExl = False
            #}3
            else:  #{3
                tWorkFolder = folderName.split(delim)
                workFolder = tWorkFolder[len(tWorkFolder) - 1]
                #print('Folder inside ' + workFolder)
                if isFolder == False:  #{4
                    for fileName in fileNames:  #{5
                        if fileName.lower() == targetName.lower():  #{6
                            target = func.buildPath(folderName, fileName,
                                                    delim)
                            delDFList.append(target)
                            cpCounter = cpCounter + 1
                        #}6
                    #}5
                #}4
                else:  #{4
                    if folderName != wd:  #{5
                        if workFolder.lower() == targetName.lower():  #{6
                            #Add to delete list
                            delDFList.append(folderName)
                            cpCounter = cpCounter + 1
                        #}6
                    #}5
                #}4
            #}3
        #}2
        print('Number of instances found: ' + str(cpCounter))
        for i in delDFList:  #{2
            print(str(i))
        #}2
        print('Analysis completed\n')
        return cpCounter