def backupCopy(srcDir, dstDir, exlSrcDir, exlDstDir): #{0 entity0 = entityc.entity(srcDir, dstDir, delim) srcDir = entity0.srcDir dstDir = entity0.dstDir dst_drive = entity0.dst_drive esList = func.listExl(exlSrcDir, delim) edList = func.listExl(exlDstDir, delim) print('Analysing sync from ' + srcDir + ' to ' + dstDir) print('Source directories to exclude:') print(esList) print('Destination directories to exclude:') print(edList) isExlSrc = False isExlDst = False if not os.path.isdir(srcDir) or not os.path.isdir( dstDir) or srcDir == dstDir: #{1 print('Error: source or destination directory not found\n') return 0 #}1 else: #{1 dstFolderList = [] srcFolderList = [] dstRemoved = [] #totalFiles = 0 sameFolderSrc = [] #Global variables reset global totalSrcFile global totalDstFile global totalSrcCopy global totalDstRemove global totalDstRename global profile1Num global profile2Num global profile3Num totalSrcFile = 0 totalDstFile = 0 totalSrcCopy = 0 totalDstRemove = 0 totalDstRename = 0 rmtreeList.clear() copytreeList.clear() renameList.clear() removeList.clear() checkNcopyList.clear() #code profiling profile1.clear() profile2.clear() profile3.clear() profile1Num = 0 profile2Num = 0 profile3Num = 0 #Copying files from the srcDir to dstDir ignoring all subfolders if not dstDir in edList and not srcDir in esList: #{2 compareNcopy(srcDir, dstDir) #}2 for folderName, subFolders, fileNames in os.walk(dstDir): #{2 for edDir in edList: #{3 if edDir in folderName: #{4 isExlDst = True #}4 #}3 if isExlDst == True: #{3 isExlDst = False #}3 else: #{3 tWorkFolder = folderName.split(delim) workFolder = tWorkFolder[len(tWorkFolder) - 1] if workFolder != pycache and folderName != dstDir: #{4 dstFolder = folderName.replace(dstDir, '') dstFolderList.append(dstFolder) #}4 #}3 #}2 dstFolderList.sort(key=lambda dstFolderList: len(dstFolderList)) #dstFolderList is replaced with sort result sorted by string length #FolderList actually contains pathnames minus the dst/srcDir path for folderName, subFolders, fileNames in os.walk(srcDir): #{2 for esDir in esList: #{3 if esDir in folderName: #{4 isExlSrc = True #}4 #}3 if isExlSrc == True: #{3 isExlSrc = False #}3 else: #{3 tWorkFolder = folderName.split(delim) workFolder = tWorkFolder[len(tWorkFolder) - 1] if workFolder != pycache and folderName != srcDir: #{4 srcFolder = folderName.replace(srcDir, '') srcFolderList.append(srcFolder) #}4 #}3 #}2 srcFolderList.sort(key=lambda srcFolderList: len(srcFolderList)) #srcFolderList is replaced with sort result sorted by string length #Loop through dstFolderList, remove dstFolder not in srcFolderList # store dstFolder in srcFolderList on sameFolderList if len(dstFolderList) != 0: #{2 for d in range(len(dstFolderList)): #{3 if not dstFolderList[d] in srcFolderList: #{4 done = False #If dstFolder already removed or is a subfolder to an already removed folder #then do nothing for c in range(len(dstRemoved)): #{5 if dstRemoved[c] in dstFolderList[d]: #{6 done = True break #}6 #}5 if done == False: #{5 tf = dstDir + dstFolderList[d] cpt = sum([len(cc) for aa, bb, cc in os.walk(tf)]) totalDstFile = totalDstFile + cpt #Assign global variable totalDstRemove = totalDstRemove + cpt #Assign global variable rmtreeList.append(tf) dstRemoved.append(dstFolderList[d]) #}5 #}4 else: #{4 sameFolderSrc.append(dstFolderList[d]) #}4 #}3 #}2 #Loop through srcFolderList, copy src to dst #Does not care about dstFolder that was removed in the last bit of code #Works with sameFolderList only if len(srcFolderList) != 0: #{2 for c in range(len(srcFolderList)): #{3 srcFolder = srcFolderList[c] #if not srcFolder in srcCopied: #if src folder does not exist in dst # then use shutil.copytree to copy entire dir if not srcFolder in sameFolderSrc: #{4 src = srcDir + srcFolder dst = dstDir + srcFolder #The srcFolderList is sorted by string length #so if src dir is part of previously "copied" dir #then do nothing #do not use os.path.isdir(dst) to check #because no files are copied at this point toCp = True for pp in copytreeList: #{5 if pp[0] in src: #{6 toCp = False break #}6 #}5 if toCp == True: #{5 cpt = sum([len(cc) for aa, bb, cc in os.walk(src)]) totalSrcCopy = totalSrcCopy + cpt totalSrcFile = totalSrcFile + cpt copytreeList.append([src, dst]) #}5 #Improvement: # for src dir that doesn't have matching dst dir, do not need to compareNcopy # All content (sub-folder+files) can be copied directly # without needing to compare, use shutil.copytree #}4 #else src folder does exist in dst # then straight to compareNcopy else: #{4 src = srcDir + srcFolder dst = dstDir + srcFolder compareNcopy(src, dst) #}4 #}3 #}2 sync_size_est(dst_drive) filecmp.clear_cache() #print(profile1) #print(profile2) #profile1Num = sum(profile1)/len(profile1) #profile2Num = sum(profile2)/len(profile2) #print('step 1 profile: ' + str(profile1Num)) #print('step 2 profile: ' + str(profile2Num)) print('Total source file count: ' + str(totalSrcFile)) print('Total destination file count: ' + str(totalDstFile)) print('Total source file to copy: ' + str(totalSrcCopy)) print('Total destination file to remove: ' + str(totalDstRemove)) print('Total destination file to rename: ' + str(totalDstRename)) if totalSrcFile != totalDstFile - totalDstRemove + totalSrcCopy: #{2 print("Issue encountered, please investigate") #}2 else: #{2 print("No issue encountered") #}2 print('Analysis finished\n') #print("copytreeList: " + str(copytreeList)) #print("rmtreeList: " + str(rmtreeList)) #print("renameList: " + str(renameList)) #print("removeList: " + str(removeList)) #print("checkNcopyList: " + str(checkNcopyList)) rets = len(rmtreeList)+len(copytreeList)+len(renameList)+\ len(removeList)+len(checkNcopyList) if rets == 0: #{2 return 0 #}2 else: #{2 return 1
def validate(srcDir, dstDir, exlSrcDir, exlDstDir): #{0 srcList = [] dstList = [] print('Checksum...') if type(srcDir) is list: #{1 for i in srcDir: #{2 p1 = func.buildPath(i[0], i[2], delim) p2 = func.buildPath(i[1], i[3], delim) fileHash1 = func.hasher(p1, False, hash_Mode) fileHash2 = func.hasher(p2, False, hash_Mode) if fileHash1 != fileHash2: #{3 print('Checksum: File checksum failed') return 0 #}3 #}2 print('Checksum: File checksum passed') return 1 #}1 else: #{1 entity0 = entityc.entity(srcDir, dstDir, delim) srcDir = entity0.srcDir dstDir = entity0.dstDir esList = func.listExl(exlSrcDir, delim) edList = func.listExl(exlDstDir, delim) print('Source directories to exclude:') print(esList) print('Destination directories to exclude:') print(edList) isExlSrc = False isExlDst = False for pf, sf, fn in os.walk(srcDir): #{2 for esDir in esList: #{3 if esDir in pf: #{4 isExlSrc = True #}4 #}3 if isExlSrc == True: #{3 isExlSrc = False continue #}3 else: #{3 for f in fn: #{4 tempStr = func.buildPath(pf, f, delim) srcList.append(tempStr) #}4 #}3 #}2 for pf, sf, fn in os.walk(dstDir): #{2 for edDir in edList: #{3 if edDir in pf: #{4 isExlDst = True #}4 #}3 if isExlDst == True: #{3 isExlDst = False continue #}3 else: #{3 for f in fn: #{4 tempStr = func.buildPath(pf, f, delim) dstList.append(tempStr) #}4 #}3 #}2 if len(srcList) != len(dstList): #{2 print('Checksum: directory checksum failed') return 0 #}2 else: #{2 for i in range(len(srcList)): #{3 p1 = srcList[i] p2 = dstList[i] tStr1 = p1.replace(srcDir, '') strHash1 = func.hasher(tStr1, True, hash_Mode) fileHash1 = func.hasher(p1, False, hash_Mode) tStr2 = p2.replace(dstDir, '') strHash2 = func.hasher(tStr2, True, hash_Mode) fileHash2 = func.hasher(p2, False, hash_Mode) if strHash1 != strHash2 or fileHash1 != fileHash2: #{4 print('Checksum: directory checksum failed') return 0 #}4 #}3 #}2 print('Checksum: directory checksum passed') return 1
def globalCompare(targetDir, cmpMode, exlDir): #{0 entity0 = entityc.entity(targetDir, targetDir, delim) targetDir = entity0.targetDir eList = func.listExl(exlDir, delim) print('Directories to exclude:') print(eList) isExl = False cwd = os.getcwd() if not os.path.isdir(targetDir): #{1 print('Error: Target directory not found\n') return '' #}1 else: #{1 fileList = [] objList = [] nameList = [] dupFileList.clear() opCounter = 0 cpCounter = 0 dnCounter = 0 totalFiles = 0 for folderName, subFolders, fileNames in os.walk(targetDir): #{2 for eDir in eList: #{3 if eDir in folderName: #{4 isExl = True #}4 #}3 if isExl == True: #{3 isExl = False #}3 else: #{3 for fileName in fileNames: #{4 if fileName.lower() != thumbs.lower(): #{5 totalFiles = totalFiles + 1 tPath = func.buildPath(folderName, fileName, delim) fSize = os.path.getsize(tPath) mTime = os.path.getmtime(tPath) fileList.append([folderName, fileName, fSize]) nameList.append([folderName, fileName, mTime]) #}5 #}4 #}3 #}2 #Now a list of all files and dir is ready if len(fileList) == 0: #{2 print('There are no files in target directory\n') return '' #}2 else: #{2 cMode = '' if cmpMode == 0: #{3 cMode = 'Filecmp.cmp (shallow=False compare file content)' #}3 else: #{3 cMode = 'Hashlib (Checksum:SHA256)' #}3 c = 0 dupName = [] dupList = [] nameList.sort(key=lambda x: x[1]) #sort in order of file name while c < (len(nameList) - 1): #{3 nc = c + 1 dir1 = nameList[c][0] dir2 = nameList[nc][0] file1 = nameList[c][1] file2 = nameList[nc][ 1] #file to be removed from nameList if same mtime1 = nameList[c][2] mtime2 = nameList[nc][2] if file1 == file2: #{4 #print(file1 + ' => ' + file2) df1 = func.buildPath(dir1, file1, delim) df2 = func.buildPath(dir2, file2, delim) if mtime1 > mtime2: #{5 df3 = 'latter' #}5 elif mtime2 > mtime1: #{5 df3 = 'former' #}5 else: #{5 df3 = 'either' #}5 dupName.append(df1 + ' == ' + df2 + ', Remove: ' + df3) dnCounter = dnCounter + 1 del nameList[nc] continue #effect is c is not incremented #}4 c = c + 1 #}3 fileList.sort( key=lambda x: x[2]) #sort fileList against fSize (file size) #loadFileObj populates objList list with list of duplicated files func.loadFileObj(fileList, objList, True, delim, cmpMode, hash_Mode) print('Finding duplicated files across all sub-directories...') if len(objList) != 0: #{3 for i in objList: #{4 timeList = [] Bvalue = 0 Bindex = 0 counter = 0 for j in range(len(i.fileNames)): #{5 if len(i.fileDir) == 1: #{6 k = 0 #}6 else: #{6 k = j #}6 path = func.buildPath(i.fileDir[k], i.fileNames[j], delim) ct = os.path.getctime(path) at = os.path.getatime(path) timeList.append(ct) timeList.append(at) #}5 while counter < len(timeList): #{5 if timeList[counter] > Bvalue: #{6 Bvalue = timeList[counter] Bindex = counter #}6 counter = counter + 1 #}5 cd = Bindex // 2 #cr = Bindex%2 #print(str(cd)) if len(i.fileDir) == 1: #{5 k = 0 #}5 else: #{5 k = cd #}5 keepath = func.buildPath(i.fileDir[k], i.fileNames[cd], delim) toKeep = 'To keep:\n' + keepath + '\nIn:\n' + keepath + '\n' #print(toKeep) for j in range(len(i.fileNames)): #{5 if j != cd: #{6 if len(i.fileDir) == 1: #{7 k = 0 #}7 else: #{7 k = j #}7 toRM = func.buildPath(i.fileDir[k], i.fileNames[j], delim) toKeep = toKeep + toRM + '\n' dupFileList.append(toRM) #}6 #}5 dupList.append(toKeep) cpCounter = cpCounter + 1 #}4 #}3 filecmp.clear_cache() #encoding arg is for writing nonEng char with open(cwd + delim + 'findDupFiles_report.txt', 'a', encoding='utf8') as outputFile: #{3 outputFile.write('\n') outputFile.write('Target directory: ' + targetDir + '\n') outputFile.write( 'Find duplicated files across all sub-directories\n') if len(dupList) > 0: #{4 outputFile.write('Files with duplicated content:\n') for i in dupList: #{5 outputFile.write(i + '\n') #}5 #}4 if len(dupName) > 0: #{4 outputFile.write('Files with duplicated names:\n') for i in dupName: #{5 outputFile.write(i + '\n') #}5 #}4 outputFile.write('Method of file comparison: ' + cMode + '\n') outputFile.write('Total file count: ' + str(totalFiles) + '\n') outputFile.write('Comparison Ops: ' + str(opCounter) + '\n') outputFile.write('Duplicated files: ' + str(cpCounter) + '\n') outputFile.write('Duplicated fileNames: ' + str(dnCounter) + '\n') outputFile.write( 'Only files with duplicated content are included in deletion\n' ) outputFile.write( 'Duplicated file naming require user investigation\n') #outputFile.close() #using "with open" auto close file even with exception #}3 print('Method of file comparison: ' + cMode) print('Total file count: ' + str(totalFiles)) print('Duplicated files: ' + str(cpCounter)) print('Duplicated fileNames: ' + str(dnCounter)) dup_size_est() print( 'Only files with duplicated content are included in deletion') print('Duplicated file naming require user investigation') print('Please find analysis report in ' + cwd + delim + 'findDupFiles_report.txt') print('Analysis completed\n') ret = str(totalFiles) + '-' + str(dnCounter) + '-' + str(cpCounter) return ret
def localCompare(targetDir, cmpMode, exlDir): #{0 entity0 = entityc.entity(targetDir, targetDir, delim) targetDir = entity0.targetDir eList = func.listExl(exlDir, delim) print('Directories to exclude:') print(eList) isExl = False cwd = os.getcwd() if not os.path.isdir(targetDir): #{1 print('Error: Target directory not found\n') return '' #}1 else: #{1 #Variable init and definition dupFileList.clear() dupList = [] opCounter = 0 cpCounter = 0 totalFiles = 0 cMode = '' if cmpMode == 0: #{2 cMode = 'Filecmp.cmp (shallow=False compare file content)' #}2 else: #{2 cMode = 'Hashlib (Checksum:SHA256)' #}2 print('Finding duplicated files within each sub-directory...') for folderName, subFolders, fileNames in os.walk(targetDir): #{2 fileList = [] objList = [] c = 0 for eDir in eList: #{3 if eDir in folderName: #{4 isExl = True #}4 #}3 if isExl == True: #{3 isExl = False #}3 else: #{3 for fileName in fileNames: #{4 if fileName.lower() != thumbs.lower(): #{5 totalFiles = totalFiles + 1 fSize = os.path.getsize( func.buildPath(folderName, fileName, delim)) #fileList.append([fileName,fsize]) fileList.append([folderName, fileName, fSize]) #}5 #}4 fileList.sort(key=lambda x: x[2]) #loadFileObj populates objList list with list of duplicated files func.loadFileObj(fileList, objList, True, delim, cmpMode, hash_Mode) ##for i in objList:#{4 ## ts = i.fileNames ## print(ts) ##}4 ##print('\n') if len(objList) != 0: #{4 for i in objList: #{5 timeList = [] Bvalue = 0 Bindex = 0 counter = 0 for j in i.fileNames: #{6 path = func.buildPath(i.fileDir[0], j, delim) ct = os.path.getctime(path) at = os.path.getatime(path) timeList.append(ct) timeList.append(at) #}6 while counter < len(timeList): #{6 if timeList[counter] > Bvalue: #{7 Bvalue = timeList[counter] Bindex = counter #}7 counter = counter + 1 #}6 cd = Bindex // 2 #cr = Bindex%2 #print(str(cd)) toKeep = 'To keep: ' + i.fileNames[cd] + ', to remove:' #print(toKeep) for j in range(len(i.fileNames)): #{6 if j != cd: #{7 toKeep = toKeep + ' ' + i.fileNames[j] toRM = func.buildPath(i.fileDir[0], i.fileNames[j], delim) dupFileList.append(toRM) #}7 #}6 toKeep = toKeep + ', at ' + i.fileDir[0] dupList.append(toKeep) cpCounter = cpCounter + 1 #}5 #}4 #}3 #}2 filecmp.clear_cache() #encoding arg is for writing nonEng char with open(cwd + delim + 'findDupFiles_report.txt', 'a', encoding='utf8') as outputFile: #{2 outputFile.write('\n') outputFile.write('Target directory: ' + targetDir + '\n') outputFile.write( 'Find duplicated files within each sub-directory\n') if len(dupList) > 0: #{3 outputFile.write('Files with duplicated content:\n') for i in dupList: #{4 outputFile.write(i + '\n') #}4 #}3 outputFile.write('Method of file comparison: ' + cMode + '\n') outputFile.write('Total file count: ' + str(totalFiles) + '\n') outputFile.write('Comparison Ops: ' + str(opCounter) + '\n') outputFile.write('Duplicated files: ' + str(cpCounter) + '\n') #outputFile.close() #using "with open" auto close file even with exception #}2 print('Method of file comparison: ' + cMode) print('Total file count: ' + str(totalFiles)) print('Duplicated files: ' + str(cpCounter)) dup_size_est() print('Please find analysis report in ' + cwd + delim + 'findDupFiles_report.txt') print('Analysis completed\n') ret = str(totalFiles) + '-0-' + str(cpCounter) return ret
def delDirFile(wd, targetName, isFolder, exlDir): #{0 entity0 = entityc.entity(wd, wd, delim) wd = entity0.targetDir eList = func.listExl(exlDir, delim) print('Directories to exclude:') print(eList) isExl = False if not os.path.isdir(wd): #{1 print('Error: target directory not found\n') return 0 #}1 else: #{1 workFolder = '' cpCounter = 0 delDFList.clear() if isFolder == False: #{2 checkTar = targetName.split('.') ext = checkTar[len(checkTar) - 1] if ext == 'sys' or ext == 'dll' or len(checkTar) == 1: #{3 print( 'Error: file extension must be specified and cannot be .sys or .dll\n' ) return 0 #}3 #}2 else: #{2 checkTar = targetName.split('.') ext = checkTar[0] if ext == '': #{3 print('Error: folder name cannot start with "."\n') return 0 #}3 #}2 print('Finding specified file/folder for deletion...') for folderName, subFolders, fileNames in os.walk(wd): #{2 for eDir in eList: #{3 if eDir in folderName: #{4 isExl = True #}4 #}3 if isExl == True: #{3 isExl = False #}3 else: #{3 tWorkFolder = folderName.split(delim) workFolder = tWorkFolder[len(tWorkFolder) - 1] #print('Folder inside ' + workFolder) if isFolder == False: #{4 for fileName in fileNames: #{5 if fileName.lower() == targetName.lower(): #{6 target = func.buildPath(folderName, fileName, delim) delDFList.append(target) cpCounter = cpCounter + 1 #}6 #}5 #}4 else: #{4 if folderName != wd: #{5 if workFolder.lower() == targetName.lower(): #{6 #Add to delete list delDFList.append(folderName) cpCounter = cpCounter + 1 #}6 #}5 #}4 #}3 #}2 print('Number of instances found: ' + str(cpCounter)) for i in delDFList: #{2 print(str(i)) #}2 print('Analysis completed\n') return cpCounter