def extractAllFilesFromDirAndSubdirs(db, logger, extractDir, dirpathRoot): depotRoot1 = "I:\\objectstore1" depotRoot2 = "F:\\objectstore2" logger.log("extracting files from %s to %s" % (dirpathRoot, extractDir)) dirfilelist = miscQueries.getAllPathsAndFilenamesFromDirAndSubdirs( db, dirpathRoot) logger.log("got %d dirs" % len(dirfilelist.keys())) for dirpath in dirfilelist: logger.log("directory %s" % dirpath) filelist = dirfilelist[dirpath] logger.log("%d files" % len(filelist)) for filehash, filename in filelist: logger.log("filehash: %s, filename: %s" % (filehash, filename)) fixedDirpath = dirpath.replace(":", "_") logger.log(fixedDirpath) destinationDir = os.path.join(extractDir, fixedDirpath) logger.log("destination: %s" % destinationDir) if not os.path.isdir(destinationDir): os.makedirs(destinationDir) if not FileUtils.CopyFileFromDepot(db, depotRoot1, destinationDir, filehash, filename): if not FileUtils.CopyFileFromDepot( db, depotRoot2, destinationDir, filehash, filename): logger.log( "#############################file not found, setting status" ) miscQueries.setFileStatus(db, filehash, "notFound")
def setToRemoveStatusForFilesInDirUsingCache(db, dirhash, logger, origDirsForFileDict, origFilesForDirDict): filelist = origFilesForDirDict.get(dirhash) if not filelist: deleteDirEntries(db, dirhash, logger) logger.log("no files found for this dir") return for filehash, filename in filelist: logger.log("filename: %s" % filename) dirs = origDirsForFileDict[filehash] if len(dirs) > 1: logger.log("multiple locations, not deleting file") origDirsForFileDict[filehash].remove(dirhash) continue for newdirhash in dirs: if dirhash != newdirhash: logger.log("dirhash does not match, not deleting") continue logger.log("set toremove status for %s" % filehash) miscQueries.setFileStatus(db, filehash, "toRemove") origDirsForFileDict[filehash] = None deleteDirEntries(db, dirhash, logger)
def extractFileOrSetDeleted(db, destinationDir, filehash, newFilename, logger): depotRoot1 = "I:\\objectstore1" depotRoot2 = "F:\\objectstore2" if not FileUtils.CopyFileFromDepot(db, depotRoot1, destinationDir, filehash, newFilename): if not FileUtils.CopyFileFromDepot(db, depotRoot2, destinationDir, filehash, newFilename): logger.log("file not found, setting status") miscQueries.setFileStatus(db, filehash, "deleted")
def moveOutCorruptedFilesFromDepot(db, logger): depotRootPath = "F:\\objectstore2" destinationDirPath = "F:\\corruptFiles" list = miscQueries.getFilesWithStatus(db, "corrupted") for item in list: filehash = item[0] logger.log(filehash) moved = FileUtils.MoveFileFromDepot(db, depotRootPath, destinationDirPath, filehash, filehash) if moved: logger.log("moved %s" % filehash) miscQueries.setFileStatus(db, filehash, "toReplace")
def deleteFilesInDirUsingCache(db, dirhash, logger, origDirsForFileDict=None, origFilesForDirDict=None): if not origDirsForFileDict: logger.log("not supported as need to check code, exiting") exit(1) if not origFilesForDirDict: logger.log("not supported as need to check code, exiting") exit(1) filelist = origFilesForDirDict.get(dirhash) if not filelist: deleteDirEntries(db, dirhash, logger) logger.log("no files found for this dir") return for filehash, filename in filelist: logger.log("filename: %s" % filename) dirs = origDirsForFileDict[filehash] if len(dirs) > 1: logger.log("multiple locations, not deleting file") origDirsForFileDict[filehash].remove(dirhash) continue for newdirhash in dirs: if dirhash != newdirhash: logger.log("dirhash does not match, not deleting") continue logger.log("deleting file %s" % filehash) deleteFileFromDisk(filehash, logger) origDirsForFileDict[filehash] = None logger.log("setting status to deleted") miscQueries.setFileStatus(db, filehash, "deleted") deleteDirEntries(db, dirhash, logger)
def getFilesFromBackup(db, logger): depotRootPath = "F:\objectstore1p5" destinationDirPath = "I:\\replacements" #list = miscQueries.getFilesWithStatus(db, "foundReplacement") #for item in list: # filehash = item[0] # miscQueries.setFileStatus(db, filehash, "notFound") #exit(1) list = miscQueries.getFilesWithStatus(db, "notFound") for item in list: filehash = item[0] #logger.log(filehash) copied = FileUtils.CopyFileFromDepot(db, depotRootPath, destinationDirPath, filehash, filehash) if copied: logger.log("copied %s" % filehash) miscQueries.setFileStatus(db, filehash, "foundReplacement")
def checkReplacements(db, depotRoot, replacementsDir, logger): # get files in replacement dir filelist = os.listdir(replacementsDir) for filename in filelist: logger.log(filename) filepath = os.path.join(replacementsDir, filename) logger.log("file: %s" % filepath) filehash = Sha1HashUtilities.HashFile(filepath) logger.log("hash: %s" % filehash) if filehash.upper() == filename.upper(): logger.log("success") MoveFileIntoDepotIfDoesNotExist(depotRoot, filepath, filehash, logger) miscQueries.setFileStatus(db, filehash, "replaced") else: logger.log("hash failed, also corrupt") os.remove(filepath)
sourceDir = "H:\\tryagain\91" destinationDir = "E:\\tryagain\91" if not os.path.isdir(destinationDir): os.mkdir(destinationDir) filelist = os.listdir(sourceDir) for filehash in filelist: sourcePath = os.path.join(sourceDir, filehash) destPath = os.path.join(destinationDir, filehash) try: shutil.copyfile(sourcePath, destPath) except: logger.log("cannot copy %s" % filehash) miscQueries.setFileStatus(db, filehash, "cannotCopy") ''' filelist = miscQueries.getFilesWithStatus(db, "cannotCopy") for fileinfo in filelist: #logger.log(fileinfo) filehash = fileinfo[0] logger.log(filehash) continue dirInfoList = miscQueries.getOriginalDirectoriesForFile(db, filehash) for dirInfo in dirInfoList: logger.log(dirInfo) dirhash = dirInfo[2] logger.log(dirhash) logger.log(miscQueries.getDirectoryPath(db, dirhash))
def markToReplace(db, filehash, logger): miscQueries.setFileStatus(db, filehash, "toReplace") logInfoForFirstCorruptFile(db, logger)
def keep(db, filehash, logger): miscQueries.setFileStatus(db, filehash, "keepForNow") logInfoForLargestFile(db, logger)
def deleteFile(db, filehash, logger): deleteFileFromDisk(filehash, logger) miscQueries.setFileStatus(db, filehash, "deleted") #logInfoForLargestFile(db, logger) logInfoForFirstCorruptFile(db, logger)
import os.path import FileUtils import DbLogger import CoreDb import Sha1HashUtilities import miscQueries logger = DbLogger.dbLogger() dbpath = "C:\\depotListing\\listingDb.sqlite" #dbpath = "/Users/v724660/db/listingDb.sqlite" db = CoreDb.CoreDb(dbpath) rootDirPath = "E:\\20141115_hss" filelist = FileUtils.getListOfAllFilesInDir(rootDirPath) logger.log("%d files" % len(filelist)) for dirpath, filename in filelist: logger.log("%s: %s" % (dirpath, filename)) filepath = os.path.join(dirpath, filename) filehash = Sha1HashUtilities.HashFile(filepath) filehash = filehash.upper() if miscQueries.checkIfFilehashInDatabase(db, filehash): logger.log("\t exists in database, set to delete") miscQueries.setFileStatus(db, filehash, "toDeleteCompletely") else: logger.log("\t not in database")
logger.log("%d: %s" % (count, filehash)) filepath = FileUtils.getFilepathOnDisk(filehash) if filepath: diskFileCount += 1 logger.log("\t filepath: %s" % str(filepath)) dirlist = miscQueries.getOriginalDirectoriesForFile(db, filehash) if dirlist: dirInfoCount += 1 for dirInfo in dirlist: logger.log("\tdirinfo: %s" % str(dirInfo)) if not filepath: logger.log("no filepath, setting status to notFound") miscQueries.setFileStatus(db, filehash, "notFound") if setStatus: if dirlist and filepath: # have all info, clear status logger.log("have dirinfo and filepath, setting status to None") miscQueries.setFileStatus(db, filehash, None) if dirlist and not filepath: # have dir info, but cannot find file, set status to NotFound logger.log( "have dirinfo but no filepath, setting status to notFound") miscQueries.setFileStatus(db, filehash, "notFound") logger.log("files with status %s: %d" % (status, statusCount)) logger.log("found dirinfo for %d files" % dirInfoCount)