def __init__(self): self.log = logging.getLogger('.'.join((config.MAIN_LOG_NAME, 'Main'))) #All files will be stored here as hashObjects, indexed by their filesize. The format follows: # {10000: [so1, so2], 1234: [so3], 4567:[so4, so5, so6]} self.scannedFiles = {} self.hashedFiles = {} #Basically the same as hashedFiles, but we only add duplicate enteries. self.duplicateFilesIndex = {} #cleanup old tables. # try: if config.DB_NAME != ":memory:" and os.path.exists(config.DB_NAME): os.remove(config.DB_NAME) # except Exception: # pass #Setup the db. tables = [hashObject.HashObject, scanParent.ScanParent] for each in tables: cursor.Cursor.registerTable(each) self.cursor = cursor.Cursor() self.cursor.setupTables() #A simple time check for when we last committed to the database #Useful for determining if a caller *really* wants the same data #in some methods self.lastDBFetch = 0 self.UPDATE_INTERVAL = config.UPDATE_INTERVAL self.updateCallbackFunction = None #Both these queues handle jobs. The file Queue is for the file manager, meant for scanning #files. The file manager then hands off more work to the hash queue, if it needs to. self.fileQueue = Queue.Queue() self.hashQueue = Queue.Queue() #Setup the file manager self.fileManager = fileManager.FileManager(self.fileQueue, self.hashQueue, self.scannedFiles, self.updateProgress) self.fileManager.setDaemon(True) self.fileManager.start() #Setup the hasher self.hasher = hasher.Hasher(self.hashQueue, self.hashedFiles, self.duplicateFilesIndex, self.updateProgress) self.hasher.setDaemon(True) self.hasher.start() #Where we store update information and such about the hasher and fileManager self.stats = {}
def hash_this(self, bufs, exp): with hasher.Hasher(hasher.FUZZY) as h: self.hash_it(h, bufs, exp)
def hash_this(self, bufs, exp): with hasher.Hasher(self.ALGS) as h: self.hash_it(h, bufs, exp)
def test_reset_after_use(self): with hasher.Hasher(hasher.ENTROPY) as h: self.process_it(h, (lc_alphabet,), lc_alphabet_entropy) h.reset() self.process_it(h, (), empty_entropy)
def test_clone(self): with hasher.Hasher(hasher.ENTROPY) as h1: self.process_it(h1, (lc_alphabet,), lc_alphabet_entropy) with h1.clone() as h2: self.assertEqual(h1.get_hashes(), h2.get_hashes())
def test_reset_before_use(self): with hasher.Hasher(hasher.ENTROPY) as h: h.reset() self.process_it(h, (), empty_entropy)
def process_this(self, bufs, exp): with hasher.Hasher(hasher.ENTROPY) as h: self.process_it(h, bufs, exp)
def test_clone(self): with hasher.Hasher(self.ALGS) as h1: self.hash_it(h1, (lc_alphabet,), lc_alphabet_hashes) with h1.clone() as h2: self.assertEqual(h1.get_hashes(), h2.get_hashes())
def test_reset_after_use(self): with hasher.Hasher(self.ALGS) as h: self.hash_it(h, (lc_alphabet,), lc_alphabet_hashes) h.reset() self.hash_it(h, (), empty_hashes)
def test_reset_before_use(self): with hasher.Hasher(self.ALGS) as h: h.reset() self.hash_it(h, (), empty_hashes)