def test_stat_symlink(self): """Test that it doesn't follow symlinks. We compare the inode only (enough to see if it's returning info from the link or the linked), as we can not compare the full stat because the st_mode will be different. """ link = os.path.join(self.basedir, 'foo') os.symlink(self.testfile, link) self.assertNotEqual(os.stat(link).st_ino, stat_path(link).st_ino) self.assertEqual(os.lstat(link).st_ino, stat_path(link).st_ino)
def _hash(self, path): """Actually hashes a file.""" hasher = content_hash_factory() crc = 0 size = 0 try: initial_stat = stat_path(path) with open_file(path, 'rb') as fh: while True: # stop hashing if path_to_cancel = path or _stopped is True with self.mutex: path_to_cancel = self._should_cancel if path_to_cancel == path or self._stopped: raise StopHashing('hashing of %r was cancelled' % path) cont = fh.read(self.chunk_size) if not cont: break hasher.update(cont) crc = crc32(cont, crc) size += len(cont) finally: with self.mutex: self._should_cancel = None return hasher.content_hash(), crc, size, initial_stat
def test_unique(self): """The hasher should return in order.""" # calculate what we should receive should_be = [] for i in range(10): hasher = content_hash_factory() text = "supercalifragilistico"+str(i) hasher.hash_object.update(text) tfile = os.path.join(self.test_dir, "tfile"+str(i)) with open_file(tfile, "wb") as fh: fh.write("supercalifragilistico"+str(i)) d = dict(path=tfile, hash=hasher.content_hash(), crc32=crc32(text), size=len(text), stat=stat_path(tfile)) should_be.append(("HQ_HASH_NEW", d)) d = defer.Deferred() class Helper(object): """Helper class.""" # class-closure, cannot use self, pylint: disable-msg=E0213 def __init__(innerself): innerself.store = [] def push(innerself, event, **kwargs): """Callback.""" innerself.store.append((event, kwargs)) if len(innerself.store) == 10: if innerself.store == should_be: d.callback(True) else: d.errback(Exception("are different!")) receiver = Helper() hq = hash_queue.HashQueue(receiver) self.addCleanup(hq.shutdown) # stop the hasher so we can test the unique items in the queue hq.hasher.stop() self.log.debug('Hasher stopped (forced)') # allow the hasher to fully stop time.sleep(0.1) # create a new hasher just like the HashQueue creates it hq.hasher = hash_queue._Hasher(hq._queue, hq._end_mark, receiver) hq.hasher.setDaemon(True) # send to hash twice for i in range(10): tfile = os.path.join(self.test_dir, "tfile"+str(i)) hq.insert(tfile, "mdid") hq.insert(tfile, "mdid") # start the hasher self.log.debug('Hasher started (forced)') hq.hasher.start() # insert the last item to check the uniqueness in the queue while # the hasher is running for i in range(9, 10): tfile = os.path.join(self.test_dir, "tfile"+str(i)) hq.insert(tfile, "mdid") return d
def __init__(self, path): """Create the instance.""" self.path = path self.tempfile = None if path_exists(self.path) and stat_path(self.path).st_size > 0: # if it's there and size > 0, open only for read self.fd = open_file(self.path, "rb") else: # this is a new hint file, lets create it as a tempfile. self.tempfile = tempfile.mktemp(dir=os.path.dirname(self.path)) self.fd = open_file(self.tempfile, "w+b")
def check_info((event, kwargs)): """check the info pushed by the hasher""" # pylint: disable-msg=W0612 self.assertEqual(event, "HQ_HASH_NEW") # calculate what we should receive realh = content_hash_factory() realh.hash_object.update("foobar") should_be = realh.content_hash() curr_stat = stat_path(testfile) self.assertEquals(should_be, kwargs['hash']) for attr in ('st_mode', 'st_ino', 'st_dev', 'st_nlink', 'st_uid', 'st_gid', 'st_size', 'st_ctime', 'st_mtime'): self.assertEquals(getattr(curr_stat, attr), getattr(kwargs['stat'], attr))
def test_order(self): """The hasher should return in order.""" # calculate what we should receive should_be = [] for i in range(10): hasher = content_hash_factory() text = "supercalifragilistico"+str(i) hasher.hash_object.update(text) tfile = os.path.join(self.test_dir, "tfile"+str(i)) with open_file(tfile, "wb") as fh: fh.write("supercalifragilistico"+str(i)) d = dict(path=tfile, hash=hasher.content_hash(), crc32=crc32(text), size=len(text), stat=stat_path(tfile)) should_be.append(("HQ_HASH_NEW", d)) # create the hasher mark = object() queue = hash_queue.UniqueQueue() d = defer.Deferred() class Helper(object): """Helper class.""" # class-closure, cannot use self, pylint: disable-msg=E0213 def __init__(innerself): innerself.store = [] def push(innerself, event, **kwargs): """Callback.""" innerself.store.append((event, kwargs)) if len(innerself.store) == 10: hasher.stop() hasher.join(timeout=5) if innerself.store == should_be: d.callback(True) else: d.errback(Exception("are different!")) receiver = Helper() hasher = hash_queue._Hasher(queue, mark, receiver) # send what to hash for i in range(10): tfile = os.path.join(self.test_dir, "tfile"+str(i)) item = ((tfile, "mdid"), FAKE_TIMESTAMP) queue.put(item) # start the hasher after putting the work items hasher.start() return d
def _check_and_create_dirs(self, path): """ check if the path isn't a file and in case it don't exists, creates it """ try: stat_result = stat_path(path) # is a regular file? if stat.S_ISREG(stat_result.st_mode): remove_file(path) make_dir(path, True) # else, the dir is already there except OSError, e: if e.errno == errno.ENOENT: # the file or dir don't exist make_dir(path, True) else: raise
def check_stat(self, fullname, oldstat): """Check stat info and return if different. Don't compare the full stat, only what is relevant: - st_ino: the data location changed in disk, may be something else - st_size: it changed in size, surely different content - st_mtime: the content could be different even having the same size """ if oldstat is None: return True newstat = stat_path(fullname) different = (newstat.st_ino != oldstat.st_ino or newstat.st_size != oldstat.st_size or newstat.st_mtime != oldstat.st_mtime) if different: log_debug("stat differ for: %r " "Old: st_ino=%d st_size=%d st_mtime=%r " "New: st_ino=%d st_size=%d st_mtime=%r", fullname, oldstat.st_ino, oldstat.st_size, oldstat.st_mtime, newstat.st_ino, newstat.st_size, newstat.st_mtime) return different
def test_order(self): """The hasher should return in order.""" # calculate what we should receive should_be = [] for i in range(10): hasher = content_hash_factory() text = "supercalifragilistico"+str(i) hasher.hash_object.update(text) tfile = os.path.join(self.test_dir, "tfile"+str(i)) with open_file(tfile, "wb") as fh: fh.write("supercalifragilistico"+str(i)) d = dict(path=tfile, hash=hasher.content_hash(), crc32=crc32(text), size=len(text), stat=stat_path(tfile)) should_be.append(("HQ_HASH_NEW", d)) d = defer.Deferred() class Helper(object): """Helper class.""" # class-closure, cannot use self, pylint: disable-msg=E0213 def __init__(innerself): innerself.store = [] def push(innerself, event, **kwargs): """Callback.""" innerself.store.append((event, kwargs)) if len(innerself.store) == 10: if innerself.store[:-1] == should_be[:-1]: d.callback(True) else: d.errback(Exception("are different! ")) receiver = Helper() hq = hash_queue.HashQueue(receiver) self.addCleanup(hq.shutdown) # send what to hash for i in range(10): tfile = os.path.join(self.test_dir, "tfile"+str(i)) hq.insert(tfile, "mdid") return d
def test_stat_no_path(self): """Test that it raises proper error when no file is there.""" try: return stat_path(os.path.join(self.basedir, 'nofile')) except OSError, e: self.assertEqual(e.errno, errno.ENOENT)
def test_stat_normal(self): """Test on a normal file.""" self.assertEqual(os.stat(self.valid_path), stat_path(self.testfile))
def insert(self, path, mdid): """Fake insert.""" self.eq.push("HQ_HASH_NEW", path=path, hash="", crc32="", size=0, stat=stat_path(path))
def hint_size(self): """Return the hint file size.""" if self.has_hint: return stat_path(self.hint_filename).st_size return 0
def size(self): return stat_path(self.filename).st_size