Esempio n. 1
0
    def check_backend_integrity(self, delete=False):
        """Check the integrity of the backend.

        Request all the files from the backend. For each of them the
        digest is recomputed and checked against the one recorded in
        the backend.

        If mismatches are found, they are reported with ERROR
        severity. The method returns False if at least a mismatch is
        found, True otherwise.

        delete (bool): if True, files with wrong digest are deleted.

        """
        clean = True
        for digest, _ in self.list():
            d = Digester()
            with self.backend.get_file(digest) as fobj:
                buf = fobj.read(self.CHUNK_SIZE)
                while len(buf) > 0:
                    d.update(buf)
                    buf = fobj.read(self.CHUNK_SIZE)
            computed_digest = d.digest()
            if digest != computed_digest:
                logger.error("File with hash %s actually has hash %s", digest,
                             computed_digest)
                if delete:
                    self.delete(digest)
                clean = False

        return clean
Esempio n. 2
0
    def put_file_from_fobj(self, src, desc=""):
        """Store a file in the storage.

        If it's already (for some reason...) in the cache send that
        copy to the backend. Otherwise store it in the file-system
        cache first.

        The file is obtained from a file-object. Other interfaces are
        available as `put_file_content', `put_file_from_path'.

        src (fileobj): a readable binary file-like object from which
            to read the contents of the file.
        desc (unicode): the (optional) description to associate to the
            file.

        return (unicode): the digest of the stored file.

        """
        logger.debug("Reading input file to store on the database.")

        # Unfortunately, we have to read the whole file-obj to compute
        # the digest but we take that chance to save it to a temporary
        # path so that we then just need to move it. Hoping that both
        # locations will be on the same filesystem, that should be way
        # faster than reading the whole file-obj again (as it could be
        # compressed or require network communication).
        # XXX We're *almost* reimplementing copyfileobj.
        with tempfile.NamedTemporaryFile('wb', delete=False,
                                         dir=self.temp_dir) as dst:
            d = Digester()
            buf = src.read(self.CHUNK_SIZE)
            while len(buf) > 0:
                d.update(buf)
                while len(buf) > 0:
                    written = dst.write(buf)
                    # Cooperative yield.
                    gevent.sleep(0)
                    if written is None:
                        break
                    buf = buf[written:]
                buf = src.read(self.CHUNK_SIZE)
            digest = d.digest()
            dst.flush()

            logger.debug("File has digest %s.", digest)

            cache_file_path = os.path.join(self.file_dir, digest)

            if not os.path.exists(cache_file_path):
                os.rename(dst.name, cache_file_path)
            else:
                os.unlink(dst.name)

        # Store the file in the backend. We do that even if the file
        # was already in the cache (that is, we ignore the check above)
        # because there's a (small) chance that the file got removed
        # from the backend but somehow remained in the cache.
        self.save(digest, desc)

        return digest
Esempio n. 3
0
 def setUp(self):
     super(TestDigester, self).setUp()
     self.d = Digester()
Esempio n. 4
0
 def __init__(self):
     self.digester = Digester()
Esempio n. 5
0
 def __init__(self, dim):
     self.dim = dim
     self.digester = Digester()
Esempio n. 6
0
 def setUp(self):
     super().setUp()
     self.d = Digester()
Esempio n. 7
0
 def __init__(self, dim):
     self.dim = dim
     # FIXME We could use os.urandom() instead.
     self.source = io.open('/dev/urandom', 'rb')
     self.digester = Digester()