Beispiel #1
0
class HashingFile:
    """Hashes the content written to this files.

    """
    def __init__(self):
        self.digester = Digester()

    def write(self, buf):
        """Update the hashing with the content of buf.

        buf (string): new content for the file.

        return (int): length of buf.

        """
        self.digester.update(buf)
        return len(buf)

    @property
    def digest(self):
        """Digest of the data written in the file.

        return (string): digest.

        """
        return self.digester.digest()

    def close(self):
        """Do nothing, because there is no hidden file we are writing
        to.

        """
        pass
Beispiel #2
0
    def check_backend_integrity(self, delete=False):
        """Check the integrity of the backend.

        Request all the files from the backend. For each of them the
        digest is recomputed and checked against the one recorded in
        the backend.

        If mismatches are found, they are reported with ERROR
        severity. The method returns False if at least a mismatch is
        found, True otherwise.

        delete (bool): if True, files with wrong digest are deleted.

        """
        clean = True
        for digest, _ in self.list():
            d = Digester()
            with self.backend.get_file(digest) as fobj:
                buf = fobj.read(self.CHUNK_SIZE)
                while len(buf) > 0:
                    d.update(buf)
                    buf = fobj.read(self.CHUNK_SIZE)
            computed_digest = d.digest()
            if digest != computed_digest:
                logger.error("File with hash %s actually has hash %s", digest,
                             computed_digest)
                if delete:
                    self.delete(digest)
                clean = False

        return clean
Beispiel #3
0
class HashingFile(object):
    """Hashes the content written to this files.

    """
    def __init__(self):
        self.digester = Digester()

    def write(self, buf):
        """Update the hashing with the content of buf.

        buf (string): new content for the file.

        return (int): length of buf.

        """
        self.digester.update(buf)
        return len(buf)

    @property
    def digest(self):
        """Digest of the data written in the file.

        return (string): digest.

        """
        return self.digester.digest()

    def close(self):
        """Do nothing, because there is no hidden file we are writing
        to.

        """
        pass
Beispiel #4
0
    def put_file_from_fobj(self, src, desc=""):
        """Store a file in the storage.

        If it's already (for some reason...) in the cache send that
        copy to the backend. Otherwise store it in the file-system
        cache first.

        The file is obtained from a file-object. Other interfaces are
        available as `put_file_content', `put_file_from_path'.

        src (fileobj): a readable binary file-like object from which
            to read the contents of the file.
        desc (unicode): the (optional) description to associate to the
            file.

        return (unicode): the digest of the stored file.

        """
        logger.debug("Reading input file to store on the database.")

        # Unfortunately, we have to read the whole file-obj to compute
        # the digest but we take that chance to save it to a temporary
        # path so that we then just need to move it. Hoping that both
        # locations will be on the same filesystem, that should be way
        # faster than reading the whole file-obj again (as it could be
        # compressed or require network communication).
        # XXX We're *almost* reimplementing copyfileobj.
        with tempfile.NamedTemporaryFile('wb', delete=False,
                                         dir=self.temp_dir) as dst:
            d = Digester()
            buf = src.read(self.CHUNK_SIZE)
            while len(buf) > 0:
                d.update(buf)
                while len(buf) > 0:
                    written = dst.write(buf)
                    # Cooperative yield.
                    gevent.sleep(0)
                    if written is None:
                        break
                    buf = buf[written:]
                buf = src.read(self.CHUNK_SIZE)
            digest = d.digest()
            dst.flush()

            logger.debug("File has digest %s.", digest)

            cache_file_path = os.path.join(self.file_dir, digest)

            if not os.path.exists(cache_file_path):
                os.rename(dst.name, cache_file_path)
            else:
                os.unlink(dst.name)

        # Store the file in the backend. We do that even if the file
        # was already in the cache (that is, we ignore the check above)
        # because there's a (small) chance that the file got removed
        # from the backend but somehow remained in the cache.
        self.save(digest, desc)

        return digest
Beispiel #5
0
    def check_backend_integrity(self, delete=False):
        """Check the integrity of the backend.

        Request all the files from the backend. For each of them the
        digest is recomputed and checked against the one recorded in
        the backend.

        If mismatches are found, they are reported with ERROR
        severity. The method returns False if at least a mismatch is
        found, True otherwise.

        delete (bool): if True, files with wrong digest are deleted.

        """
        clean = True
        for digest, _ in self.list():
            d = Digester()
            with self.backend.get_file(digest) as fobj:
                buf = fobj.read(self.CHUNK_SIZE)
                while len(buf) > 0:
                    d.update(buf)
                    buf = fobj.read(self.CHUNK_SIZE)
            computed_digest = d.digest()
            if digest != computed_digest:
                logger.error("File with hash %s actually has hash %s",
                             digest, computed_digest)
                if delete:
                    self.delete(digest)
                clean = False

        return clean
Beispiel #6
0
    def put_file_from_fobj(self, src, desc=""):
        """Store a file in the storage.

        If it's already (for some reason...) in the cache send that
        copy to the backend. Otherwise store it in the file-system
        cache first.

        The file is obtained from a file-object. Other interfaces are
        available as `put_file_content', `put_file_from_path'.

        src (fileobj): a readable binary file-like object from which
            to read the contents of the file.
        desc (unicode): the (optional) description to associate to the
            file.

        return (unicode): the digest of the stored file.

        """
        logger.debug("Reading input file to store on the database.")

        # Unfortunately, we have to read the whole file-obj to compute
        # the digest but we take that chance to save it to a temporary
        # path so that we then just need to move it. Hoping that both
        # locations will be on the same filesystem, that should be way
        # faster than reading the whole file-obj again (as it could be
        # compressed or require network communication).
        # XXX We're *almost* reimplementing copyfileobj.
        with tempfile.NamedTemporaryFile('wb', delete=False,
                                         dir=self.temp_dir) as dst:
            d = Digester()
            buf = src.read(self.CHUNK_SIZE)
            while len(buf) > 0:
                d.update(buf)
                while len(buf) > 0:
                    written = dst.write(buf)
                    # Cooperative yield.
                    gevent.sleep(0)
                    if written is None:
                        break
                    buf = buf[written:]
                buf = src.read(self.CHUNK_SIZE)
            digest = d.digest()
            dst.flush()

            logger.debug("File has digest %s.", digest)

            cache_file_path = os.path.join(self.file_dir, digest)

            if not os.path.exists(cache_file_path):
                os.rename(dst.name, cache_file_path)
            else:
                os.unlink(dst.name)

        # Store the file in the backend. We do that even if the file
        # was already in the cache (that is, we ignore the check above)
        # because there's a (small) chance that the file got removed
        # from the backend but somehow remained in the cache.
        self.save(digest, desc)

        return digest
Beispiel #7
0
class RandomFile(object):
    """Simulate a random file with dim bytes, calculating its
    SHA1 hash.

    """
    def __init__(self, dim):
        self.dim = dim
        # FIXME We could use os.urandom() instead.
        self.source = io.open('/dev/urandom', 'rb')
        self.digester = Digester()

    def read(self, byte_num):
        """Read byte_num bytes from the source and return them,
        updating the hashing.

        byte_num (int): number of bytes to read.

        return (string): byte_num bytes of content.

        """
        if byte_num > self.dim:
            byte_num = self.dim
        if byte_num == 0:
            return b''
        buf = self.source.read(byte_num)
        self.dim -= len(buf)
        self.digester.update(buf)
        return buf

    def close(self):
        """Close the source file.

        """
        self.source.close()

    @property
    def digest(self):
        """Digest of the data read from the source file.

        return (string): digest.

        """
        return self.digester.digest()
Beispiel #8
0
class RandomFile(object):
    """Simulate a random file with dim bytes, calculating its
    SHA1 hash.

    """
    def __init__(self, dim):
        self.dim = dim
        # FIXME We could use os.urandom() instead.
        self.source = io.open('/dev/urandom', 'rb')
        self.digester = Digester()

    def read(self, byte_num):
        """Read byte_num bytes from the source and return them,
        updating the hashing.

        byte_num (int): number of bytes to read.

        return (string): byte_num bytes of content.

        """
        if byte_num > self.dim:
            byte_num = self.dim
        if byte_num == 0:
            return b''
        buf = self.source.read(byte_num)
        self.dim -= len(buf)
        self.digester.update(buf)
        return buf

    def close(self):
        """Close the source file.

        """
        self.source.close()

    @property
    def digest(self):
        """Digest of the data read from the source file.

        return (string): digest.

        """
        return self.digester.digest()
Beispiel #9
0
class RandomFile:
    """Simulate a random file with dim bytes, calculating its
    SHA1 hash.

    """
    def __init__(self, dim):
        self.dim = dim
        self.digester = Digester()

    def read(self, byte_num):
        """Read byte_num bytes from the source and return them,
        updating the hashing.

        byte_num (int): number of bytes to read.

        return (string): byte_num bytes of content.

        """
        if byte_num > self.dim:
            byte_num = self.dim
        if byte_num == 0:
            return b''
        buf = os.urandom(byte_num)
        self.dim -= len(buf)
        self.digester.update(buf)
        return buf

    def close(self):
        """Do nothing.

        """
        pass

    @property
    def digest(self):
        """Digest of the data read from the source file.

        return (string): digest.

        """
        return self.digester.digest()
Beispiel #10
0
class RandomFile:
    """Simulate a random file with dim bytes, calculating its
    SHA1 hash.

    """
    def __init__(self, dim):
        self.dim = dim
        self.digester = Digester()

    def read(self, byte_num):
        """Read byte_num bytes from the source and return them,
        updating the hashing.

        byte_num (int): number of bytes to read.

        return (string): byte_num bytes of content.

        """
        if byte_num > self.dim:
            byte_num = self.dim
        if byte_num == 0:
            return b''
        buf = os.urandom(byte_num)
        self.dim -= len(buf)
        self.digester.update(buf)
        return buf

    def close(self):
        """Do nothing.

        """
        pass

    @property
    def digest(self):
        """Digest of the data read from the source file.

        return (string): digest.

        """
        return self.digester.digest()
Beispiel #11
0
class TestDigester(unittest.TestCase):
    def setUp(self):
        super().setUp()
        self.d = Digester()

    def test_success(self):
        self.assertEqual(self.d.digest(), _EMPTY_DIGEST)
        self.d.update(b"content")
        self.assertEqual(self.d.digest(), _CONTENT_DIGEST)

    def test_empty_update(self):
        self.d.update(b"")
        self.assertEqual(self.d.digest(), _EMPTY_DIGEST)

    def test_string(self):
        with self.assertRaises(TypeError):
            self.d.update("")
Beispiel #12
0
class TestDigester(unittest.TestCase):

    def setUp(self):
        super().setUp()
        self.d = Digester()

    def test_success(self):
        self.assertEqual(self.d.digest(), _EMPTY_DIGEST)
        self.d.update(b"content")
        self.assertEqual(self.d.digest(), _CONTENT_DIGEST)

    def test_empty_update(self):
        self.d.update(b"")
        self.assertEqual(self.d.digest(), _EMPTY_DIGEST)

    def test_string(self):
        with self.assertRaises(TypeError):
            self.d.update("")