Example #1
0
 def test_changing_version_should_not_affect_other_links(self):
     storage = FileStorage(self.temp_dir)
     data = BytesIO(b'hello')
     storage.store('hello.txt', data, version=1)
     data.seek(0)
     storage.store('world.txt', data, version=2)
     self.assertEqual(storage.stored_version('hello.txt'), 1)
     self.assertEqual(storage.stored_version('world.txt'), 2)
Example #2
0
    def test_store_should_respect_given_data_size(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')

        storage.store('hello.txt', data, version=1, size=2)

        storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt')
        with gzip.open(storage_path, 'rb') as f:
            self.assertEqual(f.read(), b'he')
Example #3
0
    def test_store_should_add_file_to_storage(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')

        storage.store('hello.txt', data, version=1)

        storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt')
        with gzip.open(storage_path, 'rb') as f:
            self.assertEqual(f.read(), b'hello')
Example #4
0
    def test_deleting_older_version_should_have_no_effect(self):
        storage = FileStorage(self.temp_dir)

        storage.store('hello.txt', BytesIO(b'world'), version=2)
        storage.delete('hello.txt', version=1)

        storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt')
        with gzip.open(storage_path, 'rb') as f:
            self.assertEqual(f.read(), b'world')
Example #5
0
    def test_store_should_set_modified_time_to_version(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')

        storage.store('hello.txt', data, version=1)

        storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt')
        storage_version = os.lstat(storage_path).st_mtime

        self.assertEqual(1, storage_version)
Example #6
0
    def test_blob_should_be_deleted_with_the_last_reference(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')

        storage.store('hello.txt', data, version=1)
        storage.delete('hello.txt', version=1)

        # check that blobs directory has no files in it
        # (empty directories are allowed)
        for _, _, files in os.walk(storage.blobs_dir):
            self.assertEqual(len(files), 0)
Example #7
0
    def test_store_should_not_overwrite_newer_files(self):
        storage = FileStorage(self.temp_dir)
        old_data = BytesIO(b'hello')
        new_data = BytesIO(b'world')

        storage.store('hello.txt', new_data, version=2)
        storage.store('hello.txt', old_data, version=1)

        storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt')
        with gzip.open(storage_path, 'rb') as f:
            self.assertEqual(f.read(), b'world')
Example #8
0
    def test_store_should_reuse_blobs(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')

        storage.store('hello.txt', data, version=1)

        data.seek(0)
        storage.store('world.txt', data, version=1)

        storage_path_a = os.path.join(self.temp_dir, 'links', 'hello.txt')
        storage_path_b = os.path.join(self.temp_dir, 'links', 'world.txt')

        self.assertEqual(os.readlink(storage_path_a), os.readlink(storage_path_b))
Example #9
0
    def test_store_should_reuse_blobs(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')

        storage.store('hello.txt', data, version=1)

        data.seek(0)
        storage.store('world.txt', data, version=1)

        storage_path_a = os.path.join(self.temp_dir, 'links', 'hello.txt')
        storage_path_b = os.path.join(self.temp_dir, 'links', 'world.txt')

        self.assertEqual(os.readlink(storage_path_a),
                         os.readlink(storage_path_b))
Example #10
0
    def test_removing_one_reference_should_not_break_others(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')

        storage.store('hello.txt', data, version=1)

        data.seek(0)
        storage.store('world.txt', data, version=1)
        storage.delete('hello.txt', version=1)

        path = os.path.join(self.temp_dir, 'links', 'world.txt')

        with gzip.open(path, 'rb') as f:
            self.assertEqual(f.read(), b'hello')
Example #11
0
    def test_store_should_accept_digest_hints(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')
        digest = hashlib.sha256(b'hello').hexdigest()

        storage.store('hello.txt', data, version=1)

        data.seek(0)
        storage.store('world.txt', data, version=1, digest=digest)

        storage_path_a = os.path.join(self.temp_dir, 'links', 'hello.txt')
        storage_path_b = os.path.join(self.temp_dir, 'links', 'world.txt')

        self.assertEqual(os.readlink(storage_path_a), os.readlink(storage_path_b))
Example #12
0
    def test_store_should_accept_digest_hints(self):
        storage = FileStorage(self.temp_dir)
        data = BytesIO(b'hello')
        digest = hashlib.sha256(b'hello').hexdigest()

        storage.store('hello.txt', data, version=1)

        data.seek(0)
        storage.store('world.txt', data, version=1, digest=digest)

        storage_path_a = os.path.join(self.temp_dir, 'links', 'hello.txt')
        storage_path_b = os.path.join(self.temp_dir, 'links', 'world.txt')

        self.assertEqual(os.readlink(storage_path_a),
                         os.readlink(storage_path_b))
Example #13
0
    def test_store_should_add_compressed_file_to_storage_as_is(self):
        storage = FileStorage(self.temp_dir)
        raw_data = BytesIO(b'hello')
        gz_data = BytesIO()

        with gzip.GzipFile(fileobj=gz_data, mode='wb') as dst:
            shutil.copyfileobj(raw_data, dst)

        gz_data.seek(0)

        storage.store('hello.txt', gz_data, version=1, compressed=True)

        storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt')
        with gzip.open(storage_path, 'rb') as f:
            self.assertEqual(f.read(), b'hello')
Example #14
0
class FiletrackerServer(base.Server):
    """A WSGI application providing a filetracker server.

    Note that this wouldn't work as standalone server: a "manager"
    process should handle DB initialization and recovery, refer
    to ``filetracker.servers.run`` for more details.
    """
    def __init__(self, dir=None):
        if dir is None:
            if 'FILETRACKER_DIR' not in os.environ:
                raise AssertionError(
                    "LocalFileServer must have its working "
                    "directory specified either as a constructor argument "
                    "or passed via FILETRACKER_DIR environment variable.")
            dir = os.environ['FILETRACKER_DIR']
        self.storage = FileStorage(dir)
        self.dir = self.storage.links_dir

    def parse_query_params(self, environ):
        return parse_qs(environ.get('QUERY_STRING', ''))

    def handle_PUT(self, environ, start_response):
        endpoint, path = base.get_endpoint_and_path(environ)
        if endpoint != 'files':
            raise base.HttpError('400 Bad Request',
                                 'PUT can be only performed on "/files/..."')

        content_length = int(environ.get('CONTENT_LENGTH'))

        query_params = self.parse_query_params(environ)
        last_modified = query_params.get('last_modified', (None, ))[0]
        if last_modified:
            last_modified = email.utils.parsedate_tz(last_modified)
            last_modified = email.utils.mktime_tz(last_modified)
        else:
            raise base.HttpError('400 Bad Request',
                                 '"?last-modified=" is required')

        compressed = environ.get('HTTP_CONTENT_ENCODING', None) == 'gzip'

        digest = environ.get('HTTP_SHA256_CHECKSUM', None)
        logical_size = environ.get('HTTP_LOGICAL_SIZE', None)

        if compressed and digest and logical_size:
            logger.debug('Handling PUT %s.', path)
        else:
            logger.info(
                'Handling PUT %s with unusual headers: '
                'compressed=%s, digest=%s, logical_size=%s', path, compressed,
                digest, logical_size)

        version = self.storage.store(name=path,
                                     data=environ['wsgi.input'],
                                     version=last_modified,
                                     size=content_length,
                                     compressed=compressed,
                                     digest=digest,
                                     logical_size=logical_size)
        start_response('200 OK', [
            ('Content-Type', 'text/plain'),
            ('Last-Modified', email.utils.formatdate(version)),
        ])
        return []

    def _file_headers(self, name):
        link_st = os.lstat(os.path.join(self.dir, name))
        blob_st = os.stat(os.path.join(self.dir, name))
        logical_size = self.storage.logical_size(name)
        return [
            ('Content-Type', 'application/octet-stream'),
            ('Content-Length', str(blob_st.st_size)),
            ('Content-Encoding', 'gzip'),
            ('Last-Modified', email.utils.formatdate(link_st.st_mtime)),
            ('Logical-Size', str(logical_size)),
        ]

    def handle_GET(self, environ, start_response):
        endpoint, path = base.get_endpoint_and_path(environ)
        if endpoint == 'list':
            return self.handle_list(environ, start_response)
        elif endpoint == 'version':
            return self.handle_version(environ, start_response)
        elif endpoint == 'files':
            full_path = os.path.join(self.dir, path)

            if not os.path.isfile(full_path):
                raise base.HttpError('404 Not Found',
                                     'File "{}" not found'.format(full_path))

            start_response('200 OK', self._file_headers(path))
            return _FileIterator(open(full_path, 'rb'))
        else:
            raise base.HttpError(
                '400 Bad Request',
                'Unknown endpoint "{}", expected "files" or "list"'.format(
                    endpoint))

    def handle_DELETE(self, environ, start_response):
        endpoint, path = base.get_endpoint_and_path(environ)
        if endpoint != 'files':
            raise HttpError('400 Bad Request',
                            'DELETE can be only performed on "/files/..."')

        query_params = self.parse_query_params(environ)
        last_modified = query_params.get('last_modified', (None, ))[0]
        if last_modified:
            last_modified = email.utils.parsedate_tz(last_modified)
            last_modified = email.utils.mktime_tz(last_modified)
        else:
            raise base.HttpError('400 Bad Request',
                                 '"?last-modified=" is required')

        logger.debug('Handling DELETE %s@%d', path, last_modified)

        try:
            self.storage.delete(name=path, version=last_modified)
        except FiletrackerFileNotFoundError:
            raise base.HttpError('404 Not Found', '')

        start_response('200 OK', [])
        return []

    def handle_list(self, environ, start_response):
        _, path = base.get_endpoint_and_path(environ)
        query_params = self.parse_query_params(environ)

        last_modified = query_params.get('last_modified', (None, ))[0]
        if not last_modified:
            last_modified = int(time.time())

        logger.debug('Handling GET /list/%s (@%d)', path, last_modified)

        root_dir = os.path.join(self.dir, path)
        if not os.path.isdir(root_dir):
            raise base.HttpError('400 Bad Request',
                                 'Path doesn\'t exist or is not a directory')

        start_response('200 OK', [])
        return _list_files_iterator(root_dir, last_modified)

    def handle_version(self, environ, start_response):
        start_response('200 OK', [('Content-Type', 'application/json')])
        response = {
            'protocol_versions': [2],
        }
        return [json.dumps(response).encode('utf8')]
Example #15
0
class FiletrackerServer(base.Server):
    """A WSGI application providing a filetracker server.

    Note that this wouldn't work as standalone server: a "manager"
    process should handle DB initialization and recovery, refer
    to ``filetracker.servers.run`` for more details.
    """

    def __init__(self, dir=None):
        if dir is None:
            if 'FILETRACKER_DIR' not in os.environ:
                raise AssertionError("LocalFileServer must have its working "
                        "directory specified either as a constructor argument "
                        "or passed via FILETRACKER_DIR environment variable.")
            dir = os.environ['FILETRACKER_DIR']
        self.storage = FileStorage(dir)
        self.dir = self.storage.links_dir

    def parse_query_params(self, environ):
        return parse_qs(environ.get('QUERY_STRING', ''))

    def handle_PUT(self, environ, start_response):
        endpoint, path = base.get_endpoint_and_path(environ)
        if endpoint != 'files':
            raise base.HttpError('400 Bad Request',
                                 'PUT can be only performed on "/files/..."')

        content_length = int(environ.get('CONTENT_LENGTH'))

        query_params = self.parse_query_params(environ)
        last_modified = query_params.get('last_modified', (None,))[0]
        if last_modified:
            last_modified = email.utils.parsedate_tz(last_modified)
            last_modified = email.utils.mktime_tz(last_modified)
        else:
            raise base.HttpError('400 Bad Request',
                                 '"?last-modified=" is required')

        compressed = environ.get('HTTP_CONTENT_ENCODING', None) == 'gzip'

        digest = environ.get('HTTP_SHA256_CHECKSUM', None)
        logical_size = environ.get('HTTP_LOGICAL_SIZE', None)

        if compressed and digest and logical_size:
            logger.debug('Handling PUT %s.', path)
        else:
            logger.info('Handling PUT %s with unusual headers: '
                    'compressed=%s, digest=%s, logical_size=%s',
                    path, compressed, digest, logical_size)

        version = self.storage.store(name=path,
                                     data=environ['wsgi.input'],
                                     version=last_modified,
                                     size=content_length,
                                     compressed=compressed,
                                     digest=digest,
                                     logical_size=logical_size)
        start_response('200 OK', [
            ('Content-Type', 'text/plain'),
            ('Last-Modified', email.utils.formatdate(version)),
        ])
        return []

    def _file_headers(self, name):
        link_st = os.lstat(os.path.join(self.dir, name))
        blob_st = os.stat(os.path.join(self.dir, name))
        logical_size = self.storage.logical_size(name)
        return [
                ('Content-Type', 'application/octet-stream'),
                ('Content-Length', str(blob_st.st_size)),
                ('Content-Encoding', 'gzip'),
                ('Last-Modified', email.utils.formatdate(link_st.st_mtime)),
                ('Logical-Size', str(logical_size)),
            ]

    def handle_GET(self, environ, start_response):
        endpoint, path = base.get_endpoint_and_path(environ)
        if endpoint == 'list':
            return self.handle_list(environ, start_response)
        elif endpoint == 'version':
            return self.handle_version(environ, start_response)
        elif endpoint == 'files':
            full_path = os.path.join(self.dir, path)

            if not os.path.isfile(full_path):
                raise base.HttpError('404 Not Found',
                                     'File "{}" not found'.format(full_path))

            start_response('200 OK', self._file_headers(path))
            return _FileIterator(open(full_path, 'rb'))
        else:
            raise base.HttpError(
                    '400 Bad Request',
                    'Unknown endpoint "{}", expected "files" or "list"'
                    .format(endpoint))

    def handle_DELETE(self, environ, start_response):
        endpoint, path = base.get_endpoint_and_path(environ)
        if endpoint != 'files':
            raise HttpError('400 Bad Request',
                            'DELETE can be only performed on "/files/..."')

        query_params = self.parse_query_params(environ)
        last_modified = query_params.get('last_modified', (None,))[0]
        if last_modified:
            last_modified = email.utils.parsedate_tz(last_modified)
            last_modified = email.utils.mktime_tz(last_modified)
        else:
            raise base.HttpError('400 Bad Request',
                                 '"?last-modified=" is required')

        logger.debug('Handling DELETE %s@%d', path, last_modified)

        try:
            self.storage.delete(name=path,
                                version=last_modified)
        except FiletrackerFileNotFoundError:
            raise base.HttpError('404 Not Found', '')

        start_response('200 OK', [])
        return []

    def handle_list(self, environ, start_response):
        _, path = base.get_endpoint_and_path(environ)
        query_params = self.parse_query_params(environ)

        last_modified = query_params.get('last_modified', (None,))[0]
        if not last_modified:
            last_modified = int(time.time())

        logger.debug('Handling GET /list/%s (@%d)', path, last_modified)

        root_dir = os.path.join(self.dir, path)
        if not os.path.isdir(root_dir):
            raise base.HttpError('400 Bad Request',
                            'Path doesn\'t exist or is not a directory')

        start_response('200 OK', [])
        return _list_files_iterator(root_dir, last_modified)

    def handle_version(self, environ, start_response):
        start_response('200 OK', [('Content-Type', 'application/json')])
        response = {
                'protocol_versions': [2],
        }
        return [json.dumps(response).encode('utf8')]