def test_changing_version_should_not_affect_other_links(self): storage = FileStorage(self.temp_dir) data = BytesIO(b'hello') storage.store('hello.txt', data, version=1) data.seek(0) storage.store('world.txt', data, version=2) self.assertEqual(storage.stored_version('hello.txt'), 1) self.assertEqual(storage.stored_version('world.txt'), 2)
def test_store_should_respect_given_data_size(self): storage = FileStorage(self.temp_dir) data = BytesIO(b'hello') storage.store('hello.txt', data, version=1, size=2) storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt') with gzip.open(storage_path, 'rb') as f: self.assertEqual(f.read(), b'he')
def test_store_should_add_file_to_storage(self): storage = FileStorage(self.temp_dir) data = BytesIO(b'hello') storage.store('hello.txt', data, version=1) storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt') with gzip.open(storage_path, 'rb') as f: self.assertEqual(f.read(), b'hello')
def test_deleting_older_version_should_have_no_effect(self): storage = FileStorage(self.temp_dir) storage.store('hello.txt', BytesIO(b'world'), version=2) storage.delete('hello.txt', version=1) storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt') with gzip.open(storage_path, 'rb') as f: self.assertEqual(f.read(), b'world')
def test_store_should_set_modified_time_to_version(self): storage = FileStorage(self.temp_dir) data = BytesIO(b'hello') storage.store('hello.txt', data, version=1) storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt') storage_version = os.lstat(storage_path).st_mtime self.assertEqual(1, storage_version)
def test_blob_should_be_deleted_with_the_last_reference(self): storage = FileStorage(self.temp_dir) data = BytesIO(b'hello') storage.store('hello.txt', data, version=1) storage.delete('hello.txt', version=1) # check that blobs directory has no files in it # (empty directories are allowed) for _, _, files in os.walk(storage.blobs_dir): self.assertEqual(len(files), 0)
def test_store_should_not_overwrite_newer_files(self): storage = FileStorage(self.temp_dir) old_data = BytesIO(b'hello') new_data = BytesIO(b'world') storage.store('hello.txt', new_data, version=2) storage.store('hello.txt', old_data, version=1) storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt') with gzip.open(storage_path, 'rb') as f: self.assertEqual(f.read(), b'world')
def test_store_should_reuse_blobs(self): storage = FileStorage(self.temp_dir) data = BytesIO(b'hello') storage.store('hello.txt', data, version=1) data.seek(0) storage.store('world.txt', data, version=1) storage_path_a = os.path.join(self.temp_dir, 'links', 'hello.txt') storage_path_b = os.path.join(self.temp_dir, 'links', 'world.txt') self.assertEqual(os.readlink(storage_path_a), os.readlink(storage_path_b))
def test_removing_one_reference_should_not_break_others(self): storage = FileStorage(self.temp_dir) data = BytesIO(b'hello') storage.store('hello.txt', data, version=1) data.seek(0) storage.store('world.txt', data, version=1) storage.delete('hello.txt', version=1) path = os.path.join(self.temp_dir, 'links', 'world.txt') with gzip.open(path, 'rb') as f: self.assertEqual(f.read(), b'hello')
def test_store_should_accept_digest_hints(self): storage = FileStorage(self.temp_dir) data = BytesIO(b'hello') digest = hashlib.sha256(b'hello').hexdigest() storage.store('hello.txt', data, version=1) data.seek(0) storage.store('world.txt', data, version=1, digest=digest) storage_path_a = os.path.join(self.temp_dir, 'links', 'hello.txt') storage_path_b = os.path.join(self.temp_dir, 'links', 'world.txt') self.assertEqual(os.readlink(storage_path_a), os.readlink(storage_path_b))
def test_store_should_add_compressed_file_to_storage_as_is(self): storage = FileStorage(self.temp_dir) raw_data = BytesIO(b'hello') gz_data = BytesIO() with gzip.GzipFile(fileobj=gz_data, mode='wb') as dst: shutil.copyfileobj(raw_data, dst) gz_data.seek(0) storage.store('hello.txt', gz_data, version=1, compressed=True) storage_path = os.path.join(self.temp_dir, 'links', 'hello.txt') with gzip.open(storage_path, 'rb') as f: self.assertEqual(f.read(), b'hello')
class FiletrackerServer(base.Server): """A WSGI application providing a filetracker server. Note that this wouldn't work as standalone server: a "manager" process should handle DB initialization and recovery, refer to ``filetracker.servers.run`` for more details. """ def __init__(self, dir=None): if dir is None: if 'FILETRACKER_DIR' not in os.environ: raise AssertionError( "LocalFileServer must have its working " "directory specified either as a constructor argument " "or passed via FILETRACKER_DIR environment variable.") dir = os.environ['FILETRACKER_DIR'] self.storage = FileStorage(dir) self.dir = self.storage.links_dir def parse_query_params(self, environ): return parse_qs(environ.get('QUERY_STRING', '')) def handle_PUT(self, environ, start_response): endpoint, path = base.get_endpoint_and_path(environ) if endpoint != 'files': raise base.HttpError('400 Bad Request', 'PUT can be only performed on "/files/..."') content_length = int(environ.get('CONTENT_LENGTH')) query_params = self.parse_query_params(environ) last_modified = query_params.get('last_modified', (None, ))[0] if last_modified: last_modified = email.utils.parsedate_tz(last_modified) last_modified = email.utils.mktime_tz(last_modified) else: raise base.HttpError('400 Bad Request', '"?last-modified=" is required') compressed = environ.get('HTTP_CONTENT_ENCODING', None) == 'gzip' digest = environ.get('HTTP_SHA256_CHECKSUM', None) logical_size = environ.get('HTTP_LOGICAL_SIZE', None) if compressed and digest and logical_size: logger.debug('Handling PUT %s.', path) else: logger.info( 'Handling PUT %s with unusual headers: ' 'compressed=%s, digest=%s, logical_size=%s', path, compressed, digest, logical_size) version = self.storage.store(name=path, data=environ['wsgi.input'], version=last_modified, size=content_length, compressed=compressed, digest=digest, logical_size=logical_size) start_response('200 OK', [ ('Content-Type', 'text/plain'), ('Last-Modified', email.utils.formatdate(version)), ]) return [] def _file_headers(self, name): link_st = os.lstat(os.path.join(self.dir, name)) blob_st = os.stat(os.path.join(self.dir, name)) logical_size = self.storage.logical_size(name) return [ ('Content-Type', 'application/octet-stream'), ('Content-Length', str(blob_st.st_size)), ('Content-Encoding', 'gzip'), ('Last-Modified', email.utils.formatdate(link_st.st_mtime)), ('Logical-Size', str(logical_size)), ] def handle_GET(self, environ, start_response): endpoint, path = base.get_endpoint_and_path(environ) if endpoint == 'list': return self.handle_list(environ, start_response) elif endpoint == 'version': return self.handle_version(environ, start_response) elif endpoint == 'files': full_path = os.path.join(self.dir, path) if not os.path.isfile(full_path): raise base.HttpError('404 Not Found', 'File "{}" not found'.format(full_path)) start_response('200 OK', self._file_headers(path)) return _FileIterator(open(full_path, 'rb')) else: raise base.HttpError( '400 Bad Request', 'Unknown endpoint "{}", expected "files" or "list"'.format( endpoint)) def handle_DELETE(self, environ, start_response): endpoint, path = base.get_endpoint_and_path(environ) if endpoint != 'files': raise HttpError('400 Bad Request', 'DELETE can be only performed on "/files/..."') query_params = self.parse_query_params(environ) last_modified = query_params.get('last_modified', (None, ))[0] if last_modified: last_modified = email.utils.parsedate_tz(last_modified) last_modified = email.utils.mktime_tz(last_modified) else: raise base.HttpError('400 Bad Request', '"?last-modified=" is required') logger.debug('Handling DELETE %s@%d', path, last_modified) try: self.storage.delete(name=path, version=last_modified) except FiletrackerFileNotFoundError: raise base.HttpError('404 Not Found', '') start_response('200 OK', []) return [] def handle_list(self, environ, start_response): _, path = base.get_endpoint_and_path(environ) query_params = self.parse_query_params(environ) last_modified = query_params.get('last_modified', (None, ))[0] if not last_modified: last_modified = int(time.time()) logger.debug('Handling GET /list/%s (@%d)', path, last_modified) root_dir = os.path.join(self.dir, path) if not os.path.isdir(root_dir): raise base.HttpError('400 Bad Request', 'Path doesn\'t exist or is not a directory') start_response('200 OK', []) return _list_files_iterator(root_dir, last_modified) def handle_version(self, environ, start_response): start_response('200 OK', [('Content-Type', 'application/json')]) response = { 'protocol_versions': [2], } return [json.dumps(response).encode('utf8')]
class FiletrackerServer(base.Server): """A WSGI application providing a filetracker server. Note that this wouldn't work as standalone server: a "manager" process should handle DB initialization and recovery, refer to ``filetracker.servers.run`` for more details. """ def __init__(self, dir=None): if dir is None: if 'FILETRACKER_DIR' not in os.environ: raise AssertionError("LocalFileServer must have its working " "directory specified either as a constructor argument " "or passed via FILETRACKER_DIR environment variable.") dir = os.environ['FILETRACKER_DIR'] self.storage = FileStorage(dir) self.dir = self.storage.links_dir def parse_query_params(self, environ): return parse_qs(environ.get('QUERY_STRING', '')) def handle_PUT(self, environ, start_response): endpoint, path = base.get_endpoint_and_path(environ) if endpoint != 'files': raise base.HttpError('400 Bad Request', 'PUT can be only performed on "/files/..."') content_length = int(environ.get('CONTENT_LENGTH')) query_params = self.parse_query_params(environ) last_modified = query_params.get('last_modified', (None,))[0] if last_modified: last_modified = email.utils.parsedate_tz(last_modified) last_modified = email.utils.mktime_tz(last_modified) else: raise base.HttpError('400 Bad Request', '"?last-modified=" is required') compressed = environ.get('HTTP_CONTENT_ENCODING', None) == 'gzip' digest = environ.get('HTTP_SHA256_CHECKSUM', None) logical_size = environ.get('HTTP_LOGICAL_SIZE', None) if compressed and digest and logical_size: logger.debug('Handling PUT %s.', path) else: logger.info('Handling PUT %s with unusual headers: ' 'compressed=%s, digest=%s, logical_size=%s', path, compressed, digest, logical_size) version = self.storage.store(name=path, data=environ['wsgi.input'], version=last_modified, size=content_length, compressed=compressed, digest=digest, logical_size=logical_size) start_response('200 OK', [ ('Content-Type', 'text/plain'), ('Last-Modified', email.utils.formatdate(version)), ]) return [] def _file_headers(self, name): link_st = os.lstat(os.path.join(self.dir, name)) blob_st = os.stat(os.path.join(self.dir, name)) logical_size = self.storage.logical_size(name) return [ ('Content-Type', 'application/octet-stream'), ('Content-Length', str(blob_st.st_size)), ('Content-Encoding', 'gzip'), ('Last-Modified', email.utils.formatdate(link_st.st_mtime)), ('Logical-Size', str(logical_size)), ] def handle_GET(self, environ, start_response): endpoint, path = base.get_endpoint_and_path(environ) if endpoint == 'list': return self.handle_list(environ, start_response) elif endpoint == 'version': return self.handle_version(environ, start_response) elif endpoint == 'files': full_path = os.path.join(self.dir, path) if not os.path.isfile(full_path): raise base.HttpError('404 Not Found', 'File "{}" not found'.format(full_path)) start_response('200 OK', self._file_headers(path)) return _FileIterator(open(full_path, 'rb')) else: raise base.HttpError( '400 Bad Request', 'Unknown endpoint "{}", expected "files" or "list"' .format(endpoint)) def handle_DELETE(self, environ, start_response): endpoint, path = base.get_endpoint_and_path(environ) if endpoint != 'files': raise HttpError('400 Bad Request', 'DELETE can be only performed on "/files/..."') query_params = self.parse_query_params(environ) last_modified = query_params.get('last_modified', (None,))[0] if last_modified: last_modified = email.utils.parsedate_tz(last_modified) last_modified = email.utils.mktime_tz(last_modified) else: raise base.HttpError('400 Bad Request', '"?last-modified=" is required') logger.debug('Handling DELETE %s@%d', path, last_modified) try: self.storage.delete(name=path, version=last_modified) except FiletrackerFileNotFoundError: raise base.HttpError('404 Not Found', '') start_response('200 OK', []) return [] def handle_list(self, environ, start_response): _, path = base.get_endpoint_and_path(environ) query_params = self.parse_query_params(environ) last_modified = query_params.get('last_modified', (None,))[0] if not last_modified: last_modified = int(time.time()) logger.debug('Handling GET /list/%s (@%d)', path, last_modified) root_dir = os.path.join(self.dir, path) if not os.path.isdir(root_dir): raise base.HttpError('400 Bad Request', 'Path doesn\'t exist or is not a directory') start_response('200 OK', []) return _list_files_iterator(root_dir, last_modified) def handle_version(self, environ, start_response): start_response('200 OK', [('Content-Type', 'application/json')]) response = { 'protocol_versions': [2], } return [json.dumps(response).encode('utf8')]