def test_bundle_with_invalid_metadata(bundle_maker, random_metadata): del(random_metadata['what']) m = json.dumps(random_metadata).encode('utf-8') b = bundle_maker(content='1234'.encode('utf-8'), metadata=m, version='0'.encode('utf-8')) with pytest.raises(InvalidDatalakeMetadata): File.from_bundle(b)
def test_bundle_with_invalid_metadata(bundle_maker, random_metadata): del (random_metadata['what']) m = json.dumps(random_metadata).encode('utf-8') b = bundle_maker(content='1234'.encode('utf-8'), metadata=m, version='0'.encode('utf-8')) with pytest.raises(InvalidDatalakeMetadata): File.from_bundle(b)
def test_valid_bundle(tmpdir, random_metadata): p = os.path.join(str(tmpdir), 'foo.tar') f1 = random_file(tmpdir, metadata=random_metadata) f1.to_bundle(p) f2 = File.from_bundle(p) assert f1.metadata == f2.metadata content1 = f1.read() content2 = f2.read() assert content1 assert content1 == content2
def _synchronous_push(self, filename): try: f = File.from_bundle(filename) except InvalidDatalakeBundle as e: msg = '{}. Skipping upload.'.format(e.args[0]) log.exception(msg) return url = self._archive.push(f) msg = 'Pushed {}({}) to {}'.format(filename, f.metadata['path'], url) log.info(msg) os.unlink(filename) if self._callback is not None: self._callback(filename)
def test_pre_python_3_bundle(): # prior to python 3 support, we relied on python to choose the most # suitable encoding for files. Now we do it explicitly. Make sure legacy # bundles work. eyedee = '7c72f3ab092445a08aa6983c864c087c' expected_content = b'Wake up.\nEat. Mmm.\nHappy hour.\nSleep.\n' expected_metadata = { 'end': 1474308636507, 'hash': '70373dec2de49d566fc1e34bacca7561', 'id': eyedee, 'path': '/home/brian/src/datalake/chicken.log', 'start': 1474308548000, 'version': 0, 'what': 'chicken', 'where': 'nomad', 'work_id': None } b = os.path.join(legacy_bundles, eyedee + '.tar') f = File.from_bundle(b) assert f.metadata == expected_metadata assert f.read() == expected_content
def test_bundle_not_tar(tmpfile): f = tmpfile('foobar') with pytest.raises(InvalidDatalakeBundle): File.from_bundle(f)
def test_bundle_with_non_json_metadata(bundle_maker): b = bundle_maker(content='1234'.encode('utf-8'), metadata='not:a%json#'.encode('utf-8'), version='0'.encode('utf-8')) with pytest.raises(InvalidDatalakeBundle): File.from_bundle(b)
def test_bundle_without_content(bundle_maker, random_metadata): m = json.dumps(random_metadata).encode('utf-8') b = bundle_maker(metadata=m, version='0'.encode('utf-8')) with pytest.raises(InvalidDatalakeBundle): File.from_bundle(b)
def test_bundle_without_version(bundle_maker, random_metadata): m = json.dumps(random_metadata).encode('utf-8') b = bundle_maker(content='1234'.encode('utf-8'), metadata=m) with pytest.raises(InvalidDatalakeBundle): File.from_bundle(b)