def test_tsv(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=td.joinpath('test.tsv'), tmpdir=td, virtpath=Path('irrelevant')) req.thumb = True req.realpath.write_text(_TEST_CSV.replace(',', '\t')) res = CSVChecker().check(req) assert isinstance(res.recognizer, CSVChecker) assert res.errors == [] res.thumb.save(str(Path(_IMGDIR).joinpath('tsv.png'))) with Image.open(str(Path('tests').joinpath('csv.png'))) as img: assert res.thumb.tobytes() == img.tobytes()
def test_corrupt_tarfile(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(realpath=tmpdir.joinpath('test.tar'), tmpdir=tmpdir, virtpath='irrelevant') dir1 = tmpdir.joinpath('alpha') dir1.mkdir() dir2 = tmpdir.joinpath('beta') dir2.mkdir() dir1.joinpath('file1').write_text('hello' * 10) dir2.joinpath('file2').write_text('goodbye' * 10) with tarfile.open(req.realpath, 'w:gz') as tf: tf.add(dir1, 'alpha') tf.add(dir2, 'beta') data = bytearray(req.realpath.read_bytes()) data[30] = 10 req.realpath.write_bytes(data) res = TarChecker().check(req) assert res.recognizer is None # TODO perhaps should be set assert res.extracted is None assert len(res.errors) == 1 assert 'Error -3' in str(res.errors[0])
def test_image_valid(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=Path('tests').joinpath('testimage.jpg'), tmpdir=td, virtpath=Path('irrelevant')) res = ImageChecker().check(req) assert isinstance(res.recognizer, ImageChecker) assert res.errors == [] assert res.thumb is None req.thumb = True res = ImageChecker().check(req) assert res.errors == [] res.thumb.save(str(Path(_IMGDIR).joinpath('image.png'))) with Image.open(Path('tests').joinpath('image.png')) as img: assert res.thumb.tobytes() == img.tobytes()
def test_blacklist(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) path = tmpdir.joinpath('test') path.write_text('hello') req = CheckRequest(tmpdir=tmpdir, realpath=path, virtpath='test.zip/yes.txt') checker = FileNameChecker.default() checker.blacklist.append('test.zip/no.*') res = checker.check(req) assert isinstance(res.recognizer, PlaintextChecker) assert res.errors == [] assert not res.skipped req.virtpath = 'test.zip/no.txt' res = checker.check(req) assert res.recognizer is checker assert res.errors == [] assert res.skipped
def test_no_match(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(tmpdir=tmpdir, realpath=tmpdir.joinpath('test'), virtpath='test.foo') req.realpath.write_text('whatevs') res = FileNameChecker.default().check(req) assert res.recognizer is None assert res.errors == []
def test_xml_valid(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=td.joinpath('test.xml'), tmpdir=td, virtpath=Path('irrelevant')) req.realpath.write_text(_TEST_XML) res = XMLChecker().check(req) assert isinstance(res.recognizer, XMLChecker) assert res.errors == [] assert res.thumb is None req.thumb = True res = XMLChecker().check(req) assert res.errors == [] res.thumb.save(str(Path(_IMGDIR).joinpath('xml.png'))) with Image.open(Path('tests').joinpath('xml.png')) as img: assert res.thumb.tobytes() == img.tobytes()
def test_not_zip(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(realpath=tmpdir.joinpath('test.zip'), tmpdir=tmpdir, virtpath='irrelevant') req.realpath.write_text('garbage') res = ZipChecker().check(req) assert res.recognizer is None assert res.extracted is None assert res.errors == ['not a zipfile']
def test_match(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(tmpdir=tmpdir, realpath=tmpdir.joinpath('test'), virtpath='test.zip') with ZipFile(req.realpath, 'w') as zf: pass res = FileNameChecker.default().check(req) assert isinstance(res.recognizer, ZipChecker) assert res.errors == [] assert res.extracted
def test_xml_invalid(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=td.joinpath('test.xml'), tmpdir=td, virtpath=Path('irrelevant')) req.realpath.write_text('<root>garbage') res = XMLChecker().check(req) assert res.recognizer is None assert len(res.errors) == 1 assert 'no element found' in str(res.errors[0])
def test_csv_missing_cols(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=td.joinpath('test.csv'), tmpdir=td, virtpath=Path('irrelevant')) req.realpath.write_text('a,b,c\n1,2') res = CSVChecker().check(req) assert res.recognizer is None assert len(res.errors) == 1 assert str(res.errors[0]) == 'Could not determine delimiter'
def test_plaintext_valid(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=td.joinpath('test.txt'), tmpdir=td, virtpath=Path('irrelevant')) req.realpath.write_text('Hello, world!😀\nNo problems here!\n') res = PlaintextChecker().check(req) assert isinstance(res.recognizer, PlaintextChecker) assert res.errors == [] assert res.thumb is None req.thumb = True res = PlaintextChecker().check(req) assert res.errors == [] res.thumb.save(str(Path(_IMGDIR).joinpath('plaintext.png'))) # The font I'm currently using doesn't handle emoji but I don't # really care right now with Image.open(str(Path('tests').joinpath('plaintext.png'))) as img: assert res.thumb.tobytes() == img.tobytes()
def test_json_invalid(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=td.joinpath('test.json'), tmpdir=td, virtpath=Path('irrelevant')) req.realpath.write_text('{"garbage": true') res = JSONChecker().check(req) assert res.recognizer is None assert len(res.errors) == 1 assert isinstance(res.errors[0], json.JSONDecodeError)
def test_empty_zip(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(realpath=tmpdir.joinpath('test.zip'), tmpdir=tmpdir, virtpath='irrelevant') with ZipFile(req.realpath, 'w') as zf: pass res = ZipChecker().check(req) assert res.recognizer assert res.extracted assert list(res.extracted.glob('**/*')) == [] assert res.errors == []
def test_image_invalid(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=td.joinpath('test.jpg'), tmpdir=td, virtpath=Path('irrelevant')) req.realpath.write_text('garbage') res = ImageChecker().check(req) assert res.recognizer is None assert len(res.errors) == 1 assert isinstance(res.errors[0], UnidentifiedImageError) assert res.thumb is None
def test_failed_match(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(tmpdir=tmpdir, realpath=tmpdir.joinpath('test'), virtpath='test.zip') req.realpath.write_text('whatevs') checker = FileNameChecker.default() res = checker.check(req) assert res.recognizer == checker assert res.errors == [ 'not a zipfile', 'expected to be recognized by ZipChecker ' 'because filename matched: *.zip' ]
def test_unsupported(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest( # omit file extension so QuickLook won't know what to do with it realpath=tmpdir.joinpath('text'), tmpdir=tmpdir, virtpath='irrelevant') req.realpath.write_text(_TEST_CSV) res = QLChecker().check(req) assert res.recognizer is None assert res.extracted is None assert res.errors == ['no png produced by qlmanage'] assert res.thumb is None
def test_supported(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(realpath=tmpdir.joinpath('text.csv'), tmpdir=tmpdir, virtpath='irrelevant') req.realpath.write_text(_TEST_CSV) res1 = QLChecker().check(req) assert res1.recognizer assert res1.extracted is None assert res1.errors == [] assert res1.thumb is None req.thumb = True res2 = QLChecker().check(req) assert res2.recognizer assert res2.extracted is None assert res2.errors == [] res2.thumb.save(str(Path(_IMGDIR).joinpath('quicklook.png'))) # I have no idea whether the Quick Look thumbnails are identical # across MacOS installations, let alone across different versions # of the OS, so this test may be extremely brittle with Image.open(str(Path('tests').joinpath('quicklook.png'))) as img: assert res2.thumb.tobytes() == img.tobytes()
def test_plaintext_encoding_error(): with TemporaryDirectory() as td: td = Path(td) req = CheckRequest( realpath=td.joinpath('test.txt'), tmpdir=td, virtpath=Path('irrelevant')) req.realpath.write_bytes( bytes('Hello, world!', 'utf-8') + bytes([0xfe]) + bytes('oops', 'utf-8')) res = PlaintextChecker().check(req) assert isinstance(res.recognizer, PlaintextChecker) assert len(res.errors) == 1 assert isinstance(res.errors[0], UnicodeDecodeError) assert res.thumb is None
def test_valid_zipfile(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(realpath=tmpdir.joinpath('test.zip'), tmpdir=tmpdir, virtpath='irrelevant') with ZipFile(req.realpath, 'w') as zf: zf.writestr('good.txt', 'nice to meet you!') zf.writestr('bad.txt', 'this works fine') res = ZipChecker().check(req) assert res.recognizer assert res.extracted assert ([p.name for p in res.extracted.glob('**/*') ] == ['good.txt', 'bad.txt']) assert ([p.read_text() for p in res.extracted.glob('**/*') ] == ['nice to meet you!', 'this works fine']) assert res.errors == []
def test_corrupt_zip(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(realpath=tmpdir.joinpath('test.zip'), tmpdir=tmpdir, virtpath='irrelevant') with ZipFile(req.realpath, 'w') as zf: zf.writestr('good.txt', 'nice to meet you!') zf.writestr('bad.txt', 'this works fine') old = req.realpath.read_bytes() corrupt = old.replace(bytes('work', 'utf-8'), bytes('fail', 'utf-8')) req.realpath.write_bytes(corrupt) res = ZipChecker().check(req) assert res.recognizer assert res.extracted assert ([p.name for p in res.extracted.glob('**/*') ] == ['good.txt', 'bad.txt']) assert ([str(e) for e in res.errors] == ["Bad CRC-32 for file 'bad.txt'"])
def test_valid_tarfile(): with TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) req = CheckRequest(realpath=tmpdir.joinpath('test.tar'), tmpdir=tmpdir, virtpath='irrelevant') dir1 = tmpdir.joinpath('alpha') dir1.mkdir() dir2 = tmpdir.joinpath('beta') dir2.mkdir() dir1.joinpath('file1').write_text('hello' * 10) dir2.joinpath('file2').write_text('goodbye' * 10) def test_compression(compression): with tarfile.open(req.realpath, f'w:{compression}') as tf: tf.add(dir1, 'alpha') tf.add(dir2, 'beta') res = TarChecker().check(req) assert res.recognizer assert res.extracted paths = [p for p in res.extracted.glob('**/*') if p.is_file()] assert len(paths) == 2 path1 = res.extracted.joinpath('alpha', 'file1') path2 = res.extracted.joinpath('beta', 'file2') assert path1 in paths assert path2 in paths assert path1.read_text() == 'hello' * 10 assert path2.read_text() == 'goodbye' * 10 assert res.errors == [] req.realpath.unlink() test_compression('') test_compression('gz') test_compression('bz2') test_compression('xz')