Esempio n. 1
0
def test_tsv():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=td.joinpath('test.tsv'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        req.thumb = True
        req.realpath.write_text(_TEST_CSV.replace(',', '\t'))
        res = CSVChecker().check(req)
        assert isinstance(res.recognizer, CSVChecker)
        assert res.errors == []
        res.thumb.save(str(Path(_IMGDIR).joinpath('tsv.png')))
        with Image.open(str(Path('tests').joinpath('csv.png'))) as img:
            assert res.thumb.tobytes() == img.tobytes()
def test_corrupt_tarfile():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(realpath=tmpdir.joinpath('test.tar'),
                           tmpdir=tmpdir,
                           virtpath='irrelevant')

        dir1 = tmpdir.joinpath('alpha')
        dir1.mkdir()
        dir2 = tmpdir.joinpath('beta')
        dir2.mkdir()
        dir1.joinpath('file1').write_text('hello' * 10)
        dir2.joinpath('file2').write_text('goodbye' * 10)
        with tarfile.open(req.realpath, 'w:gz') as tf:
            tf.add(dir1, 'alpha')
            tf.add(dir2, 'beta')
        data = bytearray(req.realpath.read_bytes())
        data[30] = 10
        req.realpath.write_bytes(data)

        res = TarChecker().check(req)
        assert res.recognizer is None  # TODO perhaps should be set
        assert res.extracted is None
        assert len(res.errors) == 1
        assert 'Error -3' in str(res.errors[0])
Esempio n. 3
0
def test_image_valid():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=Path('tests').joinpath('testimage.jpg'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        res = ImageChecker().check(req)
        assert isinstance(res.recognizer, ImageChecker)
        assert res.errors == []
        assert res.thumb is None

        req.thumb = True
        res = ImageChecker().check(req)
        assert res.errors == []
        res.thumb.save(str(Path(_IMGDIR).joinpath('image.png')))
        with Image.open(Path('tests').joinpath('image.png')) as img:
            assert res.thumb.tobytes() == img.tobytes()
def test_blacklist():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        path = tmpdir.joinpath('test')
        path.write_text('hello')
        req = CheckRequest(tmpdir=tmpdir,
                           realpath=path,
                           virtpath='test.zip/yes.txt')
        checker = FileNameChecker.default()
        checker.blacklist.append('test.zip/no.*')
        res = checker.check(req)
        assert isinstance(res.recognizer, PlaintextChecker)
        assert res.errors == []
        assert not res.skipped
        req.virtpath = 'test.zip/no.txt'
        res = checker.check(req)
        assert res.recognizer is checker
        assert res.errors == []
        assert res.skipped
def test_no_match():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(tmpdir=tmpdir,
                           realpath=tmpdir.joinpath('test'),
                           virtpath='test.foo')
        req.realpath.write_text('whatevs')
        res = FileNameChecker.default().check(req)
        assert res.recognizer is None
        assert res.errors == []
Esempio n. 6
0
def test_xml_valid():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=td.joinpath('test.xml'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        req.realpath.write_text(_TEST_XML)
        res = XMLChecker().check(req)
        assert isinstance(res.recognizer, XMLChecker)
        assert res.errors == []
        assert res.thumb is None

        req.thumb = True
        res = XMLChecker().check(req)
        assert res.errors == []
        res.thumb.save(str(Path(_IMGDIR).joinpath('xml.png')))
        with Image.open(Path('tests').joinpath('xml.png')) as img:
            assert res.thumb.tobytes() == img.tobytes()
def test_not_zip():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(realpath=tmpdir.joinpath('test.zip'),
                           tmpdir=tmpdir,
                           virtpath='irrelevant')
        req.realpath.write_text('garbage')
        res = ZipChecker().check(req)
        assert res.recognizer is None
        assert res.extracted is None
        assert res.errors == ['not a zipfile']
def test_match():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(tmpdir=tmpdir,
                           realpath=tmpdir.joinpath('test'),
                           virtpath='test.zip')
        with ZipFile(req.realpath, 'w') as zf:
            pass
        res = FileNameChecker.default().check(req)
        assert isinstance(res.recognizer, ZipChecker)
        assert res.errors == []
        assert res.extracted
Esempio n. 9
0
def test_xml_invalid():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=td.joinpath('test.xml'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        req.realpath.write_text('<root>garbage')
        res = XMLChecker().check(req)
        assert res.recognizer is None
        assert len(res.errors) == 1
        assert 'no element found' in str(res.errors[0])
Esempio n. 10
0
def test_csv_missing_cols():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=td.joinpath('test.csv'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        req.realpath.write_text('a,b,c\n1,2')
        res = CSVChecker().check(req)
        assert res.recognizer is None
        assert len(res.errors) == 1
        assert str(res.errors[0]) == 'Could not determine delimiter'
Esempio n. 11
0
def test_plaintext_valid():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=td.joinpath('test.txt'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        req.realpath.write_text('Hello, world!😀\nNo problems here!\n')
        res = PlaintextChecker().check(req)
        assert isinstance(res.recognizer, PlaintextChecker)
        assert res.errors == []
        assert res.thumb is None

        req.thumb = True
        res = PlaintextChecker().check(req)
        assert res.errors == []
        res.thumb.save(str(Path(_IMGDIR).joinpath('plaintext.png')))
        # The font I'm currently using doesn't handle emoji but I don't
        # really care right now
        with Image.open(str(Path('tests').joinpath('plaintext.png'))) as img:
            assert res.thumb.tobytes() == img.tobytes()
Esempio n. 12
0
def test_json_invalid():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=td.joinpath('test.json'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        req.realpath.write_text('{"garbage": true')
        res = JSONChecker().check(req)
        assert res.recognizer is None
        assert len(res.errors) == 1
        assert isinstance(res.errors[0], json.JSONDecodeError)
def test_empty_zip():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(realpath=tmpdir.joinpath('test.zip'),
                           tmpdir=tmpdir,
                           virtpath='irrelevant')
        with ZipFile(req.realpath, 'w') as zf:
            pass
        res = ZipChecker().check(req)
        assert res.recognizer
        assert res.extracted
        assert list(res.extracted.glob('**/*')) == []
        assert res.errors == []
Esempio n. 14
0
def test_image_invalid():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=td.joinpath('test.jpg'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        req.realpath.write_text('garbage')
        res = ImageChecker().check(req)
        assert res.recognizer is None
        assert len(res.errors) == 1
        assert isinstance(res.errors[0], UnidentifiedImageError)
        assert res.thumb is None
def test_failed_match():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(tmpdir=tmpdir,
                           realpath=tmpdir.joinpath('test'),
                           virtpath='test.zip')
        req.realpath.write_text('whatevs')
        checker = FileNameChecker.default()
        res = checker.check(req)
        assert res.recognizer == checker
        assert res.errors == [
            'not a zipfile', 'expected to be recognized by ZipChecker '
            'because filename matched: *.zip'
        ]
def test_unsupported():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(
            # omit file extension so QuickLook won't know what to do with it
            realpath=tmpdir.joinpath('text'),
            tmpdir=tmpdir,
            virtpath='irrelevant')
        req.realpath.write_text(_TEST_CSV)
        res = QLChecker().check(req)
        assert res.recognizer is None
        assert res.extracted is None
        assert res.errors == ['no png produced by qlmanage']
        assert res.thumb is None
def test_supported():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(realpath=tmpdir.joinpath('text.csv'),
                           tmpdir=tmpdir,
                           virtpath='irrelevant')
        req.realpath.write_text(_TEST_CSV)
        res1 = QLChecker().check(req)
        assert res1.recognizer
        assert res1.extracted is None
        assert res1.errors == []
        assert res1.thumb is None

        req.thumb = True
        res2 = QLChecker().check(req)
        assert res2.recognizer
        assert res2.extracted is None
        assert res2.errors == []
        res2.thumb.save(str(Path(_IMGDIR).joinpath('quicklook.png')))
        # I have no idea whether the Quick Look thumbnails are identical
        # across MacOS installations, let alone across different versions
        # of the OS, so this test may be extremely brittle
        with Image.open(str(Path('tests').joinpath('quicklook.png'))) as img:
            assert res2.thumb.tobytes() == img.tobytes()
Esempio n. 18
0
def test_plaintext_encoding_error():
    with TemporaryDirectory() as td:
        td = Path(td)
        req = CheckRequest(
            realpath=td.joinpath('test.txt'),
            tmpdir=td,
            virtpath=Path('irrelevant'))
        req.realpath.write_bytes(
            bytes('Hello, world!', 'utf-8')
            + bytes([0xfe]) + bytes('oops', 'utf-8'))
        res = PlaintextChecker().check(req)
        assert isinstance(res.recognizer, PlaintextChecker)
        assert len(res.errors) == 1
        assert isinstance(res.errors[0], UnicodeDecodeError)
        assert res.thumb is None
def test_valid_zipfile():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(realpath=tmpdir.joinpath('test.zip'),
                           tmpdir=tmpdir,
                           virtpath='irrelevant')
        with ZipFile(req.realpath, 'w') as zf:
            zf.writestr('good.txt', 'nice to meet you!')
            zf.writestr('bad.txt', 'this works fine')
        res = ZipChecker().check(req)
        assert res.recognizer
        assert res.extracted
        assert ([p.name for p in res.extracted.glob('**/*')
                 ] == ['good.txt', 'bad.txt'])
        assert ([p.read_text() for p in res.extracted.glob('**/*')
                 ] == ['nice to meet you!', 'this works fine'])
        assert res.errors == []
def test_corrupt_zip():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(realpath=tmpdir.joinpath('test.zip'),
                           tmpdir=tmpdir,
                           virtpath='irrelevant')
        with ZipFile(req.realpath, 'w') as zf:
            zf.writestr('good.txt', 'nice to meet you!')
            zf.writestr('bad.txt', 'this works fine')
        old = req.realpath.read_bytes()
        corrupt = old.replace(bytes('work', 'utf-8'), bytes('fail', 'utf-8'))
        req.realpath.write_bytes(corrupt)
        res = ZipChecker().check(req)
        assert res.recognizer
        assert res.extracted
        assert ([p.name for p in res.extracted.glob('**/*')
                 ] == ['good.txt', 'bad.txt'])
        assert ([str(e)
                 for e in res.errors] == ["Bad CRC-32 for file 'bad.txt'"])
def test_valid_tarfile():
    with TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        req = CheckRequest(realpath=tmpdir.joinpath('test.tar'),
                           tmpdir=tmpdir,
                           virtpath='irrelevant')

        dir1 = tmpdir.joinpath('alpha')
        dir1.mkdir()
        dir2 = tmpdir.joinpath('beta')
        dir2.mkdir()
        dir1.joinpath('file1').write_text('hello' * 10)
        dir2.joinpath('file2').write_text('goodbye' * 10)

        def test_compression(compression):
            with tarfile.open(req.realpath, f'w:{compression}') as tf:
                tf.add(dir1, 'alpha')
                tf.add(dir2, 'beta')

            res = TarChecker().check(req)
            assert res.recognizer
            assert res.extracted
            paths = [p for p in res.extracted.glob('**/*') if p.is_file()]
            assert len(paths) == 2
            path1 = res.extracted.joinpath('alpha', 'file1')
            path2 = res.extracted.joinpath('beta', 'file2')
            assert path1 in paths
            assert path2 in paths
            assert path1.read_text() == 'hello' * 10
            assert path2.read_text() == 'goodbye' * 10
            assert res.errors == []

            req.realpath.unlink()

        test_compression('')
        test_compression('gz')
        test_compression('bz2')
        test_compression('xz')