コード例 #1
0
    def test_total_examples(self, tmpdir, datadir):
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'train-00000-of-00001', tmpdir)
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'validation-00000-of-00001', tmpdir)

        fpath = Path(tmpdir / 'train-00000-of-00001')
        generate_fileinfo(fpath)

        result = self._invoke_cli('total-examples', str(tmpdir / 'train-*'))
        assert 'Total number of examples: 10' in result.output

        fpath = Path(tmpdir / 'validation-00000-of-00001')
        generate_fileinfo(fpath)

        result = self._invoke_cli('total-examples', str(tmpdir / '*'))
        assert 'Total number of examples: 20' in result.output
コード例 #2
0
ファイル: test_meta.py プロジェクト: deeplab-ai/deephub
    def test_invalid_hash(self, tmpdir, datadir):
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'train-00000-of-00001', tmpdir)
        fpath = Path(tmpdir / 'train-00000-of-00001')

        # Generate info
        original_info = generate_fileinfo(fpath)

        # Change a bit the content by writing random data at the beginning
        with open(fpath, 'r+b') as f:
            f.seek(0, io.SEEK_SET)
            print(f.tell())
            f.write('junk'.encode('utf-8'))

        with pytest.raises(TFRecordValidationError):
            get_fileinfo(fpath, shallow_check=False)

        # Try to regenerate, should fail because not valid tf records
        with pytest.raises(tf.errors.DataLossError):
            generate_fileinfo(fpath)
コード例 #3
0
ファイル: test_meta.py プロジェクト: deeplab-ai/deephub
    def test_invalid_size(self, tmpdir, datadir):
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'train-00000-of-00001', tmpdir)
        fpath = Path(tmpdir / 'train-00000-of-00001')

        # Generate info
        original_info = generate_fileinfo(fpath)

        # Change a bit the size
        with open(fpath, 'ab') as f:
            f.write('junk'.encode('utf-8'))

        with pytest.raises(TFRecordValidationError):
            get_fileinfo(fpath, shallow_check=False)

        # Try to regenerate, will not fail because tf can handle trailing rubbish
        info2 = generate_fileinfo(fpath)
        assert original_info is not info2
        assert original_info != info2
        assert info2.file_size == 354
        assert info2.md5_hash == '76a086a01e382560309ccfc232711dec'
コード例 #4
0
def generate_metadata(pattern, force, compression_type):
    """
    Generate metadata for tfrecord files.

    With this util you can generate metadata from tfrecords based on a matching
    glob pattern.

    Example: Generate metadata for training dataset
      deep utils generate-metadata 'dataset/train-*'
    """

    files = resolve_glob_pattern(pattern)
    click.echo(f"{len(files)} files matched with the pattern.")

    with click.progressbar(files) as files:
        for fpath in files:
            try:
                generate_fileinfo(fpath, compression_type=compression_type)
            except Exception as e:
                click.echo(f'Skipping file {fpath} because of: {e!s}')
    click.echo('Finished generating metadata')
コード例 #5
0
ファイル: test_meta.py プロジェクト: deeplab-ai/deephub
    def test_deleted(self, tmpdir, datadir):
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'train-00000-of-00001', tmpdir)
        fpath = Path(tmpdir / 'train-00000-of-00001')

        # Generate info
        original_info = generate_fileinfo(fpath)

        # Remove the file
        os.unlink(fpath)

        with pytest.raises(FileNotFoundError):
            get_fileinfo(fpath, shallow_check=False)
コード例 #6
0
ファイル: test_meta.py プロジェクト: deeplab-ai/deephub
    def test_contains_iter_get(self, tmpdir, datadir):
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'train-00000-of-00001', tmpdir)
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'validation-00000-of-00001', tmpdir)

        fpath1 = Path(tmpdir / 'train-00000-of-00001')
        fpath2 = Path(tmpdir / 'validation-00000-of-00001')

        # Generate for both files
        original_info1 = generate_fileinfo(fpath1)
        original_info2 = generate_fileinfo(fpath2)

        meta = TFRecordMetadata.from_folder(fpath1.parent)

        assert 'train-00000-of-00001' in meta
        assert 'validation-00000-of-00001' in meta
        assert 'unknown' not in meta

        info1 = meta['train-00000-of-00001']
        assert info1.name == 'train-00000-of-00001'

        info2 = meta['validation-00000-of-00001']
        assert info2.name == 'validation-00000-of-00001'
コード例 #7
0
ファイル: test_meta.py プロジェクト: deeplab-ai/deephub
    def test_generation_multiple_file_info(self, tmpdir, datadir):
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'train-00000-of-00001', tmpdir)
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'validation-00000-of-00001', tmpdir)

        fpath1 = Path(tmpdir / 'train-00000-of-00001')
        fpath2 = Path(tmpdir / 'validation-00000-of-00001')

        # Generate for both files
        original_info1 = generate_fileinfo(fpath1)
        original_info2 = generate_fileinfo(fpath2)

        assert original_info1.md5_hash == '3c8c216b7293fdef623b04e01bb5878a'
        assert original_info2.md5_hash == 'fdebe01f545d90f127a15ea2f28d3d1d'

        # Get from stored metadata
        info1 = get_fileinfo(fpath1)
        info2 = get_fileinfo(fpath2)

        assert info1 is not original_info1
        assert info1 == original_info1

        assert info2 is not original_info2
        assert info2 == original_info2
コード例 #8
0
ファイル: test_meta.py プロジェクト: deeplab-ai/deephub
    def test_non_existing_meta_file_generate(self, tmpdir, datadir):
        shutil.copy(datadir / 'tfrecords' / 'simple' / 'train-00000-of-00001', tmpdir)

        fpath = Path(tmpdir / 'train-00000-of-00001')

        # Try to get on folder without metadata
        with pytest.raises(TFRecordInfoMissingError):
            get_fileinfo(fpath)

        # Try to generate
        info = generate_fileinfo(fpath)
        assert info.md5_hash == '3c8c216b7293fdef623b04e01bb5878a'
        assert info.file_size == 350
        assert info.name == 'train-00000-of-00001'
        assert info.full_path == Path(fpath)

        # Try again to fetch from generated metadata
        info2 = get_fileinfo(fpath)

        assert info is not info2
        assert info == info2
コード例 #9
0
ファイル: test_meta.py プロジェクト: deeplab-ai/deephub
    def test_get_info_non_existing_file(self, tmpdir):
        with pytest.raises(FileNotFoundError):
            get_fileinfo(Path(tmpdir / 'nonexisting.tfrecords'))

        with pytest.raises(FileNotFoundError):
            generate_fileinfo(Path(tmpdir / 'nonexisting.tfrecords'))