Exemple #1
0
 def test_warns_experimental(self, repo_with_20_samples):
     repo = repo_with_20_samples
     co = repo.checkout()
     first_aset = co.arraysets['writtenaset']
     second_aset = co.arraysets['second_aset']
     with pytest.warns(UserWarning, match='Dataloaders are experimental'):
         make_tf_dataset([first_aset, second_aset])
     co.close()
Exemple #2
0
 def test_dataset_loader_fails_with_write_enabled_checkout(self, repo_with_20_samples):
     repo = repo_with_20_samples
     co = repo.checkout(write=True)
     first_aset = co.arraysets['writtenaset']
     second_aset = co.arraysets['second_aset']
     with pytest.raises(TypeError):
         make_tf_dataset([first_aset, second_aset])
     co.close()
Exemple #3
0
    def test_wans_arrayset_sample_size_mismatch(self, repo_with_20_samples):
        repo = repo_with_20_samples
        co = repo.checkout(write=True)
        second_aset = co.arraysets['second_aset']
        del second_aset['10']
        co.commit('deleting')
        co.close()

        co = repo.checkout()
        first_aset = co.arraysets['writtenaset']
        second_aset = co.arraysets['second_aset']
        with pytest.warns(UserWarning, match='Arraysets do not contain equal number of samples'):
            make_tf_dataset([first_aset, second_aset])
        co.close()
Exemple #4
0
    def test_with_keys(self, repo_with_20_samples):
        repo = repo_with_20_samples
        co = repo.checkout()
        aset = co.arraysets['writtenaset']

        # with keys
        keys = ['2', '4', '5', '6', '7', '9', '15', '18', '19']
        bad_tensor0 = aset['0']
        bad_tensor1 = aset['1']
        bad_tensor3 = aset['3']
        bad_tensor8 = aset['8']

        tf_dset = make_tf_dataset(aset, keys=keys)
        tf_dset = tf_dset.batch(3)
        total_batches = 0
        for dset1 in tf_dset:
            total_batches += 1
            assert dset1[0].shape == tf.TensorShape((3, 5, 7))
            for sample in dset1[0]:
                assert not np.allclose(sample, bad_tensor0)
                assert not np.allclose(sample, bad_tensor1)
                assert not np.allclose(sample, bad_tensor3)
                assert not np.allclose(sample, bad_tensor8)
        assert total_batches == 3
        co.close()
Exemple #5
0
 def test_lots_of_data_with_multiple_backend(self, repo_with_10000_samples):
     repo = repo_with_10000_samples
     co = repo.checkout()
     aset = co.arraysets['aset']
     tf_dset = make_tf_dataset([aset])
     tf_dset = tf_dset.batch(1000)
     for data in tf_dset:
         assert data[0].shape == (1000, 5, 7)
     co.close()
Exemple #6
0
 def test_local_without_data_fails_data_unavailable(self, written_two_cmt_server_repo, managed_tmpdir):
     new_tmpdir = pjoin(managed_tmpdir, 'new')
     mkdir(new_tmpdir)
     server, _ = written_two_cmt_server_repo
     repo = Repository(path=new_tmpdir, exists=False)
     repo.clone('name', '[email protected]', server, remove_old=True)
     co = repo.checkout()
     aset = co.arraysets['writtenaset']
     with pytest.raises(FileNotFoundError):
         tf_dset = make_tf_dataset(aset, keys=['1', '2'])
     co.close()
     repo._env._close_environments()
Exemple #7
0
    def test_dataset_loader(self, repo_with_20_samples):
        repo = repo_with_20_samples
        co = repo.checkout()
        first_aset = co.arraysets['writtenaset']
        second_aset = co.arraysets['second_aset']

        # multiple datasets
        tf_dset = make_tf_dataset([first_aset, second_aset])
        tf_dset = tf_dset.batch(6)
        for dset1, dset2 in tf_dset.take(2):
            assert dset1.shape == tf.TensorShape((6, 5, 7))
            assert dset2.shape == tf.TensorShape((6, 5, 7))
        co.close()
Exemple #8
0
 def test_local_without_data_fails_no_common_no_local(
         self, written_two_cmt_server_repo, managed_tmpdir):
     new_tmpdir = pjoin(managed_tmpdir, 'new')
     mkdir(new_tmpdir)
     server, _ = written_two_cmt_server_repo
     repo = Repository(path=new_tmpdir, exists=False)
     repo.clone('name', '[email protected]', server, remove_old=True)
     co = repo.checkout()
     aset = co.columns['writtenaset']
     with pytest.raises(ValueError):
         tf_dset = make_tf_dataset(aset)
     co.close()
     repo._env._close_environments()
Exemple #9
0
    def test_variably_shaped(self, variable_shape_written_repo):
        # Variably shaped test is required since the collation is dependent on
        # the way we return the data from generator
        repo = variable_shape_written_repo
        co = repo.checkout(write=True)
        aset = co.arraysets['writtenaset']
        for i in range(5, 10):
            aset[i] = np.random.random((2, i))
        co.commit('added data')
        co.close()

        co = repo.checkout()
        aset = co.arraysets['writtenaset']
        tf_dset = make_tf_dataset(aset)
        shape_obj = tf.TensorShape((2, None))
        tf_dset = tf_dset.padded_batch(5, padded_shapes=(shape_obj,))
        for val in tf_dset:
            assert val[0].shape[0] == 5
            assert val[0].shape[1] == 2
            assert 11 > val[0].shape[2] > 4
        co.close()
Exemple #10
0
    def test_with_index_range(self, repo_with_20_samples):
        repo = repo_with_20_samples
        co = repo.checkout()
        aset = co.arraysets['writtenaset']

        # with keys
        bad_tensor0 = aset['0']
        bad_tensor1 = aset['1']

        # with index range
        index_range = slice(2, 20)
        tf_dset = make_tf_dataset(aset, index_range=index_range)
        tf_dset = tf_dset.batch(3)
        total_batches = 0
        for dset1 in tf_dset:
            total_batches += 1
            assert dset1[0].shape == tf.TensorShape((3, 5, 7))
            for sample in dset1[0]:
                assert not np.allclose(sample, bad_tensor0)
                assert not np.allclose(sample, bad_tensor1)
        assert total_batches == 6
        co.close()