Exemple #1
0
def test_should_save_and_read_pairs_correctly(batch_size):
    images_dataset: DictsDataset
    paths_dataset: DictsDataset
    images_dataset, paths_dataset = gen.dicts_dataset(batch_size=batch_size,
                                                      paired=True,
                                                      save_on_disc=True)
    raw_dataset_fragment = testing_helpers.dicts_dataset_to_raw_dataset_fragment(
        images_dataset)

    dataset_desc = gen.dataset_desc(
        storage_method=DatasetStorageMethod.ON_DISC,
        image_dimensions=ImageDimensions(testing_consts.TEST_IMAGE_SIZE))
    dataset_spec = gen.dataset_spec(description=dataset_desc,
                                    raw_dataset_fragment=raw_dataset_fragment)

    tfrecord_full_path = preparing_data.save_to_tfrecord(
        paths_dataset.features, paths_dataset.labels, 'data', dataset_spec)

    assert utils.check_filepath(tfrecord_full_path,
                                is_directory=False,
                                is_empty=False)

    dataset = reading_tfrecords.assemble_dataset(tfrecord_full_path.parent,
                                                 dataset_spec)
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size)
    first_batch = dataset.make_one_shot_iterator().get_next()
    _check_paired_result(
        first_batch,
        (images_dataset.features.left, images_dataset.features.right),
        images_dataset.labels)
def test_should_create_same_pairs_without_repeating_pairs(
        number_translation_features_dict):
    pairs, _ = create_same_pairs(number_translation_features_dict,
                                 MIN_PAIRS_NUM,
                                 gen.dataset_spec(repeating_pairs=False))

    seen = set()
    for left, right in pairs:
        assert (left.trans, right.trans) not in seen
        seen.add((left.trans, right.trans))
Exemple #3
0
def test_processed_input_data_dirs_placement(encoding, paired):
    processed_input_data_dir = filenames.get_processed_input_data_dir(
        gen.dataset_spec(encoding=encoding, paired=paired))

    expected = '/tf/datasets/' + \
               ((consts.INPUT_DATA_NOT_ENCODED_DIR_FRAGMENT + '/') if not encoding
                else '') + \
               (consts.INPUT_DATA_NOT_PAIRED_DIR_FRAGMENT if not paired
                else consts.INPUT_DATA_PAIRED_DIR_FRAGMENT)
    assert_that(str(processed_input_data_dir), ends_with(expected))
Exemple #4
0
def test_should_save_and_read_unpaired_correctly(batch_size):
    images_dataset: DictsDataset = gen.dicts_dataset(batch_size=batch_size,
                                                     paired=False)

    tfrecord_full_path = preparing_data.save_to_tfrecord(
        images_dataset.features, images_dataset.labels, 'data',
        gen.dataset_spec(paired=False))

    assert utils.check_filepath(tfrecord_full_path,
                                is_directory=False,
                                is_empty=False)

    dataset = reading_tfrecords.assemble_dataset(
        tfrecord_full_path.parent, gen.dataset_spec(paired=False))
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    first_batch = iterator.get_next()
    _check_result(first_batch, images_dataset.features.all,
                  images_dataset.labels)
Exemple #5
0
def test_should_save_image_correctly(thor_image_path, encoding):
    show = False

    if thor_image_path.endswith(".jpg"):
        from PIL import Image
        thor = Image.open(tf_helpers.get_string(thor_image_path))
    else:
        thor = mpimg.imread(tf_helpers.get_string(thor_image_path))

    image_arr = thor[None, :]

    if show:
        plt.imshow(image_arr.squeeze())
        plt.title('before')
        plt.show()

    two_images = {
        consts.LEFT_FEATURE_IMAGE: image_arr,
        consts.RIGHT_FEATURE_IMAGE: image_arr
    }
    label_dict = gen.paired_labels_dict()

    tfrecord_full_path = preparing_data.save_to_tfrecord(
        two_images, label_dict, 'thor', gen.dataset_spec(encoding=encoding))

    dataset = reading_tfrecords.assemble_dataset(
        tfrecord_full_path.parent, gen.dataset_spec(encoding=encoding))

    left_images, _, _, _, _ = tf_helpers.unpack_first_batch(dataset)

    decoded_thor = left_images + 0.5

    if show:
        plt.imshow(decoded_thor)
        plt.title('after')
        plt.show()

    assert np.squeeze(decoded_thor).shape == np.squeeze(image_arr).shape
    assert np.allclose(decoded_thor, image_arr)
Exemple #6
0
def test_should_read_and_save_image_correctly(thor_image_path, resizing):
    show = False

    thor = mpimg.imread(tf_helpers.get_string(thor_image_path))
    thor = skimage.img_as_float(thor)
    image_arr = thor[None, :]

    if show:
        plt.imshow(image_arr.squeeze())
        plt.title('before')
        plt.show()

    features_as_paths = {
        consts.FEATURES: np.array([thor_image_path]),
    }

    labels = gen.unpaired_labels_dict()

    if resizing:
        shape = (100, 100, 3)
    else:
        shape = thor.shape
    dataset_desc = gen.dataset_desc(
        storage_method=DatasetStorageMethod.ON_DISC,
        image_dimensions=ImageDimensions(shape))
    raw_dataset_fragment = RawDatasetFragment(features=image_arr,
                                              labels=np.array(
                                                  list(labels.values())))
    dataset_spec = gen.dataset_spec(description=dataset_desc,
                                    raw_dataset_fragment=raw_dataset_fragment,
                                    paired=False)

    tfrecord_full_path = preparing_data.save_to_tfrecord(
        features_as_paths, labels, 'thor', dataset_spec)

    dataset = reading_tfrecords.assemble_dataset(tfrecord_full_path.parent,
                                                 dataset_spec)

    left_images, _ = tf_helpers.unpack_first_batch(dataset)

    decoded_thor = left_images + 0.5

    if show:
        plt.imshow(decoded_thor)
        plt.title('after')
        plt.show()

    assert np.squeeze(decoded_thor).shape == shape
    if not resizing:
        assert np.allclose(decoded_thor, image_arr, rtol=1.e-1, atol=1.e-1)
def test_all_unpaired_dataset_providers_should_get_features_from_raw_data_provider(
        description, dataset_provider_cls_name):
    provider = dataset_provider_cls_name(
        FakeRawDataProvider(curated=True, description=description))

    image_dims = provider.raw_data_provider.description.image_dimensions
    batch_size = 12
    dataset_spec = gen.dataset_spec(description=description,
                                    type=DatasetType.TEST,
                                    with_excludes=False,
                                    encoding=False,
                                    paired=False)
    dataset = provider.supply_dataset(dataset_spec,
                                      batch_size=batch_size).take(100)
    images, labels = tf_helpers.unpack_first_batch(dataset)

    assert images.shape == (batch_size, *image_dims)

    assert labels.shape == (batch_size, )
Exemple #8
0
def test_should_include_reduced_size_in_path(expected_size,
                                             should_image_size_be_reduced):
    images_dataset: DictsDataset
    paths_dataset: DictsDataset
    images_dataset, paths_dataset = gen.dicts_dataset(save_on_disc=True)

    dataset_desc = gen.dataset_desc(
        storage_method=DatasetStorageMethod.ON_DISC,
        image_dimensions=ImageDimensions(expected_size))
    raw_dataset_fragment = testing_helpers.dicts_dataset_to_raw_dataset_fragment(
        images_dataset)
    dataset_spec = gen.dataset_spec(description=dataset_desc,
                                    raw_dataset_fragment=raw_dataset_fragment,
                                    paired=False)
    tfrecord_full_path = preparing_data.save_to_tfrecord(
        paths_dataset.features, paths_dataset.labels, 'data', dataset_spec)

    parts = tfrecord_full_path.parts
    if should_image_size_be_reduced:
        assert ("size_" + str(expected_size[0])) in parts
    else:
        assert_that(parts, not_(contains("size_" + str(expected_size[0]))))
import collections

import pytest
from hamcrest import assert_that, is_in

from src.data.processing.generating_pairs import create_same_pairs, create_different_pairs, determine_class_size, \
    get_random_element
from testing_utils import gen

DATASET_SPEC = gen.dataset_spec()

MIN_PAIRS_NUM = 30


@pytest.mark.parametrize('min_pairs_num, actual_pair_num', [(15, 15), (20, 21),
                                                            (1000, 1002)])
def test_should_create_correct_pair_number(number_translation_features_dict,
                                           min_pairs_num, actual_pair_num):
    pairs, same_labels = create_same_pairs(number_translation_features_dict,
                                           min_pairs_num, DATASET_SPEC)

    assert len(pairs) == len(same_labels) == actual_pair_num


def test_should_create_same_pairs(number_translation_features_dict):
    pairs, _ = create_same_pairs(number_translation_features_dict,
                                 MIN_PAIRS_NUM, DATASET_SPEC)
    assert len(pairs) == MIN_PAIRS_NUM

    for left, right in pairs:
        assert left.number == right.number