Ejemplo n.º 1
0
def test_dataset_validation(dataset_dir, transformer_cfg_dataset, records,
                            batch_size, as_list):

    # Fit training dataset
    RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=transformer_cfg_dataset,
        records=_to_list(records, as_list),
        mode=RecordMode.TRAIN,
        batch_size=batch_size,
    )

    ds_val = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=transformer_cfg_dataset,
        records=_to_list(records, as_list),
        mode=RecordMode.VALIDATION,
        batch_size=batch_size,
    )

    for ind in range(len(ds_val)):
        result = ds_val[ind]

        # Compute expected
        sample_inds = ds_val.sample_order[ind * batch_size:(ind + 1) *
                                          batch_size]
        batch_records = records.iloc[sample_inds]
        mX = np.array([49.5, 2])
        X = batch_records[["x1", "x2"]].values.reshape(len(sample_inds), 2)
        X -= mX
        y1 = batch_records["y1"].values.reshape(len(sample_inds), 1)
        y2 = batch_records["y2"].values.reshape(len(sample_inds), 1) % 5 - 2
        expected = ({"input_1": X}, {"output_1": y1, "output_2": y2})

        _assert_batch_equal(result, expected)
Ejemplo n.º 2
0
def test_dataset_score(dataset_dir, transformer_cfg_dataset, records,
                       batch_size, as_list):
    # Fit training dataset
    RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=transformer_cfg_dataset,
        records=_to_list(records, as_list),
        mode=RecordMode.TRAIN,
        batch_size=batch_size,
    )

    ds_score = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=transformer_cfg_dataset,
        records=_to_list(records, as_list),
        mode=RecordMode.SCORE,
        batch_size=batch_size,
    )

    for ind in range(len(ds_score)):
        result = ds_score[ind]

        # Compute expected
        sample_inds = ds_score.sample_order[ind * batch_size:(ind + 1) *
                                            batch_size]
        batch_records = records.iloc[sample_inds]
        mX = np.array([49.5, 2])
        X = batch_records[["x1", "x2"]].values.reshape(len(sample_inds), 2)
        X -= mX
        expected = ({"input_1": X}, )

        _assert_batch_equal(result, expected)
Ejemplo n.º 3
0
def test_dataset_init_transformers(dataset_dir, transformer_cfg_dataset,
                                   records, mode, sample_count):
    def _assert_dict_array_equal(d1, d2):
        assert len(d1) == len(d2)
        for k in d1.keys():
            np.testing.assert_array_equal(d1[k], d2[k])

    transformer_cfg_dataset["sample_count"] = sample_count
    batch_size = 32
    ds_train = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=transformer_cfg_dataset,
        records=records,
        mode=RecordMode.TRAIN,
        batch_size=batch_size,
    )

    ref_obj = {"mean_input": np.array([49.5, 2]), "mean_output": np.array([2])}
    ref_network_params = {"num_inputs": 1, "num_outputs": 2}

    _assert_dict_array_equal(ds_train.transformer.obj, ref_obj)
    assert ds_train.transformer.network_params == ref_network_params

    ds_non_train = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=transformer_cfg_dataset,
        records=records,
        mode=mode,
        batch_size=batch_size,
    )
    _assert_dict_array_equal(ds_non_train.transformer.obj, ref_obj)
Ejemplo n.º 4
0
def test_dataset_train(dataset_dir, transformer_cfg_dataset, records,
                       batch_size, sample_count, as_list):
    transformer_cfg_dataset["sample_count"] = sample_count
    ds_train = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=transformer_cfg_dataset,
        records=_to_list(records, as_list),
        mode=RecordMode.TRAIN,
        batch_size=batch_size,
    )

    for ind in range(len(ds_train)):
        result = ds_train[ind]

        # Compute expected
        sample_inds = ds_train.sample_order[ind * batch_size:(ind + 1) *
                                            batch_size]
        batch_records = records.iloc[sample_inds]
        mX = np.array([49.5, 2])
        X = batch_records[["x1", "x2"]].values.reshape(len(sample_inds), 2)
        X -= mX
        X += 3
        y1 = batch_records["y1"].values.reshape(len(sample_inds), 1)
        y2 = batch_records["y2"].values.reshape(len(sample_inds), 1) % 5 - 2
        y2 *= 5
        expected = ({"input_1": X}, {"output_1": y1, "output_2": y2})

        _assert_batch_equal(result, expected)
Ejemplo n.º 5
0
def test_dataset_sample_inds(dataset_dir, base_cfg_dataset, records, mode,
                             sample_count):
    base_cfg_dataset["sample_count"] = sample_count
    batch_size = 32
    ds = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=base_cfg_dataset,
        records=records,
        mode=mode,
        batch_size=batch_size,
    )

    if mode == RecordMode.TRAIN and sample_count is not None:
        assert ds.sample_inds == RecordDataset.convert_sample_count_to_inds(
            records[sample_count])
    else:
        assert ds.sample_inds == list(range(len(records)))
Ejemplo n.º 6
0
def test_dataset_non_train_before_train(dataset_dir, transformer_cfg_dataset,
                                        records, mode):
    batch_size = 32
    with pytest.raises(FileNotFoundError):
        RecordDataset(
            artifact_dir=dataset_dir,
            cfg_dataset=transformer_cfg_dataset,
            records=records,
            mode=mode,
            batch_size=batch_size,
        )
Ejemplo n.º 7
0
def test_dataset_sample_order(dataset_dir, base_cfg_dataset, records, mode,
                              sample_count):
    base_cfg_dataset["sample_count"] = sample_count
    base_cfg_dataset["seed"] = 13
    batch_size = 32
    ds = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=base_cfg_dataset,
        records=records,
        mode=mode,
        batch_size=batch_size,
    )

    if mode == RecordMode.TRAIN or mode == RecordMode.VALIDATION:
        assert ds.sample_order != ds.sample_inds
        assert sorted(ds.sample_order) == ds.sample_inds

        sample_order_1 = ds.sample_order
        ds.shuffle()
        sample_order_2 = ds.sample_order
        assert sample_order_1 != sample_order_2
        assert sorted(sample_order_1) == sorted(sample_order_2)
        ds.on_epoch_end()
        sample_order_3 = ds.sample_order
        assert sample_order_2 != sample_order_3
        assert sorted(sample_order_2) == sorted(sample_order_3)
    else:
        assert ds.sample_order == ds.sample_inds

        sample_order_1 = ds.sample_order
        ds.shuffle()
        sample_order_2 = ds.sample_order
        assert sample_order_1 == sample_order_2
        ds.on_epoch_end()
        sample_order_3 = ds.sample_order
        assert sample_order_2 == sample_order_3
Ejemplo n.º 8
0
def test_dataset_length(dataset_dir, base_cfg_dataset, records, mode,
                        sample_count, batch_size):
    base_cfg_dataset["sample_count"] = sample_count
    ds = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=base_cfg_dataset,
        records=records,
        mode=mode,
        batch_size=batch_size,
    )

    if mode == RecordMode.TRAIN and sample_count is not None:
        num_samples = records[sample_count].sum()
        assert len(ds) == int(np.ceil(num_samples / float(batch_size)))
    else:
        assert len(ds) == int(np.ceil(len(records) / float(batch_size)))
Ejemplo n.º 9
0
def test_dataset_init_basics(dataset_dir, base_cfg_dataset, records, mode):
    # Exploit the fact identity transformer does not save params
    batch_size = 32
    ds = RecordDataset(
        artifact_dir=dataset_dir,
        cfg_dataset=base_cfg_dataset,
        records=records,
        mode=mode,
        batch_size=batch_size,
    )

    assert ds.num_records == len(records)
    assert ds.records == records.to_dict(orient="records")
    assert ds.mode == mode
    assert ds.batch_size == batch_size
    assert hasattr(ds, "loader")
    assert hasattr(ds, "transformer")
    if mode == RecordMode.TRAIN:
        assert hasattr(ds, "augmentor")
    else:
        assert not hasattr(ds, "augmentor")
    assert ds.transformer.network_params == {}
Ejemplo n.º 10
0
def test_convert_sample_count_to_inds(s, result):
    assert RecordDataset.convert_sample_count_to_inds(s) == result