Example #1
0
def test_empty_inputs():
    train_data_frame = TEST_DF_1.copy()
    with pytest.raises(RuntimeError):
        TabularData.from_data_frame(
            numerical_fields=None,
            categorical_fields=None,
            target_fields="label",
            train_data_frame=train_data_frame,
            num_workers=0,
            batch_size=1,
        )
Example #2
0
def test_categorical_target(tmpdir):
    train_data_frame = TEST_DF_1.copy()
    val_data_frame = TEST_DF_2.copy()
    test_data_frame = TEST_DF_2.copy()
    for df in [train_data_frame, val_data_frame, test_data_frame]:
        # change int label to string
        df["label"] = df["label"].astype(str)

    dm = TabularData.from_data_frame(
        categorical_fields=["category"],
        numerical_fields=["scalar_b", "scalar_b"],
        target_fields="label",
        train_data_frame=train_data_frame,
        val_data_frame=val_data_frame,
        test_data_frame=test_data_frame,
        num_workers=0,
        batch_size=1,
    )
    for dl in [dm.train_dataloader(), dm.val_dataloader(), dm.test_dataloader()]:
        data = next(iter(dl))
        (cat, num) = data[DefaultDataKeys.INPUT]
        target = data[DefaultDataKeys.TARGET]
        assert cat.shape == (1, 1)
        assert num.shape == (1, 2)
        assert target.shape == (1, )
Example #3
0
def test_classification(tmpdir):

    train_data_frame = TEST_DF_1.copy()
    val_data_frame = TEST_DF_1.copy()
    test_data_frame = TEST_DF_1.copy()
    data = TabularData.from_data_frame(
        categorical_fields=["category"],
        numerical_fields=["scalar_a", "scalar_b"],
        target_fields="label",
        train_data_frame=train_data_frame,
        val_data_frame=val_data_frame,
        test_data_frame=test_data_frame,
        num_workers=0,
        batch_size=2,
    )
    model = TabularClassifier(num_features=3,
                              num_classes=2,
                              embedding_sizes=data.emb_sizes)
    trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.fit(model, data)
Example #4
0
def test_tabular_data(tmpdir):
    train_data_frame = TEST_DF_1.copy()
    val_data_frame = TEST_DF_2.copy()
    test_data_frame = TEST_DF_2.copy()
    dm = TabularData.from_data_frame(
        categorical_cols=["category"],
        numerical_cols=["scalar_b", "scalar_b"],
        target_col="label",
        train_data_frame=train_data_frame,
        val_data_frame=val_data_frame,
        test_data_frame=test_data_frame,
        num_workers=0,
        batch_size=1,
    )
    for dl in [dm.train_dataloader(), dm.val_dataloader(), dm.test_dataloader()]:
        data = next(iter(dl))
        (cat, num) = data[DefaultDataKeys.INPUT]
        target = data[DefaultDataKeys.TARGET]
        assert cat.shape == (1, 1)
        assert num.shape == (1, 2)
        assert target.shape == (1, )