Esempio n. 1
0
def test_empty_inputs():
    train_df = TEST_DF_1.copy()
    with pytest.raises(RuntimeError):
        TabularData.from_df(train_df,
                            numerical_cols=None,
                            categorical_cols=None,
                            target_col="label",
                            num_workers=0,
                            batch_size=1)
Esempio n. 2
0
def test_categorical_target(tmpdir):
    train_df = TEST_DF_1.copy()
    val_df = TEST_DF_2.copy()
    test_df = TEST_DF_2.copy()
    for df in [train_df, val_df, test_df]:
        # change int label to string
        df["label"] = df["label"].astype(str)

    dm = TabularData.from_df(
        train_df,
        categorical_cols=["category"],
        numerical_cols=["scalar_b", "scalar_b"],
        target_col="label",
        val_df=val_df,
        test_df=test_df,
        num_workers=0,
        batch_size=1,
    )
    for dl in [
            dm.train_dataloader(),
            dm.val_dataloader(),
            dm.test_dataloader()
    ]:
        (cat, num), target = next(iter(dl))
        assert cat.shape == (1, 1)
        assert num.shape == (1, 2)
        assert target.shape == (1, )
Esempio n. 3
0
def test_classification(tmpdir):

    train_df = TEST_DF_1.copy()
    val_df = TEST_DF_1.copy()
    test_df = TEST_DF_1.copy()
    data = TabularData.from_df(
        train_df,
        categorical_cols=["category"],
        numerical_cols=["scalar_a", "scalar_b"],
        target_col="label",
        val_df=val_df,
        test_df=test_df,
        num_workers=0,
        batch_size=2,
    )
    model = TabularClassifier(num_features=3, num_classes=2, embedding_sizes=data.emb_sizes)
    trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.fit(model, data)
Esempio n. 4
0
def test_from_df(tmpdir):
    train_df = TEST_DF_1.copy()
    valid_df = TEST_DF_2.copy()
    test_df = TEST_DF_2.copy()
    dm = TabularData.from_df(train_df,
                             categorical_input=["category"],
                             numerical_input=["scalar_b", "scalar_b"],
                             target="label",
                             valid_df=valid_df,
                             test_df=test_df,
                             num_workers=0,
                             batch_size=1)
    for dl in [
            dm.train_dataloader(),
            dm.val_dataloader(),
            dm.test_dataloader()
    ]:
        (cat, num), target = next(iter(dl))
        assert cat.shape == (1, 1)
        assert num.shape == (1, 2)
        assert target.shape == (1, )