def test_categorical_target(tmpdir): train_df = TEST_DF_1.copy() valid_df = TEST_DF_2.copy() test_df = TEST_DF_2.copy() for df in [train_df, valid_df, test_df]: # change int label to string df["label"] = df["label"].astype(str) dm = TabularData( train_df, categorical_input=["category"], numerical_input=["scalar_b", "scalar_b"], target="label", valid_df=valid_df, test_df=test_df, num_workers=0, batch_size=1, ) for dl in [ dm.train_dataloader(), dm.val_dataloader(), dm.test_dataloader() ]: (cat, num), target = next(iter(dl)) assert cat.shape == (1, 1) assert num.shape == (1, 2) assert target.shape == (1, )
def test_empty_inputs(): train_df = TEST_DF_1.copy() with pytest.raises(RuntimeError): TabularData.from_df(train_df, numerical_cols=None, categorical_cols=None, target_col="label", num_workers=0, batch_size=1)
def test_empty_inputs(): train_data_frame = TEST_DF_1.copy() with pytest.raises(RuntimeError): TabularData.from_data_frame( numerical_fields=None, categorical_fields=None, target_fields="label", train_data_frame=train_data_frame, num_workers=0, batch_size=1, )
def test_categorical_target(tmpdir): train_data_frame = TEST_DF_1.copy() val_data_frame = TEST_DF_2.copy() test_data_frame = TEST_DF_2.copy() for df in [train_data_frame, val_data_frame, test_data_frame]: # change int label to string df["label"] = df["label"].astype(str) dm = TabularData.from_data_frame( categorical_fields=["category"], numerical_fields=["scalar_b", "scalar_b"], target_fields="label", train_data_frame=train_data_frame, val_data_frame=val_data_frame, test_data_frame=test_data_frame, num_workers=0, batch_size=1, ) for dl in [dm.train_dataloader(), dm.val_dataloader(), dm.test_dataloader()]: data = next(iter(dl)) (cat, num) = data[DefaultDataKeys.INPUT] target = data[DefaultDataKeys.TARGET] assert cat.shape == (1, 1) assert num.shape == (1, 2) assert target.shape == (1, )
def test_from_csv(tmpdir): train_csv = Path(tmpdir) / "train.csv" val_csv = test_csv = Path(tmpdir) / "valid.csv" TEST_DF_1.to_csv(train_csv) TEST_DF_2.to_csv(val_csv) TEST_DF_2.to_csv(test_csv) dm = TabularData.from_csv(categorical_fields=["category"], numerical_fields=["scalar_a", "scalar_b"], target_fields="label", train_file=str(train_csv), val_file=str(val_csv), test_file=str(test_csv), num_workers=0, batch_size=1) for dl in [ dm.train_dataloader(), dm.val_dataloader(), dm.test_dataloader() ]: data = next(iter(dl)) (cat, num) = data[DefaultDataKeys.INPUT] target = data[DefaultDataKeys.TARGET] assert cat.shape == (1, 1) assert num.shape == (1, 2) assert target.shape == (1, )
def test_classification(tmpdir): train_df = TEST_DF_1.copy() val_df = TEST_DF_1.copy() test_df = TEST_DF_1.copy() data = TabularData.from_df( train_df, categorical_cols=["category"], numerical_cols=["scalar_a", "scalar_b"], target_col="label", val_df=val_df, test_df=test_df, num_workers=0, batch_size=2, ) model = TabularClassifier(num_features=3, num_classes=2, embedding_sizes=data.emb_sizes) trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(model, data)
def test_from_df(tmpdir): train_df = TEST_DF_1.copy() val_df = TEST_DF_2.copy() test_df = TEST_DF_2.copy() dm = TabularData.from_df(train_df, categorical_cols=["category"], numerical_cols=["scalar_b", "scalar_b"], target_col="label", val_df=val_df, test_df=test_df, num_workers=0, batch_size=1) for dl in [ dm.train_dataloader(), dm.val_dataloader(), dm.test_dataloader() ]: (cat, num), target = next(iter(dl)) assert cat.shape == (1, 1) assert num.shape == (1, 2) assert target.shape == (1, )
def test_tabular_data(tmpdir): train_data_frame = TEST_DF_1.copy() val_data_frame = TEST_DF_2.copy() test_data_frame = TEST_DF_2.copy() dm = TabularData.from_data_frame( categorical_cols=["category"], numerical_cols=["scalar_b", "scalar_b"], target_col="label", train_data_frame=train_data_frame, val_data_frame=val_data_frame, test_data_frame=test_data_frame, num_workers=0, batch_size=1, ) for dl in [dm.train_dataloader(), dm.val_dataloader(), dm.test_dataloader()]: data = next(iter(dl)) (cat, num) = data[DefaultDataKeys.INPUT] target = data[DefaultDataKeys.TARGET] assert cat.shape == (1, 1) assert num.shape == (1, 2) assert target.shape == (1, )
def test_from_csv(tmpdir): train_csv = Path(tmpdir) / "train.csv" val_csv = test_csv = Path(tmpdir) / "valid.csv" TEST_DF_1.to_csv(train_csv) TEST_DF_2.to_csv(val_csv) TEST_DF_2.to_csv(test_csv) dm = TabularData.from_csv(train_csv=train_csv, categorical_cols=["category"], numerical_cols=["scalar_b", "scalar_b"], target_col="label", val_csv=val_csv, test_csv=test_csv, num_workers=0, batch_size=1) for dl in [ dm.train_dataloader(), dm.val_dataloader(), dm.test_dataloader() ]: (cat, num), target = next(iter(dl)) assert cat.shape == (1, 1) assert num.shape == (1, 2) assert target.shape == (1, )
# See the License for the specific language governing permissions and # limitations under the License. from torchmetrics.classification import Accuracy, Precision, Recall import flash from flash.data.utils import download_data from flash.tabular import TabularClassifier, TabularData # 1. Download the data download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "data/") # 2. Load the data datamodule = TabularData.from_csv( ["Sex", "Age", "SibSp", "Parch", "Ticket", "Cabin", "Embarked"], ["Fare"], target_field="Survived", train_file="./data/titanic/titanic.csv", test_file="./data/titanic/test.csv", val_split=0.25, ) # 3. Build the model model = TabularClassifier.from_data( datamodule, metrics=[Accuracy(), Precision(), Recall()]) # 4. Create the trainer trainer = flash.Trainer(fast_dev_run=True) # 5. Train the model trainer.fit(model, datamodule=datamodule)
# limitations under the License. from torchmetrics.classification import Accuracy, Precision, Recall import flash from flash.data.utils import download_data from flash.tabular import TabularClassifier, TabularData # 1. Download the data download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "data/") # 2. Load the data datamodule = TabularData.from_csv( target_col="Survived", train_csv="./data/titanic/titanic.csv", test_csv="./data/titanic/test.csv", categorical_cols=[ "Sex", "Age", "SibSp", "Parch", "Ticket", "Cabin", "Embarked" ], numerical_cols=["Fare"], val_size=0.25, ) # 3. Build the model model = TabularClassifier.from_data( datamodule, metrics=[Accuracy(), Precision(), Recall()]) # 4. Create the trainer trainer = flash.Trainer(fast_dev_run=True) # 5. Train the model trainer.fit(model, datamodule=datamodule)