def test_init_train(tmpdir): train_dl = torch.utils.data.DataLoader(DummyDataset(), batch_size=16) model = TabularClassifier(num_classes=10, num_features=16 + 16, embedding_sizes=16 * [(10, 32)]) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)
def test_jit(tmpdir): model = TabularClassifier(num_classes=10, num_features=8, embedding_sizes=4 * [(10, 32)]) model.eval() # torch.jit.script doesn't work with tabnet model = torch.jit.trace( model, ((torch.randint(0, 10, size=(1, 4)), torch.rand(1, 4)), )) # TODO: torch.jit.save doesn't work with tabnet # path = os.path.join(tmpdir, "test.pt") # torch.jit.save(model, path) # model = torch.jit.load(path) out = model((torch.randint(0, 10, size=(1, 4)), torch.rand(1, 4))) assert isinstance(out, torch.Tensor) assert out.shape == torch.Size([1, 10])
def test_classification(backbone, fields, tmpdir): train_data_frame = TEST_DF_1.copy() val_data_frame = TEST_DF_1.copy() test_data_frame = TEST_DF_1.copy() data = TabularClassificationData.from_data_frame( **fields, target_fields="label", train_data_frame=train_data_frame, val_data_frame=val_data_frame, test_data_frame=test_data_frame, num_workers=0, batch_size=2, ) model = TabularClassifier.from_data(datamodule=data, backbone=backbone) trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(model, data)
def test_serve(): train_data = { "num_col": [1.4, 2.5], "cat_col": ["positive", "negative"], "target": [1, 2] } datamodule = TabularData.from_data_frame( "cat_col", "num_col", "target", pd.DataFrame.from_dict(train_data), ) model = TabularClassifier.from_data(datamodule) # TODO: Currently only servable once a preprocess has been attached model._preprocess = datamodule.preprocess model.eval() model.serve()
def test_classification(tmpdir): train_df = TEST_DF_1.copy() val_df = TEST_DF_1.copy() test_df = TEST_DF_1.copy() data = TabularData.from_df( train_df, categorical_cols=["category"], numerical_cols=["scalar_a", "scalar_b"], target_col="label", val_df=val_df, test_df=test_df, num_workers=0, batch_size=2, ) model = TabularClassifier(num_features=3, num_classes=2, embedding_sizes=data.emb_sizes) trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(model, data)
# 1. Download the data download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "data/") # 2. Load the data datamodule = TabularData.from_csv( ["Sex", "Age", "SibSp", "Parch", "Ticket", "Cabin", "Embarked"], ["Fare"], target_field="Survived", train_file="./data/titanic/titanic.csv", test_file="./data/titanic/test.csv", val_split=0.25, ) # 3. Build the model model = TabularClassifier.from_data( datamodule, metrics=[Accuracy(), Precision(), Recall()]) # 4. Create the trainer trainer = flash.Trainer(fast_dev_run=True) # 5. Train the model trainer.fit(model, datamodule=datamodule) # 6. Test model trainer.test(model) # 7. Save it! trainer.save_checkpoint("tabular_classification_model.pt")
def test_module_import_error(tmpdir): with pytest.raises(ModuleNotFoundError, match="[tabular]"): TabularClassifier(num_classes=10, num_features=16, embedding_sizes=[])
from flash.core.data import download_data from flash.tabular import TabularClassifier if __name__ == "__main__": # 1. Download the data download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", 'data/') # 2. Load the model from a checkpoint model = TabularClassifier.load_from_checkpoint( "https://flash-weights.s3.amazonaws.com/tabular_classification_model.pt" ) # 3. Generate predictions from a sheet file! Who would survive? predictions = model.predict("data/titanic/titanic.csv") print(predictions)
def test_load_from_checkpoint_dependency_error(): with pytest.raises(ModuleNotFoundError, match=re.escape("'lightning-flash[tabular]'")): TabularClassifier.load_from_checkpoint("not_a_real_checkpoint.pt")
import torch import flash from flash.core.data.utils import download_data from flash.tabular import TabularClassificationData, TabularClassifier # 1. Create the DataModule download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "./data") datamodule = TabularClassificationData.from_csv( ["Sex", "Age", "SibSp", "Parch", "Ticket", "Cabin", "Embarked"], "Fare", target_fields="Survived", train_file="data/titanic/titanic.csv", val_split=0.1, ) # 2. Build the task model = TabularClassifier.from_data(datamodule) # 3. Create the trainer and train the model trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count()) trainer.fit(model, datamodule=datamodule) # 4. Generate predictions from a CSV predictions = model.predict("data/titanic/titanic.csv") print(predictions) # 5. Save the model! trainer.save_checkpoint("tabular_classification_model.pt")
# 1. Create the DataModule download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "./data") datamodule = TabularClassificationData.from_csv( categorical_fields=[ "Sex", "Age", "SibSp", "Parch", "Ticket", "Cabin", "Embarked" ], numerical_fields="Fare", target_fields="Survived", train_file="data/titanic/titanic.csv", val_split=0.1, batch_size=8, ) # 2. Build the task model = TabularClassifier.from_data(datamodule, backbone="fttransformer") # 3. Create the trainer and train the model trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count()) trainer.fit(model, datamodule=datamodule) # 4. Generate predictions from a CSV datamodule = TabularClassificationData.from_csv( predict_file="data/titanic/titanic.csv", parameters=datamodule.parameters, batch_size=8, ) predictions = trainer.predict(model, datamodule=datamodule, output="classes") print(predictions) # 5. Save the model!