def test_preprocess_train_data(self): train_df = pd.DataFrame( [["text", "additional", 1]], columns=["text", "additional", "label"] ) train_df.to_csv("train.csv") config = { "data_folder": "./", "train_data_path": "train.csv" } model = BasicModel(config) def remove_additional(df): return df.drop(columns=["additional"]) model._preprocess_data = MagicMock(side_effect=remove_additional) model._preprocess_and_save_data(config["train_data_path"]) preprocessed_train_df = pd.read_csv("./preprocessed_train.csv") assert "text" in preprocessed_train_df.columns assert "label" in preprocessed_train_df.columns assert "additional" not in preprocessed_train_df.columns
def test_init_model(self): model = BasicModel({ "train_data_path": "train", "unlabeled_data_path": "unlabeled", "test_data_path": "test" }) model._preprocess_and_save_data = MagicMock() model._load = MagicMock() model.init() model._preprocess_and_save_data.assert_any_call("train") model._preprocess_and_save_data.assert_any_call("unlabeled") model._preprocess_and_save_data.assert_any_call("test") model._load.assert_called_with()