def test_imputer_tfidf(test_dir, data_frame): label_col = 'label' df = data_frame(n_samples=100, label_col=label_col) data_encoder_cols = [TfIdfEncoder('features')] label_encoder_cols = [CategoricalEncoder(label_col)] data_cols = [BowFeaturizer('features')] output_path = os.path.join(test_dir, "tmp", "out") imputer = Imputer(data_featurizers=data_cols, label_encoders=label_encoder_cols, data_encoders=data_encoder_cols, output_path=output_path).fit(train_df=df, num_epochs=1) _, metrics = imputer.transform_and_compute_metrics(df) assert metrics['label']['avg_precision'] > 0.80
def test_inplace_prediction(test_dir, data_frame): label_col = 'label' df = data_frame(n_samples=100, label_col=label_col) data_encoder_cols = [TfIdfEncoder('features')] label_encoder_cols = [CategoricalEncoder(label_col)] data_cols = [BowFeaturizer('features')] output_path = os.path.join(test_dir, "tmp", "out") imputer = Imputer(data_featurizers=data_cols, label_encoders=label_encoder_cols, data_encoders=data_encoder_cols, output_path=output_path).fit(train_df=df, num_epochs=1) predicted = imputer.predict(df, inplace=True) assert predicted is df
def test_non_writable_output_path(test_dir, data_frame): label_col = 'label' df = data_frame(n_samples=100, label_col=label_col) data_encoder_cols = [TfIdfEncoder('features')] label_encoder_cols = [CategoricalEncoder(label_col)] data_cols = [BowFeaturizer('features')] output_path = os.path.join(test_dir, 'non_writable') Imputer(data_featurizers=data_cols, label_encoders=label_encoder_cols, data_encoders=data_encoder_cols, output_path=output_path).fit(train_df=df, num_epochs=1).save() from datawig.utils import logger try: # make output dir of imputer read-only os.chmod(output_path, S_IREAD | S_IXUSR) # make log file read only os.chmod(os.path.join(output_path, "imputer.log"), S_IREAD) imputer = Imputer.load(output_path) _ = imputer.predict(df) logger.warning("this should not fail") # remove log file os.chmod(os.path.join(output_path, "imputer.log"), S_IREAD | S_IXUSR | S_IWUSR) os.chmod(output_path, S_IREAD | S_IXUSR | S_IWUSR) os.remove(os.path.join(output_path, "imputer.log")) # make output dir of imputer read-only os.chmod(output_path, S_IREAD | S_IXUSR) imputer = Imputer.load(output_path) _ = imputer.predict(df) logger.warning("this should not fail") os.chmod(output_path, S_IREAD | S_IXUSR | S_IWUSR) except Exception as e: print(e) pytest.fail("This invocation not raise any Exception")
def test_explain_instance_without_label(test_dir, data_frame): label_col = 'label' df = data_frame(n_samples=100, label_col=label_col) data_encoder_cols = [TfIdfEncoder('features')] label_encoder_cols = [CategoricalEncoder(label_col)] data_cols = [BowFeaturizer('features')] output_path = os.path.join(test_dir, "tmp", "out") imputer = Imputer(data_featurizers=data_cols, label_encoders=label_encoder_cols, data_encoders=data_encoder_cols, output_path=output_path).fit(train_df=df, num_epochs=1) assert imputer.is_explainable instance = pd.Series({'features': 'some feature text'}) # explain_instance should not raise an exception _ = imputer.explain_instance(instance) assert True
def test_fit_resumes(test_dir, data_frame): feature_col, label_col = "feature", "label" df = data_frame(feature_col=feature_col, label_col=label_col) imputer = Imputer( data_encoders=[TfIdfEncoder([feature_col])], data_featurizers=[ datawig.mxnet_input_symbols.BowFeaturizer(feature_col) ], label_encoders=[CategoricalEncoder(label_col)], output_path=test_dir) assert imputer.module is None imputer.fit(df, num_epochs=20) first_fit_module = imputer.module imputer.fit(df, num_epochs=20) second_fit_module = imputer.module assert first_fit_module == second_fit_module