ac_filepath = "tmp/sample_ac.pickle" al_filepath = "tmp/sample_al.pickle" df = pd.read_csv('data/train.csv') if not (os.path.exists(ac_filepath) and os.path.exists(al_filepath)): ac = AutoConverter(target='Survived') X, y = ac.fit_transform(df) al = AutoLearn(customized_clf_list=[('LogisticRegression', LogisticRegression())], metric='roc_auc', cv_num=5, pos_label=1, n_jobs=1, verbose=0) results = al.learn(X, y) print(results['name']) print(results['eval_df']) pred = al.predict(X) print(pred) ac.save(ac_filepath) al.save(al_filepath) ac = AutoConverter.load(ac_filepath) al = AutoLearn.load(al_filepath) e = Evaluate(ac, al) orig_eval_s = e.evaluate_performance(df) col_imp_df = e.calculate_column_importance(df)
def __reg_single_validation(X, y, est, validation_ratio=0.2, verbose=0): """Run single validation for regression. Args: X (np.array): Feature matrix y (np.array): Label vector est (sklearn.base.RegressorMixin): Regressor object validation_ratio (float): size of validation data n_jobs (int): The number of jobs to run parallel processes verbose (int): Controls the verbosity Returns: {'cv_df': pd.DataFrame(data_list, columns=['metric_test', 'metric_train']), 'train_eval_df': None, 'test_eval_df': None, 'sample_est': est} """ if type(verbose) != int: raise ValueError('Verbose parameter must be an integer') if hasattr(est, 'verbose'): est.verbose = verbose data_list = [] train_eval_s_list = [] test_eval_s_list = [] metric_func_dict = MetricCatalog.get_basic_metrics(task_type='regression') ss = ShuffleSplit(n_splits=1, test_size=validation_ratio, random_state=0) train_idx, test_idx = next(ss.split(X, y)) y_train, y_test = y[train_idx], y[test_idx] X_train, X_test = X[train_idx], X[test_idx] est.fit(X_train, y_train) y_pred = est.predict(X_test) # matrix y_pred_train = est.predict(X_train) metric_test = mean_absolute_error(y_test, y_pred) metric_train = mean_absolute_error(y_train, y_pred_train) data_list.append([metric_test, metric_train]) train_eval_s = Evaluate.run_metric_functions(y_train, y_pred_train, None, metric_func_dict, task_type="regression") train_eval_s_list.append(train_eval_s) test_eval_s = Evaluate.run_metric_functions(y_test, y_pred, None, metric_func_dict, task_type="regression") test_eval_s_list.append(test_eval_s) return {'cv_df': pd.DataFrame(data_list, columns=['metric_test', 'metric_train']), 'train_eval_df': pd.concat(train_eval_s_list, axis=1).T, 'test_eval_df': pd.concat(test_eval_s_list, axis=1).T, 'sample_est': est}
def __run_cross_validation(X, y, clf, metric, cv_num=5, pos_label=1, verbose=0): """Run cross validation for evaluation. Args: X (np.array): Feature matrix y (np.array): Label vector clf (sklearn.base.ClassifierMixin): Classifier object metric (str) : Evaluation metric metric in ['roc_auc', 'neg_log_loss'] cv_num (int): Number of fold for cross validation pos_label (int): Positive label name (used for binary classification) verbose (int): Controls the verbosity Returns: {'cv_df': pd.DataFrame(data_list, columns=['metric_test', 'metric_train']), 'y_error': y_error, 'y_pred': y_pred, 'sample_clf': clf} """ if metric == 'neg_mean_absolute_error': # regression task: using separate function return __reg_cross_validation(X=X, y=y, est=clf, cv_num=cv_num, verbose=verbose) data_list = [] num_class = len(np.unique(y)) kf = StratifiedKFold(n_splits=cv_num, random_state=1) # TODO(Yoshi): random_state # accuracy, precision, recall metric_func_dict = MetricCatalog.get_basic_metrics() train_eval_s_list = [] test_eval_s_list = [] # TODO(Yoshi): If clf (e.g., GridSearchCV) has inner classifier object # that has `verbose` paramter, the below logic does not handle it. assert type(verbose) == int if hasattr(clf, 'verbose'): clf.verbose = verbose if num_class > 2: y_error = np.zeros((len(y), num_class)) y_pred_all = np.zeros((len(y), num_class)) else: y_error = np.zeros(len(y)) y_pred_all = np.zeros(len(y)) for train_idx, test_idx in kf.split(X, y): y_train, y_test = y[train_idx], y[test_idx] X_train, X_test = X[train_idx], X[test_idx] clf.fit(X_train, y_train) # Take out class information from estimator or GridSearch object if hasattr(clf, 'classes_'): classes_ = clf.classes_ else: assert hasattr(clf.best_estimator_, 'classes_') classes_ = clf.best_estimator_.classes_ if not hasattr(clf, 'predict_proba'): clf = CalibratedClassifierCV(clf, cv='prefit') clf.fit(X_train, y_train) # predict/predict_proba if metric in ['roc_auc']: assert num_class == 2 # Binary classification y_pred = clf.predict(X_test) pos_idx = np.where(np.array(classes_) == pos_label)[0][0] y_prob = clf.predict_proba(X_test)[:, pos_idx] y_pred_train = clf.predict(X_train) y_prob_train = clf.predict_proba(X_train)[:, pos_idx] y_error[test_idx] = np.abs(y_test - y_prob) y_pred_all[test_idx] = y_prob # Calculate evaulation metric fpr_test, tpr_test, _ = roc_curve(y_test, y_prob, pos_label=pos_label) metric_test = auc(fpr_test, tpr_test) fpr_train, tpr_train, _ = roc_curve(y_train, y_prob_train, pos_label=pos_label) metric_train = auc(fpr_train, tpr_train) train_eval_s = Evaluate.run_metric_functions(y_train, y_pred_train, y_prob_train, metric_func_dict, "binary") train_eval_s_list.append(train_eval_s) test_eval_s = Evaluate.run_metric_functions(y_test, y_pred, y_prob, metric_func_dict, "binary") test_eval_s_list.append(test_eval_s) elif metric in ['neg_log_loss']: print("metric in [neg_log_loss]") # Multi-class classification - we should not run it with binary! y_pred = clf.predict(X_test) y_prob = clf.predict_proba(X_test) # matrix y_pred_train = clf.predict(X_train) y_prob_train = clf.predict_proba(X_train) # matrix y_pred_all[test_idx] = y_prob # TODO(Yoshi): Cannot simply define y_error for multi y_error[test_idx] = np.nan print("Evaluate neg_log_loss") # Calculate evaluation metric. # Add the negative sign to make it a "score" metric_test = - log_loss(y_test, y_prob) metric_train = - log_loss(y_train, y_prob_train) train_eval_s = Evaluate.run_metric_functions(y_train, y_pred_train, y_prob_train, metric_func_dict, "multi") train_eval_s_list.append(train_eval_s) test_eval_s = Evaluate.run_metric_functions(y_test, y_pred, y_prob, metric_func_dict, "multi") test_eval_s_list.append(test_eval_s) else: raise Exception("Metric not supported: {}".format(metric)) data_list.append([metric_test, metric_train]) return {'cv_df': pd.DataFrame(data_list, columns=['metric_test', 'metric_train']), 'train_eval_df': pd.concat(train_eval_s_list, axis=1).T, 'test_eval_df': pd.concat(test_eval_s_list, axis=1).T, 'y_error': y_error, 'y_pred': y_pred, 'sample_clf': clf}
def __reg_cross_validation(X, y, est, cv_num=5, n_jobs=1, verbose=0): """Cross validation for regression case Args: X (np.array): Feature matrix y (np.array): Label vector est (sklearn.base.Regressor): Regressor object cv_num (int): Number of fold for cross validation n_jobs (int): The number of jobs to run parallel processes verbose (int): Controls the verbosity Returns: {'cv_df': pd.DataFrame(data_list, columns=['metric_test', 'metric_train']) 'train_eval_df': 'test_eval_df' : sample_est: est } """ data_list = [] train_eval_s_list = [] test_eval_s_list = [] metric_func_dict = MetricCatalog.get_basic_metrics(task_type='regression') kf = KFold(n_splits=cv_num, random_state=1) # TODO(Yoshi): random_state assert type(verbose) == int if hasattr(est, 'verbose'): est.verbose = verbose if hasattr(est, 'n_jobs'): est.n_jobs = n_jobs for train_idx, test_idx in kf.split(X, y): y_train, y_test = y[train_idx], y[test_idx] X_train, X_test = X[train_idx], X[test_idx] est.fit(X_train, y_train) y_pred = est.predict(X_test) y_pred_train = est.predict(X_train) metric_test = - mean_absolute_error(y_test, y_pred) metric_train = - mean_absolute_error(y_train, y_pred_train) data_list.append([metric_test, metric_train]) train_eval_s = Evaluate.run_metric_functions(y_train, y_pred_train, None, metric_func_dict, "regression") train_eval_s_list.append(train_eval_s) test_eval_s = Evaluate.run_metric_functions(y_test, y_pred, None, metric_func_dict, "regression") test_eval_s_list.append(test_eval_s) return {'cv_df': pd.DataFrame(data_list, columns=['metric_test', 'metric_train']), 'train_eval_df': pd.concat(train_eval_s_list, axis=1).T, 'test_eval_df': pd.concat(test_eval_s_list, axis=1).T, 'sample_est': est}
def __run_single_validation(X, y, clf, metric, validation_ratio=0.2, pos_label=1, verbose=0): """Run validation for evaluation. Args: X (np.array): Feature matrix y (np.array): Label vector clf (sklearn.base.ClassifierMixin): Classifier object metric (str) : Evaluation metric metric in ['roc_auc', 'neg_log_loss'] validation_ratio (float): size of validation data pos_label (int): Positive label name (used for binary classification) verbose (int): Controls the verbosity Returns: {'cv_df': pd.DataFrame(data_list, columns=['metric_test', 'metric_train']), 'y_error': y_error, 'y_pred': y_pred, 'sample_clf': clf} """ # TODO(Yoshi): Overall function should be able to merge with # run_cross_validation() data_list = [] metric_func_dict = MetricCatalog.get_basic_metrics() train_eval_s_list = [] test_eval_s_list = [] num_class = len(np.unique(y)) sss = StratifiedShuffleSplit(n_splits=1, test_size=validation_ratio, random_state=0) train_idx, test_idx = next(sss.split(X, y)) y_train, y_test = y[train_idx], y[test_idx] X_train, X_test = X[train_idx], X[test_idx] clf.fit(X_train, y_train) # predict/predict_proba if metric in ['roc_auc']: assert num_class == 2 # Take out class information from estimator or GridSearch object if hasattr(clf, 'classes_'): classes_ = clf.classes_ else: assert hasattr(clf.best_estimator_, 'classes_') classes_ = clf.best_estimator_.classes_ # Binary classification pos_idx = np.where(np.array(classes_) == pos_label)[0][0] y_pred = clf.predict(X_test) y_prob = clf.predict_proba(X_test)[:, pos_idx] y_pred_train = clf.predict(X_train) y_prob_train = clf.predict_proba(X_train)[:, pos_idx] y_error = np.abs(y_test - y_pred) # Calculate evaulation metric fpr_test, tpr_test, _ = roc_curve(y_test, y_prob, pos_label=pos_label) metric_test = auc(fpr_test, tpr_test) fpr_train, tpr_train, _ = roc_curve(y_train, y_prob_train, pos_label=pos_label) metric_train = auc(fpr_train, tpr_train) train_eval_s = Evaluate.run_metric_functions(y_train, y_pred_train, y_prob_train, metric_func_dict, "binary") train_eval_s_list.append(train_eval_s) test_eval_s = Evaluate.run_metric_functions(y_test, y_pred, y_prob, metric_func_dict, "binary") test_eval_s_list.append(test_eval_s) elif metric in ['neg_log_loss']: print("metric in [neg_log_loss]") # Multi-class classification - we should not run it with binary! # TODO(Bublin): This y_pred don't have collect index # (do we have to return y_pred and y_error?) y_pred = clf.predict(X_test) # matrix y_prob = clf.predict_proba(X_test) # matrix y_pred_train = clf.predict(X_train) y_prob_train = clf.predict_proba(X_train) # matrix # TODO(Yoshi): Cannot simply define y_error for multi y_error = np.nan print("Evaluate neg_log_loss") # Calculate evaluation metric metric_test = log_loss(y_test, y_prob) metric_train = log_loss(y_train, y_prob_train) train_eval_s = Evaluate.run_metric_functions(y_train, y_pred_train, y_prob_train, metric_func_dict, "multi") train_eval_s_list.append(train_eval_s) test_eval_s = Evaluate.run_metric_functions(y_test, y_pred, y_prob, metric_func_dict, "multi") test_eval_s_list.append(test_eval_s) else: raise Exception("Metric not supported: {}".format(metric)) data_list.append([metric_test, metric_train]) return {'cv_df': pd.DataFrame(data_list, columns=['metric_test', 'metric_train']), 'train_eval_df': pd.concat(train_eval_s_list, axis=1).T, 'test_eval_df': pd.concat(test_eval_s_list, axis=1).T, 'y_error': y_error, 'y_pred': y_pred, 'test_index': test_idx, 'sample_clf': clf}
def setUp(self): self.df1 = pd.read_csv('data/train.csv') self.assertTrue(True) ac1 = AutoConverter(target='Survived') self.assertTrue(True) X1, y1 = ac1.fit_transform(self.df1) al1 = AutoLearn(level=1) al1.learn(X1, y1) self.e1 = Evaluate(ac=ac1, alearn=al1) self.assertTrue(True) clf1 = LogisticRegression() clf1.fit(X1, y1) self.e1a = Evaluate(ac=ac1, alearn=clf1) with self.assertRaises(ValueError): Evaluate(alearn=al1) self.e1b = Evaluate(alearn=al1, feature_names=ac1.feature_names) data = datasets.load_iris() self.df2 = pd.DataFrame(np.c_[data.target.reshape(-1, 1), data.data], columns=["class"] + data.feature_names) ac2 = AutoConverter(target="class") al2 = AutoLearn(level=1) X2, y2 = ac2.fit_transform(self.df2) al2.learn(X2, y2) self.e2 = Evaluate(ac=ac2, alearn=al2) clf2 = LogisticRegression() clf2.fit(X2, y2) self.e2a = Evaluate(ac=ac2, alearn=clf2) # subtable dirpath = "data/kaggle-kkbox-churn-prediction-challenge-1k" members_df = pd.read_csv(os.path.join(dirpath, "members_train.csv")) transactions_df = pd.read_csv(os.path.join(dirpath, "transactions.csv")) user_logs_df = pd.read_csv(os.path.join(dirpath, "user_logs.csv")) subtables3 = { "transactions": { "table": transactions_df, "link_key": "msno", "group_key": "msno" }, "user_logs": { "table": user_logs_df, "link_key": "msno", "group_key": "msno" } } ac3 = AutoConverter(target="is_churn") X3, y3 = ac3.fit_transform(df=members_df, subtables=subtables3) al3 = AutoLearn(level=1) al3.learn(X3, y3) self.e3 = Evaluate(ac=ac3, alearn=al3) self.df4 = members_df ac4 = AutoConverter(target="is_churn", task_type="regression") X4, y4 = ac4.fit_transform(df=members_df) al4 = AutoLearn(level=1, task="regression") al4.learn(X4, y4) e4 = Evaluate(alearn=al4, ac=ac4) self.e4 = e4
class EvaluateTestCase(unittest.TestCase): def setUp(self): self.df1 = pd.read_csv('data/train.csv') self.assertTrue(True) ac1 = AutoConverter(target='Survived') self.assertTrue(True) X1, y1 = ac1.fit_transform(self.df1) al1 = AutoLearn(level=1) al1.learn(X1, y1) self.e1 = Evaluate(ac=ac1, alearn=al1) self.assertTrue(True) clf1 = LogisticRegression() clf1.fit(X1, y1) self.e1a = Evaluate(ac=ac1, alearn=clf1) with self.assertRaises(ValueError): Evaluate(alearn=al1) self.e1b = Evaluate(alearn=al1, feature_names=ac1.feature_names) data = datasets.load_iris() self.df2 = pd.DataFrame(np.c_[data.target.reshape(-1, 1), data.data], columns=["class"] + data.feature_names) ac2 = AutoConverter(target="class") al2 = AutoLearn(level=1) X2, y2 = ac2.fit_transform(self.df2) al2.learn(X2, y2) self.e2 = Evaluate(ac=ac2, alearn=al2) clf2 = LogisticRegression() clf2.fit(X2, y2) self.e2a = Evaluate(ac=ac2, alearn=clf2) # subtable dirpath = "data/kaggle-kkbox-churn-prediction-challenge-1k" members_df = pd.read_csv(os.path.join(dirpath, "members_train.csv")) transactions_df = pd.read_csv(os.path.join(dirpath, "transactions.csv")) user_logs_df = pd.read_csv(os.path.join(dirpath, "user_logs.csv")) subtables3 = { "transactions": { "table": transactions_df, "link_key": "msno", "group_key": "msno" }, "user_logs": { "table": user_logs_df, "link_key": "msno", "group_key": "msno" } } ac3 = AutoConverter(target="is_churn") X3, y3 = ac3.fit_transform(df=members_df, subtables=subtables3) al3 = AutoLearn(level=1) al3.learn(X3, y3) self.e3 = Evaluate(ac=ac3, alearn=al3) self.df4 = members_df ac4 = AutoConverter(target="is_churn", task_type="regression") X4, y4 = ac4.fit_transform(df=members_df) al4 = AutoLearn(level=1, task="regression") al4.learn(X4, y4) e4 = Evaluate(alearn=al4, ac=ac4) self.e4 = e4 def test_calculate_column_importance(self): for e in [self.e1, self.e1a, self.e2, self.e2a, self.e3, self.e4]: try: e.calculate_column_importance() except Exception as e: self.fail(str(e)) def test_evaluate(self): for e in [self.e1, self.e1a, self.e2, self.e2a, self.e3, self.e4]: orig_eval_s = e.evaluate_performance() col_imp_df = e.calculate_column_importance() self.assertEqual(orig_eval_s.index.tolist(), col_imp_df.columns.tolist()) # They should raise Errors as X and y are not given with self.assertRaises(ValueError): self.e1b.evaluate_performance() with self.assertRaises(ValueError): self.e1b.calculate_column_importance() def test_get_top_column(self): self.assertEqual(5, len(self.e1.get_top_columns(n=5))) for table_colname in self.e3.get_top_columns(n=3): tablename = table_colname.split("..")[0] self.assertTrue(tablename in list(self.e3.ac.subtables_.keys()) + ["main"]) def test_get_mispredictions(self): for e, df in [(self.e1, self.df1), (self.e1a, self.df1), (self.e2, self.df2), (self.e2a, self.df2)]: mispred_df = e.get_mispredictions(df) orig_colset = set(df.columns.tolist()) mispred_colset = set(mispred_df.columns.tolist()) # All columns in mispred_df should be in df self.assertEqual(len(mispred_colset & orig_colset), len(mispred_colset)) def test_stratify_errors(self): for e, df in [(self.e1, self.df1), (self.e1a, self.df1)]: es = e.stratify_errors(df) self.assertIsNotNone(es) self.assertIsInstance(es, ErrorSummary) self.assertIsNotNone(es.diversity) self.assertIsNotNone(es.error_dist) self.assertIsNotNone(es.errors) self.assertEqual(es.error_dist.index.levels[0].tolist(), es.diversity.index.tolist()) # None should be returned for the Iris dataset self.assertIsNone(self.e2.stratify_errors(self.df2)) self.assertIsNone(self.e2a.stratify_errors(self.df2)) def test_get_explanations(self): e_df1 = self.e1.get_explanations(self.df1) self.assertEqual(e_df1.shape[0], self.df1.shape[0]) e_df1a = self.e1a.get_explanations(self.df1) self.assertEqual(e_df1a.shape[0], self.df1.shape[0]) e_df2 = self.e2.get_explanations(self.df2) self.assertEqual(e_df2.shape[0], self.df2.shape[0]) e_df2a = self.e2a.get_explanations(self.df2) self.assertEqual(e_df2a.shape[0], self.df2.shape[0])
from learnit.autolearn.evaluate import Evaluate from learnit.autolearn.blueprints import StackedXGBoost, AverageBlender if __name__ == '__main__': df = pd.read_csv('data/train.csv') ac = AutoConverter(target='Survived') X, y = ac.fit_transform(df) al = AutoLearn(customized_clf_list=[('LogisticRegression', LogisticRegression())], metric='roc_auc', cv_num=5, pos_label=1, n_jobs=1, verbose=0) results = al.learn(X, y) print(results['name']) print(results['eval_df']) pred = al.predict(X) print(pred) # name, clf, cv_result = autolearn(X, y, verbose=3, clf_list=[('AverageBlender', AverageBlender(scoring='roc_auc', random_state=1, verbose=3))]) #name, clf, cv_result = autolearn(X, y, verbose=3, clf_list=[('LogisticRegression', LogisticRegression())]) e = Evaluate(ac, al) orig_eval_s = e.evaluate_performance() col_imp_df = e.calculate_column_importance() explain_df = e.get_explanations(df) X_test = ac.transform(df, prediction=True)