def test_cash_module(): from alphaml.engine.components.data_manager import DataManager from alphaml.estimators.classifier import Classifier import random from sklearn.metrics import roc_auc_score result = [] for i in range(1): import xlrd sheet = xlrd.open_workbook("lyqdata.xlsx") sheet = sheet.sheet_by_index(0) nrows = sheet.nrows X_train = [] y_train = [] for i in range(2, nrows): X_train.append(sheet.row_values(i, start_colx=1)) y_train.append(int(sheet.cell_value(i, 0))) dm = DataManager(X_train, y_train) cls = Classifier( # include_models=['liblinear_svc', 'libsvm_svc', 'random_forest', 'logistic_regression', 'mlp'], include_models=['mlp'], optimizer='smbo', cross_valid=False, ensemble_method='ensemble_selection', ensemble_size=args.ensemble_size, save_dir='data/save_models' ) cls.fit(dm, metric='auc', runcount=args.run_count) sheet = xlrd.open_workbook("lyqtestdata.xlsx") sheet = sheet.sheet_by_index(0) nrows = sheet.nrows X_test = [] y_test = [] for i in range(1, nrows): X_test.append(sheet.row_values(i, start_colx=1)) y_test.append(int(sheet.cell_value(i, 0))) pred = cls.predict_proba(X_test) result.append(roc_auc_score(y_test, pred[:, 1:2])) print(result) import pickle with open('result.pkl', 'wb') as f: pickle.dump(result, f)
def test_cash_module(): from alphaml.engine.components.data_manager import DataManager from alphaml.estimators.classifier import Classifier import random from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder result = [] for i in range(1): import xlrd sheet = xlrd.open_workbook("ybai_Keratoconus_TJ_20190425.xlsx") sheet = sheet.sheet_by_index(0) nrows = sheet.nrows X_train = [] y_train = [] for i in range(1, nrows): X_train.append(sheet.row_values(i, start_colx=1)) y_train.append(int(sheet.cell_value(i, 0))) encoder = OneHotEncoder() encoder.fit(np.reshape(y_train, (len(y_train), 1))) X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, stratify=y_train) dm = DataManager(X_train, y_train) cls = Classifier( # include_models=['liblinear_svc', 'libsvm_svc', 'xgboost', 'random_forest', 'logistic_regression', 'mlp'], optimizer='smbo', ensemble_method='bagging', ensemble_size=args.ensemble_size, ) cls.fit(dm, metric='auc', runcount=args.run_count) pred = cls.predict_proba(X_test) print(pred) y_test = encoder.transform(np.reshape(y_test, (len(y_test), 1))).toarray() result.append(roc_auc_score(y_test, pred)) print(result) import pickle with open('result.pkl', 'wb') as f: pickle.dump(result, f)
from time import time warnings.filterwarnings("ignore") parser = argparse.ArgumentParser() parser.add_argument("--generated_feature", type=int, default=1) parser.add_argument("--dataset", type=str) args = parser.parse_args() x, y, c = load_data(args.dataset) dm = DataManager(x, y) lr = LogisticRegression() lr.fit(dm.train_X, dm.train_y) y_pred = lr.predict(dm.val_X) print("original lr accu:", accuracy_score(dm.val_y, y_pred), flush=True) if args.generated_feature > 0: af = AutoFeature("accuracy", "auto_cross") af.fit(dm, args.generated_feature) dm = af.transform(dm) clf = Classifier() start_time = time() clf.fit(dm, metric="accuracy", runcount=50) print("alphaml time:", time() - start_time) print("dataset:", args.dataset) print("generated data:", args.generated_feature, ", alphaml score:", clf.score(dm.val_X, dm.val_y))