def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0] X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0] unique_labels = sorted(y_train.unique().tolist()) clf = DecisionTreeClassifier() clf.fit(X_train, y_train) print("\n\n{}\n".format(clf.score(X_test, y_test))) y_predicted = clf.predict(X_test) print("Generating confusion matrix figure... \n") stdfunc.plot_confusion_matrix( y_test, y_predicted, ml_name='DT', classes=unique_labels, title='Confusion matrix for Decision Tree evaluation') print("Generating classification report figure... \n") stdfunc.plot_classification_report( y_test, y_predicted, ml_name='DT', classes=unique_labels, title='Classification report for Decision Tree evaluation')
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0] X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0] unique_labels = sorted(y_train.unique().tolist()) print(X_train) print(X_test) # hyper-parameters inferred by running skopt clf = SVC(C=447.81051228628013, coef0=0.12426850569436687, decision_function_shape="ovr", degree=2, gamma=0.02413100813767344, kernel="rbf", tol=0.004948161298923479, verbose=True) clf.fit(X_train, y_train) print("\n\n{}\n".format(clf.score(X_test, y_test))) y_predicted = clf.predict(X_test) print("Generating confusion matrix figure... \n") stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='SVM', classes=unique_labels, title='Confusion matrix for SVM evaluation') print("Generating classification report figure... \n") stdfunc.plot_classification_report(y_test, y_predicted, ml_name='SVM', classes=unique_labels, title='Classification report for SVM evaluation')
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0] X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0] unique_labels = sorted(y_train.unique().tolist()) # hyper-parameters inferred by running auto-sklearn clf = GradientBoostingClassifier(learning_rate=0.0433556140045585, n_estimators=388, subsample=0.8291104221904706, criterion='mse', min_samples_split=13, min_samples_leaf=15, max_depth=10, max_features=0.33000096635982235, verbose=True) # hyper parameters inferred by running hyperopt-sklearn # clf = GradientBoostingClassifier(criterion="mse", learning_rate=0.28539836866041823, max_depth=9, max_features=0.3842196341383438, # min_samples_leaf=14, min_samples_split=9, n_estimators=734, subsample=0.7421091918485163) clf.fit(X_train, y_train) print("\n\n{}\n".format(clf.score(X_test, y_test))) y_predicted = clf.predict(X_test) print("Generating confusion matrix figure... \n") stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='DT', classes=unique_labels, title='Confusion matrix for Decision Tree evaluation') print("Generating classification report figure... \n") stdfunc.plot_classification_report(y_test, y_predicted, ml_name='DT', classes=unique_labels, title='Classification report for Decision Tree evaluation')
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0] X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0] unique_labels = sorted(y_train.unique().tolist()) clf = GaussianProcessClassifier(max_iter_predict=500, warm_start=True, n_jobs=-1) clf.fit(X_train, y_train) print("\n\n{}\n".format(clf.score(X_test, y_test))) y_predicted = clf.predict(X_test) print("Generating confusion matrix figure... \n") stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='GP', classes=unique_labels, title='Confusion matrix for Gaussian Process evaluation') print("Generating classification report figure... \n") stdfunc.plot_classification_report(y_test, y_predicted, ml_name='GP', classes=unique_labels, title='Classification report for Gaussian Process evaluation')
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0] X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0] unique_labels = sorted(y_train.unique().tolist()) # hyper-parameters inferred from running hyperopt-sklearn clf = RandomForestClassifier(bootstrap=False, class_weight=None, criterion='entropy', max_depth=None, max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=75, n_jobs=1, oob_score=False, random_state=1, verbose=False, warm_start=False) clf.fit(X_train, y_train) print("\n\n{}\n".format(clf.score(X_test, y_test))) y_predicted = clf.predict(X_test) print("Generating confusion matrix figure... \n") stdfunc.plot_confusion_matrix( y_test, y_predicted, ml_name='RF', classes=unique_labels, title='Confusion matrix for Random Forest evaluation') print("Generating classification report figure... \n") stdfunc.plot_classification_report( y_test, y_predicted, ml_name='RF', classes=unique_labels, title='Classification report for Random Forest evaluation')
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values unique_labels = sorted(set(y_train.tolist())) le = preprocessing.LabelEncoder() le.fit(y_train) y_train = le.transform(y_train) y_test = le.transform(y_test) # hyper-parameters inferred by running hyperopt-sklearn clf = XGBClassifier(colsample_bylevel=0.8737745469231419, colsample_bytree=1.0, gamma=4.858229599937319e-07, learning_rate=0.4853267733199465, max_delta_step=0, max_depth=9, min_child_weight=0, n_estimators=64, reg_alpha=2.5693931492543614e-05, reg_lambda=6.027978487395207e-05, scale_pos_weight=73.0915750362818, subsample=0.5410531887103683) clf.fit(X_train, y_train) print("\n\n{}\n".format(clf.score(X_test, y_test))) y_predicted = clf.predict(X_test) print("Generating confusion matrix figure... \n") stdfunc.plot_confusion_matrix( y_test, y_predicted, ml_name='XG', classes=unique_labels, title='Confusion matrix for XGBoost evaluation') print("Generating classification report figure... \n") stdfunc.plot_classification_report( y_test, y_predicted, ml_name='XG', classes=unique_labels, title='Classification report for XGBoost evaluation')
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0] X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0] unique_labels = sorted(y_train.unique().tolist()) curr_pred, curr_score = None, 0 for k in range(1, 20): clf = KNeighborsClassifier(n_neighbors=k, n_jobs=-1) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) y_predicted = clf.predict(X_test) if score > curr_score: print("K = {} -- {}".format(k, score)) curr_pred = y_predicted curr_score = score print("Generating confusion matrix figure... \n") stdfunc.plot_confusion_matrix( y_test, curr_pred, ml_name='Nearest_Neighbor', classes=unique_labels, title='Confusion matrix for Nearest Neighbor evaluation') print("Generating classification report figure... \n") stdfunc.plot_classification_report( y_test, curr_pred, ml_name='Nearest_Neighbor', classes=unique_labels, title='Classification report for Nearest Neighbor evaluation')