Beispiel #1
0
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0]
    X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0]
    unique_labels = sorted(y_train.unique().tolist())

    clf = DecisionTreeClassifier()

    clf.fit(X_train, y_train)

    print("\n\n{}\n".format(clf.score(X_test, y_test)))

    y_predicted = clf.predict(X_test)

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(
        y_test,
        y_predicted,
        ml_name='DT',
        classes=unique_labels,
        title='Confusion matrix for Decision Tree evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(
        y_test,
        y_predicted,
        ml_name='DT',
        classes=unique_labels,
        title='Classification report for Decision Tree evaluation')
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0]
    X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0]
    unique_labels = sorted(y_train.unique().tolist())

    print(X_train)
    print(X_test)

    # hyper-parameters inferred by running skopt
    clf = SVC(C=447.81051228628013, coef0=0.12426850569436687, decision_function_shape="ovr",
              degree=2, gamma=0.02413100813767344, kernel="rbf", tol=0.004948161298923479, verbose=True)

    clf.fit(X_train, y_train)

    print("\n\n{}\n".format(clf.score(X_test, y_test)))

    y_predicted = clf.predict(X_test)

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='SVM',
                                  classes=unique_labels,
                                  title='Confusion matrix for SVM evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(y_test, y_predicted, ml_name='SVM',
                                       classes=unique_labels,
                                       title='Classification report for SVM evaluation')
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0]
    X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0]
    unique_labels = sorted(y_train.unique().tolist())

    # hyper-parameters inferred by running auto-sklearn
    clf = GradientBoostingClassifier(learning_rate=0.0433556140045585,
        n_estimators=388, subsample=0.8291104221904706, criterion='mse',
        min_samples_split=13, min_samples_leaf=15,
        max_depth=10, max_features=0.33000096635982235, verbose=True)

    # hyper parameters inferred by running hyperopt-sklearn
    # clf = GradientBoostingClassifier(criterion="mse", learning_rate=0.28539836866041823, max_depth=9, max_features=0.3842196341383438,
    #                                  min_samples_leaf=14, min_samples_split=9, n_estimators=734, subsample=0.7421091918485163)

    clf.fit(X_train, y_train)

    print("\n\n{}\n".format(clf.score(X_test, y_test)))

    y_predicted = clf.predict(X_test)

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='DT',
                                  classes=unique_labels,
                                  title='Confusion matrix for Decision Tree evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(y_test, y_predicted, ml_name='DT',
                                       classes=unique_labels,
                                       title='Classification report for Decision Tree evaluation')
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0]
    X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0]
    unique_labels = sorted(y_train.unique().tolist())

    clf = GaussianProcessClassifier(max_iter_predict=500, warm_start=True, n_jobs=-1)

    clf.fit(X_train, y_train)

    print("\n\n{}\n".format(clf.score(X_test, y_test)))

    y_predicted = clf.predict(X_test)

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='GP',
                                  classes=unique_labels,
                                  title='Confusion matrix for Gaussian Process evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(y_test, y_predicted, ml_name='GP',
                                       classes=unique_labels,
                                       title='Classification report for Gaussian Process evaluation')
Beispiel #5
0
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0]
    X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0]
    unique_labels = sorted(y_train.unique().tolist())

    # hyper-parameters inferred from running hyperopt-sklearn
    clf = RandomForestClassifier(bootstrap=False,
                                 class_weight=None,
                                 criterion='entropy',
                                 max_depth=None,
                                 max_features='sqrt',
                                 max_leaf_nodes=None,
                                 min_impurity_decrease=0.0,
                                 min_impurity_split=None,
                                 min_samples_leaf=1,
                                 min_samples_split=2,
                                 min_weight_fraction_leaf=0.0,
                                 n_estimators=75,
                                 n_jobs=1,
                                 oob_score=False,
                                 random_state=1,
                                 verbose=False,
                                 warm_start=False)

    clf.fit(X_train, y_train)

    print("\n\n{}\n".format(clf.score(X_test, y_test)))

    y_predicted = clf.predict(X_test)

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(
        y_test,
        y_predicted,
        ml_name='RF',
        classes=unique_labels,
        title='Confusion matrix for Random Forest evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(
        y_test,
        y_predicted,
        ml_name='RF',
        classes=unique_labels,
        title='Classification report for Random Forest evaluation')
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values
    X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values
    unique_labels = sorted(set(y_train.tolist()))

    le = preprocessing.LabelEncoder()
    le.fit(y_train)
    y_train = le.transform(y_train)
    y_test = le.transform(y_test)

    # hyper-parameters inferred by running hyperopt-sklearn
    clf = XGBClassifier(colsample_bylevel=0.8737745469231419,
                        colsample_bytree=1.0,
                        gamma=4.858229599937319e-07,
                        learning_rate=0.4853267733199465,
                        max_delta_step=0,
                        max_depth=9,
                        min_child_weight=0,
                        n_estimators=64,
                        reg_alpha=2.5693931492543614e-05,
                        reg_lambda=6.027978487395207e-05,
                        scale_pos_weight=73.0915750362818,
                        subsample=0.5410531887103683)

    clf.fit(X_train, y_train)

    print("\n\n{}\n".format(clf.score(X_test, y_test)))

    y_predicted = clf.predict(X_test)

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(
        y_test,
        y_predicted,
        ml_name='XG',
        classes=unique_labels,
        title='Confusion matrix for XGBoost evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(
        y_test,
        y_predicted,
        ml_name='XG',
        classes=unique_labels,
        title='Classification report for XGBoost evaluation')
Beispiel #7
0
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0]
    X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0]
    unique_labels = sorted(y_train.unique().tolist())

    curr_pred, curr_score = None, 0

    for k in range(1, 20):
        clf = KNeighborsClassifier(n_neighbors=k, n_jobs=-1)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        y_predicted = clf.predict(X_test)
        if score > curr_score:
            print("K = {} -- {}".format(k, score))
            curr_pred = y_predicted
            curr_score = score

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(
        y_test,
        curr_pred,
        ml_name='Nearest_Neighbor',
        classes=unique_labels,
        title='Confusion matrix for Nearest Neighbor evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(
        y_test,
        curr_pred,
        ml_name='Nearest_Neighbor',
        classes=unique_labels,
        title='Classification report for Nearest Neighbor evaluation')