Ejemplo n.º 1
0
def gradient_boosting_model(_remove_extra_classes=False, metric="accuracy"):

    print("###Running Gradient Boosting model| Limited dataset:{}".format(
        _remove_extra_classes))
    dt = bdt.getOneHotEncodedDataset(
        remove_extra_classes=_remove_extra_classes)

    X = dt.iloc[:, 1:31]
    y = dt.iloc[:, 31]

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42,
                                                        stratify=y)

    train = bdt.getBalancedDataset(X_train, y_train)
    X_train = train.iloc[:, :-1]
    y_train = train.iloc[:, -1]

    gb_clf = GradientBoostingClassifier(n_estimators=100,
                                        learning_rate=0.5,
                                        max_depth=3,
                                        random_state=42)
    gb_clf.fit(X_train, y_train)

    scores = cross_val_score(gb_clf, X_test, y_test, cv=5, scoring=metric)

    y_pred = gb_clf.predict(X_test)
    conf_matrix = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = conf_matrix.ravel()

    print('{} testing : {:.3f} (+-{:.3f})'.format(metric, scores.mean(),
                                                  scores.std()))
    print("confusion matrix:\n", conf_matrix)
    print(
        "True Negative:{0}, False Positive:{1} \nFalse Negative:{2}, True Positive:{3}"
        .format(tn, fp, fn, tp))

    print("###Finished running Gradient Boosting model")

    return
Ejemplo n.º 2
0
def svm_model(_remove_extra_classes=False, metric="accuracy"):

    print("###Running SVM model| Limited dataset:{}".format(
        _remove_extra_classes))
    dt = bdt.getOneHotEncodedDataset(
        remove_extra_classes=_remove_extra_classes)

    X = dt.iloc[:, 1:31]
    y = dt.iloc[:, 31]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42,
                                                        stratify=y)

    train = bdt.getBalancedDataset(X_train, y_train)
    X_train = train.iloc[:, :-1]
    y_train = train.iloc[:, -1]

    clf = svm.SVC(gamma='scale',
                  decision_function_shape='ovo',
                  C=1.0,
                  cache_size=200,
                  kernel='rbf')
    clf.fit(X_train, y_train)

    scores = cross_val_score(clf, X_test, y_test, cv=5, scoring=metric)

    y_pred = clf.predict(X_test)
    conf_matrix = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    print('{} testing : {:.3f} (+-{:.3f})'.format(metric, scores.mean(),
                                                  scores.std()))
    print("confusion matrix:\n", conf_matrix)
    print(
        "True Negative:{0}, False Positive:{1} \nFalse Negative:{2}, True Positive:{3}"
        .format(tn, fp, fn, tp))
    print("###Finished running SVM model")

    return
Ejemplo n.º 3
0
def mlp_model(_remove_extra_classes=False, metric="accuracy"):

    print(
        "Running MLP model| Limited dataset:{}".format(_remove_extra_classes))
    dt = bdt.getOneHotEncodedDataset(
        remove_extra_classes=_remove_extra_classes)

    X = dt.iloc[:, 1:31]
    y = dt.iloc[:, 31]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42,
                                                        stratify=y)

    train = bdt.getBalancedDataset(X_train, y_train)
    X_train = train.iloc[:, :-1]
    y_train = train.iloc[:, -1]

    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes=(90, 40),
                        random_state=1)
    clf.fit(X_train, y_train)

    scores = cross_val_score(clf, X_test, y_test, cv=5, scoring=metric)

    y_pred = clf.predict(X_test)
    conf_matrix = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = conf_matrix.ravel()

    print('{} testing : {:.3f} (+-{:.3f})'.format(metric, scores.mean(),
                                                  scores.std()))
    print("confusion matrix:\n", conf_matrix)
    print(
        "True Negative:{0}, False Positive:{1} \nFalse Negative:{2}, True Positive:{3}"
        .format(tn, fp, fn, tp))

    print("###Finished running MLP model")

    return
Ejemplo n.º 4
0
def main():
    dt = bdt.getOneHotEncodedDataset()
    #dt = bdt.getBalancedDataset(dataset)
    X  = dt.iloc[ : , 1:31 ]
    y = dt.iloc[ : , 31 ]

    subsets = [
    ["IL-10 -592=CA","TNF-308=GG"],
    ["MBL -221=YX", "IL-10 -819=CT", "TNF-308=GG"],
    ["TNF-308=GG"],
    ["PTX3 rs2305619=GG", "MPO C-463T=GG"],
    ["PTX3 rs2305619=AA", "IL-10 -592=CA"],
    ["PTX3 rs2305619=AA"],
    ["IL-10 -819=CT", "MPO C-463T=GG"],
    ["PTX3 rs1840680=AA", "IL-28b rs12979860=CT"],
    ["MPO C-463T=GG"],
    ["PTX3 rs1840680=AA", "MBL -221=XX"]
    ]
    print("###Running Experinmento1:")
    for sset in subsets:
        X_sset = X[sset]
        #print(X_sset.columns)
        X_train, X_test, y_train, y_test = train_test_split(X_sset, y, test_size=0.33, random_state=100, stratify=y)

        gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.5, max_depth=3, random_state=100)
        gb_clf.fit(X_train, y_train)
        scores = cross_val_score(gb_clf, X_train, y_train, cv=5, scoring='accuracy')
        print("Score: {0:.3f} +-({1:.3f}))".format(scores.mean(), scores.std()))

        y_pred = gb_clf.predict(X_test)
        conf_matrix = confusion_matrix(y_test, y_pred)
        tn, fp, fn, tp = conf_matrix.ravel()

        print("confusion matrix:", conf_matrix)
        print("True Negative:{0}, False Positive:{1} \nFalse Negative:{2}, True Positive:{3}".format(tn, fp, fn, tp))

    print("###Finished Experinmento1")
    return