Exemple #1
0
def run_one_fold(model):
    model.compile(loss=masked_loss_function,
                  optimizer='Adam',
                  metrics=[masked_accuracy])
    history = model.fit(
        X_train,
        y_train,
        epochs=epochs,
        batch_size=128,
        # shuffle=True,
        verbose=2,
        validation_data=(X_val, y_val),
        callbacks=[MyCustomCallback()])

    score = ROC_PR.ROC_Score(model, X_val, y_val)
    score_test = ROC_PR.ROC_Score(model, X_test, y_test)
    score_for_each_drug = ROC_PR.ROC(model, X_test, y_test,
                                     ("wide-n-deep" + "BO_delete"), True)
    spec_recall, prec_recall = ROC_PR.PR(model, X_test, y_test)

    print('area under ROC curve for val:', score)
    print('area under ROC curve for test:', score_test)
    print(score_for_each_drug)
    print("recall at 95 spec: ", spec_recall)
    print("precision recall: ", prec_recall)

    string_random = get_random_string(17)
    print(string_random)
    model.save('wnd_' + string_random + '.h5')

    return score
def get_model_SVM_new(kernel=0, degree=1, C=1, gamma=1):
    from sklearn.svm import SVC
    all_scores = 0
    C = 10**(int(C))
    gamma = 10**(int(gamma))
    degree = int(degree)
    kernel = int(kernel)

    global X_train
    global X_test
    global X_val
    global y_train
    global y_test
    global y_val

    res_test = []
    res_val = []
    res_sr = []
    res_pr = []
    string_random = get_random_string(20)
    for i in range(0, len(y_train[0])):
        X_train2 = X_train.tolist()
        X_test2 = X_test.tolist()
        X_val2 = X_val.tolist()

        y_train2 = y_train[:, i]
        y_test2 = y_test[:, i]
        y_val2 = y_val[:, i]
        y_train2 = y_train2.tolist()
        y_test2 = y_test2.tolist()
        y_val2 = y_val2.tolist()

        for i2 in range(len(y_train2) - 1, -1, -1):
            if y_train2[i2] != 0.0 and y_train2[i2] != 1.0:
                del y_train2[i2]
                del X_train2[i2]

        for i2 in range(len(y_test2) - 1, -1, -1):
            if y_test2[i2] != 0.0 and y_test2[i2] != 1.0:
                del y_test2[i2]
                del X_test2[i2]

        for i2 in range(len(y_val2) - 1, -1, -1):
            if y_val2[i2] != 0.0 and y_val2[i2] != 1.0:
                del y_val2[i2]
                del X_val2[i2]
        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42,
        #                                                     shuffle=True)

        if kernel == 0:
            svm_model_linear = SVC(kernel='linear',
                                   C=C).fit(X_train2, y_train2)
        elif kernel == 1:
            svm_model_linear = SVC(kernel='poly', C=C,
                                   degree=degree).fit(X_train2, y_train2)
        else:
            svm_model_linear = SVC(kernel='rbf', C=C,
                                   gamma=gamma).fit(X_train2, y_train2)
        # try:
        #     score1 = ROC_PR.ROC_ML(svm_model_linear, X_test, y_test, "SVM", 0)
        # except:
        #     score1 = svm_model_linear.score(X_test, y_test)

        score_val, _, _ = ROC_PR.ROC_ML(svm_model_linear, X_val2, y_val2, "LR",
                                        0)
        score_test, score_sr, score_pr = ROC_PR.ROC_ML(svm_model_linear,
                                                       X_test2, y_test2, "LR",
                                                       0)
        print(i, flush=True)
        # print(score1, flush=True)
        res_test.append(score_test)
        res_val.append(score_val)
        res_sr.append(score_sr)
        res_pr.append(score_pr)
        all_scores = all_scores + score_val
        print('svm' + str(i) + string_random + '.sav')
        pickle.dump(svm_model_linear,
                    open('svm' + str(i) + string_random + '.sav', 'wb'))

    global rf_val_score, rf_test_score
    res_val.append(all_scores / len(y_train[0]))
    rf_val_score.append(res_val)
    rf_test_score.append(res_test)
    rf_sr_score.append(res_sr)

    print("val score", res_val)
    print("test score", res_test)
    print("recall at 95 spec: ", res_sr)
    print("precision recall: ", res_pr)
    print(all_scores / len(y_train[0]), flush=True)
    print(string_random)
    return all_scores / len(y_train[0])
def get_model_GBT(n_estimators=10,
                  min_samples_split=2,
                  max_depth=1,
                  random_state=0):
    import xgboost.sklearn as xgb
    all_scores = 0
    n_estimators = 10 * int(n_estimators)
    min_samples_split = int(min_samples_split)
    if random_state < 0:
        random_state = None
    else:
        random_state = int(random_state)
    if max_depth > 15:
        max_depth = None
    else:
        max_depth = 10 * int(max_depth)

    global X_train
    global X_test
    global X_val
    global y_train
    global y_test
    global y_val

    res_test = []
    res_val = []
    res_sr = []
    res_pr = []
    string_random = get_random_string(20)

    for i in range(0, len(y_train[0])):
        X_train2 = X_train.tolist()
        X_test2 = X_test.tolist()
        X_val2 = X_val.tolist()

        y_train2 = y_train[:, i]
        y_test2 = y_test[:, i]
        y_val2 = y_val[:, i]
        y_train2 = y_train2.tolist()
        y_test2 = y_test2.tolist()
        y_val2 = y_val2.tolist()

        for i2 in range(len(y_train2) - 1, -1, -1):
            if y_train2[i2] != 0.0 and y_train2[i2] != 1.0:
                del y_train2[i2]
                del X_train2[i2]

        for i2 in range(len(y_test2) - 1, -1, -1):
            if y_test2[i2] != 0.0 and y_test2[i2] != 1.0:
                del y_test2[i2]
                del X_test2[i2]

        for i2 in range(len(y_val2) - 1, -1, -1):
            if y_val2[i2] != 0.0 and y_val2[i2] != 1.0:
                del y_val2[i2]
                del X_val2[i2]
        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42,
        #                                                     shuffle=True)

        param = {
            'n_estimators': n_estimators,
            'min_samples_split': min_samples_split,
            'random_state': random_state,
            'max_depth': max_depth
        }
        print(n_estimators)
        print(min_samples_split)
        print(random_state)
        print(max_depth)
        try:
            gbt_model = xgb.XGBModel(n_estimators=n_estimators,
                                     min_samples_split=min_samples_split,
                                     random_state=random_state,
                                     max_depth=max_depth).fit(
                                         np.array(X_train2),
                                         np.array(y_train2))
            score_val, _, _ = ROC_PR.ROC_ML(gbt_model,
                                            np.array(X_val2),
                                            np.array(y_val2),
                                            "GBT",
                                            0,
                                            xgb=True)
            score_test, score_sr, score_pr = ROC_PR.ROC_ML(gbt_model,
                                                           np.array(X_test2),
                                                           np.array(y_test2),
                                                           "GBT",
                                                           0,
                                                           xgb=True)
            print('gbt' + str(i) + string_random + '.sav')
            pickle.dump(gbt_model,
                        open('gbt' + str(i) + string_random + '.sav', 'wb'))
        except ():
            print("errorrrrrr in GBT", flush=True)
            score_test, score_sr, score_pr, score_val = 0, 0, 0, 0

        print(i, flush=True)
        # print(score1, flush=True)
        res_test.append(score_test)
        res_val.append(score_val)
        res_sr.append(score_sr)
        res_pr.append(score_pr)
        all_scores = all_scores + score_val

    global rf_val_score, rf_test_score
    res_val.append(all_scores / len(y_train[0]))
    rf_val_score.append(res_val)
    rf_test_score.append(res_test)
    rf_sr_score.append(res_sr)

    print("val score", res_val)
    print("test score", res_test)
    print("recall at 95 spec: ", res_sr)
    print("precision recall: ", res_pr)
    print(all_scores / len(y_train[0]), flush=True)

    print(string_random)

    return all_scores / len(y_train[0])
def get_model_RF(n_estimators=10,
                 min_samples_split=2,
                 max_depth=1,
                 bootstrap=0):
    from sklearn.ensemble import RandomForestClassifier
    all_scores = 0
    n_estimators = 10 * int(n_estimators)
    min_samples_split = int(min_samples_split)
    if bootstrap < 0:
        bootstrap = False
    else:
        bootstrap = True
    if max_depth > 15:
        max_depth = None
    else:
        max_depth = 10 * int(max_depth)

    global X_train
    global X_test
    global X_val
    global y_train
    global y_test
    global y_val

    res_test = []
    res_val = []
    res_sr = []
    res_pr = []
    string_random = get_random_string(20)
    for i in range(0, len(y_train[0])):
        X_train2 = X_train.tolist()
        X_test2 = X_test.tolist()
        X_val2 = X_val.tolist()

        y_train2 = y_train[:, i]
        y_test2 = y_test[:, i]
        y_val2 = y_val[:, i]
        y_train2 = y_train2.tolist()
        y_test2 = y_test2.tolist()
        y_val2 = y_val2.tolist()

        for i2 in range(len(y_train2) - 1, -1, -1):
            if y_train2[i2] != 0.0 and y_train2[i2] != 1.0:
                del y_train2[i2]
                del X_train2[i2]

        for i2 in range(len(y_test2) - 1, -1, -1):
            if y_test2[i2] != 0.0 and y_test2[i2] != 1.0:
                del y_test2[i2]
                del X_test2[i2]

        for i2 in range(len(y_val2) - 1, -1, -1):
            if y_val2[i2] != 0.0 and y_val2[i2] != 1.0:
                del y_val2[i2]
                del X_val2[i2]
        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42,
        #                                                     shuffle=True)
        rf_model = RandomForestClassifier(n_estimators=n_estimators,
                                          min_samples_split=min_samples_split,
                                          bootstrap=bootstrap,
                                          max_depth=max_depth).fit(
                                              X_train2, y_train2)

        score_val, _, _ = ROC_PR.ROC_ML(rf_model,
                                        X_val2,
                                        y_val2,
                                        "RF",
                                        0,
                                        rf=True)
        score_test, score_sr, score_pr = ROC_PR.ROC_ML(rf_model,
                                                       X_test2,
                                                       y_test2,
                                                       "RF",
                                                       0,
                                                       rf=True)
        print(i, flush=True)
        # print(score1, flush=True)
        res_test.append(score_test)
        res_val.append(score_val)
        res_sr.append(score_sr)
        res_pr.append(score_pr)
        all_scores = all_scores + score_val
        print('rf' + str(i) + string_random + '.sav')
        pickle.dump(rf_model, open('rf' + str(i) + string_random + '.sav',
                                   'wb'))

    global rf_val_score, rf_test_score
    res_val.append(all_scores / len(y_train[0]))
    rf_val_score.append(res_val)
    rf_test_score.append(res_test)
    rf_sr_score.append(res_sr)

    print("val score", res_val)
    print("test score", res_test)
    print("recall at 95 spec: ", res_sr)
    print("precision recall: ", res_pr)
    print(all_scores / len(y_train[0]), flush=True)
    print(string_random)
    return all_scores / len(y_train[0])
def get_model_LR_new(C=1, penalty=1, solver=1, l1_ratio=1, max_iter=2):
    from sklearn.linear_model import LogisticRegression
    all_scores = 0
    C = 10**(int(C))
    penalty = int(penalty)
    solver = int(solver)
    l1_ratio = l1_ratio / 10
    max_iter = 10**max_iter
    print(max_iter)

    global X_train
    global X_test
    global X_val
    global y_train
    global y_test
    global y_val

    res_test = []
    res_val = []
    res_sr = []
    res_pr = []
    string_random = get_random_string(20)
    for i in range(0, len(y_train[0])):
        X_train2 = X_train.tolist()
        X_test2 = X_test.tolist()
        X_val2 = X_val.tolist()

        y_train2 = y_train[:, i]
        y_test2 = y_test[:, i]
        y_val2 = y_val[:, i]
        y_train2 = y_train2.tolist()
        y_test2 = y_test2.tolist()
        y_val2 = y_val2.tolist()

        for i2 in range(len(y_train2) - 1, -1, -1):
            if y_train2[i2] != 0.0 and y_train2[i2] != 1.0:
                del y_train2[i2]
                del X_train2[i2]

        for i2 in range(len(y_test2) - 1, -1, -1):
            if y_test2[i2] != 0.0 and y_test2[i2] != 1.0:
                del y_test2[i2]
                del X_test2[i2]

        for i2 in range(len(y_val2) - 1, -1, -1):
            if y_val2[i2] != 0.0 and y_val2[i2] != 1.0:
                del y_val2[i2]
                del X_val2[i2]
        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42,
        #                                                     shuffle=True)

        if penalty == 0:
            lr_model_linear = LogisticRegression(C=C,
                                                 penalty='l1',
                                                 solver='liblinear',
                                                 max_iter=max_iter).fit(
                                                     X_train2, y_train2)
        elif penalty == 1:
            if solver == 0:
                lr_model_linear = LogisticRegression(C=C,
                                                     penalty='l2',
                                                     solver='newton-cg',
                                                     max_iter=max_iter).fit(
                                                         X_train2, y_train2)
            elif solver == 1:
                lr_model_linear = LogisticRegression(C=C,
                                                     penalty='l2',
                                                     solver='sag',
                                                     max_iter=max_iter).fit(
                                                         X_train2, y_train2)
            else:
                lr_model_linear = LogisticRegression(C=C,
                                                     penalty='l2',
                                                     solver='lbfgs',
                                                     max_iter=max_iter).fit(
                                                         X_train2, y_train2)
        elif penalty == 2:
            lr_model_linear = LogisticRegression(C=C,
                                                 penalty='elasticnet',
                                                 solver='saga',
                                                 max_iter=max_iter,
                                                 l1_ratio=l1_ratio).fit(
                                                     X_train2, y_train2)
        else:
            lr_model_linear = LogisticRegression(C=C,
                                                 penalty='none',
                                                 max_iter=max_iter).fit(
                                                     X_train2, y_train2)

        score_val, _, _ = ROC_PR.ROC_ML(lr_model_linear, X_val2, y_val2, "LR",
                                        0)
        score_test, score_sr, score_pr = ROC_PR.ROC_ML(lr_model_linear,
                                                       X_test2, y_test2, "LR",
                                                       0)
        print(i, flush=True)
        # print(score1, flush=True)
        res_test.append(score_test)
        res_val.append(score_val)
        res_sr.append(score_sr)
        res_pr.append(score_pr)
        all_scores = all_scores + score_val
        print('lr' + str(i) + string_random + '.sav')
        pickle.dump(lr_model_linear,
                    open('lr' + str(i) + string_random + '.sav', 'wb'))

    global rf_val_score, rf_test_score
    res_val.append(all_scores / len(y_train[0]))
    rf_val_score.append(res_val)
    rf_test_score.append(res_test)
    rf_sr_score.append(res_sr)

    print("val score", res_val)
    print("test score", res_test)
    print("recall at 95 spec: ", res_sr)
    print("precision recall: ", res_pr)
    print(all_scores / len(y_train[0]), flush=True)
    print(string_random)
    return all_scores / len(y_train[0])