Example #1
0
def learn(X_trains, X_tests, y_trains, y_tests, k_fold, clf_params):
    #create classifier
    clf = XGBClassifier(max_depth=int(clf_params['max_depth']),
                        learning_rate=clf_params['lr'],
                        n_estimators=int(clf_params['estimators']),
                        objective='binary:logistic',
                        gamma=clf_params['gamma'],
                        min_child_weight=int(clf_params['min_child_weight']),
                        reg_lambda=clf_params['lambda'],
                        booster='gbtree',
                        alpha=clf_params['alpha'])
    y_train_scores, y_test_scores, y_train_preds, y_test_preds = [], [], [], []
    for i in range(k_fold):
        print('------------------------------\niteration number ' + str(i))
        X_train, X_test, y_train, y_test = X_trains[i], X_tests[i], y_trains[
            i], y_tests[i]
        clf.fit(X_train, y_train)
        clf.predict_proba(X_test)
        y_score = clf.predict_proba(X_test)
        y_pred = clf.predict(X_test)
        y_test_preds.append(y_pred)
        y_test_scores.append(y_score[:, 1])
        train_pred = clf.predict(X_train)
        train_score = clf.predict_proba(X_train)
        y_train_preds.append(train_pred)
        y_train_scores.append(train_score[:, 1])

        print_auc_for_iter(np.array(y_tests[i]['Tag'].values),
                           np.array(y_score).T[1])

    all_y_train = []
    for i in range(k_fold):
        all_y_train.append(y_trains[i]['Tag'].values)
    all_y_train = np.array(all_y_train).flatten()

    all_y_test = []
    for i in range(k_fold):
        all_y_test.append(y_tests[i]['Tag'].values)
    all_y_test = np.array(all_y_test).flatten()

    y_train_scores = np.array(y_train_scores).flatten()
    y_test_scores = np.array(y_test_scores).flatten()

    #calc AUC on validation set
    _, test_auc, _, _ = calc_auc_on_flat_results(all_y_train, y_train_scores,
                                                 all_y_test, y_test_scores)
    return test_auc
Example #2
0
    def fit(self,
            X,
            y,
            X_train_ids,
            X_test_ids,
            y_train_ids,
            y_test_ids,
            params,
            bacteria,
            task_name_title,
            relative_path_to_save_results,
            pca_obj=None):
        if not os.path.exists(
                os.path.join(relative_path_to_save_results, "XGBOOST")):
            os.makedirs(os.path.join(relative_path_to_save_results, "XGBOOST"))
        os.chdir(
            os.path.join(os.path.abspath(os.path.curdir),
                         relative_path_to_save_results, "XGBOOST"))

        print("XGBOOST...")

        # update each classifier results in a mutual file
        xgb_results_file = Path("all_xgb_results.csv")
        if not xgb_results_file.exists():
            all_xgb_results = pd.DataFrame(columns=[
                'LR', 'MAX-DEPTH', 'N-ESTIMATORS', 'OBJECTIVE', 'GAMMA',
                'MIN-CHILD-WEIGHT', 'BOOSTER', 'TRAIN-AUC', 'TRAIN-ACC',
                'TEST-AUC', 'TEST-ACC', 'PRECISION', 'RECALL'
            ])
            all_xgb_results.to_csv(xgb_results_file, index=False)

        num_of_classes = len(set(y))
        BINARY = True if num_of_classes == 2 else False
        optional_classifiers = self.create_classifiers(params)

        for clf in optional_classifiers:
            all_xgb_results = pd.read_csv(xgb_results_file)
            clf_folder_name = "d=" + str(clf.max_depth) + "_lr=" + str(clf.learning_rate) + "_e=" + \
                              str(clf.n_estimators) + "_o=" + clf.objective + "_g=" + str(clf.gamma) + "_m=" + \
                              str(clf.min_child_weight) + "_b=" + clf.booster
            if not os.path.exists(clf_folder_name):
                os.makedirs(clf_folder_name)

            # Split the data set
            X_trains, X_tests, y_trains, y_tests, xgb_coefs = [], [], [], [], []
            xgb_y_test_from_all_iter, xgb_y_score_from_all_iter = np.array(
                []), np.array([])
            xgb_y_pred_from_all_iter, xgb_class_report_from_all_iter = np.array(
                []), np.array([])
            xgb_coefs, bacteria_coeff_average, y_train_scores, y_test_scores = [], [], [], []
            train_accuracies, test_accuracies, confusion_matrixes, y_train_preds, y_test_preds = [], [], [], [], []

            for i in range(params["K_FOLD"]):
                print('------------------------------\niteration number ' +
                      str(i))
                X_train, X_test, y_train, y_test = np.array(
                    X.loc[X_train_ids[i]]), np.array(
                        X.loc[X_test_ids[i]]), np.array(
                            y[y_train_ids[i]]), np.array(y[y_test_ids[i]])
                X_trains.append(X_train)
                X_tests.append(X_test)
                y_trains.append(y_train)
                y_tests.append(y_test)

                clf.fit(X_train, y_train)
                clf.predict_proba(X_test)
                y_score = clf.predict_proba(X_test)
                y_pred = clf.predict(X_test)
                y_test_preds.append(y_pred)
                y_test_scores.append(y_score[:, 0])
                xgb_class_report = classification_report(y_test, y_pred)
                train_pred = clf.predict(X_train)
                train_score = clf.predict_proba(X_train)
                y_train_preds.append(train_pred)
                y_train_scores.append(train_score[:, 0])

                train_accuracies.append(
                    accuracy_score(y_train, clf.predict(X_train)))
                test_accuracies.append(accuracy_score(
                    y_test, y_pred))  # same as - clf.score(X_test, y_test)
                confusion_matrixes.append(confusion_matrix(y_test, y_pred))

                if BINARY:
                    self.print_auc_for_iter(np.array(y_test),
                                            np.array(y_score).T[0])

                self.save_y_test_and_score(y_test, y_pred, y_score,
                                           xgb_class_report)
                # --------------------------------------------! COEFF PLOTS -----------------------------------------
                if params["create_coeff_plots"]:
                    svm_coefs, coefficients, bacteria_coeff_average = \
                        self.calc_bacteria_coeff_average(num_of_classes, pca_obj, bacteria, clf, xgb_coefs, bacteria_coeff_average)

            # --------------------------------------------! AUC -----------------------------------------
            all_y_train = np.array(y_trains).flatten()
            all_predictions_train = np.array(y_train_preds).flatten()
            y_train_scores = np.array(y_train_scores).flatten()
            all_test_real_tags = np.array(y_tests).flatten()
            all_test_pred_tags = np.array(y_test_preds).flatten()
            y_test_scores = np.array(y_test_scores).flatten()

            train_auc, test_auc, train_rho, test_rho = \
                calc_auc_on_flat_results(all_y_train, y_train_scores,
                                         all_test_real_tags, y_test_scores)

            # ----------------------------------------! CONFUSION MATRIX -------------------------------------
            print("------------------------------")
            names = params["CLASSES_NAMES"]
            confusion_matrix_average, confusion_matrix_acc = edit_confusion_matrix(
                confusion_matrixes, "XGB", names, BINARY=BINARY)
            if BINARY:
                _, _, _, xgb_roc_auc = roc_auc(all_test_real_tags.astype(int),
                                               y_test_scores,
                                               visualize=True,
                                               graph_title='XGB\n' +
                                               task_name_title.capitalize() +
                                               " AUC on all iterations",
                                               save=True,
                                               folder=clf_folder_name)
                res_path = os.path.join(clf_folder_name,
                                        str(round(xgb_roc_auc, 5)))
            else:
                xgb_roc_auc = 0
                res_path = clf_folder_name

            if not os.path.exists(res_path):
                os.mkdir(res_path)

            if params["create_coeff_plots"]:
                self.plot_bacteria_coeff_average(bacteria_coeff_average,
                                                 len(set(y)),
                                                 params["TASK_TITLE"],
                                                 clf_folder_name, bacteria,
                                                 params["K_FOLD"], "XGB",
                                                 res_path, BINARY, names)

            print_confusion_matrix(confusion_matrix_average, names,
                                   confusion_matrix_acc, "XGB",
                                   task_name_title, res_path)

            if BINARY:
                _, _, _, xgb_train_roc_auc = roc_auc(all_y_train,
                                                     y_train_scores,
                                                     visualize=False,
                                                     graph_title="train auc",
                                                     save=False,
                                                     folder=res_path)
            else:
                xgb_train_roc_auc = 0
                multi_class_roc_auc(all_y_train.astype(int),
                                    y_train_scores,
                                    names,
                                    graph_title='XGB\n' +
                                    task_name_title.capitalize() +
                                    " AUC on all iterations",
                                    save=True,
                                    folder=res_path)

            # ----------------------------------------! SAVE RESULTS -------------------------------------

            self.save_results(task_name_title, train_auc, test_auc, train_rho,
                              test_rho, confusion_matrix_average,
                              confusion_matrix_acc, train_accuracies,
                              test_accuracies, xgb_y_score_from_all_iter,
                              xgb_y_pred_from_all_iter,
                              xgb_y_test_from_all_iter, "XGB", res_path)

            all_xgb_results.loc[len(all_xgb_results)] = [
                clf.learning_rate, clf.max_depth, clf.n_estimators,
                clf.objective, clf.gamma, clf.min_child_weight, clf.booster,
                xgb_train_roc_auc,
                np.mean(train_accuracies), xgb_roc_auc,
                np.mean(test_accuracies),
                precision_score(all_test_real_tags.astype(int),
                                all_test_pred_tags,
                                average='micro'),
                recall_score(all_test_real_tags.astype(int),
                             all_test_pred_tags,
                             average='micro')
            ]
            if BINARY:
                all_xgb_results = all_xgb_results.sort_values(by=['TEST-AUC'],
                                                              ascending=False)
            else:
                all_xgb_results = all_xgb_results.sort_values(by=['TEST-ACC'],
                                                              ascending=False)

            all_xgb_results.to_csv(xgb_results_file, index=False)
Example #3
0
        y_score = clf_score.predict_proba(X_test)
        y_test_scores.append(y_score[:, 1])
        train_score = clf_score.predict_proba(X_train)
        y_train_scores.append(train_score[:, 1])
        #calc AUC per iteration
        fpr, tpr, thresholds = roc_curve(np.array(y_test),
                                         np.array(np.array(y_score).T[1]))
        roc_auc = auc(fpr, tpr)
        print('ROC AUC = ' + str(round(roc_auc, 4)))

    #calc AUC on all iterations
    all_y_train = []
    for i in range(k_fold):
        all_y_train.append(y_trains[i].values)
    all_y_train = np.array(all_y_train).flatten()

    all_y_test = []
    for i in range(k_fold):
        all_y_test.append(y_tests[i].values)
    all_y_test = np.array(all_y_test).flatten()

    y_train_scores = np.array(y_train_scores).flatten()
    y_test_scores = np.array(y_test_scores).flatten()

    train_auc, test_auc, train_rho, test_rho = calc_auc_on_flat_results(
        all_y_train, y_train_scores, all_y_test, y_test_scores)
    '''  
    test_auc, acc = nn_main(X, y, params, 'GDM_extra_features', Net, plot=True, k_fold=5)
    print('Final auc: ' +str(test_auc))
    nni.report_final_result(test_auc)
    '''
Example #4
0
    def fit(self,
            X,
            y,
            X_train_ids,
            X_test_ids,
            y_train_ids,
            y_test_ids,
            params,
            weights,
            bacteria,
            task_name_title,
            relative_path_to_save_results,
            pca_obj=None):
        if not os.path.exists(
                os.path.join(relative_path_to_save_results, "SVM")):
            os.makedirs(os.path.join(relative_path_to_save_results, "SVM"))
        os.chdir(
            os.path.join(os.path.abspath(os.path.curdir),
                         relative_path_to_save_results, "SVM"))
        print("SVM...")

        # update each classifier results in a mutual file
        svm_results_file = Path("all_svm_results.csv")
        if not svm_results_file.exists():
            all_svm_results = pd.DataFrame(columns=[
                'KERNEL', 'GAMMA', 'C', 'TRAIN-AUC', 'TRAIN-ACC', 'TEST-AUC',
                'TEST-ACC', 'PRECISION', 'RECALL'
            ])
            all_svm_results.to_csv(svm_results_file, index=False)

        num_of_classes = len(set(y))
        BINARY = True if num_of_classes == 2 else False
        optional_classifiers = self.create_classifiers(params, weights)

        for clf in optional_classifiers:
            all_svm_results = pd.read_csv(svm_results_file)
            clf_folder_name = "k=" + clf.kernel + "_c=" + str(
                clf.C) + "_g=" + clf.gamma
            if not os.path.exists(clf_folder_name):
                os.makedirs(clf_folder_name)

            X_trains, X_tests, y_trains, y_tests, svm_coefs = [], [], [], [], []
            svm_y_test_from_all_iter, svm_y_score_from_all_iter = np.array(
                []), np.array([])
            svm_y_pred_from_all_iter, svm_class_report_from_all_iter = np.array(
                []), np.array([])
            train_accuracies, test_accuracies, confusion_matrixes, y_train_preds, y_train_scores,\
            y_test_preds , y_test_scores = [], [], [], [], [], [], []

            bacteria_coeff_average = []

            for i in range(params["K_FOLD"]):
                print('------------------------------\niteration number ' +
                      str(i))
                X_train, X_test, y_train, y_test = X.loc[
                    X_train_ids[i]], X.loc[X_test_ids[i]], y[
                        y_train_ids[i]], y[y_test_ids[i]]
                X_trains.append(X_train)
                X_tests.append(X_test)
                y_trains.append(y_train)
                y_tests.append(y_test)

                # FIT
                clf.fit(X_train, y_train)
                # GET RESULTS
                y_score = clf.decision_function(X_test)
                y_pred = clf.predict(X_test)
                y_test_preds.append(y_pred)
                svm_class_report = classification_report(y_test,
                                                         y_pred).split("\n")
                train_pred = clf.predict(X_train)
                train_score = clf.decision_function(X_train)
                y_train_preds.append(train_pred)
                y_train_scores.append(train_score)
                y_test_scores.append(y_score)
                # SAVE RESULTS
                train_accuracies.append(accuracy_score(y_train, train_pred))
                test_accuracies.append(accuracy_score(y_test, y_pred))
                confusion_matrixes.append(confusion_matrix(y_test, y_pred))

                if BINARY:
                    self.print_auc_for_iter(np.array(y_test),
                                            np.array(y_score))

                self.save_y_test_and_score(y_test, y_pred, y_score,
                                           svm_class_report)
                # --------------------------------------------! COEFF PLOTS -----------------------------------------
                if params["create_coeff_plots"]:
                    svm_coefs, coefficients, bacteria_coeff_average = \
                        self.calc_bacteria_coeff_average(num_of_classes, pca_obj, bacteria, clf, svm_coefs, bacteria_coeff_average)

            # --------------------------------------------! AUC -----------------------------------------
            all_y_train = np.array(y_trains).flatten()
            all_predictions_train = np.array(y_train_preds).flatten()
            y_train_scores = np.array(y_train_scores).flatten()
            all_test_real_tags = np.array(y_tests).flatten()
            all_test_pred_tags = np.array(y_test_preds).flatten()
            y_test_scores = np.array(y_test_scores).flatten()

            train_auc, test_auc, train_rho, test_rho = \
                calc_auc_on_flat_results(all_y_train, y_train_scores,
                                           all_test_real_tags, y_test_scores)

            # ----------------------------------------! CONFUSION MATRIX -------------------------------------
            print("------------------------------")
            names = params["CLASSES_NAMES"]
            confusion_matrix_average, confusion_matrix_acc = edit_confusion_matrix(
                confusion_matrixes, "SVM", names, BINARY=BINARY)
            if BINARY:
                _, _, _, svm_roc_auc = roc_auc(all_test_real_tags.astype(int),
                                               y_test_scores,
                                               visualize=True,
                                               graph_title='SVM\n' +
                                               task_name_title.capitalize() +
                                               " AUC on all iterations",
                                               save=True,
                                               folder=clf_folder_name)
                res_path = os.path.join(clf_folder_name,
                                        str(round(svm_roc_auc, 5)))
            else:
                svm_roc_auc = 0
                res_path = clf_folder_name

            if not os.path.exists(res_path):
                os.mkdir(res_path)

            if params["create_coeff_plots"]:
                self.plot_bacteria_coeff_average(bacteria_coeff_average,
                                                 len(set(y)),
                                                 params["TASK_TITLE"],
                                                 clf_folder_name, bacteria,
                                                 params["K_FOLD"], "SVM",
                                                 res_path, BINARY, names)

            print_confusion_matrix(confusion_matrix_average, names,
                                   confusion_matrix_acc, "SVM",
                                   task_name_title, res_path)

            if BINARY:
                _, _, _, svm_train_roc_auc = roc_auc(all_y_train,
                                                     y_train_scores,
                                                     visualize=False,
                                                     graph_title="train auc",
                                                     save=False,
                                                     folder=res_path)
            else:
                svm_train_roc_auc = 0
                multi_class_roc_auc(all_y_train.astype(int),
                                    y_train_scores,
                                    names,
                                    graph_title='SVM\n' +
                                    task_name_title.capitalize() +
                                    " AUC on all iterations",
                                    save=True,
                                    folder=res_path)

            # ----------------------------------------! SAVE RESULTS -------------------------------------
            self.save_results(task_name_title, train_auc, test_auc, train_rho,
                              test_rho, confusion_matrix_average,
                              confusion_matrix_acc, train_accuracies,
                              test_accuracies, svm_y_score_from_all_iter,
                              svm_y_pred_from_all_iter,
                              svm_y_test_from_all_iter, "SVM", res_path)

            all_svm_results.loc[len(all_svm_results)] = [
                clf.kernel, clf.C, clf.gamma, svm_train_roc_auc,
                np.mean(train_accuracies), svm_roc_auc,
                np.mean(test_accuracies),
                precision_score(all_test_real_tags.astype(int),
                                all_test_pred_tags,
                                average='micro'),
                recall_score(all_test_real_tags.astype(int),
                             all_test_pred_tags,
                             average='micro')
            ]
            if BINARY:
                all_svm_results = all_svm_results.sort_values(by=['TEST-AUC'],
                                                              ascending=False)
            else:
                all_svm_results = all_svm_results.sort_values(by=['TEST-ACC'],
                                                              ascending=False)

            all_svm_results.to_csv(svm_results_file, index=False)
Example #5
0
def learn(X_trains, X_tests, y_trains, y_tests, k_fold, task):
    all_y_train = []
    for i in range(k_fold):
        all_y_train.append(y_trains[i]['Tag'].values)
    all_y_train = np.array(all_y_train).flatten()

    all_y_test = []
    for i in range(k_fold):
        all_y_test.append(y_tests[i]['Tag'].values)
    all_y_test = np.array(all_y_test).flatten()

    #SVM
    clf = svm.SVC(kernel='linear',
                  C=0.1,
                  gamma='scale',
                  class_weight='balanced')

    y_test_scores, y_train_scores = [], []

    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    fig, ax = plt.subplots()
    for i in range(k_fold):
        print('------------------------------\niteration number ' + str(i))
        X_train, X_test, y_train, y_test = X_trains[i], X_tests[i], y_trains[
            i], y_tests[i],
        # FIT
        clf.fit(X_train, y_train)
        # GET RESULTS
        y_score = clf.decision_function(X_test)
        train_score = clf.decision_function(X_train)
        y_train_scores.append(train_score)
        y_test_scores.append(y_score)
        '''
        viz = plot_roc_curve(clf, X_test, y_test,
                         name='ROC fold {}'.format(i),
                         alpha=0.3, lw=1, ax=ax)
        interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)
        '''
        print_auc_for_iter(np.array(y_tests[i]['Tag'].values),
                           np.array(y_score).T)
    '''
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)/np.sqrt(k_fold)
    ax.plot(mean_fpr, mean_tpr, color='b',
            label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
            lw=2, alpha=.8)
    
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
                    label=r'$\pm$ 1 std. dev.')
    
    ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
           title="Receiver operating characteristic " + task)
    ax.legend(loc="lower right")
    plt.savefig(task + ".svg")
    '''
    y_train_scores = np.array(y_train_scores).flatten()
    y_test_scores = np.array(y_test_scores).flatten()

    SVM_train_auc, SVM_test_auc, _, _ = calc_auc_on_flat_results(
        all_y_train, y_train_scores, all_y_test, y_test_scores)

    #XGBOOST
    clf = XGBClassifier(max_depth=5,
                        learning_rate=0.01,
                        n_estimators=100,
                        objective='binary:logistic',
                        gamma=0.5,
                        min_child_weight=3,
                        booster='gbtree')

    y_test_scores, y_train_scores = [], []
    for i in range(k_fold):
        print('------------------------------\niteration number ' + str(i))
        X_train, X_test, y_train, y_test = X_trains[i], X_tests[i], y_trains[
            i], y_tests[i],
        # FIT
        clf.fit(X_train, y_train)
        # GET RESULTS
        y_score = clf.predict_proba(X_test)
        train_score = clf.predict_proba(X_train)
        y_train_scores.append(train_score[:, 1])
        y_test_scores.append(y_score[:, 1])

        print_auc_for_iter(np.array(y_tests[i]['Tag'].values),
                           np.array(y_score).T[1])

    y_train_scores = np.array(y_train_scores).flatten()
    y_test_scores = np.array(y_test_scores).flatten()

    XGB_train_auc, XGB_test_auc, _, _ = calc_auc_on_flat_results(
        all_y_train, y_train_scores, all_y_test, y_test_scores)

    #NN
    NN_test_auc = 0
    NN_train_auc = 0
    for i in range(k_fold):
        Net = models_nn['relu_b']
        print('------------------------------\niteration number ' + str(i))
        X_train, X_test, y_train, y_test = X_trains[i], X_tests[i], y_trains[
            i], y_tests[i]
        train_auc, test_auc = nn_model(X_train, X_test, y_train, y_test, Net)
        NN_train_auc += train_auc
        NN_test_auc += test_auc

    NN_train_auc /= k_fold
    NN_test_auc /= k_fold

    return SVM_train_auc, SVM_test_auc, XGB_train_auc, XGB_test_auc, NN_train_auc, NN_test_auc
Example #6
0
def learn_XGBOOST(X_trains, X_tests, y_trains, y_tests, k_fold, clf_params,
                  clf_ens_params, df_concat):

    #create classifier
    clf_score = XGBClassifier(max_depth=int(clf_params['max_depth']),
                              learning_rate=clf_params['lr'],
                              n_estimators=int(clf_params['estimators']),
                              objective='binary:logistic',
                              gamma=clf_params['gamma'],
                              min_child_weight=int(
                                  clf_params['min_child_weight']),
                              reg_lambda=clf_params['lambda'],
                              booster='dart',
                              alpha=clf_params['alpha'])

    ens_clf = XGBClassifier(max_depth=int(clf_ens_params['max_depth']),
                            learning_rate=clf_ens_params['lr'],
                            n_estimators=int(clf_ens_params['estimators']),
                            objective='binary:logistic',
                            gamma=clf_ens_params['gamma'],
                            min_child_weight=int(
                                clf_ens_params['min_child_weight']),
                            reg_lambda=clf_ens_params['lambda'],
                            booster='dart',
                            alpha=clf_ens_params['alpha'])

    y_train_scores, y_test_scores, y_train_scores_ens, y_test_scores_ens = [], [], [], []
    all_y_train_ens, all_y_test_ens = [], []

    for i in range(k_fold):
        print('------------------------------\niteration number ' + str(i))
        X_train, X_test, y_train, y_test = X_trains[i], X_tests[i], y_trains[
            i], y_tests[i]
        #train XGBOOST model
        clf_score.fit(X_train, y_train)
        clf_score.predict_proba(X_test)
        y_score = clf_score.predict_proba(X_test)
        y_test_scores.append(y_score[:, 1])
        train_score = clf_score.predict_proba(X_train)
        y_train_scores.append(train_score[:, 1])

        #building new data frame for learning all model
        score_train_df = build_score_df(X_train, train_score[:, 1])
        X_train_ens, y_train_ens = create_concate_df_to_learn(
            score_train_df, df_concat)
        all_y_train_ens.append(y_train_ens.values)
        score_test_df = build_score_df(X_test, y_score[:, 1])
        X_test_ens, y_test_ens = create_concate_df_to_learn(
            score_test_df, df_concat)
        all_y_test_ens.append(y_test_ens.values)

        #train all model using score predictions of XGBOOST
        ens_clf.fit(X_train_ens, y_train_ens)
        ens_clf.predict_proba(X_test_ens)
        y_score_ens = ens_clf.predict_proba(X_test_ens)
        y_test_scores_ens.append(y_score_ens[:, 1])
        train_score_ens = ens_clf.predict_proba(X_train_ens)
        y_train_scores_ens.append(train_score_ens[:, 1])

        #print auc of each fold
        print('iner model')
        print_auc_for_iter(np.array(y_test), np.array(y_score).T[1])
        print('ensemble model')
        print_auc_for_iter(np.array(y_test_ens), np.array(y_score_ens).T[1])

    # calc AUC on validation set_inner model
    all_y_train = []
    for i in range(k_fold):
        all_y_train.append(y_trains[i]['Tag'].values)
    all_y_train = np.array(all_y_train).flatten()

    all_y_test = []
    for i in range(k_fold):
        all_y_test.append(y_tests[i]['Tag'].values)
    all_y_test = np.array(all_y_test).flatten()

    y_train_scores = np.array(y_train_scores).flatten()
    y_test_scores = np.array(y_test_scores).flatten()
    print('Inner Model')
    _, test_auc, _, _ = calc_auc_on_flat_results(all_y_train, y_train_scores,
                                                 all_y_test, y_test_scores)

    # calc AUC on validation set ensemble model

    all_y_train_ens = np.array(all_y_train_ens).flatten()
    all_y_test_ens = np.array(all_y_test_ens).flatten()
    y_train_scores = np.array(y_train_scores_ens).flatten()
    y_test_scores = np.array(y_test_scores_ens).flatten()

    print('Ensemble Model')
    _, test_auc, _, _ = calc_auc_on_flat_results(all_y_train_ens,
                                                 y_train_scores,
                                                 all_y_test_ens, y_test_scores)
    return test_auc