Ejemplos de create_training_data en Python, ejemplos de util.create_training_data en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: cluster.py Proyecto: migraf/mt

def hdbscan_clustering(data, min_cluster_size, min_samples=None, excluded_variables=[],
                  prepare_data=True, random_state=0, max_iter=1000, display=True, dims=3, reduction_algorithm="isomap",
                  evaluate_clusters=False, dim_red_params=None):
    """
    Perform clustering using hdb scan while at the same time reducing the dimensions using umap
    :param data: data to cluster
    :type data:
    :param min_cluster_size: min samples that should make up a cluster
    :type min_cluster_size:
    :return:
    :rtype:
    """

    if prepare_data:
        data = create_training_data(data, excluded_variables=excluded_variables, test_train_split=False)
        numpy_data = data.values
    else:
        numpy_data = data.values

    labels = hdbscan.HDBSCAN(
        min_samples=min_samples,
        min_cluster_size=min_cluster_size,
    ).fit_predict(numpy_data)
    print(labels)

    if display:
        plot_clusters(numpy_data, labels, dims=dims, reduction_algorithm=reduction_algorithm)
    if evaluate_clusters:
        cluster_dfs = extract_clusters(data, labels)
        comparison_result = compare_clusters(data, cluster_dfs)
        pprint(comparison_result)

    return labels

Ejemplo n.º 2

0

Mostrar archivo

def logistic_regression(data=None,
                        num_cols=None,
                        cat_cols=None,
                        target=None,
                        train_data=None,
                        train_labels=None):
    """
    Train a classifier using logistic regression
    Parameters
    ----------
    data :
    num_cols :
    cat_cols :
    target :
    train_data :
    train_labels :

    Returns
    -------

    """
    if train_data is not None and train_labels is not None:
        clf = LogisticRegression(penalty="elasticnet")
        clf.fit(train_data, train_labels)
        return clf
    else:
        x_train, x_test, y_train, y_test = create_training_data(
            data, num_cols, cat_cols, target)
        clf = LogisticRegression(penalty="elasticnet")
        clf.fit(x_train, y_train)
        # TODO display results
        return clf

Ejemplo n.º 3

0

Mostrar archivo

Archivo: cluster.py Proyecto: migraf/mt

def k_means_cluster(data, n_clusters, excluded_variables=[], prepare_data=True, random_state=0, max_iter=1000,
                    display=True, dims=3, reduction_algorithm="isomap", evaluate_clusters=False, dim_red_params=None):
    """
    Attempts k means clustering for the selected data
    :param data:
    :type data:
    :return:
    :rtype:
    """
    # Perform k means clustering

    if prepare_data:
        data = create_training_data(data, excluded_variables=excluded_variables, test_train_split=False)
        numpy_data = data.values
    else:
        numpy_data = data.values
    # TODO give more configuration options
    kmeans = KMeans(n_clusters=n_clusters, random_state=random_state, max_iter=max_iter).fit(numpy_data)
    labels = kmeans.predict(numpy_data)
    if display:
        plot_clusters(numpy_data, labels, dims=dims, reduction_algorithm=reduction_algorithm)
    if evaluate_clusters:
        cluster_dfs = extract_clusters(data, labels)
        comparison_result = compare_clusters(data, cluster_dfs)
        pprint(comparison_result)

    return labels

Ejemplo n.º 4

0

Mostrar archivo

def random_forest_regressor(data=None,
                            num_cols=None,
                            cat_cols=None,
                            target=None,
                            train_data=None,
                            train_labels=None):
    if train_data is not None and train_labels is not None:
        clf = RandomForestRegressor(random_state=0)
        clf.fit(train_data, train_labels)
        return clf
    else:
        x_train, x_test, y_train, y_test = create_training_data(
            data, num_cols, cat_cols, target)
        y_train = y_train.astype(float)
        y_test = y_test.astype(float)
        clf = RandomForestRegressor(random_state=0)
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        print(clf.score(x_test, y_test))
        # probs = np.max(clf.predict_proba(x_test), axis=1)
        # print(probs)
        # TODO make this more general
        if len(y_train.unique()) == 2:
            roc_auc = create_roc_auc_plot(y_test.values, probs)
        else:
            # TODO plot regression result
            pass
        feature_importances = plot_feature_importances(
            x_train, clf.feature_importances_)
        return clf

Ejemplo n.º 5

0

Mostrar archivo

def linear_regression(data, num_cols, cat_cols, target):
    """
    Fit a basic linear regression model to the data
    :return:
    :rtype:
    """
    x_train, x_test, y_train, y_test = create_training_data(
        data, num_cols, cat_cols, target)
    print(len(x_train), len(x_test))
    regr = LinearRegression()
    regr.fit(x_train, y_train)
    print(regr.score(x_test, y_test))
    print(regr.coef_)

Ejemplo n.º 6

0

Mostrar archivo

def elastic_net(data=None,
                num_cols=None,
                cat_cols=None,
                target=None,
                train_data=None,
                train_labels=None):
    """
    Fit an elastic net regression model to the provided data
    :param data: numpy array containing numeric training data
    :type data: list-like numeric
    :param target: target column to predict
    :type target: list-like numeric
    :return:
    :rtype:
    """
    if train_data is not None and train_labels is not None:
        regr = ElasticNet()
        regr.fit(train_data, train_labels)
        return regr
    else:
        x_train, x_test, y_train, y_test = create_training_data(
            data, num_cols, cat_cols, target)
        y_train = y_train.astype(float)
        y_test = y_test.astype(float)
        # print(x_train)
        # print(list(y_train))
        clf = ElasticNet()
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        print(clf.score(x_test, y_test))
        explainer = shap.KernelExplainer(clf.predict, x_train)
        shap_values = explainer.shap_values(x_train, nsamples=100)
        print(shap_values)
        # shap.summary_plot(shap_values, x_train)
        plt.tight_layout()
        # plt.show()
        print(explainer.expected_value)
        shap.force_plot(explainer.expected_value, shap_values[0],
                        x_train.iloc[0, :])
        plt.show()
        # probs = np.max(clf.predict_proba(x_test), axis=1)
        # print(probs)
        # TODO make this more general
        # if len(y_train.unique()) == 2:
        #     roc_auc = create_roc_auc_plot(y_test.values, probs)
        # else:
        #     # TODO plot regression result
        #     pass
        # feature_importances = plot_feature_importances(x_train, clf.feature_importances_)
        return clf

Ejemplo n.º 7

0

Mostrar archivo

def random_forest_classifier(data=None,
                             num_cols=None,
                             cat_cols=None,
                             target=None,
                             train_data=None,
                             train_labels=None):
    """
    Train a random forest model on the with the selected columns on the selected target using data from the
    given dataframe
    :param data:
    :type data:
    :param num_cols:
    :type num_cols:
    :param cat_cols:
    :type cat_cols:
    :param target:
    :type target:
    :return:
    :rtype:
    """
    # Only train the classifier
    if train_data is not None and train_labels is not None:
        clf = RandomForestClassifier(random_state=0)
        clf.fit(train_data, train_labels)
        return clf
    else:

        x_train, x_test, y_train, y_test = create_training_data(
            data, num_cols, cat_cols, target)
        y_train = y_train.astype("str")
        y_test = y_test.astype("str")
        print(x_train)
        print(list(y_train))
        clf = RandomForestClassifier(random_state=0)
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        print(clf.score(x_test, y_test))
        probs = np.max(clf.predict_proba(x_test), axis=1)
        print(probs)
        # TODO make this more general
        if len(y_train.unique()) == 2:
            roc_auc = create_roc_auc_plot(y_test.values, probs)
        else:
            # TODO plot confusion matrix
            pass
        feature_importances = plot_feature_importances(
            x_train, clf.feature_importances_)
        return clf

Ejemplo n.º 8

0

Mostrar archivo

def svm_regression(data=None,
                   num_cols=None,
                   cat_cols=None,
                   target=None,
                   train_data=None,
                   train_labels=None):
    """
    Train an svm regressor with the given data and target
    :param cat_cols:
    :type cat_cols:
    :param num_cols:
    :type num_cols:
    :param data:
    :type data:
    :param target:
    :type target:
    :return:
    :rtype:
    """
    if train_data is not None and train_labels is not None:
        clf = SVR()
        clf.fit(train_data, train_labels)
        return clf
    else:
        x_train, x_test, y_train, y_test = create_training_data(
            data, num_cols, cat_cols, target, na_strategy="fill")
        y_train = y_train.astype(float)
        y_test = y_test.astype(float)
        # print(x_train)
        # print(list(y_train))
        clf = SVR()
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        print(clf.score(x_test, y_test))
        # probs = np.max(clf.predict_proba(x_test), axis=1)
        # print(probs)
        # TODO make this more general
        # if len(y_train.unique()) == 2:
        #     roc_auc = create_roc_auc_plot(y_test.values, probs)
        # else:
        #     # TODO plot regression result
        #     pass
        # feature_importances = plot_feature_importances(x_train, clf.feature_importances_)
        return clf

Ejemplo n.º 9

0

Mostrar archivo

def svm_classifier(data=None,
                   num_cols=None,
                   cat_cols=None,
                   target=None,
                   train_data=None,
                   train_labels=None):
    """
    Train an svm classifier with the given data and target
    :param cat_cols:
    :type cat_cols:
    :param num_cols:
    :type num_cols:
    :param data: raw training data
    :type data:
    :param target: target column in the data
    :type target:
    :return:
    :rtype:
    """
    if train_data is not None and train_labels is not None:
        clf = SVC()
        clf.fit(train_data, train_labels)
        return clf
    else:

        x_train, x_test, y_train, y_test = create_training_data(
            data, num_cols, cat_cols, target, na_strategy="fill")
        # TODO multiclass classification
        y_train = y_train.astype("str")
        y_test = y_test.astype("str")
        print(x_train)
        print(list(y_train))
        clf = SVC(gamma="auto")
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        print(y_pred)
        print(clf.score(x_test, y_test))
        # probs = np.max(clf.predict_proba(x_test), axis=1)
        # print(probs)
        # # TODO make this more general
        # roc_auc = create_roc_auc_plot(y_test.values, probs)
        # feature_importances = plot_feature_importances(x_train, clf.feature_importances_)
        return clf

Ejemplo n.º 10

0

Mostrar archivo

Archivo: cluster.py Proyecto: migraf/mt

def vbgmm_cluster(data, n_clusters, covariance_type="full", weight_concentration_prior=None, excluded_variables=[],
                  prepare_data=True, random_state=0, max_iter=1000, display=True, dims=3, reduction_algorithm="isomap",
                  evaluate_clusters=False, dim_red_params=None):
    """
    Perform clustering using a variational bayesian gaussian mixture
    :param max_iter:
    :type max_iter:
    :param n_models:
    :type n_models:
    :param data:
    :type data:
    :param three_dimensional:
    :type three_dimensional:
    :return:
    :rtype:
    """
    if prepare_data:
        data = create_training_data(data, excluded_variables=excluded_variables, test_train_split=False)
        numpy_data = data.values
    else:
        numpy_data = data.values
    # TODO give more configuration options
    bgmm = BayesianGaussianMixture(n_components=n_clusters,
                                   covariance_type=covariance_type,
                                   weight_concentration_prior=weight_concentration_prior,
                                   max_iter=max_iter).fit(numpy_data)
    # Get the number/names of the cluster
    labels = bgmm.predict(numpy_data)

    if display:
        plot_clusters(numpy_data, labels, dims=dims, reduction_algorithm=reduction_algorithm)
    if evaluate_clusters:
        cluster_dfs = extract_clusters(data, labels)
        comparison_result = compare_clusters(data, cluster_dfs)
        pprint(comparison_result)

    return labels

Ejemplo n.º 11

0

Mostrar archivo

Archivo: cluster.py Proyecto: migraf/mt

def gmm_cluster(data, n_clusters, covariance_type="full", excluded_variables=[], prepare_data=True, random_state=0,
                max_iter=1000, display=True, dims=3, reduction_algorithm="isomap", evaluate_clusters=False,
                dim_red_params=None):
    """
    Attempt clustering using Gaussian Mixture Models and different variations of
    Expectation Maximization Algorithms
    :param max_iter:
    :type max_iter:
    :param three_dimensional:
    :type three_dimensional:
    :param n_clusters:
    :type n_clusters:
    :param data:
    :type data:
    :return:
    :rtype:
    """
    if prepare_data:
        data = create_training_data(data, excluded_variables=excluded_variables, test_train_split=False)
        numpy_data = data.values
    else:
        numpy_data = data.values
    gmm = GaussianMixture(n_components=n_clusters,
                          covariance_type=covariance_type,
                          max_iter=max_iter).fit(numpy_data)
    # Get the number/names of the cluster
    labels = gmm.predict(numpy_data)
    # Create a figure with the data grouped by associated cluster
    if display:
        plot_clusters(numpy_data, labels, dims=dims, reduction_algorithm=reduction_algorithm)
    if evaluate_clusters:
        cluster_dfs = extract_clusters(data, labels)
        comparison_result = compare_clusters(data, cluster_dfs)
        pprint(comparison_result)

    return labels

Ejemplo n.º 12

0

Mostrar archivo

def svm(data,
        target,
        excluded_variables=[],
        prediction_type=None,
        kernel='rbf',
        C=1.0,
        degree=3,
        cv=True,
        cv_params=None,
        display=True,
        shap=True,
        prepare_data=True):
    if prediction_type:
        model_subtype = prediction_type
    else:
        model_subtype = detect_prediction_type(data, target)

    if prepare_data:
        x_train, x_test, y_train, y_test = create_training_data(
            data, target, excluded_variables)
    else:
        x_train, x_test = data[0], data[1]
        y_train, y_test = target[0], target[1]
    print(f"Creating a svm {model_subtype} model")
    if model_subtype in ["binary", "multi-class"]:
        pred = SVC(kernel=kernel, C=C, degree=degree, probability=True)
        y_train = y_train.astype("str")
        y_test = y_test.astype("str")

        # Kernel function for shap value prediction
        f = lambda x: pred.predict_proba(x)[:, 1]
    else:
        pred = SVR(kernel=kernel, C=C, degree=degree)
        y_train = y_train.astype("float")
        y_test = y_test.astype("float")

    if cv:
        # Perform cross validation hyper parameter tuning
        if not cv_params:
            cv_params = {
                "C": [1, 10, 100],
                "kernel": ["linear", "poly", "rbf", "sigmoid"],
                "gamma": ["auto", "scale"]
            }
        pred, cv_results, param_results = cross_validation_tuning(
            pred, cv_params, x_train, y_train)
        print(param_results)
    else:
        pred.fit(x_train, y_train)

    if display:
        display_model_performance(pred, model_subtype, x_test, y_test, target)
        if model_subtype != "regression":
            shap_values = display_feature_importances(pred.predict_proba,
                                                      x_train,
                                                      x_test,
                                                      return_shap=shap)
        else:
            shap_values = display_feature_importances(pred,
                                                      x_train,
                                                      x_test,
                                                      return_shap=shap)
    else:
        print(f"Score: {pred.score(x_test, y_test)}")
        # TODO print additional information
    if shap:
        return pred, shap_values
    return pred

Ejemplo n.º 13

0

Mostrar archivo

def linear_model(data,
                 target,
                 excluded_variables=[],
                 prediction_type=None,
                 l1_ratio=0.2,
                 max_iter=1000,
                 cv=True,
                 cv_params=None,
                 display=True,
                 shap=True,
                 prepare_data=True):
    if prediction_type:
        model_subtype = prediction_type
    else:
        model_subtype = detect_prediction_type(data, target)

    if prepare_data:
        x_train, x_test, y_train, y_test = create_training_data(
            data, target, excluded_variables)
    else:
        x_train, x_test = data[0], data[1]
        y_train, y_test = target[0], target[1]
    print(f"Creating a linear {model_subtype} model")
    if model_subtype in ["binary", "multi-class"]:
        pred = LogisticRegression(penalty="elasticnet",
                                  l1_ratio=l1_ratio,
                                  max_iter=max_iter,
                                  solver="saga")
        y_train = y_train.astype("str")
        y_test = y_test.astype("str")

        # Kernel function for shap value prediction
        f = lambda x: pred.predict_proba(x)[:, 1]
    else:
        pred = ElasticNet(l1_ratio=l1_ratio, max_iter=max_iter)
        y_train = y_train.astype("float")
        y_test = y_test.astype("float")

    if cv:
        # Perform cross validation hyper parameter tuning
        if not cv_params:
            cv_params = {
                "l1_ratio": [0, 0.2, 0.5, 0.75, 1],
                "max_iter": [100, 1000, 10000]
            }
        pred, cv_results, param_results = cross_validation_tuning(
            pred, cv_params, x_train, y_train)
        print(param_results)
    else:
        pred.fit(x_train, y_train)

    if display:
        display_model_performance(pred, model_subtype, x_test, y_test, target)
        if model_subtype != "regression":
            shap_values = display_feature_importances(pred.predict_proba,
                                                      x_train,
                                                      x_test,
                                                      return_shap=shap)
        else:
            shap_values = display_feature_importances(pred.predict,
                                                      x_train,
                                                      x_test,
                                                      return_shap=shap)
    else:
        print(f"Score: {pred.score(x_test, y_test)}")
        # TODO print additional information
    if shap:
        return pred, shap_values
    return pred

Ejemplo n.º 14

0

Mostrar archivo

def random_forest(data,
                  target,
                  excluded_variables=[],
                  prediction_type=None,
                  n_estimators=100,
                  criterion=None,
                  max_depth=None,
                  max_features=None,
                  min_samples_leaf=1,
                  cv=True,
                  cv_params=None,
                  display=True,
                  shap=True,
                  prepare_data=True):
    if prediction_type:
        model_subtype = prediction_type
    else:
        model_subtype = detect_prediction_type(data, target)

    if prepare_data:
        x_train, x_test, y_train, y_test = create_training_data(
            data, target, excluded_variables)
    else:
        x_train, x_test = data[0], data[1]
        y_train, y_test = target[0], target[1]
    print(f"Creating a random forest {model_subtype} model")
    if model_subtype in ["binary", "multi-class"]:
        if criterion:
            pred = RandomForestClassifier(random_state=0,
                                          n_estimators=n_estimators,
                                          criterion=criterion,
                                          max_depth=max_depth,
                                          min_samples_leaf=min_samples_leaf,
                                          max_features=max_features)
        else:
            pred = RandomForestClassifier(random_state=0,
                                          n_estimators=n_estimators,
                                          max_depth=max_depth,
                                          min_samples_leaf=min_samples_leaf,
                                          max_features=max_features)

        # TODO check if this is really necessary
        y_train = y_train.astype("str")
        y_test = y_test.astype("str")
    else:
        if criterion:
            pred = RandomForestRegressor(random_state=0,
                                         n_estimators=n_estimators,
                                         criterion=criterion,
                                         max_depth=max_depth,
                                         min_samples_leaf=min_samples_leaf,
                                         max_features=max_features)
        else:
            pred = RandomForestRegressor(random_state=0,
                                         n_estimators=n_estimators,
                                         max_depth=max_depth,
                                         min_samples_leaf=min_samples_leaf,
                                         max_features=max_features)

        y_train = y_train.astype("float")
        y_test = y_test.astype("float")
    if cv:
        if not cv_params:
            cv_params = {
                "n_estimators": [10, 100, 500],
                "max_depth": [None, 6, 8],
                "max_features": [None, "auto", "log2"],
                "min_samples_leaf": [1, 5, 10]
            }
        pred, cv_results, param_results = cross_validation_tuning(
            pred, cv_params, x_train, y_train)
        print(param_results)

    else:
        pred.fit(x_train, y_train)

    if display:
        display_model_performance(pred, model_subtype, x_test, y_test, target)
        shap_values = display_feature_importances(pred,
                                                  x_train,
                                                  x_test,
                                                  model_type="tree",
                                                  return_shap=shap)
    else:
        print(f"Score: {pred.score(x_test, y_test)}")
        # TODO print additional information
    if shap:
        return pred, shap_values
    return pred

Ejemplo n.º 15

0

Mostrar archivo

Archivo: multi_model_predictor.py Proyecto: migraf/mt

def multi_model_predictor(data,
                          target,
                          excluded_variables=[],
                          prediction_type=None,
                          linear_model_params=None,
                          svm_params=None,
                          random_forest_params=None,
                          gradient_boosting_params=None,
                          cv=True,
                          display=True,
                          shap=True,
                          prepare_data=True,
                          all_models=True):
    if prediction_type:
        model_subtype = prediction_type
    else:
        model_subtype = detect_prediction_type(data, target)

    if prepare_data:
        x_train, x_test, y_train, y_test, train_ind, test_ind = create_training_data(
            data, target, excluded_variables, test_train_indices=True)
    else:
        x_train, x_test = data[0], data[1]
        y_train, y_test = target[0], target[1]

    # Extract data for catboost pool
    # TODO

    # create the models
    print("Training models")
    if linear_model_params:
        lin_m = linear_model([x_train, x_test], [y_train, y_test],
                             prediction_type=model_subtype,
                             cv=cv,
                             display=False,
                             shap=False,
                             prepare_data=False,
                             **linear_model_params)
    else:
        lin_m = linear_model([x_train, x_test], [y_train, y_test],
                             prediction_type=model_subtype,
                             cv=cv,
                             display=False,
                             shap=False,
                             prepare_data=False)

    if svm_params:
        svm_m = svm([x_train, x_test], [y_train, y_test],
                    prediction_type=model_subtype,
                    cv=cv,
                    display=False,
                    shap=False,
                    prepare_data=False,
                    **svm_params)
    else:
        svm_m = svm([x_train, x_test], [y_train, y_test],
                    prediction_type=model_subtype,
                    cv=cv,
                    display=False,
                    shap=False,
                    prepare_data=False)

    if random_forest_params:
        rf_m = random_forest([x_train, x_test], [y_train, y_test],
                             prediction_type=model_subtype,
                             cv=cv,
                             display=False,
                             shap=False,
                             prepare_data=False,
                             **random_forest_params)
    else:
        rf_m = random_forest([x_train, x_test], [y_train, y_test],
                             prediction_type=model_subtype,
                             cv=cv,
                             display=False,
                             shap=False,
                             prepare_data=False)
    if gradient_boosting_params:
        gb_m, gb_m_score = gradient_boosted_trees(
            data=data,
            target=target,
            prediction_type=model_subtype,
            cv=cv,
            display=False,
            shap=False,
            **gradient_boosting_params,
            score=True)
    else:
        gb_m, gb_m_score = gradient_boosted_trees(
            data=data,
            target=target,
            prediction_type=model_subtype,
            cv=cv,
            display=False,
            shap=False,
            score=True)
    # Display scores
    lm_score = lin_m.score(x_test, y_test)
    print(f"Linear model score: {lm_score}")

    rf_score = rf_m.score(x_test, y_test)
    print(f"Random forest model score: {rf_score}")

    svm_score = svm_m.score(x_test, y_test)
    print(f"SVM model score: {svm_score}")

    print(f"Catboost model score: {gb_m_score}")

    models = [("linear_model", lm_score, lin_m),
              ("random forest", rf_score, rf_m), ("svm", svm_score, svm_m),
              ("catboost", gb_m_score, gb_m)]

    top_model = sorted(models, key=lambda x: x[1])[-1]
    print(top_model)
    top_pred = top_model[2]
    if display:
        display_model_performance(top_pred, model_subtype, x_test, y_test,
                                  target)
        if top_model[0] in {"linear_model", "svm"}:
            if model_subtype == "regression":
                shap_values = display_feature_importances(top_pred.predict,
                                                          x_train,
                                                          x_test,
                                                          return_shap=shap)
            else:
                shap_values = display_feature_importances(
                    top_pred.predict_proba, x_train, x_test, return_shap=shap)
        else:
            shap_values = display_feature_importances(top_pred,
                                                      x_train,
                                                      x_test,
                                                      model_type="tree",
                                                      return_shap=shap)
    if shap:
        return top_pred, models, shap_values
    else:
        return top_pred, models

Ejemplo n.º 16

0

Mostrar archivo

Archivo: multi_model_predictor.py Proyecto: migraf/mt

    def train_models(self, tuning=False, verbose=True, test_set=True):
        """
        Train all available models that fit the selected target and store the results

        Parameters
        ----------
        tuning : bool indicating whether to perform cross validated hyper parameter tuning
        verbose : bool indicating the level of output the training generates
        test_set : bool setting wether to split the data into a training and testing set

        Returns
        -------

        """
        if test_set:
            x_train, x_test, y_train, y_test = create_training_data(
                self.data, self.num_cols, self.cat_cols, self.target)
            # TODO add gradient boosting models
            # Train regression models
            predictions = []
            if self.mode == "regression":
                if verbose:
                    print("Training ElasticNet model")
                    elastic_net_model = elastic_net(train_data=x_train,
                                                    train_labels=y_train)
                    print(
                        f"Elasticnet result: {elastic_net_model.score(x_test, y_test)}"
                    )
                    print("Training SVM model")
                    svm_regressor = svm_regression(train_data=x_train,
                                                   train_labels=y_train)
                    print(
                        f"SVM model result: {svm_regressor.score(x_test, y_test)}"
                    )
                    print(f"Training Random Forest Regressor")
                    rf_regressor = random_forest_regressor(
                        train_data=x_train, train_labels=y_train)
                    print(
                        f"Random Forest Results: {rf_regressor.score(x_test, y_test)}"
                    )

                else:
                    elastic_net_model = elastic_net(train_data=x_train,
                                                    train_labels=y_train)
                    svm_regressor = svm_regression(train_data=x_train,
                                                   train_labels=y_train)
                    rf_regressor = random_forest_regressor(
                        train_data=x_train, train_labels=y_train)

                predictions.append(
                    ("elastic_net", elastic_net_model.predict(x_test)))
                predictions.append(("svm", svm_regressor.predict(x_test)))
                predictions.append(
                    ("random forest", rf_regressor.predict(x_test)))

            # Binary prediction models
            elif self.mode == "binary":
                if verbose:
                    print("Training Logistic regression classifier")
                    logreg_clf = logistic_regression(train_data=x_train,
                                                     train_labels=y_train)
                    print(
                        f"SVM classifier result: {logreg_clf.score(x_test, y_test)}"
                    )
                    print("Training SVM classifier")
                    svm_clf = svm_classifier(train_data=x_train,
                                             train_labels=y_train)
                    print(
                        f"SVM classifier result: {svm_clf.score(x_test, y_test)}"
                    )
                    print("Training RF Classifier")
                    rf_clf = random_forest_classifier(train_data=x_train,
                                                      train_labels=y_train)
                    print(
                        f"RF classifier score: {rf_clf.score(x_test, y_test)}")
                else:
                    logreg_clf = logistic_regression(train_data=x_train,
                                                     train_labels=y_train)
                    svm_clf = svm_classifier(train_data=x_train,
                                             train_labels=y_train)
                    rf_clf = random_forest_classifier(train_data=x_train,
                                                      train_labels=y_train)
                predictions.append(
                    ("logistic regression", logreg_clf.predict(x_test)))
                predictions.append(("svm", svm_clf.predict(x_test)))
                predictions.append(("random forest", rf_clf.predict(x_test)))

            elif self.mode == "multi-class":
                if verbose:
                    print("Training logistic regression")
                    logreg_clf = logistic_regression(train_data=x_train,
                                                     train_labels=y_train)
                    print(
                        f"SVM classifier result: {logreg_clf.score(x_test, y_test)}"
                    )
                    print("Training SVM classifier")
                    svm_clf = svm_classifier(train_data=x_train,
                                             train_labels=y_train)
                    print(
                        f"SVM classifier result: {svm_clf.score(x_test, y_test)}"
                    )
                    print("Training RF Classifier")
                    rf_clf = random_forest_classifier(train_data=x_train,
                                                      train_labels=y_train)
                    print(
                        f"RF classifier score: {rf_clf.score(x_test, y_test)}")
                else:
                    logreg_clf = logistic_regression(train_data=x_train,
                                                     train_labels=y_train)
                    svm_clf = svm_classifier(train_data=x_train,
                                             train_labels=y_train)
                    rf_clf = random_forest_classifier(train_data=x_train,
                                                      train_labels=y_train)
                predictions.append(
                    ("logistic regression", logreg_clf.predict(x_test)))
                predictions.append(("svm", svm_clf.predict(x_test)))
                predictions.append(("random forest", rf_clf.predict(x_test)))

            self.display_results(predictions, y_test, self.mode)