Example #1
0
def lazy_cls(X, y, output_csv=False):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)

    clf = LazyClassifier()
    models, predictions = clf.fit(X_train, X_test, y_train, y_test)

    if output_csv:
        models.to_csv('data/lazy_cls.csv')

    print(models)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

"""# **8. Apply Lazypredict**

As the dataset is too big when we apply <code>LazyClassifier</code> algorithm our execution may crash due to less RAM. Google colab provides 12gb RAM for free but to execute this algorithm with big dataset we need more RAM. So i'm using Google colab pro to execute this algorithm. Don't worry if you have not Colab pro. I'll provide the output in a csv file.
"""

!pip install lazypredict==0.2.7

!pip install lightgbm

import lazypredict
from lazypredict.Supervised import LazyClassifier

clf= LazyClassifier(verbose=0,ignore_warnings=True,custom_metric=None)
train,test=clf.fit(x_train,x_test,y_train,y_test)

train

"""NOTE: If the execution got failed or crashed then run the below cell to see the output of upper cells and make comment the above five cells"""

#  all_algorithm_df=pd.read_csv("lazypredict_algo.csv")
#  all_algorithm_df

"""# **9. Hyperparameter Tuning**"""

# this code is to show how much time required to train the model using different algorithms
def timer(start_time= None):
  if not start_time:
    start_time=datetime.now()
Example #3
0
def build_model(df, c):
    #df = df.loc[:100] # FOR TESTING PURPOSE, COMMENT THIS OUT FOR PRODUCTION

    Y = df[c]  # Selecting the last column as Y
    X = df.loc[:, df.columns !=
               c]  # Using all column except for the c column as X
    st.markdown('**1.2. Dataset dimension**')
    st.write('X')
    st.info(X.shape)
    st.write('Y')
    st.info(Y.shape)

    st.markdown('**1.3. Variable details**:')
    st.write('X variable (first 20 are shown)')
    st.info(list(X.columns[:20]))
    st.write('Y variable')
    st.info(Y.name)
    st.markdown('**1.4. Histograms**:')
    df.hist(alpha=0.5, figsize=(20, 10))
    st.pyplot()
    # Build lazy model
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=split_size, random_state=seed_number)
    if model == 'Regression':
        reg = LazyRegressor(verbose=0,
                            ignore_warnings=False,
                            custom_metric=None,
                            predictions=True)
    elif model == 'Classification':
        reg = LazyClassifier(verbose=0,
                             ignore_warnings=True,
                             custom_metric=None,
                             predictions=True)
    models_train, predictions_train = reg.fit(X_train, X_train, Y_train,
                                              Y_train)
    models_test, predictions_test = reg.fit(X_train, X_test, Y_train, Y_test)
    pr = ProfileReport(df, explorative=True)
    st.header('**2.Pandas Profiling Report(Exploratory data Analysis)**')
    st_profile_report(pr)
    st.subheader('3. Table of Model Performance')

    st.write('Training set')
    st.write(models_train)
    st.markdown(filedownload(models_train, 'modeltraining.csv'),
                unsafe_allow_html=True)

    st.write('Test set')
    st.write(models_test)
    st.markdown(filedownload(models_test, 'modeltest.csv'),
                unsafe_allow_html=True)
    st.subheader('4. Predictions By the models')
    st.write('Training set')
    st.write(predictions_train)
    st.markdown(filedownload(predictions_train, 'predicttraining.csv'),
                unsafe_allow_html=True)

    st.write('Test set')
    st.write(predictions_test)
    st.markdown(filedownload(predictions_test, 'predicttest.csv'),
                unsafe_allow_html=True)

    st.subheader('5. Plot of Model Performance (Test set)')
    if model == 'Regression':
        with st.markdown('**R-squared**'):
            # Tall
            models_test["R-Squared"] = [
                0 if i < 0 else i for i in models_test["R-Squared"]
            ]
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(y=models_test.index,
                              x="R-Squared",
                              data=models_test)
            ax1.set(xlim=(0, 1))
        st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(x=models_test.index, y="R-Squared", data=models_test)
        ax1.set(ylim=(0, 1))
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'),
                    unsafe_allow_html=True)

        with st.markdown('**RMSE (capped at 50)**'):
            # Tall
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax2 = sns.barplot(y=models_test.index, x="RMSE", data=models_test)
        st.markdown(imagedownload(plt, 'plot-rmse-tall.pdf'),
                    unsafe_allow_html=True)
        #Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax2 = sns.barplot(x=models_test.index, y="RMSE", data=models_test)
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-rmse-wide.pdf'),
                    unsafe_allow_html=True)

        with st.markdown('**Calculation time**'):
            # Tall
            models_test["Time Taken"] = [
                0 if i < 0 else i for i in models_test["Time Taken"]
            ]  #        plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax3 = sns.barplot(y=models_test.index,
                              x="Time Taken",
                              data=models_test)
        st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax3 = sns.barplot(x=models_test.index,
                          y="Time Taken",
                          data=models_test)
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'),
                    unsafe_allow_html=True)
    elif model == 'Classification':
        with st.markdown('**Accuracy**'):
            # Tall
            models_test["Accuracy"] = [
                0 if i < 0 else i for i in models_test["Accuracy"]
            ]
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(y=models_test.index,
                              x="Accuracy",
                              data=models_test)
            ax1.set(xlim=(0, 1))
        st.markdown(imagedownload(plt, 'plot-accuracy-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(x=models_test.index, y="Accuracy", data=models_test)
        ax1.set(ylim=(0, 1))
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-accuracy-wide.pdf'),
                    unsafe_allow_html=True)
        with st.markdown('**Balanced Accuracy**'):
            # Tall
            models_test["Balanced Accuracy"] = [
                0 if i < 0 else i for i in models_test["Balanced Accuracy"]
            ]
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(y=models_test.index,
                              x="Balanced Accuracy",
                              data=models_test)
            ax1.set(xlim=(0, 1))
        st.markdown(imagedownload(plt, 'plot-balanced-accuracy-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(x=models_test.index,
                          y="Balanced Accuracy",
                          data=models_test)
        ax1.set(ylim=(0, 1))
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-balanced-accuracy-wide.pdf'),
                    unsafe_allow_html=True)
        with st.markdown('**F1 Score**'):
            # Tall
            models_test["F1 Score"] = [
                0 if i < 0 else i for i in models_test["F1 Score"]
            ]
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(y=models_test.index,
                              x="F1 Score",
                              data=models_test)
            ax1.set(xlim=(0, 1))
        st.markdown(imagedownload(plt, 'plot-F1-Score-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(x=models_test.index, y="F1 Score", data=models_test)
        ax1.set(ylim=(0, 1))
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-F1-Score-wide.pdf'),
                    unsafe_allow_html=True)
        with st.markdown('**Calculation time**'):
            # Tall
            models_test["Time Taken"] = [
                0 if i < 0 else i for i in models_test["Time Taken"]
            ]  #        plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax3 = sns.barplot(y=models_test.index,
                              x="Time Taken",
                              data=models_test)
        st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax3 = sns.barplot(x=models_test.index,
                          y="Time Taken",
                          data=models_test)
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'),
                    unsafe_allow_html=True)
Example #4
0
import numpy as np
import lazypredict
import joblib
from lazypredict.Supervised import LazyClassifier
from sklearn.datasets import load_files, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

DATA_DIR = "/home/ila/Documents/repos/python-works/artificialintelligence/machine_learning/doc_classification/classifydata/dataset_5classes/"
# DATA_DIR = "/home/ila/Documents/900_docs/ocr_text/"

# data = load_files(DATA_DIR, encoding="utf-8", decode_error="replace")
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.5,
                                                    random_state=123)
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
print(models)
models
Example #5
0
print(X)

X_full = df.drop('good_cond', axis=1)
y_full = df['good_cond']


# Perform lazy classifier once to get the list of all models:

# In[ ]:


# Splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Using LazyClassifier for cut dataset
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
modellist = list(models.index.values) # Get the list of the methods' names
models


# Perform replications with 75% data as the training set. The R2 scores of the model are recorded. The models are also ranked according to the R2 scores. These scores and rank are then averaged.

# In[ ]:


Nrep = 1000 # Number of replications, the higher the better
r2score = np.zeros((len(modellist),Nrep)) # Initialize the r2score
position = np.zeros((len(modellist),Nrep)) # Initialize the position (rank)
for LOOP in range(0,Nrep):
Example #6
0
def main():
    output_dir = os.path.dirname(__file__)

    experiments = [
        # "A_May24_11_08_ela_skresnext50_32x4d_fold0_fp16",
        # "A_May15_17_03_ela_skresnext50_32x4d_fold1_fp16",
        # "A_May21_13_28_ela_skresnext50_32x4d_fold2_fp16",
        # "A_May26_12_58_ela_skresnext50_32x4d_fold3_fp16",
        #
        # "B_Jun05_08_49_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        # "B_Jun09_16_38_rgb_tf_efficientnet_b6_ns_fold1_local_rank_0_fp16",
        # "B_Jun11_08_51_rgb_tf_efficientnet_b6_ns_fold2_local_rank_0_fp16",
        # "B_Jun11_18_38_rgb_tf_efficientnet_b6_ns_fold3_local_rank_0_fp16",
        #
        # "C_Jun24_22_00_rgb_tf_efficientnet_b2_ns_fold2_local_rank_0_fp16",
        #
        # "D_Jun18_16_07_rgb_tf_efficientnet_b7_ns_fold1_local_rank_0_fp16",
        # "D_Jun20_09_52_rgb_tf_efficientnet_b7_ns_fold2_local_rank_0_fp16",
        #
        # "E_Jun18_19_24_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        # "E_Jun21_10_48_rgb_tf_efficientnet_b6_ns_fold0_istego100k_local_rank_0_fp16",
        #
        # "F_Jun29_19_43_rgb_tf_efficientnet_b3_ns_fold0_local_rank_0_fp16",
        #
        "G_Jul03_21_14_nr_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        "G_Jul05_00_24_nr_rgb_tf_efficientnet_b6_ns_fold1_local_rank_0_fp16",
        "G_Jul06_03_39_nr_rgb_tf_efficientnet_b6_ns_fold2_local_rank_0_fp16",
        "G_Jul07_06_38_nr_rgb_tf_efficientnet_b6_ns_fold3_local_rank_0_fp16",
        #
        "H_Jul11_16_37_nr_rgb_tf_efficientnet_b7_ns_mish_fold2_local_rank_0_fp16",
        "H_Jul12_18_42_nr_rgb_tf_efficientnet_b7_ns_mish_fold1_local_rank_0_fp16",
    ]

    holdout_predictions = get_predictions_csv(experiments, "cauc", "holdout",
                                              "d4")
    test_predictions = get_predictions_csv(experiments, "cauc", "test", "d4")
    checksum = compute_checksum_v2(experiments)

    holdout_ds = get_holdout("", features=[INPUT_IMAGE_KEY])
    image_ids = [fs.id_from_fname(x) for x in holdout_ds.images]

    quality_h = F.one_hot(torch.tensor(holdout_ds.quality).long(),
                          3).numpy().astype(np.float32)

    test_ds = get_test_dataset("", features=[INPUT_IMAGE_KEY])
    quality_t = F.one_hot(torch.tensor(test_ds.quality).long(),
                          3).numpy().astype(np.float32)

    x, y = get_x_y_for_stacking(holdout_predictions,
                                with_logits=True,
                                tta_logits=True)
    print(x.shape, y.shape)

    x_test, _ = get_x_y_for_stacking(test_predictions,
                                     with_logits=True,
                                     tta_logits=True)
    print(x_test.shape)

    if True:
        x = np.column_stack([x, quality_h])
        x_test = np.column_stack([x_test, quality_t])

    group_kfold = GroupKFold(n_splits=5)

    for fold_index, (train_index, valid_index) in enumerate(
            group_kfold.split(x, y, groups=image_ids)):
        x_train, x_valid, y_train, y_valid = (x[train_index], x[valid_index],
                                              y[train_index], y[valid_index])

        clf = LazyClassifier(verbose=True,
                             ignore_warnings=False,
                             custom_metric=alaska_weighted_auc,
                             predictions=True)
        models, predictions = clf.fit(x_train, x_valid, y_train, y_valid)
        print(models)

        models.to_csv(
            os.path.join(output_dir,
                         f"lazypredict_models_{fold_index}_{checksum}.csv"))
        predictions.to_csv(
            os.path.join(output_dir,
                         f"lazypredict_preds_{fold_index}_{checksum}.csv"))
df_X, df_Y = sm.fit_sample(dfX, dfY.ravel())
df_Y = pd.DataFrame(df_Y, columns=['sex_b'])

# In[17]:

sns.countplot(x='sex_b', data=df_Y)

# #### Lazy predict classification

# In[18]:

X_train, X_test, y_train, y_test = train_test_split(df_X,
                                                    df_Y,
                                                    test_size=.33,
                                                    random_state=123)
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
models

# ## DBSCAN (Density-Based Spatial Clustering of Applications with Noise)

# #### This is a clustering algorithm (an alternative to K-Means) that clusters points together and identifies any points not belonging to a cluster as outliers. It’s like K-means, except the number of clusters does not need to be specified in advance.
# #### The method, step-by-step:
# #### Randomly select a point not already assigned to a cluster or designated as an outlier. Determine if it’s a core point by seeing if there are at least min_samples points around it within epsilon distance.
# #### Create a cluster of this core point and all points within epsilon distance of it (all directly reachable points).
# #### Find all points that are within epsilon distance of each point in the cluster and add them to the cluster. Find all points that are within epsilon distance of all newly added points and add these to the cluster. Rinse and repeat. (i.e. perform “neighborhood jumps” to find all density-reachable points and add them to the cluster).

# ### Sklearn Implementation of DBSCAN:

# In[19]:
Example #8
0
data = pd.read_csv(r'D:\Datasets\winequality-red.csv')
#print(data.head())

threshold = 5
data['quality'] = np.where(data['quality']>threshold,1,0)
#print(data.quality.value_counts())

x = data.drop('quality',axis=1)
y = data['quality']

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=75)

from lazypredict.Supervised import LazyClassifier
lpc = LazyClassifier()
models,predictions = lpc.fit(x_train,x_test,y_train,y_test)

print(models)

from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(x_train,y_train)

y_pred = rfc.predict(x_test)

print("Without Hyperparamter Tuning :- ")

from sklearn import metrics
print("Accuracy Score :- ",metrics.accuracy_score(y_test,y_pred))
print("Confusion Matrix :- ",metrics.confusion_matrix(y_test,y_pred))
Example #9
0
        def build_model(df):
            l = len(df)

            #df = df.iloc[:100]
            X = df.iloc[:, :
                        -1]  # Using all column except for the last column as X
            Y = df.iloc[:, -1]  # Selecting the last column as Y

            st.markdown('**1.2. Dataset dimension**')
            st.write('X (Independent Axis)')
            st.info(X.shape)
            st.write('Y (Dependent Axis)')
            st.info(Y.shape)

            st.markdown('**1.3. Variable details**:')
            st.write('X variable (first few are shown)')
            st.info(list(X.columns[:int(l / 5)]))
            st.write('Y variable')
            st.info(Y.name)

            # Build lazy model
            X_train, X_test, Y_train, Y_test = train_test_split(
                X, Y, test_size=split_size, random_state=seed_number)
            clf = LazyClassifier(verbose=0,
                                 ignore_warnings=False,
                                 custom_metric=None)
            models_train, predictions_train = clf.fit(X_train, X_train,
                                                      Y_train, Y_train)
            models_test, predictions_test = clf.fit(X_train, X_test, Y_train,
                                                    Y_test)

            st.subheader('2.Model Performance Plot (Training Set)')

            st.write('Training set')
            st.write(predictions_train)
            st.markdown(filedownload(predictions_train, 'training.csv'),
                        unsafe_allow_html=True)

            st.write('Test set')
            st.write(predictions_test)
            st.markdown(filedownload(predictions_test, 'test.csv'),
                        unsafe_allow_html=True)

            st.subheader('3.Model Performance Plot(Test set)')

            with st.markdown('**Accuracy**'):
                # Tall
                predictions_test["Accuracy"] = [
                    0 if i < 0 else i for i in predictions_test["Accuracy"]
                ]
                plt.figure(figsize=(5, 12))
                sns.set_theme(style="darkgrid")
                ax1 = sns.barplot(y=predictions_test.index,
                                  x="Accuracy",
                                  data=predictions_test)
                ax1.set(xlim=(0, 1))
            st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(12, 5))
            sns.set_theme(style="darkgrid")
            ax1 = sns.barplot(x=predictions_test.index,
                              y="Accuracy",
                              data=predictions_test)
            ax1.set(ylim=(0, 1))
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'),
                        unsafe_allow_html=True)
Example #10
0
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
message = data.data
related = data.target

# splitting for train and test
x_train, x_test, y_train, y_test = train_test_split(message,
                                                    related,
                                                    train_size=0.9)

# Using lazy predict
clf = LazyClassifier(classifiers='all')
model, predictions = clf.fit(x_train, x_test, y_train, y_test)
print(model)
Example #11
0
def build_model(df, model, split_size, seed_number, label):
    # FOR TESTING PURPOSE, COMMENT THIS OUT FOR PRODUCTION
    progress = 0
    if label != "":
        dataTypeObj = df.dtypes[label]
        Y = df[[label]]
        # Using all column except for the last column as X
        X = df.drop([label], axis=1)
        progress = 1

    if progress == 1:
        st.markdown('**Dataset dimension**')
        st.write('X')
        st.info(X.shape)
        st.write('Y')
        st.info(Y.shape)

        st.markdown('**Variable details**:')
        st.write('X variable')
        st.info(list(X.columns))
        st.write('Y variable')
        st.info(label)
        # Build lazy model
        X_train, X_test, Y_train, Y_test = train_test_split(
            X, Y, test_size=split_size, random_state=seed_number)
        agree = st.selectbox("Which type of choose", ("One", "All"))
        if agree == "One":
            if model == 'Classification':
                l = [
                    "KNN", "SVM", "Random Forest", "BaggingClassifier",
                    "DecisionTreeClassifier", "ExtraTreeClassifier",
                    "GaussianProcessClassifier", "LinearSVC", "NuSVC",
                    "AdaBoostClassifier"
                ]
                classifier_name = st.selectbox("Select Classifier", l)
                params = add_parameter_ui(classifier_name)

                clf = get_classifier(classifier_name, params)
                clf.fit(X_train, Y_train)

                Y_pred = clf.predict(X_test)

                acc = accuracy_score(Y_test, Y_pred)
                mse = mean_squared_error(Y_pred, Y_test)

                rmse = math.sqrt(mse)

                st.write(f"Classifier = {classifier_name}")
                st.write(f"Accuracy = {acc}")
                st.write(f"rmse = {rmse}")
            else:
                l = [
                    "LinearRegression", "RandomForestRegressor",
                    "DecisionTreeRegressor", "GaussianProcessRegressor",
                    "ExtraTreeRegressor", "LGBMRegressor", "BaggingRegressor",
                    "KNeighborsRegressor", "Lars", "SVR", "NuSVR"
                ]
                model_name = st.selectbox("Select Regression Model", l)
                reg = get_linear_model(model_name)
                reg.fit(X_train, Y_train)
                Y_pred = reg.predict(X_test)
                acc = reg.score(X_test, Y_test)
                mse = mean_squared_error(Y_pred, Y_test)
                rmse = math.sqrt(mse)
                st.write(f"Regression Model = {model_name}")
                st.write(f"Accuracy = {acc}")
                st.write(f"rmse = {rmse}")

        elif agree == "All":

            if model == 'Regression':
                reg = LazyRegressor(verbose=0,
                                    ignore_warnings=False,
                                    custom_metric=None)
            elif model == 'Classification':
                reg = LazyClassifier(verbose=0,
                                     ignore_warnings=True,
                                     custom_metric=None)
            models_train, predictions_train = reg.fit(X_train, X_train,
                                                      Y_train, Y_train)
            models_test, predictions_test = reg.fit(X_train, X_test, Y_train,
                                                    Y_test)

            st.subheader('Table of Model Performance')
            st.write('Training set')
            st.write(predictions_train)
            st.markdown(filedownload(predictions_train, 'training.csv'),
                        unsafe_allow_html=True)

            st.write('Test set')
            st.write(Y_test)
            st.write(predictions_test)
            st.write(models_test)
            st.markdown(filedownload(predictions_test, 'test.csv'),
                        unsafe_allow_html=True)

            st.subheader('4. Plot of Model Performance (Test set)')

            with st.markdown('**R-squared**'):
                # Tall
                predictions_test["R-Squared"] = [
                    0 if i < 0 else i for i in predictions_test["R-Squared"]
                ]
                plt.figure(figsize=(3, 9))
                sns.set_theme(style="whitegrid")
                ax1 = sns.barplot(y=predictions_test.index,
                                  x="R-Squared",
                                  data=predictions_test)
                ax1.set(xlim=(0, 1))
            st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(9, 3))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(x=predictions_test.index,
                              y="R-Squared",
                              data=predictions_test)
            ax1.set(ylim=(0, 1))
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'),
                        unsafe_allow_html=True)

            with st.markdown('**RMSE (capped at 50)**'):
                # Tall
                plt.figure(figsize=(3, 9))
                sns.set_theme(style="whitegrid")
                ax2 = sns.barplot(y=predictions_test.index,
                                  x="RMSE",
                                  data=predictions_test)
            st.markdown(imagedownload(plt, 'plot-rmse-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(9, 3))
            sns.set_theme(style="whitegrid")
            ax2 = sns.barplot(x=predictions_test.index,
                              y="RMSE",
                              data=predictions_test)
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-rmse-wide.pdf'),
                        unsafe_allow_html=True)

            with st.markdown('**Calculation time**'):
                # Tall
                predictions_test["Time Taken"] = [
                    0 if i < 0 else i for i in predictions_test["Time Taken"]
                ]  # plt.figure(figsize=(3, 9))
                sns.set_theme(style="whitegrid")
                ax3 = sns.barplot(y=predictions_test.index,
                                  x="Time Taken",
                                  data=predictions_test)
            st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(9, 3))
            sns.set_theme(style="whitegrid")
            ax3 = sns.barplot(x=predictions_test.index,
                              y="Time Taken",
                              data=predictions_test)
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'),
                        unsafe_allow_html=True)