Beispiel #1
0
def main():
    """Semi Automated ML App with Streamlit """

    activities = ["EDA", "Plots", "Model Building", "About"]
    choice = st.sidebar.selectbox("Select Activities", activities)

    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis")

        data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        # file_buffer = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        # data = io.TextIOWrapper(file_buffer)

        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Shape"):
                st.write(df.shape)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Summary"):
                st.write(df.describe())

            if st.checkbox("Show Selected Columns"):
                selected_columns = st.multiselect("Select Columns",
                                                  all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts())

            if st.checkbox("Correlation Plot(Matplotlib)"):
                plt.matshow(df.corr())
                st.pyplot()

            if st.checkbox("Correlation Plot(Seaborn)"):
                st.write(sns.heatmap(df.corr(), annot=True))
                st.pyplot()

            if st.checkbox("Pie Plot"):
                all_columns = df.columns.to_list()
                column_to_plot = st.selectbox("Select 1 Column", all_columns)
                pie_plot = df[column_to_plot].value_counts().plot.pie(
                    autopct="%1.1f%%")
                st.write(pie_plot)
                st.pyplot()

    elif choice == 'Plots':
        st.subheader("Data Visualization")
        data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts().plot(kind='bar'))
                st.pyplot()

            # Customizable Plot

            all_columns_names = df.columns.tolist()
            type_of_plot = st.selectbox(
                "Select Type of Plot",
                ["area", "bar", "line", "hist", "box", "kde"])
            selected_columns_names = st.multiselect("Select Columns To Plot",
                                                    all_columns_names)

            if st.button("Generate Plot"):
                st.success("Generating Customizable Plot of {} for {}".format(
                    type_of_plot, selected_columns_names))

                # Plot By Streamlit
                if type_of_plot == 'area':
                    cust_data = df[selected_columns_names]
                    st.area_chart(cust_data)

                elif type_of_plot == 'bar':
                    cust_data = df[selected_columns_names]
                    st.bar_chart(cust_data)

                elif type_of_plot == 'line':
                    cust_data = df[selected_columns_names]
                    st.line_chart(cust_data)

                # Custom Plot
                elif type_of_plot:
                    # fig, ax = plt.subplots()
                    # ax.scatter(df[selected_columns_names])
                    # st.pyplot(fig)
                    cust_plot = df[selected_columns_names].plot(
                        kind=type_of_plot)
                    st.write(cust_plot)
                    st.pyplot()

    elif choice == 'Model Building':
        st.subheader("Building ML Models")
        data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            # Model Building
            X = df.iloc[:, 0:-1]
            Y = df.iloc[:, -1]
            seed = 7
            # prepare models
            models = []
            models.append(('LR', LogisticRegression()))
            models.append(('LDA', LinearDiscriminantAnalysis()))
            models.append(('KNN', KNeighborsClassifier()))
            models.append(('CART', DecisionTreeClassifier()))
            models.append(('NB', GaussianNB()))
            models.append(('SVM', SVC()))
            # evaluate each model in turn

            model_names = []
            model_mean = []
            model_std = []
            all_models = []
            # all_columns = []
            scoring = 'accuracy'
            for name, model in models:
                kfold = model_selection.KFold(n_splits=10, random_state=seed)
                cv_results = model_selection.cross_val_score(model,
                                                             X,
                                                             Y,
                                                             cv=kfold,
                                                             scoring=scoring)
                model_names.append(name)
                model_mean.append(cv_results.mean())
                model_std.append(cv_results.std())

                accuracy_results = {
                    "model name": name,
                    "model_accuracy": cv_results.mean(),
                    "standard deviation": cv_results.std()
                }
                all_models.append(accuracy_results)

            if st.checkbox("Metrics As Table"):
                st.dataframe(
                    pd.DataFrame(zip(model_names, model_mean, model_std),
                                 columns=["Algo", "Mean of Accuracy", "Std"]))

            if st.checkbox("Metrics As JSON"):
                st.json(all_models)
Beispiel #2
0

if option=="sensor":
	sensor_data=load_sensor(20)
	sensor_data1=load_sensor(100000)
	if st.sidebar.checkbox("Sensor dataset"):
		st.subheader("Sensor datasets")
		st.write(sensor_data1)
	dataframe = pd.DataFrame({
  'first column': ["sensor_00","sensor_01","sensor_02"]})
	st.sidebar.subheader("Choose your sensor for ploting")
	sensor_option = st.sidebar.selectbox('',dataframe['first column'])
	# print(sensor_data[sensor_option])
	# print(sensor_data.columns)
	
	#df3=pd.DataFrame({"date":sensor_data["timestamp"],sensor_option:sensor_data[sensor_option]})
	if st.sidebar.button("Plot"):
		#df3=df3.rename(columns={'date':'index'}).set_index('index')
		st.subheader("Line chart")
		st.line_chart(sensor_data[sensor_option])
		st.subheader("Bar chart")
		st.bar_chart(sensor_data[sensor_option])
		sensor_data[sensor_option].hist()
		st.subheader("Histogram")
		st.set_option('deprecation.showPyplotGlobalUse', False)
		st.pyplot()
		st.subheader("Area chart")
		st.area_chart(sensor_data[sensor_option])
		#st.line_chart(sensor_data[sensor_option])
		#st.subheader("Area chart")
		#st.area_chart(sensor_data[sensor_option])
Beispiel #3
0
            "field": "a",
            "type": "quantitative"
        },
        "y": {
            "field": "b",
            "type": "quantitative"
        },
    },
}

# 5 empty charts
st.vega_lite_chart(spec)
st.pyplot()
st.line_chart()
st.bar_chart()
st.area_chart()

# 1 empty map
st.deck_gl_chart()

# 6 errors
try:
    st.vega_lite_chart({})
except Exception as e:
    st.write(e)

try:
    st.vega_lite_chart(data, {})
except Exception as e:
    st.write(e)
Beispiel #4
0
def classify_data(dataset, data_type):
    """Classify data and health risks with selected classification."""
    filename = "/home/maddykapfhammer/Documents/Allegheny/MozillaFellows/predictiveWellness/vigor/dataFiles/PubMedArticles.csv"
    naive_interpretation = ""
    gini_interpretation = ""
    entropy_interpretation = ""
    svm_interpretation = ""
    amount = st.number_input("How much data would you like to be produced?", min_value=1)

    st.header("Please Choose Your Method of Classification:")
    naive_classification = st.checkbox("Naive Bayes Classification")
    gini_classification = st.checkbox("Gini Index Decision Tree Classification")
    entropy_classification = st.checkbox("Entropy Decision Tree Classification")
    svm_classification = st.checkbox("Support Vector Machine Classification")

    if naive_classification:
        with st.spinner("Classifying with Naive Bayes..."):
            new_data = naive_bayes.import_data(data_type)
            st.area_chart(new_data["Health"])
            naive_interpretation = naive_bayes.perform_methods(data_type)
            naive_results = health_query.perform_methods(filename, naive_interpretation, amount)
            for i, j in naive_results.iterrows():
                st.header(j["Titles"])
                st.write(j["Date Published"])
                st.write(j["Abstract"])
        st.success("Complete!")

    if gini_classification:
        with st.spinner("Classifying with Gini Index..."):
            new_data = decision_tree.import_data(data_type)
            st.area_chart(new_data["Health"])
            gini_interpretation = decision_tree.perform_gini_index(data_type)
            gini_results = health_query.perform_methods(filename, gini_interpretation, amount)
            for i, j in gini_results.iterrows():
                st.header(j["Titles"])
                st.write(j["Date Published"])
                st.write(j["Abstract"])
        st.success("Complete!")

    if entropy_classification:
        with st.spinner("Classifying with Entropy..."):
            new_data = decision_tree.import_data(data_type)
            st.area_chart(new_data["Health"])
            entropy_interpretation = decision_tree.perform_entropy(data_type)
            entropy_results = health_query.perform_methods(filename, entropy_interpretation, amount)
            for i, j in entropy_results.iterrows():
                st.header(j["Titles"])
                st.write(j["Date Published"])
                st.write(j["Abstract"])
        st.success("Complete!")

    if svm_classification:
        with st.spinner("Classifying with Support Vector Machine..."):
            new_data = svm.import_data(data_type)
            st.area_chart(new_data["Health"])
            svm_interpretation = svm.perform_methods(data_type)
            svm_results = health_query.perform_methods(filename, svm_interpretation, amount)
            for i, j in svm_results.iterrows():
                st.header(j["Titles"])
                st.write(j["Date Published"])
                st.write(j["Abstract"])
        st.success("Complete!")
elif choices == 'Visualize':
    st.subheader("Visualize")
    df = pd.read_csv(data)
    all_col = df.columns.to_list()
    all_coll = df.columns.to_list()

    if st.sidebar.checkbox("Bar Chart"):
        bar = st.multiselect("Select Features ", all_col, key='a')
        new_df = df[bar]
        st.bar_chart(df[bar])

    if st.sidebar.checkbox("Area_chart"):

        area = st.multiselect("Select Features", all_col)
        new_dff = df[area]
        st.area_chart(df[area])

    if st.sidebar.checkbox("Line Chart"):
        st.line_chart(df)

    if st.sidebar.checkbox("Plotly charts"):

        X1 = st.selectbox("select ", all_col, key='b')
        X2 = st.selectbox("select", all_col, key='m')
        X3 = st.selectbox("select", all_col)
        hist_data = [df[X1], df[X2], df[X3]]
        group_labels = [X1, X2, X3]
        fig = ff.create_distplot(hist_data,
                                 group_labels,
                                 bin_size=[.1, .25, .5])
        st.plotly_chart(fig, use_container_width=True)
Beispiel #6
0
def main():

    #df = load_data()
    data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
    if data is not None:
        df = pd.read_csv(data)
        page = st.sidebar.selectbox(
            "Choose a page",
            ['Homepage', 'Exploration', 'Plots', 'Prediction'])

        if page == 'Homepage':
            st.title('Wine Alcohol Class Prediction')
            st.text('Select a page in the sidebar')
            st.dataframe(df)
        elif page == 'Exploration':
            st.title('Explore the Wine Data-set')
            if st.checkbox("Show Shape"):
                st.dataframe(df.shape)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.dataframe(all_columns)

            if st.checkbox('Show column descriptions'):
                st.dataframe(df.describe())

            if st.checkbox("Show Selected Columns"):
                selected_columns = st.multiselect("Select Columns",
                                                  all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Show Value Counts"):
                st.dataframe(df.iloc[:, -1].value_counts())

            st.markdown('### Analysing column relations')
            st.text('Correlations:')
            fig, ax = plt.subplots(figsize=(10, 10))
            sns.heatmap(df.corr(), annot=True, ax=ax)
            st.pyplot()
            st.text('Effect of the different classes')
            sns.pairplot(df,
                         vars=[
                             'magnesium', 'flavanoids', 'nonflavanoid_phenols',
                             'proline'
                         ],
                         hue='alcohol')
            st.pyplot()

        elif page == 'Plots':
            st.subheader("Data Visualization")
            st.title('Plots')
            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts().plot(kind='bar'))
                st.pyplot()
            # Customizable Plot
            all_columns_names = df.columns.tolist()
            type_of_plot = st.selectbox(
                "Select Type of Plot",
                ["area", "bar", "line", "hist", "box", "kde"])
            selected_columns_names = st.multiselect("Select Columns To Plot",
                                                    all_columns_names)

            if st.button("Generate Plot"):
                st.success("Generating Customizable Plot of {} for {}".format(
                    type_of_plot, selected_columns_names))
                # Plot By Streamlit
                if type_of_plot == 'area':
                    cust_data = df[selected_columns_names]
                    st.area_chart(cust_data)
                elif type_of_plot == 'bar':
                    cust_data = df[selected_columns_names]
                    st.bar_chart(cust_data)
                elif type_of_plot == 'line':
                    cust_data = df[selected_columns_names]
                    st.line_chart(cust_data)
                # Custom Plot
                elif type_of_plot:
                    cust_plot = df[selected_columns_names].plot(
                        kind=type_of_plot)
                    st.write(cust_plot)
                    st.pyplot()

        else:
            st.title('Modelling')
            model, accuracy = train_model(df)
            st.write('Accuracy: ' + str(accuracy))
            st.markdown('### Make prediction')
            st.dataframe(df)
            row_number = st.number_input('Select row',
                                         min_value=0,
                                         max_value=len(df) - 1,
                                         value=0)
            st.markdown('#### Predicted')
            st.text(
                model.predict(
                    df.drop(['alcohol'],
                            axis=1).loc[row_number].values.reshape(1, -1))[0])
Beispiel #7
0
```python
import streamlit as st
import numpy as np
import pandas as pd

```

"""

#chart
# 20行3列のランダムな行列を生成
df = pd.DataFrame(np.random.rand(20, 3), columns=['a', 'b', 'c'])
#グラフを表示
st.line_chart(df)  #折れ線グラフ

st.area_chart(df)  # エリアチャート

st.bar_chart(df)  # 棒グラフ

#mapを表示する

df = pd.DataFrame(
    np.random.rand(100, 2) / [50, 50] + [35.69, 139.70],
    columns=['lat', 'lon']  #lat=緯度, lon=経度
)
# st.write(df)
st.map(df)

#画像を表示する
st.write('Display Image')
def main():
    """Mortality Prediction App"""
    st.title("Hepatitis Disease Mortality Prediction App")
    # st.markdown(html_temp.format("royalblue"),unsafe_allow_html=True)


    menu = ["Home","Login","SignUp"]
    submenu = ["Plot","Prediction","Metrics"]

    choice = st.sidebar.selectbox("Menu",menu)
    if choice == "Home":
        st.subheader("Home")
        st.text("Welcome")

    elif choice == "Login":
        username = st.sidebar.text_input("Username")
        password = st.sidebar.text_input("Password",type="password")
        if st.sidebar.checkbox("Login"):
            create_usertable()
            hashed_pswd = generate_hashes(password)
            result = login_user(username,verify_hashes(password,hashed_pswd))
            # if password == "12345":
            if result:
                st.success("Welcome {}".format(username))

                activity = st.selectbox("Activity",submenu)
                if activity == "Plot":
                    st.subheader("Data Visualization Plot")
                    df = pd.read_csv("data/cleaned_data.csv")
                    st.dataframe(df)

                    # freq distribution plot
                    freq_df = pd.read_csv("data/Age_frq_Dist.csv")
                    st.bar_chart(freq_df["count"])
                    st.dataframe(freq_df)


                    df['class'].value_counts().plot(kind="bar")
                    st.pyplot()


                    if st.checkbox("Area Chart"):
                        all_columns = df.columns.to_list()
                        feat_choices = st.multiselect("Choose a feature" , all_columns)
                        new_df = df[feat_choices]
                        st.area_chart(new_df)


                # prediction
                elif activity == "Prediction":
                    st.subheader("Predictive Analytics")

                    age = st.number_input("Age",7,80)
                    sex = st.radio("Sex",tuple(gender_dict.keys()))
                    steroid = st.radio("Do You Take Steroids?",tuple(feature_dict.keys()))
                    antivirals = st.radio("Do You Take Antivirals?",tuple(feature_dict.keys()))
                    fatique = st.radio("Do You Have Fatique?",tuple(feature_dict.keys()))
                    spiders = st.radio("Presence of Spider Naeve",tuple(feature_dict.keys()))
                    ascites = st.selectbox("Ascites",tuple(feature_dict.keys()))
                    varices = st.selectbox("Presence of Varices",tuple(feature_dict.keys()))
                    bilirubin = st.number_input("bilirubin Content",0.0,8.0)
                    alk_phosphate = st.number_input("Alkaline Phosphate Content",0.0,296.0)
                    sgot = st.number_input("Sgot",0.0,648.0)
                    albumin = st.number_input("Albumin",0.0,6.4)
                    protime = st.number_input("Prothrombin Time",0.0,100.0)
                    histology = st.radio("Histology",tuple(feature_dict.keys()))
                    feature_list = [age,get_value(sex,gender_dict),get_fvalue(steroid),get_fvalue(antivirals),get_fvalue(fatique),get_fvalue(spiders)
                           ,get_fvalue(ascites),get_fvalue(varices),bilirubin,alk_phosphate,sgot,albumin,int(protime),get_fvalue(histology)]
                    st.write(len(feature_list))
                    st.write(feature_list)
                    pretty_result = {"age":age,"sex":sex,"steroid":steroid,"antivirals":antivirals,"fatique":fatique,"spiders":spiders,
                                  "ascites":ascites , "varices":varices ,"bilirubin":bilirubin,"alk_phosphate":alk_phosphate,"sgot":sgot,
                                  "albumin":albumin,"protime":protime,"histology":histology}
                    st.json(pretty_result)
                    single_sample = np.array(feature_list).reshape(1,-1)

                    # # Machine Learning models
                    model_choice = st.selectbox("Select Model",["LR","KNN","DecisionTree"])

                    if st.button("Predict"):
                        if model_choice == "KNN":
                            loaded_model = load_model("models/KNN_HepatitisB_model.pkl")
                            prediction=loaded_model.predict(single_sample)
                            pred_prob=loaded_model.predict_proba(single_sample)
                        elif model_choice == "DecisionTree":
                            loaded_model=load_model("models/decision_tree_clf_HepatitisB_model.pkl")
                            prediction=loaded_model.predict(single_sample)
                            pred_prob=loaded_model.predict_proba(single_sample)
                        else:
                            loaded_model = load_model("models/LogisticReg_HepatitisB_model.pkl")
                            prediction=loaded_model.predict(single_sample)
                            pred_prob=loaded_model.predict_proba(single_sample)

                        # st.write(prediction)
                        # 	# prediction_label = {"Die":1,"Live":2}
    					# 	# final_result = get_key(prediction,prediction_label)
                        if prediction == 1:
                            st.warning("Patient Dies")
                            pred_probability_score = {"Die":pred_prob[0][0]*100 , "Live":pred_prob[0][1]*100}
                            st.subheader("Prediction Probability Score using {}".format(model_choice))
                            st.json(pred_probability_score)
    						st.subheader("Prescriptive Analytics")
                            st.markdonw(prescriptive_message_temp,unsafe_allow_html=True)

                        else:
                            st.success("Patient Lives")
                            pred_probability_score = {"Die":pred_prob[0][0]*100,"Live":pred_prob[0][1]*100}
                            st.subheader("Prediction Probability Score using {}".format(model_choice))
                            st.json(pred_probability_score)

                if st.checkbox("Interpret"):
                    if model_choice == "KNN":
                        loaded_model = load_model("models/KNN_HepatitisB_model.pkl")

                    elif model_choice == "DecisionTree":
                        loaded_model = load_model("models/decision_tree_clf_HepatitisB_model.pkl")

                    else:
                        loaded_model = load_model("models/LogisticReg_HepatitisB_model.pkl")


                        # loaded_model = load_model("models/logistic_regression_model.pkl")
                        # 1 Die and 2 Live
                        df = pd.read_csv("data/cleaned_data.csv")
                        x = df[['age', 'sex', 'steroid', 'antivirals','fatigue','spiders', 'ascites','varices', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin', 'protime','histology']]
                        feature_names = ['age', 'sex', 'steroid', 'antivirals','fatigue','spiders', 'ascites','varices', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin', 'protime','histology']
                        class_names = ['Die(1)','Live(2)']
                        explainer = lime.lime_tabular.LimeTabularExplainer(x.values,feature_names=feature_names, class_names=class_names,discretize_continuous=True)
                        # The Explainer Instance
                        exp = explainer.explain_instance(np.array(feature_list), loaded_model.predict_proba,num_features=13, top_labels=1)
                        exp.show_in_notebook(show_table=True, show_all=False)
                        # exp.save_to_file('lime_oi.html')
                        st.write(exp.as_list())
                        new_exp = exp.as_list()
                        label_limits = [i[0] for i in new_exp]
                        # st.write(label_limits)
                        label_scores = [i[1] for i in new_exp]
                        plt.barh(label_limits,label_scores)
                        st.pyplot()
                        plt.figure(figsize=(20,10))
                        fig = exp.as_pyplot_figure()
                        st.pyplot()




            else:
                st.warning("Incorrect Username/Password")
 st.info("Data Visualization")
 df = get_dataframe(csv_filename)
 
 if st.checkbox("Show Value Counts"):
     fig, ax = plt.subplots()
     st.write(df.iloc[:,-1].value_counts().plot(kind='bar'))
     st.pyplot(fig)
 all_columns_names = df.columns.tolist()
 type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"])
 selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names)
 if st.button("Generate Plot"):
     st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names))
     if type_of_plot == 'area':
         
         cust_data = df[selected_columns_names]
         st.area_chart(cust_data)
     if type_of_plot == 'bar':
         
         cust_data = df[selected_columns_names]
         st.bar_chart(cust_data)
     if type_of_plot == 'line':
         
         cust_data = df[selected_columns_names]
         st.line_chart(cust_data)
     if type_of_plot == 'hist' or 'box' or 'kde':
         if len(selected_columns_names)>1:
             st.warning('select one coloumn')
         else:
             fig, ax = plt.subplots()
             cust_plot= df[get_singleton(selected_columns_names)].plot(kind=type_of_plot)
             st.write(cust_plot)
# Chart Table
chartTable = pd.DataFrame(
    np.random.rand(25, 5),
    columns=["A", "B", "X", "Y", "Z"]
)
st.subheader("Chart Table")
st.line_chart(chartTable)

# Colored Chart Table
coloredChartTable = pd.DataFrame(
    np.random.rand(25, 3),
    columns=["A", "B", "C"]
)
st.subheader("Colored Chart")
st.area_chart(coloredChartTable)

# Bar Chart
barChart = pd.DataFrame(
    np.random.rand(25, 3),
    columns=["A", "B", "C"]
)
st.subheader("Bar Chart")
st.bar_chart(barChart)

# Displaying Plots
import matplotlib.pyplot as plt

arr = np.random.normal(1, 1, size=100)
fig, ax = plt.subplots()
ax.hist(arr, bins=20)
Beispiel #11
0
        data = datasets.load_breast_cancer()
        st.write("""dataset breast""")
    else:
        data = datasets.load_wine()
        st.write("""dataset wine""")
    x = data.data
    y = data.target
    x1 = pd.DataFrame(x, columns=data.feature_names)
    x2 = pd.DataFrame(y)
    return x, y, x1, x2


x, y, x1, x2 = select_dataset(dataset)
#x1=frame(dataset)
st.write(""" ## SHAPE OF DATA IS""", x.shape)
st.write(""" ## No of classes  """, len(np.unique(y)))
box = st.selectbox(""" display data """, options=["inputdata", "targetdata"])
if box == "inputdata":
    st.write(""" ## Input Data""", x1.head())
elif box == "targetdata":
    st.write(""" ## target Data""", x2.head())
#st.write("""# heat map """,sn.heatmap(x1.corr(),annot=True))
#box1=st.selectbox("""## PLOT""",options=["inputdata","targetdata"])
st.bar_chart(x1)
st.area_chart(x1)
st.altair_chart(x1)

import bs4
Href = "https://app.powerbi.com/reportEmbed?reportId=55021219-4186-4a3b-b904-4df92fd29bb0&autoAuth=true&ctid=bf93bb5e-ecf0-4e3d-be0e-79b5cc527a48&config=eyJjbHVzdGVyVXJsIjoiaHR0cHM6Ly93YWJpLWluZGlhLWNlbnRyYWwtYS1wcmltYXJ5LXJlZGlyZWN0LmFuYWx5c2lzLndpbmRvd3MubmV0LyJ9"
st.write(Href)
Beispiel #12
0
def main():
    """ Machine Learning Dataset Explorer"""
    st.title("Machine Learning Dataset Explorer")
    st.subheader("Simple Data Science Explorer with Streamlit")

    html_temp = """ 
    <div style="background-color:tomato;">
    <p style="color:white; font-size: 50px">Frase aleatória</p>
    <div>
    """
    st.markdown(html_temp, unsafe_allow_html=True)

    def file_selector(folder_path='.'):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox("Escolhar um arquivo", filenames)
        return os.path.join(folder_path, selected_filename)

    filename = file_selector()
    st.info("Você escolheu {}".format(filename))

    #Ler os dados
    df = pd.read_csv(filename)

    # Mostrar o dataset
    if st.checkbox("Mostrar DataSet"):
        number = st.number_input("Número de linhas para visualizar", 5, 10)
        st.dataframe(df.head(number))

    #Mostrar colunas
    if st.button("Nomes das Colunas"):
        st.write(df.columns)

    #Mostrar formatos
    if st.checkbox("Formato do Dataset"):
        st.write(df.shape)
        data_dim = st.radio("Show Dimension By", ("Rows", "Columns"))
        if data_dim == 'Columns':
            st.text("Número de Colunas")
            st.write(df.shape[1])
        elif data_dim == "Rows":
            st.text("Número de linhas")
            st.write(df.shape[0])
        else:
            st.write(df.shape)

    #Escolher colunas
    if st.checkbox("Selecione as colunas desejadas"):
        all_columns = df.columns.tolist()
        selected_columns = st.multiselect("Escolha", all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

    #Mostrar valores
    if st.button("Valores"):
        st.text("Valores em classes")
        st.write(df.iloc[:, 0].value_counts())  #moradores
        st.write(df.iloc[:, 1].value_counts())  #idosos
        st.write(df.iloc[:, -1].value_counts())  #crianças
        st.write(df.iloc[:, -2].value_counts())  #familias

    #Mostrar Datatypes
    if st.button("DataTypes"):
        st.write(df.dtypes)

    #Mostrar sumário
    if st.checkbox("Sumário"):
        st.write(df.describe().T)

    #Visualização
    st.subheader("Visualização dos dados")
    #Corelação
    #Seaborn
    if st.checkbox("Seaborn Plot"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot
    #Count plot
    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target")
        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox("Primary Columm to GroupBy",
                                   all_columns_names)
        selected_columns_names = st.multiselect("Select Columns",
                                                all_columns_names)
        if st.button("Plot"):
            st.text("Generate Plot")
            if selected_columns_names:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind="bar"))
            st.pyplot()
    #Pie chart
    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        selected_column = st.selectbox("Selecione a coluna desejada",
                                       all_columns_names)
        if st.button("Gerar Pie Plot"):
            st.success("Gerando um Pie Plot")
            st.write(
                df[selected_column].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

    #Plot customizado
    st.subheader("Plot Customizado")
    all_columns_names = df.columns.tolist()
    type_of_plot = st.selectbox("Selecione o tipo de plot",
                                ['area', 'bar', 'line', 'hist', 'box', 'kde'])
    selected_columns_names = st.multiselect("Selecione as colunas",
                                            all_columns_names)

    if st.button("Gerar Plot"):
        st.success("Gerando plot de {} para {}".format(type_of_plot,
                                                       selected_columns_names))

        if type_of_plot == 'area':
            cust_data = df[selected_columns_names]
            st.area_chart(cust_data)

        elif type_of_plot == 'bar':
            cust_data = df[selected_columns_names]
            st.bar_chart(cust_data)

        elif type_of_plot == 'line':
            cust_data = df[selected_columns_names]
            st.line_chart(cust_data)

        elif type_of_plot:
            cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()
Beispiel #13
0
def main():
    """Automated ML App"""

    #st.title('Machine Learning Application')
    activities = ["Home", "EDA", "Plots", "ML_Algorithms", "Neural Network"]
    choice = st.sidebar.selectbox("Menu", activities)

    html_temp = """
        <div 
        style="background-color:royalblue;padding:10px;border-radius:10px">
        <h1 style="color:white;text-align:center;font-style: italic;">Classifying the survival of patients with heart failure using Various Machine Learning Algorithms</h1>
        </div>
        """
    components.html(html_temp)
    #data = st.file_uploader("Upload a Dataset", type=["csv","txt","xlsx"])
    data = pd.read_csv('heart_failure.csv')
    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis using Pandas Profiling")
        if data is not None:

            df = pd.read_csv('heart_failure.csv')
            st.dataframe(df.head())
            lable = preprocessing.LabelEncoder()
            for col in df.columns:
                df[col] = lable.fit_transform(df[col])
            #pandas profiling
            profile = ProfileReport(df)
            st_profile_report(profile)

    elif choice == 'Plots':
        st.subheader("Data Visualization")

        if data is not None:
            df = pd.read_csv('heart_failure.csv')
            st.dataframe(df.head())
            lable = preprocessing.LabelEncoder()
            for col in df.columns:
                df[col] = lable.fit_transform(df[col])

        if st.checkbox("Show Value Counts"):
            st.write(df.iloc[:, -1].value_counts().plot(kind='bar'))
            st.pyplot()

        #Customized Plot
        all_columns_names = df.columns.tolist()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("Select Columns To Plot",
                                                all_columns_names)
        if st.button("Generate Plot"):
            st.success("Generating Customizable Plot of {} for {}".format(
                type_of_plot, selected_columns_names))
            # Plot By Streamlit
            if type_of_plot == 'area':
                cust_data = df[selected_columns_names]
                st.area_chart(cust_data)
            elif type_of_plot == 'bar':
                cust_data = df[selected_columns_names]
                st.bar_chart(cust_data)
            elif type_of_plot == 'line':
                cust_data = df[selected_columns_names]
                st.line_chart(cust_data)
    # Custom Plot
            elif type_of_plot:
                cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
                st.write(cust_plot)
                st.pyplot()

    elif choice == 'ML_Algorithms':
        st.subheader("Machine Learning Algorithms")

        if data is not None:
            df = pd.read_csv('heart_failure.csv')
            st.dataframe(df.head())
            lable = preprocessing.LabelEncoder()
            for col in df.columns:
                df[col] = lable.fit_transform(df[col])

        if st.checkbox("Summary"):
            st.write(df.describe())

        X = df.iloc[:, :-1].values
        y = df.iloc[:, -1].values

        #col_name = st.selectbox("Select Column Name",["X","y"])

        #if col_name == 'X':
        #    st.dataframe(X)
        #elif col_name == 'y':
        #    st.dataframe(y)

        st.write("Number of classes", len(np.unique(y)))
        params = dict()
        classifer_name = st.sidebar.selectbox(
            "Select Classifer",
            ("SVM Linear", "SVM Radial", "Decision Tree", "Random Forest"))

        #add parameters
        def add_parameters(clf_name):
            """Selection of parameters"""
            if clf_name == "SVM Linear":
                C = st.sidebar.slider("C", 0.01, 15.0)
                params["C"] = C
            elif clf_name == "SVM Radial":
                C = st.sidebar.slider("C", 0.01, 15.0)
                params["C"] = C
            elif clf_name == "Decision Tree":
                max_depth = st.sidebar.slider("max_depth", 2, 15)
                max_leaf_nodes = st.sidebar.slider("max_leaf_nodes", 2, 20)
                params["max_depth"] = max_depth
                params["max_leaf_nodes"] = max_leaf_nodes
            elif clf_name == "Random Forest":
                max_depth = st.sidebar.slider("max_depth", 2, 15)
                n_estimators = st.sidebar.slider("n_estimators", 1, 200)
                params["max_depth"] = max_depth
                params["n_estimators"] = n_estimators
            return params

        add_parameters(classifer_name)

        #get classifers
        def get_classifiers(clf_name, params):
            clf = None
            if clf_name == "SVM Linear":
                clf = SVC(C=params["C"], kernel='linear')
            elif clf_name == "SVM Radial":
                clf = SVC(C=params["C"], kernel='rbf')
            elif clf_name == "Decision Tree":
                clf = DecisionTreeClassifier(
                    max_depth=params["max_depth"],
                    max_leaf_nodes=params["max_leaf_nodes"],
                    random_state=100)
            elif clf_name == "Random Forest":
                clf = RandomForestClassifier(
                    n_estimators=params["n_estimators"],
                    max_depth=params["max_depth"],
                    random_state=100)

            return clf

        clf = get_classifiers(classifer_name, params)

        #Classification
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=100)

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        st.write(
            f'<div style="color: #1C2331; font-size: medium; font-style: italic; padding: 15px; background-color:#b2dfdb;border-radius:5px;">Classifier = {classifer_name}</div></br>',
            unsafe_allow_html=True)
        clf_report = classification_report(y_test, y_pred)
        st.success(f"Classification Report:\n\n {clf_report}")
        st.warning(f"accuracy = {acc}")
        for i in range(1, 10):
            st.write("Actual=%s, Predicted=%s" % (y_test[i], y_pred[i]))

    elif choice == 'Neural Network':
        st.subheader("Neural Networks (MLPClassifier)")

        if data is not None:
            df = pd.read_csv('heart_failure.csv')
            st.dataframe(df.head())
            lable = preprocessing.LabelEncoder()
            for col in df.columns:
                df[col] = lable.fit_transform(df[col])

        X = df.iloc[:, :-1].values
        y = df.iloc[:, -1].values

        params = dict()
        classifer_name = "MLPClassifier"

        def add_parameters(clf_name):
            """Selection of parameters"""
            if clf_name == "MLPClassifier":
                max_iter = st.sidebar.slider("max_iter", 2, 30)
                params["max_iter"] = max_iter

            return params

        add_parameters(classifer_name)

        #get classifers
        def get_classifiers(clf_name, params):
            clf = None
            if clf_name == "MLPClassifier":
                clf = MLPClassifier(max_iter=params["max_iter"])

            return clf

        clf = get_classifiers(classifer_name, params)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=100)

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        st.write(
            f'<div style="color: #1C2331; font-size: medium; font-style: italic; padding: 15px; background-color:#b2dfdb;border-radius:5px;">Classifier = {classifer_name}</div></br>',
            unsafe_allow_html=True)
        clf_report = classification_report(y_test, y_pred)
        st.success(f"Classification Report:\n\n {clf_report}")
        acc = accuracy_score(y_test, y_pred)
        st.warning(f"accuracy = {acc}")
        for i in range(1, 10):
            st.write("Actual=%s, Predicted=%s" % (y_test[i], y_pred[i]))
Beispiel #14
0
def data_display():
    "# In this section we will try to explore ways to display our data"
    # Let’s add a title to test things out
    st.title("My Cool Streamlit App!!!")

    # Let's write something
    # write any text
    st.write("Here's our first attempt at using data to create a table:")
    # write dataframes
    st.write(
        pd.DataFrame({"first column": [1, 2, 3, 4], "second column": [10, 20, 30, 40]})
    )
    # write JSON
    st.write({"Name": "John", "Country": "USA"})

    # Emojis are love!
    st.write("Display some cool emojis :sunglasses:")

    # st.write accepts chart objects too!
    df2 = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"])
    c = (
        alt.Chart(df2)
        .mark_circle()
        .encode(x="a", y="b", size="c", color="c", tooltip=["a", "b", "c"])
    )
    # st.write(c)

    # you can also use
    st.text("you can also use st.text or st.markdown as well")

    # let me showcase some Magic!
    """
    # My Cool Streamlit App!!!
    Here's our first attempt at using data to create a table:
    """

    df = pd.DataFrame({"first column": [1, 2, 3, 4], "second column": [10, 20, 30, 40]})
    # df

    # you can also use st.dataframe and st.table
    "Display a df with `st.dataframe(df)` and `st.table(df)`"
    st.dataframe(df)
    st.table(df)

    # let's display some charts
    "let's display charts"
    chart_data = pd.DataFrame(np.random.randn(50, 3), columns=["a", "b", "c"])
    # line_chart
    "Display a line chart figure with `st.line_chart`"
    st.line_chart(chart_data)
    # area_chart
    "Display a area chart figure with `st.area_chart`"
    st.area_chart(chart_data)

    # bar_chart
    "Display a bar chart figure with `st.bar_chart`"
    st.bar_chart(chart_data)

    # let's draw Map chart
    map_data = pd.DataFrame(
        np.random.randn(1000, 2) / [50, 50] + [37.76, -122.4], columns=["lat", "lon"]
    )
    "Display a map figure with `st.map`"
    st.map(map_data)

    "Display a matplotlib.pyplot figure with `st.pyplot`"
    arr = np.random.normal(1, 1, size=100)
    fig, ax = plt.subplots()
    ax.hist(arr, bins=20)
    st.pyplot(fig)

    "Display a chart using the Altair library with `st.altair_chart`"
    st.altair_chart(c, use_container_width=True)

    "Display an interactive Plotly chart using `st.plotly_chart`"

    # Add histogram data
    x1 = np.random.randn(200) - 2
    x2 = np.random.randn(200)
    x3 = np.random.randn(200) + 2

    # Group data together
    hist_data = [x1, x2, x3]

    group_labels = ["Group 1", "Group 2", "Group 3"]

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, bin_size=[0.1, 0.25, 0.5])

    st.plotly_chart(fig, use_container_width=True)

    """
Beispiel #15
0
import streamlit as st
import pandas as pd
import numpy as np

chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])

st.area_chart(chart_data)
Beispiel #16
0
])

st.subheader('Total Energy Generation in ' + country + ' (MW)')

forecast_horizon = st.sidebar.slider(label='Forecast Horizon (hours)',
                                     min_value=12,
                                     max_value=168,
                                     value=48)

window_length = st.sidebar.slider(label='Window Length', min_value=1, value=30)

country_code = COUNTRY_MAPPINGS[country]
df = get_energy_data(country_code)

#Plotting total energy generation for selected country
st.area_chart(df, use_container_width=False, width=800)

cols_renewable = ['Wind Onshore', 'Wind Offshore', 'Solar']

#Selecting the renewable energy columns,
#Only if they are available in the dataframe
df = df[df.columns & cols_renewable]

for item in df.columns:

    smape = calculate_smape(df[[item]], regressor, forecast_horizon,
                            window_length)
    st.subheader(item + ' Energy Generation Forecast in ' + country + ' (MW)')
    #Generating and plotting a forecast for each renewable energy type
    df_forecast = generate_forecast(df[[item]], regressor, forecast_horizon,
                                    window_length)
Beispiel #17
0
def main():
    """Semi Automated ML App With Streamlit """

    activities = ["Exploratory Data Analysis", "Plots"]
    choice = st.sidebar.selectbox("Select Activities", activities)

    if choice == 'Exploratory Data Analysis':
        st.subheader("Exploratory Data Analysis")

        data = st.file_uploader("Upload a Dataset of Your Choice",
                                type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Shape"):
                st.write(df.shape)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Summary"):
                st.write(df.describe())

            if st.checkbox("Show Selected Columns"):
                selected_columns = st.multiselect("Select Columns",
                                                  all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Show Value Counts"):
                st.dataframe(df.iloc[:, -1].value_counts())

            if st.checkbox("Correlation Plot(Matplotlib)"):
                plt.matshow(df.corr())
                st.pyplot()

            if st.checkbox("Correlation Plot(Seaborn)"):
                st.write(sns.heatmap(df.corr(), annot=True))
                st.pyplot()

            # if st.checkbox("Pie Plot"):
            #     all_columns = df.columns.to_list()
            #     column_to_plot = st.selectbox("Select 1 Column", all_columns)
            #     pie_plot = df[column_to_plot].value_counts().plot.pie(autopct="%1.1f%%")
            #     st.write(pie_plot)
            #     st.pyplot()

            if st.checkbox("Pie plot"):
                all_columns = df.columns.to_list()
                column_to_plot = st.selectbox("Select Column", all_columns)
                pie_plot = df[column_to_plot].value_counts().plot.pie(
                    autopct="%1.1f%%")
                st.write(pie_plot)
                st.pyplot()

    elif choice == 'Plots':
        st.subheader("Data Visualization")
        data = st.file_uploader("Upload a Dataset",
                                type=["csv", "txt", "xlsx"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts().plot(kind='bar'))
                st.pyplot()

            # Customizable Plot

            all_columns_names = df.columns.tolist()
            type_of_plot = st.selectbox(
                "Select Type of Plot",
                ["area", "bar", "line", "hist", "box", "kde"])
            selected_columns_names = st.multiselect("Select Columns To Plot",
                                                    all_columns_names)

            if st.button("Generate Plot"):
                st.success("Generating Customizable Plot of {} for {}".format(
                    type_of_plot, selected_columns_names))

                # Plot By Streamlit
                if type_of_plot == 'area':
                    cust_data = df[selected_columns_names]
                    st.area_chart(cust_data)

                elif type_of_plot == 'bar':
                    cust_data = df[selected_columns_names]
                    st.bar_chart(cust_data)

                elif type_of_plot == 'line':
                    cust_data = df[selected_columns_names]
                    st.line_chart(cust_data)

                # Custom Plot
                elif type_of_plot:
                    cust_plot = df[selected_columns_names].plot(
                        kind=type_of_plot)
                    st.write(cust_plot)
                    st.pyplot()
Beispiel #18
0
def main():
    html_page = """
	<div style="text-align: center;">
	<div>
		<h1>Machine Learning | Pedictive Analysis | Data Analytics</h1>
	</div>
	<div>
		<h2>Drag and Drop any dataset and predict or analyse</h2>
	</div>
	</div>
	"""
    st.markdown(html_page, unsafe_allow_html=True)

    activities = [
        "Exploratory Data Analysis", "Plot", "Model Building", "About"
    ]
    choice = st.sidebar.selectbox("Select Activities", activities)
    if choice == "Exploratory Data Analysis":
        st.subheader("Exploratory Data Analysis")

        # Drag and Drop Feature
        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Shape of the dataset"):
                st.write(df.shape)

            if st.checkbox("Show Columns of the dataset"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Select Columns to Show"):
                all_columns = df.columns.to_list(
                )  # doing this again so that we can use this functionality without depending on above
                selected_columns = st.multiselect("Select the Columns",
                                                  all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Show Summary of the dataset"):
                st.write(df.describe())

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts())

    elif choice == "Plot":
        st.subheader("Data Visualization")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

        if st.checkbox("Correlation Map with Seaborn"):
            st.write(sns.heatmap(df.corr(), annot=True))
            st.pyplot()

        if st.checkbox("Pie Chart"):
            all_columns = df.columns.to_list()
            columns_to_plot = st.selectbox("Select 1 Column", all_columns)
            pie_plot = df[columns_to_plot].value_counts().plot.pie(
                autopct="%1.1f%%")
            st.write(pie_plot)
            st.pyplot()

        all_columns_names = df.columns.to_list()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("Select Columns To Plot",
                                                all_columns_names)

        if st.button("Generate Plot"):
            st.success("Generating Customizable Plot of {} for {}".format(
                type_of_plot, selected_columns_names))

            # Plots By Streamlit
            if type_of_plot == 'area':
                cust_data = df[selected_columns_names]
                st.area_chart(cust_data)
            elif type_of_plot == 'bar':
                cust_data = df[selected_columns_names]
                st.bar_chart(cust_data)
            elif type_of_plot == 'line':
                cust_data = df[selected_columns_names]
                st.line_chart(cust_data)

            # custom plot
            elif type_of_plot:
                cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
                st.write(cust_plot)
                st.pyplot()

    elif choice == "Model Building":
        st.subheader("Building Machine Learning Model")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            # Model Building
            X = df.iloc[:, 0:-1]
            Y = df.iloc[:, -1]
            seed = 7

            # model
            models = []
            models.append(("LR", LogisticRegression()))
            models.append(("LDA", LinearDiscriminantAnalysis()))
            models.append(("KNN", KNeighborsClassifier()))
            models.append(("CART", DecisionTreeClassifier()))
            models.append(("NB", GaussianNB()))
            models.append(("SVM", SVC()))

            # Evaluate each model one by one

            # List
            model_names = []
            model_mean = []
            model_std = []
            all_models = []
            scoring = 'accuracy'

            for name, model in models:
                kfold = model_selection.KFold(n_splits=10, random_state=seed)
                cv_results = model_selection.cross_val_score(model,
                                                             X,
                                                             Y,
                                                             cv=kfold,
                                                             scoring=scoring)
                model_names.append(name)
                model_mean.append(cv_results.mean())
                model_std.append(cv_results.std())

                accuracy_result = {
                    "model_name": name,
                    "model_accuracy": cv_results.mean(),
                    "Standard_deviation": cv_results.std()
                }
                all_models.append(accuracy_result)
            if st.checkbox("Metrics as Table"):
                st.dataframe(
                    pd.DataFrame(zip(model_names, model_mean, model_std),
                                 columns=[
                                     "Model Name", "Model Accuracy",
                                     "Standard Deviation"
                                 ]))

            if st.checkbox("Metrics as JSON"):
                st.json(all_models)

    elif choice == "About":
        st.subheader("About Developer")

        html_page = """
		<div style = "background-color:tomato; padding:50px">
   		<p style="font-size:25px"> SHAIL MODI <br> B. Tech - Computer Engineering <br> &#9993; [email protected] <br> K.J.Somaiya College Of Engineering </p>
   		<p>&copy; shailmodi<p>
		</div>
		"""
        st.markdown(html_page, unsafe_allow_html=True)
# Group
if st.checkbox("Show Bar Chart Plot"):
    v_group = data.groupby('species')
    st.bar_chart(v_group)
    st.pyplot()

# Line
if st.checkbox("Show Line Plot"):
    v_group = data.groupby('species')
    st.bar_chart(v_group)
    st.pyplot()

# Area
if st.checkbox("Show Area Plot"):
    v_group = data.groupby('species')
    st.area_chart(v_group)
    st.pyplot()


# Images
@st.cache
def load_image(img):
    im = Image.open(os.path.join(img))
    return im


species_type = st.radio("Select Species Type",
                        ("setosa", "virginica", "versicolor"))
if species_type == 'setosa':
    st.text("Showing Setosa Species")
    st.image(load_image('imgs/iris_setosa.jpg'))
Beispiel #20
0
def main():
    """Common ML Data Explorer """
    st.title("Common ML Data Explorer")
    st.subheader("Simple ML App with Streamlit")

    img_list = glob.glob("images/*.png")
    # st.write(img_list)
    # for i in img_list:
    # 	c_image = Image.open(i)
    # 	st.image(i)
    all_image = [Image.open(i) for i in img_list]
    st.image(all_image)

    def file_selector(folder_path='./datasets'):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox('Select a file', filenames)
        return os.path.join(folder_path, selected_filename)

    filename = file_selector()
    st.write('You selected `%s`' % filename)
    df = pd.read_csv(filename)

    if st.checkbox("Show DataSet"):
        number = st.number_input("Number of Rows to View")
        st.dataframe(df.head(number))

    if st.button("Columns Names"):
        st.write(df.columns)

    if st.checkbox("Shape of Dataset"):
        st.write(df.shape)
        data_dim = st.radio("Show Dimension by", ("Rows", "Columns"))
        if data_dim == 'Rows':
            st.text("Number of  Rows")
            st.write(df.shape[0])
        elif data_dim == 'Columns':
            st.text("Number of Columns")
            st.write(df.shape[1])

    if st.checkbox("Select Columns To Show"):
        all_columns = df.columns.tolist()
        selected_columns = st.multiselect('Select', all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

    if st.button("Data Types"):
        st.write(df.dtypes)

    if st.button("Value Counts"):
        st.text("Value Counts By Target/Class")
        st.write(df.iloc[:, -1].value_counts())

    if st.checkbox("Summary"):
        st.write(df.describe())

    st.subheader("Data Visualization")
    # Show Correlation Plots

    # Matplotlib Plot
    if st.checkbox("Correlation Plot [Matplotlib]"):
        plt.matshow(df.corr())
        st.pyplot()
    # Seaborn Plot
    if st.checkbox("Correlation Plot with Annotation[Seaborn]"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot()

    # Counts Plots

    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target/Class")

        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox('Select Primary Column To Group By',
                                   all_columns_names)
        selected_column_names = st.multiselect('Select Columns',
                                               all_columns_names)
        if st.button("Plot"):
            st.text("Generating Plot for: {} and {}".format(
                primary_col, selected_column_names))
            if selected_column_names:
                vc_plot = df.groupby(
                    primary_col)[selected_column_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind='bar'))
            st.pyplot()

    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        # st.info("Please Choose Target Column")
        # int_column =  st.selectbox('Select Int Columns For Pie Plot',all_columns_names)
        if st.button("Generate Pie Plot"):
            # cust_values = df[int_column].value_counts()
            # st.write(cust_values.plot.pie(autopct="%1.1f%%"))
            st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

    if st.checkbox("BarH Plot"):
        all_columns_names = df.columns.tolist()
        st.info("Please Choose the X and Y Column")
        x_column = st.selectbox('Select X Columns For Barh Plot',
                                all_columns_names)
        y_column = st.selectbox('Select Y Columns For Barh Plot',
                                all_columns_names)
        barh_plot = df.plot.barh(x=x_column, y=y_column, figsize=(10, 10))
        if st.button("Generate Barh Plot"):
            st.write(barh_plot)
            st.pyplot()

    st.subheader("Customizable Plots")
    all_columns_names = df.columns.tolist()
    type_of_plot = st.selectbox("Select the Type of Plot",
                                ["area", "bar", "line", "hist", "box", "kde"])
    selected_column_names = st.multiselect('Select Columns To Plot',
                                           all_columns_names)
    # plot_fig_height = st.number_input("Choose Fig Size For Height",10,50)
    # plot_fig_width = st.number_input("Choose Fig Size For Width",10,50)
    # plot_fig_size =(plot_fig_height,plot_fig_width)
    cust_target = df.iloc[:, -1].name

    if st.button("Generate Plot"):
        st.success("Generating A Customizable Plot of: {} for :: {}".format(
            type_of_plot, selected_column_names))
        # Plot By Streamlit
        if type_of_plot == 'area':
            cust_data = df[selected_column_names]
            st.area_chart(cust_data)
        elif type_of_plot == 'bar':
            cust_data = df[selected_column_names]
            st.bar_chart(cust_data)
        elif type_of_plot == 'line':
            cust_data = df[selected_column_names]
            st.line_chart(cust_data)
        # Plot By Matplotlib
        # elif type_of_plot == 'pie':
        # 	custom_plot = df[selected_column_names].plot(subplots=True,kind=type_of_plot)
        # 	st.write(custom_plot)
        # 	st.pyplot()
        elif type_of_plot == 'hist':
            custom_plot = df[selected_column_names].plot(kind=type_of_plot,
                                                         bins=2)
            st.write(custom_plot)
            st.pyplot()
        elif type_of_plot == 'box':
            custom_plot = df[selected_column_names].plot(kind=type_of_plot)
            st.write(custom_plot)
            st.pyplot()
        elif type_of_plot == 'kde':
            custom_plot = df[selected_column_names].plot(kind=type_of_plot)
            st.write(custom_plot)
            st.pyplot()
        else:
            cust_plot = df[selected_column_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()

    html_temp = """
	<div style="background-color:powderblue;"><p style="color:blue;font-size:60px;"> Hello world colored</p></div>
	"""
    # 	html_temp2 = """
    # <body style="background-color:red;">
    # <p style="color:blue">Hello World Streamlit</p>
    # <form>
    # <input type="text"/>
    # </form>
    # </body>
    # </html>"""
    st.markdown(html_temp, unsafe_allow_html=True)

    st.subheader("Feature Engineering and ML Aspect")

    if st.checkbox("Show Features"):
        all_features = df.iloc[:, 0:-1]
        st.text('Features Names:: {}'.format(all_features.columns[0:-1]))
        st.dataframe(all_features.head(10))

    if st.checkbox("Show Target"):
        all_target = df.iloc[:, -1]
        st.text('Target/Class Name:: {}'.format(all_target.name))
        st.dataframe(all_target.head(10))

    all_ml_dict = {
        'LR': LogisticRegression(),
        'LDA': LinearDiscriminantAnalysis(),
        'KNN': KNeighborsClassifier(),
        'CART': DecisionTreeClassifier(),
        'NB': GaussianNB(),
        'SVM': SVC()
    }
    # models = []
    model_choice = st.multiselect('Model Choices', list(all_ml_dict.keys()))
    for key in all_ml_dict:
        if 'LDA' in key:
            st.write(key)

        # results = []
        # names = []
        # allmodels = []
        # scoring = 'accuracy'
        # for name, model in models:
        # 	kfold = model_selection.KFold(n_splits=10, random_state=seed)
        # 	cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        # 	results.append(cv_results)
        # 	names.append(name)
        # 	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
        # 	allmodels.append(msg)
        # 	model_results = results
        # 	model_names = names

    # Make Downloadable file as zip,since markdown strips to html
    st.markdown("""[google.com](iris.zip)""")

    st.markdown("""[google.com](./iris.zip)""")

    # def make_zip(data):
    # 	output_filename = '{}_archived'.format(data)
    # 	return shutil.make_archive(output_filename,"zip",os.path.join("downloadfiles"))

    def makezipfile(data):
        output_filename = '{}_zipped.zip'.format(data)
        with ZipFile(output_filename, "w") as z:
            z.write(data)
        return output_filename

    if st.button("Download File"):
        DOWNLOAD_TPL = f'[{filename}]({makezipfile(filename)})'
        # st.text(DOWNLOAD_TPL)
        st.text(DOWNLOAD_TPL)
        st.markdown(DOWNLOAD_TPL)
Beispiel #21
0
 def on_train_begin(self, logs=None):
     st.header("Summary")
     self._summary_chart = st.area_chart()
     self._summary_stats = st.text("%8s :  0" % "epoch")
     st.header("Training Log")
Beispiel #22
0
    st.write(Final_Data)
    if st.checkbox("Show all the column Names"):
        st.write(Final_Data.columns)

if st.checkbox("Show size of dataset"):
    if st.checkbox("Show row size"):
        st.write(Final_Data.shape[0])
    if st.checkbox("Show column size"):
        st.write(Final_Data.shape[1])
    if st.checkbox("Show complete dataset size"):
        st.write(Final_Data.shape)
    if st.checkbox("Show desc of Ratings in final data"):
        Final_Data.describe()["Ratings"]

st.write("**displaying final dataset header lines using area chart**")
st.area_chart(Final_Data)

print("Number of NaN values = " + str(Final_Data.isnull().sum()))

duplicates = Final_Data.duplicated(["MovieID", "CustID", "Ratings"])
print("Number of duplicate rows = " + str(duplicates.sum()))

if st.checkbox("Show unique customer & movieId in Total Data:"):
    st.write("Total number of movie ratings = ", str(Final_Data.shape[0]))
    st.write("Number of unique users = ",
             str(len(np.unique(Final_Data["CustID"]))))
    st.write("Number of unique movies = ",
             str(len(np.unique(Final_Data["MovieID"]))))

if not os.path.isfile("Data/TrainData.pkl"):
    Final_Data.iloc[:int(Final_Data.shape[0] *
Beispiel #23
0
def main():
    def get_table_download_link2(df1, df2, frase):

        xlsx_io = io.BytesIO()
        writer = pd.ExcelWriter(xlsx_io, engine='xlsxwriter')
        df1.to_excel(writer, 'Обучающая выборка')
        df2.to_excel(writer, 'Тестовая ваборка')

        writer.save()
        xlsx_io.seek(0)

        media_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        data = base64.b64encode(xlsx_io.read()).decode("utf-8")
        href = f'<a href="data:{media_type};base64,{data}" download="RegressionResults.xlsx" >{frase}</a> (right-click and save)'
        st.markdown(href, unsafe_allow_html=True)

    def save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions):
        cols = []
        for i in all_clummn_names:
            cols.append(str(i))
        cols.remove(selected_y[-1])
        cols.append(str(selected_y[-1]) + " ( Selected y )")

        df1 = pd.DataFrame(np.concatenate((trainX, trainY), axis=1),
                           columns=cols)
        st.text("Обучающая выборка")
        st.write(df1)
        cols.pop()
        cols.append('Predictions')
        cols.append(str(selected_y[-1]) + " ( Real y value )")

        predictions = np.reshape(predictions, (predictions.shape[0], 1))

        df2 = pd.DataFrame(np.concatenate((np.concatenate(
            (testX, predictions), axis=1), testY),
                                          axis=1),
                           columns=cols)
        st.text("Тестовая выборка")
        st.write(df2)
        get_table_download_link2(df1, df2, "Сохранить результаты xlsx File")

    def save_exel(df, frase):
        xlsx_io = io.BytesIO()
        writer = pd.ExcelWriter(xlsx_io, engine='xlsxwriter')
        df.to_excel(writer, 'Результаты кластеризации')
        writer.save()
        xlsx_io.seek(0)
        media_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        data = base64.b64encode(xlsx_io.read()).decode("utf-8")
        href = f'<a href="data:{media_type};base64,{data}" download="ClusteringResults.xlsx" >{frase}</a> (right-click and save)'
        st.markdown(href, unsafe_allow_html=True)

    def get_file():
        file = st.file_uploader(
            " Нажмите browse files, чтобы загрузить файл в формате .csv",
            type="csv")
        show_file = st.empty()

        if not file:
            show_file.info(
                " Файл не загружен. Загрузите файл для анализа в формате .csv")
            return
        else:
            show_file.info(" Загрузка файла выполнена успешно")
        return file

    def regression_errors_values(testY, predict, n_predict):
        st.write("Средняя абсолютная ошибка:",
                 mean_absolute_error(testY, predict))
        st.write("Средняя квадратичная ошибка:",
                 mean_squared_error(testY, predict))
        st.write("Средняя абсолютная ошибка (c нормализацией):",
                 mean_absolute_error(testY, n_predict))
        st.write("Средняя квадратичная ошибка (c нормализацией):",
                 mean_squared_error(testY, n_predict))

    def regression_plot_show(trainX, testY, predict, n_predict, reg_type,
                             target_label):
        xx = [i for i in range(trainX.shape[0])]
        plt.figure()
        plt.plot(xx[0:testY.size],
                 testY[0:testY.size],
                 'o',
                 color='r',
                 label='y')
        plt.plot(xx[0:testY.size],
                 predict[0:testY.size],
                 color='b',
                 linewidth=2,
                 label='predicted y')
        plt.plot(xx[0:testY.size],
                 n_predict[0:testY.size],
                 color='k',
                 linewidth=2,
                 label='predicted y with normalize')
        plt.ylabel(target_label)
        plt.xlabel('Line number in dataset')
        plt.legend(loc=4)
        plt.title(reg_type)

        st.pyplot(plt.show())

    def model_linear_Regression(trainX, trainY, testX, testY):
        model = LinearRegression(normalize=False)
        model.fit(trainX, trainY)

        predict = model.predict(testX)
        return predict

    def model_ridge_Regression(trainX, trainY, testX, testY):
        model = Ridge(normalize=True)
        model.fit(trainX, trainY)

        predict = model.predict(testX)
        return predict

    def model_lasso(trainX, trainY, testX, testY):
        model = Lasso(normalize=False)
        model.fit(trainX, trainY)

        predict = model.predict(testX)
        return predict

    def model_random_forest(trainX, trainY, testX, testY):
        model = RandomForestRegressor(criterion="mae", bootstrap=True)
        model.fit(trainX, trainY)
        predict = model.predict(testX)
        return predict

    st.title("Анализ данных & Машинное обучение")
    st.subheader("Загрузите файл с данными для анализа")

    file = get_file()
    if file:
        data = pd.read_csv(file, sep=',')

        if st.checkbox("Показать данные"):
            num = st.slider("Колличество строк для отображеия ", 5,
                            data.shape[0])
            st.dataframe(data.head(num))

        if st.checkbox("Заголовки столбцов"):
            st.write(data.columns)

        if st.checkbox("Показать размерность"):

            data_dm = st.radio("Размерность по", ("Строкам", "Столбцам"))
            if data_dm == "Столбцам":
                st.write("Число столбцов: " + str(data.shape[1]))
            elif data_dm == "Строкам":
                st.write("Число строк: " + str(data.shape[0]))

        if st.checkbox("Типы данных"):
            st.write(data.dtypes)

        if st.checkbox("Статистика по значениям"):
            st.write(data.describe().T)

        if st.checkbox("Рассмотреть отдельные столбцы"):
            all_data = data.columns.tolist()
            sltd_columns = st.multiselect("Select", all_data)
            new_data = data[sltd_columns]
            data = new_data
            st.dataframe(new_data)

        st.header("Визуализация данных")
        if st.checkbox("Построить корреляционную матрицу"):
            plt.figure(figsize=(10, 10))
            plt.title('Correlation between different fearures')

            st.write(
                sns.heatmap(data.corr(),
                            vmax=1,
                            square=True,
                            annot=True,
                            cmap='gray_r'))
            st.pyplot()

        st.subheader("Построение графиков")
        all_clummn_names = data.columns.tolist()
        plot_type = st.selectbox("Выберите тип графика",
                                 ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns = st.multiselect(
            "Выберите столбцы для построения графика", all_clummn_names)

        if st.button("Построить график"):
            st.success("Построение графика {} для {}".format(
                plot_type, selected_columns))
            if plot_type == 'area':
                plot_data = data[selected_columns]
                st.area_chart(plot_data)
            elif plot_type == 'bar':
                plot_data = data[selected_columns]
                st.bar_chart(plot_data)
            elif plot_type == 'line':
                plot_data = data[selected_columns]
                st.line_chart(plot_data)
            elif plot_type:
                cust_data = data[selected_columns].plot(kind=plot_type)
                st.write(cust_data)
                st.pyplot()

        if st.checkbox("Круговая диаграмма"):
            all_clummn_names = data.columns.tolist()
            if st.button("Построить круговую диаграмму"):
                st.success("Поостроение круговой диаграммы")
                st.write(
                    data.iloc[:,
                              -1].value_counts().plot.pie(autopct="%1.1f%%"))
                st.pyplot()

        if st.checkbox("PCA проекция"):
            scaler = StandardScaler()
            pca = decomposition.PCA(n_components=2)
            X_reduced = pca.fit_transform(scaler.fit_transform(data))
            plt.figure(figsize=(6, 6))
            plt.scatter(X_reduced[:, 0],
                        X_reduced[:, 1],
                        edgecolor='none',
                        alpha=0.7,
                        s=40,
                        cmap=plt.cm.get_cmap('nipy_spectral', 10))
            plt.title('Feautures PCA projection')
            st.pyplot(plt.show())

        if st.checkbox("TSNE проекция"):
            scaler = StandardScaler()
            tsne = TSNE(random_state=17)
            tsne_representation = tsne.fit_transform(
                scaler.fit_transform(data))
            plt.figure(figsize=(6, 6))
            plt.scatter(tsne_representation[:, 0],
                        tsne_representation[:, 1],
                        edgecolor='none',
                        alpha=0.7,
                        s=40,
                        cmap=plt.cm.get_cmap('nipy_spectral', 10))
            plt.title('Feautures T-sne projection ')
            st.pyplot(plt.show())
        st.subheader("Применение методов машинного обучения")
        if st.checkbox("Выбрать тип решаемой задачи"):
            problem_type = st.radio(
                "Выберите тип задачи",
                ("Регрессия", "Кластеризация", "Классификация"))

            if problem_type == "Регрессия":
                st.write("Выберите столбец у для задачи регрессии")
                selected_y = st.multiselect("", all_clummn_names)
                reg_type = st.radio("Выберите алгоритм решения",
                                    ("Все", "Линейная регрессия", "Ridge",
                                     "Lasso", "Случайный лес"))
                test_size_slider = st.slider(
                    "Выберите размер тестовой выборки %", 1, 100)
                if st.checkbox("Выполнить"):

                    if (selected_y):
                        st.success(
                            "Столбец {} выбран успешно".format(selected_y))

                        train, test = train_test_split(
                            data, test_size=test_size_slider / 100)
                        #st.write(test_size_slider/100)
                        trainX = np.array(train.drop(selected_y, 1))
                        trainY = np.array(train[selected_y])
                        testX = np.array(test.drop(selected_y, 1))
                        testY = np.array(test[selected_y])

                        st.write(selected_y)
                        # st.write(trainY.reshape(trainY.shape[0]))

                        if reg_type == "Все":
                            reg_type = "Линейная регрессия"
                            n_predictions = model_linear_Regression(
                                preprocessing.normalize(trainX), trainY,
                                preprocessing.normalize(testX), testY)
                            predictions = model_linear_Regression(
                                trainX, trainY, testX, testY)
                            regression_plot_show(trainX, testY, predictions,
                                                 n_predictions, reg_type,
                                                 selected_y)
                            regression_errors_values(testY, predictions,
                                                     n_predictions)

                            save_train_test_data_xlsx(trainX, trainY, testX,
                                                      testY, predictions)

                            reg_type = "Ridge"
                            n_predictions = model_ridge_Regression(
                                preprocessing.normalize(trainX), trainY,
                                preprocessing.normalize(testX), testY)
                            predictions = model_ridge_Regression(
                                trainX, trainY, testX, testY)
                            regression_plot_show(trainX, testY, predictions,
                                                 n_predictions, reg_type,
                                                 selected_y)
                            regression_errors_values(testY, predictions,
                                                     n_predictions)

                            save_train_test_data_xlsx(trainX, trainY, testX,
                                                      testY, predictions)

                            reg_type = "Случайный лес"
                            n_predictions = model_random_forest(
                                preprocessing.normalize(trainX), trainY,
                                preprocessing.normalize(testX), testY)
                            predictions = model_random_forest(
                                trainX, trainY, testX, testY)
                            regression_plot_show(trainX, testY, predictions,
                                                 n_predictions, reg_type,
                                                 selected_y)
                            regression_errors_values(testY, predictions,
                                                     n_predictions)

                            save_train_test_data_xlsx(trainX, trainY, testX,
                                                      testY, predictions)

                            reg_type = "Lasso"
                            n_predictions = model_lasso(
                                preprocessing.normalize(trainX), trainY,
                                preprocessing.normalize(testX), testY)
                            predictions = model_lasso(trainX, trainY, testX,
                                                      testY)
                            regression_plot_show(trainX, testY, predictions,
                                                 n_predictions, reg_type,
                                                 selected_y)
                            regression_errors_values(testY, predictions,
                                                     n_predictions)

                            save_train_test_data_xlsx(trainX, trainY, testX,
                                                      testY, predictions)

                            reg_type = ''

                        if reg_type == "Линейная регрессия":
                            n_predictions = model_linear_Regression(
                                preprocessing.normalize(trainX), trainY,
                                preprocessing.normalize(testX), testY)
                            predictions = model_linear_Regression(
                                trainX, trainY, testX, testY)
                            regression_plot_show(trainX, testY, predictions,
                                                 n_predictions, reg_type,
                                                 selected_y)
                            regression_errors_values(testY, predictions,
                                                     n_predictions)

                            save_train_test_data_xlsx(trainX, trainY, testX,
                                                      testY, predictions)

                        if reg_type == "Ridge":
                            n_predictions = model_ridge_Regression(
                                preprocessing.normalize(trainX), trainY,
                                preprocessing.normalize(testX), testY)
                            predictions = model_ridge_Regression(
                                trainX, trainY, testX, testY)
                            regression_plot_show(trainX, testY, predictions,
                                                 n_predictions, reg_type,
                                                 selected_y)
                            regression_errors_values(testY, predictions,
                                                     n_predictions)

                            save_train_test_data_xlsx(trainX, trainY, testX,
                                                      testY, predictions)

                        if reg_type == "Lasso":

                            n_predictions = model_lasso(
                                preprocessing.normalize(trainX), trainY,
                                preprocessing.normalize(testX), testY)
                            predictions = model_lasso(trainX, trainY, testX,
                                                      testY)
                            regression_plot_show(trainX, testY, predictions,
                                                 n_predictions, reg_type,
                                                 selected_y)
                            regression_errors_values(testY, predictions,
                                                     n_predictions)

                            save_train_test_data_xlsx(trainX, trainY, testX,
                                                      testY, predictions)

                        if reg_type == "Случайный лес":
                            n_predictions = model_random_forest(
                                preprocessing.normalize(trainX), trainY,
                                preprocessing.normalize(testX), testY)
                            predictions = model_random_forest(
                                trainX, trainY, testX, testY)
                            regression_plot_show(trainX, testY, predictions,
                                                 n_predictions, reg_type,
                                                 selected_y)
                            regression_errors_values(testY, predictions,
                                                     n_predictions)

                            save_train_test_data_xlsx(trainX, trainY, testX,
                                                      testY, predictions)
                    else:
                        st.warning("Выберите стоббец у")

            elif problem_type == "Кластеризация":
                n_clusters = st.number_input("Введите количество кластеров:",
                                             3)
                dataset = data.copy()

                scaler = StandardScaler()
                X = scaler.fit_transform(dataset)
                km = KMeans(n_clusters=n_clusters)

                # fit & predict clusters
                dataset['cluster'] = km.fit_predict(X)
                st.write(dataset['cluster'])
                st.write(dataset)

                if st.checkbox("PCA"):
                    pca = decomposition.PCA(n_components=2)
                    X_reduced = pca.fit_transform(X)
                    plt.figure(figsize=(6, 6))
                    plt.scatter(X_reduced[:, 0],
                                X_reduced[:, 1],
                                edgecolor='none',
                                alpha=0.7,
                                s=40,
                                c=dataset['cluster'],
                                cmap=plt.cm.get_cmap('nipy_spectral', 10))
                    plt.colorbar()
                    plt.title('Feautures PCA projection')
                    st.pyplot(plt.show())

                if st.checkbox("TSNE"):
                    tsne = TSNE(random_state=17)
                    tsne_representation = tsne.fit_transform(X)
                    plt.figure(figsize=(6, 6))
                    plt.scatter(tsne_representation[:, 0],
                                tsne_representation[:, 1],
                                edgecolor='none',
                                alpha=0.7,
                                s=40,
                                c=dataset['cluster'],
                                cmap=plt.cm.get_cmap('nipy_spectral', 10))
                    plt.colorbar()
                    plt.title('Feautures T-sne projection ')
                    st.pyplot(plt.show())

                save_exel(dataset, "Сохранить результаты xlsx File")

            elif problem_type == "Классификация":
                classification_type = st.radio(
                    "Выберите алгоритм решения",
                    ("Все", "KNeighborsClassifier", "SVC_model"))
                st.write("Выберите столбец у для задачи классификации")
                selected_y = st.multiselect("", all_clummn_names)
                if st.button("Выбрать"):
                    st.success("Столбец {} выбран успешно".format(selected_y))

                # ".iloc" принимает row_indexer, column_indexer
                y = np.array(data[selected_y])
                X = np.array(data.drop(selected_y, 1))

                # test_size показывает, какой объем данных нужно выделить для тестового набора
                # Random_state — просто сид для случайной генерации
                # Этот параметр можно использовать для воссоздания определённого результата:
                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.20, random_state=27)

                if classification_type == "Все":
                    SVC_model = svm.SVC()
                    st.write(
                        "В KNN-модели нужно указать параметр n_neighbors. Это число точек, на которое будет смотреть классификатор, чтобы определить, к какому классу принадлежит новая точка"
                    )  #
                    nbr = st.slider("Число точек ", 3, data.shape[0])
                    KNN_model = KNeighborsClassifier(n_neighbors=nbr)
                    SVC_model.fit(X_train, y_train)
                    KNN_model.fit(X_train, y_train)

                    SVC_prediction = SVC_model.predict(X_test)
                    KNN_prediction = KNN_model.predict(X_test)

                    # Оценка точности — простейший вариант оценки работы классификатора
                    st.write("Оценка точности классификатора SVC_model")
                    st.write(accuracy_score(SVC_prediction, y_test))
                    st.write(
                        "Матрица неточности и отчёт о классификации дадут больше информации о производительности"
                    )
                    st.write(confusion_matrix(SVC_prediction, y_test))

                    st.write(classification_report(SVC_prediction, y_test))
                    st.write(SVC_prediction.tolist())

                    st.write(
                        "Оценка точности классификатора KNeighborsClassifier")
                    st.write(accuracy_score(KNN_prediction, y_test))
                    st.write(
                        "Матрица неточности и отчёт о классификации дадут больше информации о производительности"
                    )
                    # Но матрица неточности и отчёт о классификации дадут больше информации о производительности
                    st.write(confusion_matrix(KNN_prediction, y_test))
                    st.write(classification_report(KNN_prediction, y_test))
                    st.write(KNN_prediction.tolist())

                if classification_type == "KNeighborsClassifier":

                    st.write(
                        "В KNN-модели нужно указать параметр n_neighbors. Это число точек, на которое будет смотреть классификатор, чтобы определить, к какому классу принадлежит новая точка"
                    )  #
                    nbr = st.slider("Число точек ", 3, data.shape[0])
                    KNN_model = KNeighborsClassifier(n_neighbors=nbr)
                    KNN_model.fit(X_train, y_train)
                    KNN_prediction = KNN_model.predict(X_test)

                    st.write(
                        "Оценка точности классификатора KNeighborsClassifier")
                    st.write(accuracy_score(KNN_prediction, y_test))
                    st.write(
                        "Матрица неточности и отчёт о классификации дадут больше информации о производительности"
                    )
                    # Но матрица неточности и отчёт о классификации дадут больше информации о производительности
                    st.write(confusion_matrix(KNN_prediction, y_test))
                    st.write(classification_report(KNN_prediction, y_test))
                    st.write(KNN_prediction.tolist())

                if classification_type == "SVC_model":
                    SVC_model = svm.SVC()
                    SVC_model.fit(X_train, y_train)

                    SVC_prediction = SVC_model.predict(X_test)
                    # Оценка точности — простейший вариант оценки работы классификатора
                    st.write("Оценка точности классификатора SVC_model")
                    st.write(accuracy_score(SVC_prediction, y_test))
                    st.write(
                        "Матрица неточности и отчёт о классификации дадут больше информации о производительности"
                    )
                    st.write(confusion_matrix(SVC_prediction, y_test))
                    st.write(classification_report(SVC_prediction, y_test))
                    st.write(SVC_prediction.tolist())

        if st.button("Завершить работу"):
            st.balloons()
Beispiel #24
0
def main():
    st.write("""
# Heart Failure Prediction App

This app predicts the **Heart Failure** for a patient.

Data obtained from [here](https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-1023-5).
""")
    activities = ["EDA", "Plot", "Model Building", "Predict", "About"]

    choice = st.sidebar.selectbox("Select Activity", activities)

    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis")
        df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

        # data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        # if data is not None:
        #     df = pd.read_csv(data)
        st.dataframe(df)

        if st.checkbox("Show shape"):
            st.write(df.shape)

        if st.checkbox("Show columns"):
            all_columns = df.columns.to_list()
            st.write(all_columns)

        if st.checkbox("Select Columns To Show"):
            selected_columns = st.multiselect("Select Columns", all_columns)
            new_df = df[selected_columns]
            st.dataframe(new_df)

        if st.checkbox("Show summary"):
            st.write(df.describe())

        if st.checkbox("Show value counts"):
            st.write(df.iloc[:, -1].value_counts())

    elif choice == 'Plot':
        st.subheader("Data Visualization")

        df = pd.read_csv('heart_failure_clinical_records_dataset.csv')
        st.dataframe(df)

        if st.checkbox("Correlation with Seaborn"):
            corr = df.corr()
            st.write(sns.heatmap(corr))
            st.pyplot()

        if st.checkbox("Pie Chart"):
            all_columns = df.columns.to_list()
            colums_to_plot = st.selectbox("Select 1 column to plot",
                                          all_columns)
            pie_plot = df[colums_to_plot].value_counts().plot.pie()
            st.write(pie_plot)
            st.pyplot()

        all_columns = df.columns.tolist()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("selct Columns To plot",
                                                all_columns)

        if st.button("Generate Plot"):
            st.success("Generating Customizable Plot of {} for {}".format(
                type_of_plot, selected_columns_names))

            # Plot by streamlit
            if type_of_plot == "area":
                cust_data = df[selected_columns_names]
                st.area_chart(cust_data)

            elif type_of_plot == "bar":
                cust_data = df[selected_columns_names]
                st.bar_chart(cust_data)

            elif type_of_plot == "line":
                cust_data = df[selected_columns_names]
                st.line_chart(cust_data)

            # Custom plot
            elif type_of_plot:
                cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
                st.write(cust_plot)
                st.pyplot()

    elif choice == 'Model Building':
        st.subheader("Building ML Model")

        df = pd.read_csv('heart_failure_clinical_records_dataset.csv')
        st.dataframe(df)

        # Model building
        X = df.iloc[:, 0:-1]
        Y = df.iloc[:, -1]
        seed = 7

        # Model
        models = []
        # models.append(("LR", LogisticRegression()))
        # models.append(("LDA", LinearDiscriminantAnalysis()))
        # models.append(("KNN", KNeighborsClassifier()))
        # models.append(('CART', DecisionTreeClassifier()))
        # models.append(('NB', GaussianNB()))
        # models.append(('SVM', SVC(probability=True)))
        models.append(('RFC', RandomForestClassifier(n_estimators=100)))
        # models.append(('GBC', GradientBoostingClassifier()))

        # evaluate each model in turn

        # List
        model_name = []
        model_mean = []
        model_std = []
        all_models = []
        scoring = 'accuracy'

        for name, model in models:
            kfold = model_selection.KFold(n_splits=10, random_state=seed)
            cv_results = model_selection.cross_val_score(model,
                                                         X,
                                                         Y,
                                                         cv=kfold,
                                                         scoring=scoring)
            model_name.append(name)
            model_mean.append(cv_results.mean())
            model_std.append(cv_results.std())

            accuracy_results = {
                "model_name": name,
                "model_accuracy": cv_results.mean(),
                "standard_deviation": cv_results.std()
            }
            all_models.append(accuracy_results)
            model.fit(X, Y)
            pickle.dump(model, open(name + '.pkl', 'wb'))

        st.dataframe(
            pd.DataFrame(
                zip(model_name, model_mean, model_std),
                columns=['Model Name', 'Model Accuracy',
                         'Standard Deviation']))

        st.json(all_models)

    elif choice == 'Predict':
        models = []
        # models.append("LR.pkl")
        # models.append("LDA.pkl")
        # models.append("KNN.pkl")
        # models.append('CART.pkl')
        # models.append('NB.pkl')
        # models.append('SVM.pkl')
        models.append('RFC.pkl')

        # models.append('GBC.pkl')

        def user_input_features():
            age = st.slider('Age of the patient(Years)', 40, 95, 50)
            anaemia = st.selectbox(
                'Anaemia-Decrease of red blood cells or hemoglobin(True-1, False-0)',
                (1, 0))
            creatinine_phosphokinase = st.slider(
                'Creatinine phosphokinase-Level of the CPK enzyme in the blood(mcg/L)',
                23, 7861, 3300)
            diabetes = st.selectbox(
                'Diabetes-If the patient has diabetes(True-1, False-0)',
                (1, 0))
            ejection_fraction = st.slider(
                'Ejection fraction-Percentage of blood leaving', 14, 80, 30)
            high_blood_pressure = st.selectbox(
                'High blood pressure-If a patient has hypertension(True-1, False-0)',
                (1, 0))
            platelets = st.slider(
                'Platelets-Platelets in the blood(kiloplatelets/mL)', 25100,
                850000, 40000)
            serum_creatinine = st.slider(
                'Serum creatinine-Level of creatinine in the blood(mg/dL)',
                0.5000, 9.4000, 1.2000)
            serum_sodium = st.slider(
                'Serum sodium-Level of sodium in the blood(mEq/L)', 113, 148,
                120)
            sex = st.selectbox('Sex-Woman or Man(Man-1,Women-0)', (1, 0))
            smoking = st.selectbox(
                'Smoking-If the patient smokes(True-1, False-0)', (1, 0))
            time = st.slider('Time-Follow-up period(Days)', 4, 285, 100)
            data = {
                'age': age,
                'anaemia': anaemia,
                'creatinine_phosphokinase': creatinine_phosphokinase,
                'diabetes': diabetes,
                'ejection_fraction': ejection_fraction,
                'high_blood_pressure': high_blood_pressure,
                'platelets': platelets,
                'serum_creatinine': serum_creatinine,
                'serum_sodium': serum_sodium,
                'sex': sex,
                'smoking': smoking,
                'time': time
            }
            features = pd.DataFrame(data, index=[0])
            return features

        input_df = user_input_features()
        if input_df is not None:
            st.dataframe(input_df)
        for name in models:
            model = pickle.load(open(name, 'rb'))
            prediction_proba = model.predict_proba(input_df)
            st.write('{} Predictions:'.format(name[0:-4]))
            st.write(prediction_proba)
            index = np.argmax(prediction_proba)
            if index == 0:
                st.write('Not Dead')
            else:
                st.write('Dead')

    elif choice == 'About':
        st.subheader("About")
        st.write("Made By Rishab Koul with the Streamlit Library")
Beispiel #25
0
def main():
    st.header("project Dataset explorer")
    st.sidebar.header("OPTIONS")
    all_cols = df.columns.values
    numeric_cols = df.select_dtypes(include=numerics).columns.values
    obj_cols = df.select_dtypes(include=["object"]).columns.values

    if st.sidebar.checkbox("Data preview", True):
        st.subheader("Data preview")
        st.markdown(
            f"Shape of dataset : {df.shape[0]} rows, {df.shape[1]} columns")
        if st.checkbox("Data types"):
            st.dataframe(df.dtypes)
        if st.checkbox("Data Summary"):
            st.write(df.describe())

    if st.sidebar.checkbox("Pattern distribution", False):
        st.subheader("Plot numeric column distribution")
        with st.echo():
            col = st.selectbox("Choose a column to display", numeric_cols)
            n_bins = st.number_input("Max number of bins ?", 5, 100, 10)
            chart = (alt.Chart(df).mark_bar().encode(
                alt.X(f"{col}:Q", bin=alt.Bin(maxbins=n_bins)),
                alt.Y("count()")))
            st.altair_chart(chart)
        st.markdown("---")

    if st.sidebar.checkbox("Scatterplot", False):
        st.subheader("Scatterplot")
        selected_cols = st.multiselect("Choose 2 columns :", numeric_cols)
        if len(selected_cols) == 2:
            color_by = st.selectbox("Color by column:",
                                    all_cols,
                                    index=len(all_cols) - 1)
            col1, col2 = selected_cols
            chart = (alt.Chart(df).mark_circle(size=20).encode(
                alt.X(f"{col1}:Q"), alt.Y(f"{col2}:Q"),
                alt.Color(f"{color_by}")).interactive())
            st.altair_chart(chart)
        st.markdown("---")

# seaborn plot
    if st.sidebar.checkbox("Correlation plot"):
        st.subheader("Correlation plot")
        cor = df.corr()
        mask = np.zeros_like(cor)
        mask[np.triu_indices_from(mask)] = True
        plt.figure(figsize=(12, 10))
        with sns.axes_style("white"):
            st.write(
                sns.heatmap(cor,
                            annot=True,
                            linewidth=2,
                            mask=mask,
                            cmap="magma"))
            st.pyplot()
            # Pie plot
    if st.sidebar.checkbox("pie plot"):
        st.subheader("Pie plot")
        all_columns_names = df.columns.tolist()
        st.success("Generating A pie plot")
        st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
        st.pyplot()

    if st.sidebar.checkbox("plot of value counts"):
        st.subheader("Groupby columns")
        st.text("value counts by target")
        all_columns_name = df.columns.tolist()
        primary_col = st.selectbox("primary columns to groupby",
                                   all_columns_name)
        selected_columns_name = st.multiselect("select columns to plot",
                                               all_columns_name)
        if st.button("plot"):
            st.text("Generate value plot")
            if selected_columns_name:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_name].count()
        else:
            vc_plot = df.iloc[:, -1].value_counts()
        st.write(vc_plot.plot(kind="bar"))
        st.pyplot()
        # customizabe plot
        if st.sidebar.checkbox("customizable plot", False):
            st.subheader("Deviation")
            columns_names = df.columns.tolist()
            type_of_plot = st.selectbox(
                "select type of plot",
                ["area", "bar", "line", "hist", "box", "kde"])
            selected_columns_names = st.multiselect("select column to plot",
                                                    columns_names)

            if st.button("Show plot"):
                st.success("Generating customizable plot of {} for {}".format(
                    type_of_plot, selected_columns_names))

                if type_of_plot == 'area':
                    cust_data = df[selected_columns_names]
                    st.area_chart(cust_data)
                elif type_of_plot == 'bar':
                    cust_data = df[selected_columns_names]
                    st.bar_chart(cust_data)
                elif type_of_plot == 'line':
                    cust_data = df[selected_columns_names]
                    st.line_chart(cust_data)
                elif type_of_plot:
                    cust_plot = df[selected_columns_names].plot(
                        kind=type_of_plot)
                    st.write(cust_plot)
                    st.pyplot()

            if st.sidebar.checkbox("Deviations"):
                st.subheader("Deviation plot")
                for feature in ['time', 'measurement', 'control_mode']:
                    ax = plt.subplot()
                    st.write(
                        sns.distplot(df[feature][df.binary_result == 1],
                                     bins=50,
                                     label='Anormal',
                                     kde_kws={'bw': 0.02}))
                    st.write(
                        sns.distplot(df[feature][df.binary_result == 0],
                                     bins=50,
                                     label='Normal',
                                     kde_kws={'bw': 0.02}))
                    ax.set_xlabel('')
                    ax.set_title('histogram of feature: ' + str(feature))
                    plt.legend(loc='best')
                    st.pyplot()

            def ztest(feature):

                mean = falsepositive[feature].mean()
                std = falsepositive[feature].std()
                zScore = (falsenegative[feature].mean() -
                          mean) / (std / np.sqrt(sample_size))
                return zScore

            columns = df.drop('binary_result', axis=1).columns
            falsepositive = df[df.binary_result == 0]
            falsenegative = df[df.binary_result == 1]
            sample_size = len(falsepositive)
            significant_features = ["measurement"]
            setpoint = 70

            for i in columns:
                z_value = ztest(i)

                if (abs(z_value) >= setpoint):
                    st.write(i, " is critical alarm")
                    significant_features.append(i)

        st.subheader("Inliers & Outliers of Data")
        significant_features.append('binary_result')
        y = df[significant_features]

        inliers = df[df.binary_result == 0]
        ins = inliers.drop(['binary_result'], axis=1)

        outliers = df[df.binary_result == 1]
        outs = outliers.drop(['binary_result'], axis=1)

        ins.shape, outs.shape

        def falsepositive_accuracy(values):

            tp = list(values).count(1)
            total = values.shape[0]
            accuracy = np.round(tp / total, 4)

            return accuracy

        def falsenegative_accuracy(values):

            tn = list(values).count(-1)
            total = values.shape[0]
            accuracy = np.round(tn / total, 4)

            return accuracy

        st.subheader("Accuracy score For Isolation forest")
        ISF = IsolationForest(random_state=42)
        ISF.fit(ins)

        falsepositive_isf = ISF.predict(ins)
        falsenegative_isf = ISF.predict(outs)

        in_accuracy_isf = falsepositive_accuracy(falsepositive_isf)
        out_accuracy_isf = falsenegative_accuracy(falsenegative_isf)
        st.write("Accuracy in Detecting falsepositive Alarm:", in_accuracy_isf)
        st.write("Accuracy in Detecting falsenegative Alarm:",
                 out_accuracy_isf)

        st.subheader("Accuracy score For Local Outlier Factor")
        LOF = LocalOutlierFactor(novelty=True)
        LOF.fit(ins)

        falsepositive_lof = LOF.predict(ins)
        falsenegative_lof = LOF.predict(outs)

        in_accuracy_lof = falsepositive_accuracy(falsepositive_lof)
        out_accuracy_lof = falsenegative_accuracy(falsenegative_lof)
        st.write("Accuracy in Detecting falsepositive Alarm :",
                 in_accuracy_lof)
        st.write("Accuracy in Detecting falsenegative Alarm:",
                 out_accuracy_lof)

        if st.sidebar.checkbox("Alarm Report", False):
            st.subheader("classification of Alarm")
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=[16, 3])
            ax1.set_title("Accuracy of Isolation Forest", fontsize=20)
            st.write(
                sns.barplot(x=[in_accuracy_isf, out_accuracy_isf],
                            y=['falsepositive Alarm', 'falsenegative Alarm'],
                            label="classifiers",
                            color="b",
                            ax=ax1))
            ax1.set(xlim=(0, 1))

            ax2.set_title("Accuracy of Local Outlier Factor", fontsize=20)
            st.write(
                sns.barplot(x=[in_accuracy_lof, out_accuracy_lof],
                            y=['falsepositive Alarm', 'falsenegative Alarm'],
                            label="classifiers",
                            color="r",
                            ax=ax2))
            ax2.set(xlim=(0, 1))
            st.pyplot()
Beispiel #26
0
        rn = st.slider("", 1 , 15 , 5)
        df , name = GetPerformanceStat(int(PL_ID), rn)

        st.header(name + "'s Performance Stats")
        st.subheader('Select the options you want to see stats for')
        chioce = st.radio('', ('Overall','Raids','Tackles') )
        
        if chioce == 'Overall':
            # Total points stats
            st.subheader('Total points stats in recent '+str(rn)+' Matches')
            if st.checkbox('Show Data'):
                st.write(df[['player_total_points','player_raid_points_total','player_tackle_points_total']].T )
            if st.checkbox('Show Bar Chart'):
                st.bar_chart(df[['player_total_points','player_raid_points_total','player_tackle_points_total']] )
            if st.checkbox('Show Area Chart'):
                st.area_chart(df[['player_total_points','player_raid_points_total','player_tackle_points_total']])

        elif chioce == 'Raids':
            # Raids Stats
            st.subheader('Total Raids Statsin recent '+str(rn)+' Matches')
            if st.checkbox('Show Data'):
                st.write(df[['player_raids_total','player_raids_successful','player_raids_empty','player_raids_unsuccessful']].T )
            if st.checkbox('Show Bar Chart'):
                st.bar_chart(df[['player_raids_total','player_raids_successful','player_raids_empty','player_raids_unsuccessful']] )
            if st.checkbox('Show Area Chart'):
                st.area_chart(df[['player_raids_total','player_raids_successful','player_raids_empty','player_raids_unsuccessful']] )

        elif chioce == 'Tackles':
            # Tackel stats
            st.subheader('Total Raids Statsin recent '+str(rn)+' Matches')
            if st.checkbox('Show Data'):
Beispiel #27
0
def main():
	""" BPX Energy """
	st.title("BPX Energy LE Forecast Tool")
	st.subheader("SoHa Change Dev/Test")

	html_temp = """
	<div style="background-color:green;"><p style="color:white;font-size:50px;padding:10px">SoHa Change is Awesome</p></div>
	"""
	st.markdown(html_temp,unsafe_allow_html=True)

	def file_selector(folder_path='./datasets'):
		filenames = os.listdir(folder_path)
		selected_filename = st.selectbox("Select A file",filenames)
		return os.path.join(folder_path,selected_filename)

	filename = file_selector()
	st.info("You Selected {}".format(filename))

	# Read Data
	df = pd.read_csv(filename)

	# Show Dataset

	if st.checkbox("Show Dataset"):
		number = st.number_input("Number of Rows to View")
		st.dataframe(df.head(n=9))

	# Show Columns
	if st.button("Column Names"):
		st.write(df.columns)

	# Show Shape
	if st.checkbox("Shape of Dataset"):
		data_dim = st.radio("Show Dimension By ",("Rows","Columns"))
		if data_dim == 'Rows':
			st.text("Number of Rows")
			st.write(df.shape[0])
		elif data_dim == 'Columns':
			st.text("Number of Columns")
			st.write(df.shape[1])
		else:
			st.write(df.shape)

	# Select Columns
	if st.checkbox("Select Columns To Show"):
		all_columns = df.columns.tolist()
		selected_columns = st.multiselect("Select",all_columns)
		new_df = df[selected_columns]
		st.dataframe(new_df)
	
	# Show Values
	if st.button("Value Counts"):
		st.text("Value Counts By Target/Class")
		st.write(df.iloc[:,-1].value_counts())


	# Show Datatypes
	if st.button("Data Types"):
		st.write(df.dtypes)



	# Show Summary
	if st.checkbox("Summary"):
		st.write(df.describe().T)

	## Plot and Visualization

	st.subheader("Data Visualization")
	# Correlation
	# Seaborn Plot
	if st.checkbox("Correlation Plot[Seaborn]"):
		st.write(sns.heatmap(df.corr(),annot=True))
		st.pyplot()

	
	# Pie Chart
	if st.checkbox("Pie Plot"):
		all_columns_names = df.columns.tolist()
		if st.button("Generate Pie Plot"):
			st.success("Generating A Pie Plot")
			st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%%"))
			st.pyplot()

	# Count Plot
	if st.checkbox("Plot of Value Counts"):
		st.text("Value Counts By Target")
		all_columns_names = df.columns.tolist()
		primary_col = st.selectbox("Primary Columm to GroupBy",all_columns_names)
		selected_columns_names = st.multiselect("Select Columns",all_columns_names)
		if st.button("Plot"):
			st.text("Generate Plot")
			if selected_columns_names:
				vc_plot = df.groupby(primary_col)[selected_columns_names].count()
			else:
				vc_plot = df.iloc[:,-1].value_counts()
			st.write(vc_plot.plot(kind="bar"))
			st.pyplot()


	# Customizable Plot

	st.subheader("Customizable Plot")
	all_columns_names = df.columns.tolist()
	type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"])
	selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names)

	if st.button("Generate Plot"):
		st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names))

		# Plot By Streamlit
		if type_of_plot == 'area':
			cust_data = df[selected_columns_names]
			st.area_chart(cust_data)

		elif type_of_plot == 'bar':
			cust_data = df[selected_columns_names]
			st.bar_chart(cust_data)

		elif type_of_plot == 'line':
			cust_data = df[selected_columns_names]
			st.line_chart(cust_data)

		# Custom Plot 
		elif type_of_plot:
			cust_plot= df[selected_columns_names].plot(kind=type_of_plot)
			st.write(cust_plot)
			st.pyplot()

	if st.button("SoHa Change"):
		st.balloons()

	st.sidebar.header("LE Utility Tool")
	st.sidebar.info("The LE & Forecast Utility is a tool meant to improve the speed & consistency of production reporting for operations PEs.")

	st.sidebar.header("Get Datasets")
	st.sidebar.markdown("[Common ML Dataset Repo]("")")

	st.sidebar.header("Developed By")
	st.sidebar.info("*****@*****.**")
	st.sidebar.info("*****@*****.**")
	st.sidebar.text("SoHa Change Team lead: Kellen McLoughlin")
	st.sidebar.text("Web App maintained by MarkusJBPX")
Beispiel #28
0
""")

st.write("""
## Giá mở
""")
st.line_chart(df.Open)

st.write("""
## Giá đóng
""")
st.line_chart(df.Close)

st.write("""
## Số lượng giao dịch trong ngày
""")
st.area_chart(df.Volume)

file = st.file_uploader("Pick a file")

if file:
    df1 = pd.read_csv(file)
    # Lấy cột Volume
    Test_1 = np.array(df1[['Volume']])

    # Lấy cột Open
    Test_2 = np.array(df1[['Open']])

    # Lấy cột High
    Test_3 = np.array(df1[['High']])

    # Lấy cột Low
Beispiel #29
0
def main():
    activities = ["EDA &VIZ", "Modelling"]
    choice = st.sidebar.selectbox("Select Activities", activities)
    if st.sidebar.checkbox('About'):
        st.sidebar.markdown("""
                           app work in progress .This is a beta release.
                           
                           version: b-0.0.1
                           
                           initial release:27/6/2020
                           
                           helpful suggestions are welcome.
                           
                           contact: [email protected]
                           """)

    if choice == 'EDA &VIZ':
        st.title('Play with ML')

        html_temp1 = """<img src="images/dobby1.jpeg" alt="It's dobby" width="120" height="150">"""
        st.markdown(html_temp1, unsafe_allow_html=True)
        html_temp = """
        <div style="background-color:coral;padding:12px">
        <h2 style="color:white;text-align:center;"> Play with ML App </h2>
        
        </div>
        """
        st.markdown(html_temp, unsafe_allow_html=True)
        st.markdown(
            'hey,tired of modelling and tuning ML Models,  wanna play with data & ML modles? Then upload a dataset here.. **_Dobby , a free elf_** is here for you '
        )
        st.subheader("Exploratory Data Analysis & Vizualization ")
        data = st.file_uploader("Upload a Dataset", type=["csv"])

        if data is not None:
            st.subheader('EDA')
            df = pd.read_csv(data)
            st.write('shape:', df.shape)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                #st.write(all_columns)

            if st.checkbox("Null values"):
                st.write(df.isnull().sum())

            if st.checkbox("Information"):
                st.write(df.info())

            if st.checkbox("Summary"):
                st.write(df.describe())

            if st.checkbox("Show Selected Columns"):
                all_columns_names = df.columns.tolist()
                selected_columns = st.multiselect("Select Columns",
                                                  all_columns)
                df1 = df[selected_columns]
                #st.dataframe(df1)

            if st.checkbox("Correlation Plot(Seaborn)"):
                st.write(sns.heatmap(df.corr(), annot=True))
                st.pyplot()

            st.subheader('Data Visualization')

            if st.checkbox("Show Value Counts"):
                column = st.selectbox("Select a Column to show value counts",
                                      all_columns)
                st.write(df[column].value_counts())
                st.write(df[column].value_counts().plot(kind='bar'))
                st.pyplot()

            all_columns_names = df.columns.tolist()
            type_of_plot = st.selectbox("Select Type of Plot", [
                "area", "bar", "pie", "line", "hist", "box", "kde",
                "altair_chart"
            ])
            selected_columns_names = st.multiselect("Select Columns To Plot",
                                                    all_columns_names)

            if st.button("Generate Plot"):
                st.success("Generating   {} plot  for {}".format(
                    type_of_plot, selected_columns_names))

                if type_of_plot == 'area':
                    cust_data = df[selected_columns_names]
                    st.area_chart(cust_data)
                    st.pyplot()

                elif type_of_plot == 'bar':
                    cust_data = df[selected_columns_names]
                    st.bar_chart(cust_data)
                    st.pyplot()

                elif type_of_plot == "Pie Plot":
                    column_to_plot = st.selectbox("Select 1 Column",
                                                  selected_columns_names)
                    pie_plot = df[column_to_plot].value_counts().plot.pie(
                        autopct="%1.1f%%")
                    st.write(pie_plot)
                    st.pyplot()

                elif type_of_plot == 'line':
                    cust_data = df[selected_columns_names]
                    st.line_chart(cust_data)
                    st.pyplot()

                elif type_of_plot == 'altair_chart':
                    a = st.selectbox("Select X axis", all_columns)
                    b = st.selectbox("Select Y axis", all_columns)
                    c = st.selectbox("Select a column ", all_columns)
                    cust_data = pd.DataFrame([a, b, c])
                    c = alt.Chart(cust_data).mark_circle().encode(
                        x='a',
                        y='b',
                        size='c',
                        color='c',
                        tooltip=['a', 'b', 'c'])
                    st.altair_chart(c, use_container_width=True)
                    st.pyplot()

                elif type_of_plot:
                    cust_plot = df[selected_columns_names].plot(
                        kind=type_of_plot)
                    st.write(cust_plot)
                    st.pyplot()

    if choice == 'Modelling':
        html_temp = """
        <div style="background-color:coral;padding:12px">
        <h2 style="color:white;text-align:center;"> Play with ML App </h2>
        </div>
        """

        st.header('Training')
        st.markdown(
            "**_Hello Iam Dobby. Dobby has no master - Dobby is a free elf_**. Due to SARS-CoV-2 lockdown I dont have much work to do , So Iam here to make your model."
        )
        data = st.file_uploader("Upload a Dataset", type=["csv"])

        if data is not None:
            st.subheader('EDA')
            df = pd.read_csv(data)
            st.dataframe(df.head())
            st.write('shape:', df.shape)

            st.header('Data Preprocessing')

            all_columns = df.columns.tolist()
            features = st.multiselect("Select feature columns", all_columns)
            X = df[features]
            st.dataframe(X)
            st.write(X.head())
            st.write(X.shape)
            labels = st.selectbox("Select label column", all_columns)
            y = df[labels]
            st.dataframe(y)
            st.write(y.head())
            st.write(X.shape)

            all_columns = X.columns.tolist()

            if st.checkbox("Handling missing values"):

                radioval = st.radio("choose type", ('ffill', 'statistical'))
                if radioval == 'None':
                    print('handling missing values skipped')

                elif radioval == 'fbfill':
                    if st.checkbox("fbfill"):
                        X = X.ffill(axis=0)
                        X = X.ffill(axis=0)
                    st.markdown('**_missing values are fb filled_**')

                elif radioval == 'statistical':
                    if st.checkbox("handle with mean"):
                        selected_columns = st.multiselect(
                            "Select Columns to handle with mean ", all_columns)
                        X[selected_columns] = X[selected_columns].fillna(
                            X[selected_columns].mean(), inplace=True)
                        st.write('handled with mean')

                    elif st.checkbox("handle with median"):
                        selected_columns = st.multiselect(
                            "Select Columns to handle with median",
                            all_columns)
                        X[selected_columns] = X[selected_columns].fillna(
                            X[selected_columns].median(), inplace=True)
                        st.write('handled with median')

                    elif st.checkbox("handle with mode"):
                        selected_columns = st.multiselect(
                            "Select Columns to handle with mode", all_columns)
                        X[selected_columns] = X[selected_columns].fillna(
                            X[selected_columns].mode()[0], inplace=True)
                        st.write('handled with mode')
                    st.markdown(
                        '**_missing values are filled statistically_**')

                st.write('missing values:', X.isnull().sum())

            if st.checkbox("One hot encoding"):
                if st.checkbox("encode features"):
                    X = pd.get_dummies(X)
                    st.write("features are one hot encoded")
                if st.checkbox("encode labels"):
                    y = pd.get_dummies(y)
                    st.write("labels are one hot encoded")
                    st.dataframe(y)

            st.write('Train - val split')
            number = st.number_input('test split size',
                                     min_value=0.1,
                                     max_value=1.00)
            from sklearn.model_selection import train_test_split
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=number, random_state=0)
            st.write(X_train.shape)
            st.write(X_test.shape)

            if st.checkbox("Feature Scaling"):
                radioval = st.radio(
                    "choose type of feature scaling",
                    ('none', 'Standardization', 'Normalization'))
                if radioval == 'none':
                    st.write("you skipped feature scaling")
                if radioval == 'Standardization':
                    from sklearn.preprocessing import StandardScaler
                    sc_X = StandardScaler()
                    X_train = sc_X.fit_transform(X_train)
                    X_test = sc_X.transform(X_test)
                    #sc_y = StandardScaler()
                    #y_train = sc_y.fit_transform(y_train)
                if radioval == 'Normalization':
                    min_max_scaler = sklearn.preprocessing.MinMaxScaler()
                    X_train = min_max_scaler.fit_transform(X_train)
                    X_test = min_max_scaler.transform(X_test)

            st.header("Training")
            problem_types = ['Regression', 'Classification']
            problem_type = st.selectbox("Select Problem Type ", problem_types)
            st.sidebar.markdown("Hyperparameter Tuning")

            if problem_type == 'Classification':
                models = [
                    'Logistic Regression', 'KNN', 'SVM', 'DecisionTree',
                    'Random Forest', 'XgBoostClassifier'
                ]
                model = st.selectbox("Select  a model ", models)
                if model == 'Logistic Regression':
                    from sklearn.linear_model import LogisticRegression
                    classifier = LogisticRegression(random_state=0)

                if model == 'KNN':
                    n_neighbors = st.sidebar.slider('n_neighbors',
                                                    min_value=1,
                                                    max_value=5,
                                                    step=1)
                    p = st.sidebar.selectbox("P", [1, 2, 3, 4])
                    from sklearn.neighbors import KNeighborsClassifier
                    classifier = KNeighborsClassifier(n_neighbors=n_neighbors,
                                                      metric='minkowski',
                                                      p=p)

                if model == 'SVM':
                    from sklearn.svm import SVC
                    kernel_list = ['linear', 'poly', 'rbf', 'sigmoid']
                    kernel = st.sidebar.selectbox("P", kernel_list)
                    C = st.sidebar.slider('C',
                                          min_value=1,
                                          max_value=6,
                                          step=1)
                    degree = st.sidebar.slider('Degree',
                                               min_value=1,
                                               max_value=10,
                                               step=1)
                    classifier = SVC(kernel=kernel,
                                     C=C,
                                     random_state=0,
                                     degree=degree)

                if model == 'DecisionTree':
                    from sklearn.tree import DecisionTreeClassifier
                    criterion = st.sidebar.selectbox("criterion",
                                                     ["gini", "entropy"])
                    max_depth = st.sidebar.slider('max_depth',
                                                  min_value=1,
                                                  max_value=10,
                                                  step=1)
                    min_samples_leaf = st.sidebar.slider('min_samples_leaf',
                                                         min_value=1,
                                                         max_value=10,
                                                         step=1)
                    classifier = DecisionTreeClassifier(
                        criterion=criterion,
                        max_depth=max_depth,
                        min_samples_leaf=min_samples_leaf,
                        random_state=0)

                if model == 'Random Forest':
                    from sklearn.ensemble import RandomForestClassifier
                    criterion = st.sidebar.selectbox("criterion",
                                                     ["gini", "entropy"])
                    n_estimators = st.sidebar.number_input('n_estimators',
                                                           min_value=1,
                                                           max_value=500,
                                                           step=1)
                    max_depth = st.sidebar.slider('max_depth',
                                                  min_value=1,
                                                  max_value=10,
                                                  step=1)
                    classifier = RandomForestClassifier(
                        n_estimators=n_estimators,
                        criterion=criterion,
                        max_depth=max_depth,
                        random_state=0)

                if model == 'XgBoostClassifier':
                    from xgboost import XGBClassifier
                    n_estimators = st.sidebar.number_input('n_estimators',
                                                           min_value=1,
                                                           max_value=2000)
                    reg_lambda = st.sidebar.number_input('reg_lambda',
                                                         min_value=0.01,
                                                         max_value=5.00,
                                                         step=0.02)
                    max_depth = st.sidebar.slider('max_depth',
                                                  min_value=1,
                                                  max_value=10,
                                                  step=1)
                    colsample_bytree = st.sidebar.number_input(
                        'colsample_bytree',
                        min_value=0.50,
                        max_value=1.00,
                        step=0.05)
                    classifier = XGBClassifier(
                        n_estimators=n_estimators,
                        reg_lambda=reg_lambda,
                        max_depth=max_depth,
                        colsample_bytree=colsample_bytree)

                if st.button("Train"):
                    with st.spinner('model is training...'):
                        classifier.fit(X_train, y_train)
                    st.success('Model trained!')

                    y_pred = classifier.predict(X_test)
                    from sklearn.metrics import accuracy_score
                    acc = accuracy_score(y_test, y_pred)
                    st.write('val_accuracy:', acc)
                    from sklearn.metrics import confusion_matrix, classification_report
                    st.write(classification_report(y_test, y_pred))
                    cm = confusion_matrix(y_test, y_pred)
                    st.markdown("**_confusion matrix_**")
                    st.write(cm)
                    y_pred = pd.DataFrame(y_pred)
                    st.dataframe(y_pred)
                    st.write(y_pred[0].value_counts())
                    st.write(y_pred[0].value_counts().plot(kind='bar'))
                    st.pyplot()
                    st.balloons()

                def download_model(model):
                    output_model = pickle.dumps(model)
                    st.write("model saved as output_model ")
                    b64 = base64.b64encode(output_model).decode()
                    href = f'<a href="data:file/output_model;base64,{b64}">Download Trained Model</a>'
                    st.markdown(href, unsafe_allow_html=True)

                if st.button("save & Download model"):
                    download_model(classifier)

            if problem_type == 'Regression':
                models = [
                    'Linear Regression', 'SVR', 'DecisionTree',
                    'Random Forest', 'XgBoostRegression'
                ]
                model = st.selectbox("Select  a model ", models)

                if model == 'Linear Regression':
                    from sklearn.linear_model import LinearRegression
                    regressor = LinearRegression()

                if model == 'SVR':
                    from sklearn.svm import SVR
                    kernel_list = ['linear', 'poly', 'rbf', 'sigmoid']
                    kernel = st.sidebar.selectbox("P", kernel_list)
                    degree = st.sidebar.slider('Degree',
                                               min_value=1,
                                               max_value=10,
                                               step=1)
                    regressor = SVR(kernel=kernel, degree=degree)

                if model == 'DecisionTree':
                    from sklearn.tree import DecisionTreeRegressor
                    criterion = st.sidebar.selectbox(
                        "criterion", ["mse", "friedman_mse", "mae"])
                    max_depth = st.sidebar.slider('max_depth',
                                                  min_value=1,
                                                  max_value=10,
                                                  step=1)
                    min_samples_leaf = st.sidebar.slider('min_samples_leaf',
                                                         min_value=1,
                                                         max_value=10,
                                                         step=1)
                    regressor = DecisionTreeRegressor(
                        criterion=criterion,
                        max_depth=max_depth,
                        min_samples_leaf=min_samples_leaf,
                        random_state=0)

                if model == 'Random Forest':
                    from sklearn.ensemble import RandomForestRegressor
                    n_estimators = st.sidebar.number_input('n_estimators',
                                                           min_value=1,
                                                           max_value=500,
                                                           step=1)
                    max_depth = st.sidebar.slider('max_depth',
                                                  min_value=1,
                                                  max_value=10,
                                                  step=1)
                    criterion = st.sidebar.selectbox("criterion",
                                                     ["mse", "mae"])
                    regressor = RandomForestRegressor(
                        n_estimators=n_estimators,
                        criterion=criterion,
                        max_depth=max_depth,
                        random_state=0)

                if model == 'XgBoostRegression':
                    from xgboost import XGBRegressor
                    n_estimators = st.sidebar.number_input('n_estimators',
                                                           min_value=1,
                                                           max_value=2000)
                    reg_lambda = st.sidebar.number_input('reg_lambda',
                                                         min_value=0.01,
                                                         max_value=5.00,
                                                         step=0.02)
                    max_depth = st.sidebar.slider('max_depth',
                                                  min_value=1,
                                                  max_value=10,
                                                  step=1)
                    booster = st.sidebar.selectbox(
                        'booster', ["gbtree", "gblinear", "dart"])
                    learning_rate = st.sidebar.number_input('learning_rate',
                                                            min_value=0.05,
                                                            max_value=3.00,
                                                            step=0.01)
                    colsample_bytree = st.sidebar.number_input(
                        'colsample_bytree',
                        min_value=0.50,
                        max_value=1.00,
                        step=0.05)
                    regressor = XGBRegressor(n_estimators=n_estimators,
                                             learning_rate=learning_rate,
                                             booster=booster,
                                             reg_lambda=reg_lambda,
                                             max_depth=max_depth,
                                             colsample_bytree=colsample_bytree)

                if st.button("Train"):
                    with st.spinner('model is training...'):
                        regressor.fit(X_train, y_train)
                    st.success('Model trained!')

                    y_pred = regressor.predict(X_test)
                    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
                    mae_tr = mean_absolute_error(y_train,
                                                 regressor.predict(X_train))
                    mae = mean_absolute_error(y_test, y_pred)
                    mse = mean_squared_error(y_test, y_pred)
                    mse_tr = mean_squared_error(y_train,
                                                regressor.predict(X_train))
                    r2 = r2_score(y_test, y_pred)
                    r2_tr = r2_score(y_train, regressor.predict(X_train))
                    st.write('mean absolute error:')
                    st.write('train:', mae_tr, 'val:', mae)
                    st.write('mean squared error:')
                    st.write('train:', mse_tr, 'val:', mse)
                    st.write('r2:')
                    st.write('train:', r2_tr, 'val:', r2)
                    y_pred = pd.DataFrame(y_pred)
                    st.dataframe(y_pred)
                    st.balloons()

                def download_model(model):
                    output_model = pickle.dumps(model)
                    st.write("model saved as output_model ")
                    b64 = base64.b64encode(output_model).decode()
                    href = f'<a href="data:file/output_model;base64,{b64}">Download Trained Model</a>'
                    st.markdown(href, unsafe_allow_html=True)

                if st.button("save & Download model"):
                    download_model(regressor)
Beispiel #30
0
df = pd.DataFrame({
    'pool selection': ['Pool 1', 'Pool 2', 'Pool 3', 'Pool 4'],
})

option = st.sidebar.selectbox('Which pool do you want ?', df['pool selection'])

'You selected : ', option

latest_iteration = st.empty()
bar = st.progress(0)

array = [0.0]

if st.button('Compute'):
    chart = st.area_chart(array)
    for i in range(100):

        # Random data to display
        latest_iteration.text(f'Computing {i+1}%')
        bar.progress(i + 1)

        df = pd.DataFrame({
            abs((np.random.randn() + 10) / 10),
        })
        chart.add_rows(df)

        time.sleep(0.1)

    st.write("Finished !")