def main(): """Semi Automated ML App with Streamlit """ activities = ["EDA", "Plots", "Model Building", "About"] choice = st.sidebar.selectbox("Select Activities", activities) if choice == 'EDA': st.subheader("Exploratory Data Analysis") data = st.file_uploader("Upload a Dataset", type=["csv", "txt"]) # file_buffer = st.file_uploader("Upload a Dataset", type=["csv", "txt"]) # data = io.TextIOWrapper(file_buffer) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show Shape"): st.write(df.shape) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Summary"): st.write(df.describe()) if st.checkbox("Show Selected Columns"): selected_columns = st.multiselect("Select Columns", all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts()) if st.checkbox("Correlation Plot(Matplotlib)"): plt.matshow(df.corr()) st.pyplot() if st.checkbox("Correlation Plot(Seaborn)"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() if st.checkbox("Pie Plot"): all_columns = df.columns.to_list() column_to_plot = st.selectbox("Select 1 Column", all_columns) pie_plot = df[column_to_plot].value_counts().plot.pie( autopct="%1.1f%%") st.write(pie_plot) st.pyplot() elif choice == 'Plots': st.subheader("Data Visualization") data = st.file_uploader("Upload a Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts().plot(kind='bar')) st.pyplot() # Customizable Plot all_columns_names = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: # fig, ax = plt.subplots() # ax.scatter(df[selected_columns_names]) # st.pyplot(fig) cust_plot = df[selected_columns_names].plot( kind=type_of_plot) st.write(cust_plot) st.pyplot() elif choice == 'Model Building': st.subheader("Building ML Models") data = st.file_uploader("Upload a Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) # Model Building X = df.iloc[:, 0:-1] Y = df.iloc[:, -1] seed = 7 # prepare models models = [] models.append(('LR', LogisticRegression())) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) models.append(('SVM', SVC())) # evaluate each model in turn model_names = [] model_mean = [] model_std = [] all_models = [] # all_columns = [] scoring = 'accuracy' for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) model_names.append(name) model_mean.append(cv_results.mean()) model_std.append(cv_results.std()) accuracy_results = { "model name": name, "model_accuracy": cv_results.mean(), "standard deviation": cv_results.std() } all_models.append(accuracy_results) if st.checkbox("Metrics As Table"): st.dataframe( pd.DataFrame(zip(model_names, model_mean, model_std), columns=["Algo", "Mean of Accuracy", "Std"])) if st.checkbox("Metrics As JSON"): st.json(all_models)
if option=="sensor": sensor_data=load_sensor(20) sensor_data1=load_sensor(100000) if st.sidebar.checkbox("Sensor dataset"): st.subheader("Sensor datasets") st.write(sensor_data1) dataframe = pd.DataFrame({ 'first column': ["sensor_00","sensor_01","sensor_02"]}) st.sidebar.subheader("Choose your sensor for ploting") sensor_option = st.sidebar.selectbox('',dataframe['first column']) # print(sensor_data[sensor_option]) # print(sensor_data.columns) #df3=pd.DataFrame({"date":sensor_data["timestamp"],sensor_option:sensor_data[sensor_option]}) if st.sidebar.button("Plot"): #df3=df3.rename(columns={'date':'index'}).set_index('index') st.subheader("Line chart") st.line_chart(sensor_data[sensor_option]) st.subheader("Bar chart") st.bar_chart(sensor_data[sensor_option]) sensor_data[sensor_option].hist() st.subheader("Histogram") st.set_option('deprecation.showPyplotGlobalUse', False) st.pyplot() st.subheader("Area chart") st.area_chart(sensor_data[sensor_option]) #st.line_chart(sensor_data[sensor_option]) #st.subheader("Area chart") #st.area_chart(sensor_data[sensor_option])
"field": "a", "type": "quantitative" }, "y": { "field": "b", "type": "quantitative" }, }, } # 5 empty charts st.vega_lite_chart(spec) st.pyplot() st.line_chart() st.bar_chart() st.area_chart() # 1 empty map st.deck_gl_chart() # 6 errors try: st.vega_lite_chart({}) except Exception as e: st.write(e) try: st.vega_lite_chart(data, {}) except Exception as e: st.write(e)
def classify_data(dataset, data_type): """Classify data and health risks with selected classification.""" filename = "/home/maddykapfhammer/Documents/Allegheny/MozillaFellows/predictiveWellness/vigor/dataFiles/PubMedArticles.csv" naive_interpretation = "" gini_interpretation = "" entropy_interpretation = "" svm_interpretation = "" amount = st.number_input("How much data would you like to be produced?", min_value=1) st.header("Please Choose Your Method of Classification:") naive_classification = st.checkbox("Naive Bayes Classification") gini_classification = st.checkbox("Gini Index Decision Tree Classification") entropy_classification = st.checkbox("Entropy Decision Tree Classification") svm_classification = st.checkbox("Support Vector Machine Classification") if naive_classification: with st.spinner("Classifying with Naive Bayes..."): new_data = naive_bayes.import_data(data_type) st.area_chart(new_data["Health"]) naive_interpretation = naive_bayes.perform_methods(data_type) naive_results = health_query.perform_methods(filename, naive_interpretation, amount) for i, j in naive_results.iterrows(): st.header(j["Titles"]) st.write(j["Date Published"]) st.write(j["Abstract"]) st.success("Complete!") if gini_classification: with st.spinner("Classifying with Gini Index..."): new_data = decision_tree.import_data(data_type) st.area_chart(new_data["Health"]) gini_interpretation = decision_tree.perform_gini_index(data_type) gini_results = health_query.perform_methods(filename, gini_interpretation, amount) for i, j in gini_results.iterrows(): st.header(j["Titles"]) st.write(j["Date Published"]) st.write(j["Abstract"]) st.success("Complete!") if entropy_classification: with st.spinner("Classifying with Entropy..."): new_data = decision_tree.import_data(data_type) st.area_chart(new_data["Health"]) entropy_interpretation = decision_tree.perform_entropy(data_type) entropy_results = health_query.perform_methods(filename, entropy_interpretation, amount) for i, j in entropy_results.iterrows(): st.header(j["Titles"]) st.write(j["Date Published"]) st.write(j["Abstract"]) st.success("Complete!") if svm_classification: with st.spinner("Classifying with Support Vector Machine..."): new_data = svm.import_data(data_type) st.area_chart(new_data["Health"]) svm_interpretation = svm.perform_methods(data_type) svm_results = health_query.perform_methods(filename, svm_interpretation, amount) for i, j in svm_results.iterrows(): st.header(j["Titles"]) st.write(j["Date Published"]) st.write(j["Abstract"]) st.success("Complete!")
elif choices == 'Visualize': st.subheader("Visualize") df = pd.read_csv(data) all_col = df.columns.to_list() all_coll = df.columns.to_list() if st.sidebar.checkbox("Bar Chart"): bar = st.multiselect("Select Features ", all_col, key='a') new_df = df[bar] st.bar_chart(df[bar]) if st.sidebar.checkbox("Area_chart"): area = st.multiselect("Select Features", all_col) new_dff = df[area] st.area_chart(df[area]) if st.sidebar.checkbox("Line Chart"): st.line_chart(df) if st.sidebar.checkbox("Plotly charts"): X1 = st.selectbox("select ", all_col, key='b') X2 = st.selectbox("select", all_col, key='m') X3 = st.selectbox("select", all_col) hist_data = [df[X1], df[X2], df[X3]] group_labels = [X1, X2, X3] fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5]) st.plotly_chart(fig, use_container_width=True)
def main(): #df = load_data() data = st.file_uploader("Upload a Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) page = st.sidebar.selectbox( "Choose a page", ['Homepage', 'Exploration', 'Plots', 'Prediction']) if page == 'Homepage': st.title('Wine Alcohol Class Prediction') st.text('Select a page in the sidebar') st.dataframe(df) elif page == 'Exploration': st.title('Explore the Wine Data-set') if st.checkbox("Show Shape"): st.dataframe(df.shape) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() st.dataframe(all_columns) if st.checkbox('Show column descriptions'): st.dataframe(df.describe()) if st.checkbox("Show Selected Columns"): selected_columns = st.multiselect("Select Columns", all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show Value Counts"): st.dataframe(df.iloc[:, -1].value_counts()) st.markdown('### Analysing column relations') st.text('Correlations:') fig, ax = plt.subplots(figsize=(10, 10)) sns.heatmap(df.corr(), annot=True, ax=ax) st.pyplot() st.text('Effect of the different classes') sns.pairplot(df, vars=[ 'magnesium', 'flavanoids', 'nonflavanoid_phenols', 'proline' ], hue='alcohol') st.pyplot() elif page == 'Plots': st.subheader("Data Visualization") st.title('Plots') if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts().plot(kind='bar')) st.pyplot() # Customizable Plot all_columns_names = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot = df[selected_columns_names].plot( kind=type_of_plot) st.write(cust_plot) st.pyplot() else: st.title('Modelling') model, accuracy = train_model(df) st.write('Accuracy: ' + str(accuracy)) st.markdown('### Make prediction') st.dataframe(df) row_number = st.number_input('Select row', min_value=0, max_value=len(df) - 1, value=0) st.markdown('#### Predicted') st.text( model.predict( df.drop(['alcohol'], axis=1).loc[row_number].values.reshape(1, -1))[0])
```python import streamlit as st import numpy as np import pandas as pd ``` """ #chart # 20行3列のランダムな行列を生成 df = pd.DataFrame(np.random.rand(20, 3), columns=['a', 'b', 'c']) #グラフを表示 st.line_chart(df) #折れ線グラフ st.area_chart(df) # エリアチャート st.bar_chart(df) # 棒グラフ #mapを表示する df = pd.DataFrame( np.random.rand(100, 2) / [50, 50] + [35.69, 139.70], columns=['lat', 'lon'] #lat=緯度, lon=経度 ) # st.write(df) #画像を表示する st.write('Display Image')
def main(): """Mortality Prediction App""" st.title("Hepatitis Disease Mortality Prediction App") # st.markdown(html_temp.format("royalblue"),unsafe_allow_html=True) menu = ["Home","Login","SignUp"] submenu = ["Plot","Prediction","Metrics"] choice = st.sidebar.selectbox("Menu",menu) if choice == "Home": st.subheader("Home") st.text("Welcome") elif choice == "Login": username = st.sidebar.text_input("Username") password = st.sidebar.text_input("Password",type="password") if st.sidebar.checkbox("Login"): create_usertable() hashed_pswd = generate_hashes(password) result = login_user(username,verify_hashes(password,hashed_pswd)) # if password == "12345": if result: st.success("Welcome {}".format(username)) activity = st.selectbox("Activity",submenu) if activity == "Plot": st.subheader("Data Visualization Plot") df = pd.read_csv("data/cleaned_data.csv") st.dataframe(df) # freq distribution plot freq_df = pd.read_csv("data/Age_frq_Dist.csv") st.bar_chart(freq_df["count"]) st.dataframe(freq_df) df['class'].value_counts().plot(kind="bar") st.pyplot() if st.checkbox("Area Chart"): all_columns = df.columns.to_list() feat_choices = st.multiselect("Choose a feature" , all_columns) new_df = df[feat_choices] st.area_chart(new_df) # prediction elif activity == "Prediction": st.subheader("Predictive Analytics") age = st.number_input("Age",7,80) sex ="Sex",tuple(gender_dict.keys())) steroid ="Do You Take Steroids?",tuple(feature_dict.keys())) antivirals ="Do You Take Antivirals?",tuple(feature_dict.keys())) fatique ="Do You Have Fatique?",tuple(feature_dict.keys())) spiders ="Presence of Spider Naeve",tuple(feature_dict.keys())) ascites = st.selectbox("Ascites",tuple(feature_dict.keys())) varices = st.selectbox("Presence of Varices",tuple(feature_dict.keys())) bilirubin = st.number_input("bilirubin Content",0.0,8.0) alk_phosphate = st.number_input("Alkaline Phosphate Content",0.0,296.0) sgot = st.number_input("Sgot",0.0,648.0) albumin = st.number_input("Albumin",0.0,6.4) protime = st.number_input("Prothrombin Time",0.0,100.0) histology ="Histology",tuple(feature_dict.keys())) feature_list = [age,get_value(sex,gender_dict),get_fvalue(steroid),get_fvalue(antivirals),get_fvalue(fatique),get_fvalue(spiders) ,get_fvalue(ascites),get_fvalue(varices),bilirubin,alk_phosphate,sgot,albumin,int(protime),get_fvalue(histology)] st.write(len(feature_list)) st.write(feature_list) pretty_result = {"age":age,"sex":sex,"steroid":steroid,"antivirals":antivirals,"fatique":fatique,"spiders":spiders, "ascites":ascites , "varices":varices ,"bilirubin":bilirubin,"alk_phosphate":alk_phosphate,"sgot":sgot, "albumin":albumin,"protime":protime,"histology":histology} st.json(pretty_result) single_sample = np.array(feature_list).reshape(1,-1) # # Machine Learning models model_choice = st.selectbox("Select Model",["LR","KNN","DecisionTree"]) if st.button("Predict"): if model_choice == "KNN": loaded_model = load_model("models/KNN_HepatitisB_model.pkl") prediction=loaded_model.predict(single_sample) pred_prob=loaded_model.predict_proba(single_sample) elif model_choice == "DecisionTree": loaded_model=load_model("models/decision_tree_clf_HepatitisB_model.pkl") prediction=loaded_model.predict(single_sample) pred_prob=loaded_model.predict_proba(single_sample) else: loaded_model = load_model("models/LogisticReg_HepatitisB_model.pkl") prediction=loaded_model.predict(single_sample) pred_prob=loaded_model.predict_proba(single_sample) # st.write(prediction) # # prediction_label = {"Die":1,"Live":2} # # final_result = get_key(prediction,prediction_label) if prediction == 1: st.warning("Patient Dies") pred_probability_score = {"Die":pred_prob[0][0]*100 , "Live":pred_prob[0][1]*100} st.subheader("Prediction Probability Score using {}".format(model_choice)) st.json(pred_probability_score) st.subheader("Prescriptive Analytics") st.markdonw(prescriptive_message_temp,unsafe_allow_html=True) else: st.success("Patient Lives") pred_probability_score = {"Die":pred_prob[0][0]*100,"Live":pred_prob[0][1]*100} st.subheader("Prediction Probability Score using {}".format(model_choice)) st.json(pred_probability_score) if st.checkbox("Interpret"): if model_choice == "KNN": loaded_model = load_model("models/KNN_HepatitisB_model.pkl") elif model_choice == "DecisionTree": loaded_model = load_model("models/decision_tree_clf_HepatitisB_model.pkl") else: loaded_model = load_model("models/LogisticReg_HepatitisB_model.pkl") # loaded_model = load_model("models/logistic_regression_model.pkl") # 1 Die and 2 Live df = pd.read_csv("data/cleaned_data.csv") x = df[['age', 'sex', 'steroid', 'antivirals','fatigue','spiders', 'ascites','varices', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin', 'protime','histology']] feature_names = ['age', 'sex', 'steroid', 'antivirals','fatigue','spiders', 'ascites','varices', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin', 'protime','histology'] class_names = ['Die(1)','Live(2)'] explainer = lime.lime_tabular.LimeTabularExplainer(x.values,feature_names=feature_names, class_names=class_names,discretize_continuous=True) # The Explainer Instance exp = explainer.explain_instance(np.array(feature_list), loaded_model.predict_proba,num_features=13, top_labels=1) exp.show_in_notebook(show_table=True, show_all=False) # exp.save_to_file('lime_oi.html') st.write(exp.as_list()) new_exp = exp.as_list() label_limits = [i[0] for i in new_exp] # st.write(label_limits) label_scores = [i[1] for i in new_exp] plt.barh(label_limits,label_scores) st.pyplot() plt.figure(figsize=(20,10)) fig = exp.as_pyplot_figure() st.pyplot() else: st.warning("Incorrect Username/Password")"Data Visualization") df = get_dataframe(csv_filename) if st.checkbox("Show Value Counts"): fig, ax = plt.subplots() st.write(df.iloc[:,-1].value_counts().plot(kind='bar')) st.pyplot(fig) all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"]) selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names)) if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) if type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) if type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) if type_of_plot == 'hist' or 'box' or 'kde': if len(selected_columns_names)>1: st.warning('select one coloumn') else: fig, ax = plt.subplots() cust_plot= df[get_singleton(selected_columns_names)].plot(kind=type_of_plot) st.write(cust_plot)
# Chart Table chartTable = pd.DataFrame( np.random.rand(25, 5), columns=["A", "B", "X", "Y", "Z"] ) st.subheader("Chart Table") st.line_chart(chartTable) # Colored Chart Table coloredChartTable = pd.DataFrame( np.random.rand(25, 3), columns=["A", "B", "C"] ) st.subheader("Colored Chart") st.area_chart(coloredChartTable) # Bar Chart barChart = pd.DataFrame( np.random.rand(25, 3), columns=["A", "B", "C"] ) st.subheader("Bar Chart") st.bar_chart(barChart) # Displaying Plots import matplotlib.pyplot as plt arr = np.random.normal(1, 1, size=100) fig, ax = plt.subplots() ax.hist(arr, bins=20)
data = datasets.load_breast_cancer() st.write("""dataset breast""") else: data = datasets.load_wine() st.write("""dataset wine""") x = y = x1 = pd.DataFrame(x, columns=data.feature_names) x2 = pd.DataFrame(y) return x, y, x1, x2 x, y, x1, x2 = select_dataset(dataset) #x1=frame(dataset) st.write(""" ## SHAPE OF DATA IS""", x.shape) st.write(""" ## No of classes """, len(np.unique(y))) box = st.selectbox(""" display data """, options=["inputdata", "targetdata"]) if box == "inputdata": st.write(""" ## Input Data""", x1.head()) elif box == "targetdata": st.write(""" ## target Data""", x2.head()) #st.write("""# heat map """,sn.heatmap(x1.corr(),annot=True)) #box1=st.selectbox("""## PLOT""",options=["inputdata","targetdata"]) st.bar_chart(x1) st.area_chart(x1) st.altair_chart(x1) import bs4 Href = "" st.write(Href)
def main(): """ Machine Learning Dataset Explorer""" st.title("Machine Learning Dataset Explorer") st.subheader("Simple Data Science Explorer with Streamlit") html_temp = """ <div style="background-color:tomato;"> <p style="color:white; font-size: 50px">Frase aleatória</p> <div> """ st.markdown(html_temp, unsafe_allow_html=True) def file_selector(folder_path='.'): filenames = os.listdir(folder_path) selected_filename = st.selectbox("Escolhar um arquivo", filenames) return os.path.join(folder_path, selected_filename) filename = file_selector()"Você escolheu {}".format(filename)) #Ler os dados df = pd.read_csv(filename) # Mostrar o dataset if st.checkbox("Mostrar DataSet"): number = st.number_input("Número de linhas para visualizar", 5, 10) st.dataframe(df.head(number)) #Mostrar colunas if st.button("Nomes das Colunas"): st.write(df.columns) #Mostrar formatos if st.checkbox("Formato do Dataset"): st.write(df.shape) data_dim ="Show Dimension By", ("Rows", "Columns")) if data_dim == 'Columns': st.text("Número de Colunas") st.write(df.shape[1]) elif data_dim == "Rows": st.text("Número de linhas") st.write(df.shape[0]) else: st.write(df.shape) #Escolher colunas if st.checkbox("Selecione as colunas desejadas"): all_columns = df.columns.tolist() selected_columns = st.multiselect("Escolha", all_columns) new_df = df[selected_columns] st.dataframe(new_df) #Mostrar valores if st.button("Valores"): st.text("Valores em classes") st.write(df.iloc[:, 0].value_counts()) #moradores st.write(df.iloc[:, 1].value_counts()) #idosos st.write(df.iloc[:, -1].value_counts()) #crianças st.write(df.iloc[:, -2].value_counts()) #familias #Mostrar Datatypes if st.button("DataTypes"): st.write(df.dtypes) #Mostrar sumário if st.checkbox("Sumário"): st.write(df.describe().T) #Visualização st.subheader("Visualização dos dados") #Corelação #Seaborn if st.checkbox("Seaborn Plot"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot #Count plot if st.checkbox("Plot of Value Counts"): st.text("Value Counts By Target") all_columns_names = df.columns.tolist() primary_col = st.selectbox("Primary Columm to GroupBy", all_columns_names) selected_columns_names = st.multiselect("Select Columns", all_columns_names) if st.button("Plot"): st.text("Generate Plot") if selected_columns_names: vc_plot = df.groupby( primary_col)[selected_columns_names].count() else: vc_plot = df.iloc[:, -1].value_counts() st.write(vc_plot.plot(kind="bar")) st.pyplot() #Pie chart if st.checkbox("Pie Plot"): all_columns_names = df.columns.tolist() selected_column = st.selectbox("Selecione a coluna desejada", all_columns_names) if st.button("Gerar Pie Plot"): st.success("Gerando um Pie Plot") st.write( df[selected_column].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() #Plot customizado st.subheader("Plot Customizado") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Selecione o tipo de plot", ['area', 'bar', 'line', 'hist', 'box', 'kde']) selected_columns_names = st.multiselect("Selecione as colunas", all_columns_names) if st.button("Gerar Plot"): st.success("Gerando plot de {} para {}".format(type_of_plot, selected_columns_names)) if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) elif type_of_plot: cust_plot = df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot()
def main(): """Automated ML App""" #st.title('Machine Learning Application') activities = ["Home", "EDA", "Plots", "ML_Algorithms", "Neural Network"] choice = st.sidebar.selectbox("Menu", activities) html_temp = """ <div style="background-color:royalblue;padding:10px;border-radius:10px"> <h1 style="color:white;text-align:center;font-style: italic;">Classifying the survival of patients with heart failure using Various Machine Learning Algorithms</h1> </div> """ components.html(html_temp) #data = st.file_uploader("Upload a Dataset", type=["csv","txt","xlsx"]) data = pd.read_csv('heart_failure.csv') if choice == 'EDA': st.subheader("Exploratory Data Analysis using Pandas Profiling") if data is not None: df = pd.read_csv('heart_failure.csv') st.dataframe(df.head()) lable = preprocessing.LabelEncoder() for col in df.columns: df[col] = lable.fit_transform(df[col]) #pandas profiling profile = ProfileReport(df) st_profile_report(profile) elif choice == 'Plots': st.subheader("Data Visualization") if data is not None: df = pd.read_csv('heart_failure.csv') st.dataframe(df.head()) lable = preprocessing.LabelEncoder() for col in df.columns: df[col] = lable.fit_transform(df[col]) if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts().plot(kind='bar')) st.pyplot() #Customized Plot all_columns_names = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot = df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() elif choice == 'ML_Algorithms': st.subheader("Machine Learning Algorithms") if data is not None: df = pd.read_csv('heart_failure.csv') st.dataframe(df.head()) lable = preprocessing.LabelEncoder() for col in df.columns: df[col] = lable.fit_transform(df[col]) if st.checkbox("Summary"): st.write(df.describe()) X = df.iloc[:, :-1].values y = df.iloc[:, -1].values #col_name = st.selectbox("Select Column Name",["X","y"]) #if col_name == 'X': # st.dataframe(X) #elif col_name == 'y': # st.dataframe(y) st.write("Number of classes", len(np.unique(y))) params = dict() classifer_name = st.sidebar.selectbox( "Select Classifer", ("SVM Linear", "SVM Radial", "Decision Tree", "Random Forest")) #add parameters def add_parameters(clf_name): """Selection of parameters""" if clf_name == "SVM Linear": C = st.sidebar.slider("C", 0.01, 15.0) params["C"] = C elif clf_name == "SVM Radial": C = st.sidebar.slider("C", 0.01, 15.0) params["C"] = C elif clf_name == "Decision Tree": max_depth = st.sidebar.slider("max_depth", 2, 15) max_leaf_nodes = st.sidebar.slider("max_leaf_nodes", 2, 20) params["max_depth"] = max_depth params["max_leaf_nodes"] = max_leaf_nodes elif clf_name == "Random Forest": max_depth = st.sidebar.slider("max_depth", 2, 15) n_estimators = st.sidebar.slider("n_estimators", 1, 200) params["max_depth"] = max_depth params["n_estimators"] = n_estimators return params add_parameters(classifer_name) #get classifers def get_classifiers(clf_name, params): clf = None if clf_name == "SVM Linear": clf = SVC(C=params["C"], kernel='linear') elif clf_name == "SVM Radial": clf = SVC(C=params["C"], kernel='rbf') elif clf_name == "Decision Tree": clf = DecisionTreeClassifier( max_depth=params["max_depth"], max_leaf_nodes=params["max_leaf_nodes"], random_state=100) elif clf_name == "Random Forest": clf = RandomForestClassifier( n_estimators=params["n_estimators"], max_depth=params["max_depth"], random_state=100) return clf clf = get_classifiers(classifer_name, params) #Classification X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100), y_train) y_pred = clf.predict(X_test) acc = accuracy_score(y_test, y_pred) st.write( f'<div style="color: #1C2331; font-size: medium; font-style: italic; padding: 15px; background-color:#b2dfdb;border-radius:5px;">Classifier = {classifer_name}</div></br>', unsafe_allow_html=True) clf_report = classification_report(y_test, y_pred) st.success(f"Classification Report:\n\n {clf_report}") st.warning(f"accuracy = {acc}") for i in range(1, 10): st.write("Actual=%s, Predicted=%s" % (y_test[i], y_pred[i])) elif choice == 'Neural Network': st.subheader("Neural Networks (MLPClassifier)") if data is not None: df = pd.read_csv('heart_failure.csv') st.dataframe(df.head()) lable = preprocessing.LabelEncoder() for col in df.columns: df[col] = lable.fit_transform(df[col]) X = df.iloc[:, :-1].values y = df.iloc[:, -1].values params = dict() classifer_name = "MLPClassifier" def add_parameters(clf_name): """Selection of parameters""" if clf_name == "MLPClassifier": max_iter = st.sidebar.slider("max_iter", 2, 30) params["max_iter"] = max_iter return params add_parameters(classifer_name) #get classifers def get_classifiers(clf_name, params): clf = None if clf_name == "MLPClassifier": clf = MLPClassifier(max_iter=params["max_iter"]) return clf clf = get_classifiers(classifer_name, params) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100), y_train) y_pred = clf.predict(X_test) st.write( f'<div style="color: #1C2331; font-size: medium; font-style: italic; padding: 15px; background-color:#b2dfdb;border-radius:5px;">Classifier = {classifer_name}</div></br>', unsafe_allow_html=True) clf_report = classification_report(y_test, y_pred) st.success(f"Classification Report:\n\n {clf_report}") acc = accuracy_score(y_test, y_pred) st.warning(f"accuracy = {acc}") for i in range(1, 10): st.write("Actual=%s, Predicted=%s" % (y_test[i], y_pred[i]))
def data_display(): "# In this section we will try to explore ways to display our data" # Let’s add a title to test things out st.title("My Cool Streamlit App!!!") # Let's write something # write any text st.write("Here's our first attempt at using data to create a table:") # write dataframes st.write( pd.DataFrame({"first column": [1, 2, 3, 4], "second column": [10, 20, 30, 40]}) ) # write JSON st.write({"Name": "John", "Country": "USA"}) # Emojis are love! st.write("Display some cool emojis :sunglasses:") # st.write accepts chart objects too! df2 = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"]) c = ( alt.Chart(df2) .mark_circle() .encode(x="a", y="b", size="c", color="c", tooltip=["a", "b", "c"]) ) # st.write(c) # you can also use st.text("you can also use st.text or st.markdown as well") # let me showcase some Magic! """ # My Cool Streamlit App!!! Here's our first attempt at using data to create a table: """ df = pd.DataFrame({"first column": [1, 2, 3, 4], "second column": [10, 20, 30, 40]}) # df # you can also use st.dataframe and st.table "Display a df with `st.dataframe(df)` and `st.table(df)`" st.dataframe(df) st.table(df) # let's display some charts "let's display charts" chart_data = pd.DataFrame(np.random.randn(50, 3), columns=["a", "b", "c"]) # line_chart "Display a line chart figure with `st.line_chart`" st.line_chart(chart_data) # area_chart "Display a area chart figure with `st.area_chart`" st.area_chart(chart_data) # bar_chart "Display a bar chart figure with `st.bar_chart`" st.bar_chart(chart_data) # let's draw Map chart map_data = pd.DataFrame( np.random.randn(1000, 2) / [50, 50] + [37.76, -122.4], columns=["lat", "lon"] ) "Display a map figure with ``" "Display a matplotlib.pyplot figure with `st.pyplot`" arr = np.random.normal(1, 1, size=100) fig, ax = plt.subplots() ax.hist(arr, bins=20) st.pyplot(fig) "Display a chart using the Altair library with `st.altair_chart`" st.altair_chart(c, use_container_width=True) "Display an interactive Plotly chart using `st.plotly_chart`" # Add histogram data x1 = np.random.randn(200) - 2 x2 = np.random.randn(200) x3 = np.random.randn(200) + 2 # Group data together hist_data = [x1, x2, x3] group_labels = ["Group 1", "Group 2", "Group 3"] # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, bin_size=[0.1, 0.25, 0.5]) st.plotly_chart(fig, use_container_width=True) """
import streamlit as st import pandas as pd import numpy as np chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) st.area_chart(chart_data)
]) st.subheader('Total Energy Generation in ' + country + ' (MW)') forecast_horizon = st.sidebar.slider(label='Forecast Horizon (hours)', min_value=12, max_value=168, value=48) window_length = st.sidebar.slider(label='Window Length', min_value=1, value=30) country_code = COUNTRY_MAPPINGS[country] df = get_energy_data(country_code) #Plotting total energy generation for selected country st.area_chart(df, use_container_width=False, width=800) cols_renewable = ['Wind Onshore', 'Wind Offshore', 'Solar'] #Selecting the renewable energy columns, #Only if they are available in the dataframe df = df[df.columns & cols_renewable] for item in df.columns: smape = calculate_smape(df[[item]], regressor, forecast_horizon, window_length) st.subheader(item + ' Energy Generation Forecast in ' + country + ' (MW)') #Generating and plotting a forecast for each renewable energy type df_forecast = generate_forecast(df[[item]], regressor, forecast_horizon, window_length)
def main(): """Semi Automated ML App With Streamlit """ activities = ["Exploratory Data Analysis", "Plots"] choice = st.sidebar.selectbox("Select Activities", activities) if choice == 'Exploratory Data Analysis': st.subheader("Exploratory Data Analysis") data = st.file_uploader("Upload a Dataset of Your Choice", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show Shape"): st.write(df.shape) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Summary"): st.write(df.describe()) if st.checkbox("Show Selected Columns"): selected_columns = st.multiselect("Select Columns", all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show Value Counts"): st.dataframe(df.iloc[:, -1].value_counts()) if st.checkbox("Correlation Plot(Matplotlib)"): plt.matshow(df.corr()) st.pyplot() if st.checkbox("Correlation Plot(Seaborn)"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() # if st.checkbox("Pie Plot"): # all_columns = df.columns.to_list() # column_to_plot = st.selectbox("Select 1 Column", all_columns) # pie_plot = df[column_to_plot].value_counts().plot.pie(autopct="%1.1f%%") # st.write(pie_plot) # st.pyplot() if st.checkbox("Pie plot"): all_columns = df.columns.to_list() column_to_plot = st.selectbox("Select Column", all_columns) pie_plot = df[column_to_plot].value_counts().plot.pie( autopct="%1.1f%%") st.write(pie_plot) st.pyplot() elif choice == 'Plots': st.subheader("Data Visualization") data = st.file_uploader("Upload a Dataset", type=["csv", "txt", "xlsx"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts().plot(kind='bar')) st.pyplot() # Customizable Plot all_columns_names = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot = df[selected_columns_names].plot( kind=type_of_plot) st.write(cust_plot) st.pyplot()
def main(): html_page = """ <div style="text-align: center;"> <div> <h1>Machine Learning | Pedictive Analysis | Data Analytics</h1> </div> <div> <h2>Drag and Drop any dataset and predict or analyse</h2> </div> </div> """ st.markdown(html_page, unsafe_allow_html=True) activities = [ "Exploratory Data Analysis", "Plot", "Model Building", "About" ] choice = st.sidebar.selectbox("Select Activities", activities) if choice == "Exploratory Data Analysis": st.subheader("Exploratory Data Analysis") # Drag and Drop Feature data = st.file_uploader("Upload Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show Shape of the dataset"): st.write(df.shape) if st.checkbox("Show Columns of the dataset"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Select Columns to Show"): all_columns = df.columns.to_list( ) # doing this again so that we can use this functionality without depending on above selected_columns = st.multiselect("Select the Columns", all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show Summary of the dataset"): st.write(df.describe()) if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts()) elif choice == "Plot": st.subheader("Data Visualization") data = st.file_uploader("Upload Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Correlation Map with Seaborn"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() if st.checkbox("Pie Chart"): all_columns = df.columns.to_list() columns_to_plot = st.selectbox("Select 1 Column", all_columns) pie_plot = df[columns_to_plot].value_counts().plot.pie( autopct="%1.1f%%") st.write(pie_plot) st.pyplot() all_columns_names = df.columns.to_list() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) # Plots By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # custom plot elif type_of_plot: cust_plot = df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() elif choice == "Model Building": st.subheader("Building Machine Learning Model") data = st.file_uploader("Upload Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) # Model Building X = df.iloc[:, 0:-1] Y = df.iloc[:, -1] seed = 7 # model models = [] models.append(("LR", LogisticRegression())) models.append(("LDA", LinearDiscriminantAnalysis())) models.append(("KNN", KNeighborsClassifier())) models.append(("CART", DecisionTreeClassifier())) models.append(("NB", GaussianNB())) models.append(("SVM", SVC())) # Evaluate each model one by one # List model_names = [] model_mean = [] model_std = [] all_models = [] scoring = 'accuracy' for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) model_names.append(name) model_mean.append(cv_results.mean()) model_std.append(cv_results.std()) accuracy_result = { "model_name": name, "model_accuracy": cv_results.mean(), "Standard_deviation": cv_results.std() } all_models.append(accuracy_result) if st.checkbox("Metrics as Table"): st.dataframe( pd.DataFrame(zip(model_names, model_mean, model_std), columns=[ "Model Name", "Model Accuracy", "Standard Deviation" ])) if st.checkbox("Metrics as JSON"): st.json(all_models) elif choice == "About": st.subheader("About Developer") html_page = """ <div style = "background-color:tomato; padding:50px"> <p style="font-size:25px"> SHAIL MODI <br> B. Tech - Computer Engineering <br> ✉ [email protected] <br> K.J.Somaiya College Of Engineering </p> <p>© shailmodi<p> </div> """ st.markdown(html_page, unsafe_allow_html=True)
# Group if st.checkbox("Show Bar Chart Plot"): v_group = data.groupby('species') st.bar_chart(v_group) st.pyplot() # Line if st.checkbox("Show Line Plot"): v_group = data.groupby('species') st.bar_chart(v_group) st.pyplot() # Area if st.checkbox("Show Area Plot"): v_group = data.groupby('species') st.area_chart(v_group) st.pyplot() # Images @st.cache def load_image(img): im = return im species_type ="Select Species Type", ("setosa", "virginica", "versicolor")) if species_type == 'setosa': st.text("Showing Setosa Species") st.image(load_image('imgs/iris_setosa.jpg'))
def main(): """Common ML Data Explorer """ st.title("Common ML Data Explorer") st.subheader("Simple ML App with Streamlit") img_list = glob.glob("images/*.png") # st.write(img_list) # for i in img_list: # c_image = # st.image(i) all_image = [ for i in img_list] st.image(all_image) def file_selector(folder_path='./datasets'): filenames = os.listdir(folder_path) selected_filename = st.selectbox('Select a file', filenames) return os.path.join(folder_path, selected_filename) filename = file_selector() st.write('You selected `%s`' % filename) df = pd.read_csv(filename) if st.checkbox("Show DataSet"): number = st.number_input("Number of Rows to View") st.dataframe(df.head(number)) if st.button("Columns Names"): st.write(df.columns) if st.checkbox("Shape of Dataset"): st.write(df.shape) data_dim ="Show Dimension by", ("Rows", "Columns")) if data_dim == 'Rows': st.text("Number of Rows") st.write(df.shape[0]) elif data_dim == 'Columns': st.text("Number of Columns") st.write(df.shape[1]) if st.checkbox("Select Columns To Show"): all_columns = df.columns.tolist() selected_columns = st.multiselect('Select', all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.button("Data Types"): st.write(df.dtypes) if st.button("Value Counts"): st.text("Value Counts By Target/Class") st.write(df.iloc[:, -1].value_counts()) if st.checkbox("Summary"): st.write(df.describe()) st.subheader("Data Visualization") # Show Correlation Plots # Matplotlib Plot if st.checkbox("Correlation Plot [Matplotlib]"): plt.matshow(df.corr()) st.pyplot() # Seaborn Plot if st.checkbox("Correlation Plot with Annotation[Seaborn]"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() # Counts Plots if st.checkbox("Plot of Value Counts"): st.text("Value Counts By Target/Class") all_columns_names = df.columns.tolist() primary_col = st.selectbox('Select Primary Column To Group By', all_columns_names) selected_column_names = st.multiselect('Select Columns', all_columns_names) if st.button("Plot"): st.text("Generating Plot for: {} and {}".format( primary_col, selected_column_names)) if selected_column_names: vc_plot = df.groupby( primary_col)[selected_column_names].count() else: vc_plot = df.iloc[:, -1].value_counts() st.write(vc_plot.plot(kind='bar')) st.pyplot() if st.checkbox("Pie Plot"): all_columns_names = df.columns.tolist() #"Please Choose Target Column") # int_column = st.selectbox('Select Int Columns For Pie Plot',all_columns_names) if st.button("Generate Pie Plot"): # cust_values = df[int_column].value_counts() # st.write(cust_values.plot.pie(autopct="%1.1f%%")) st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() if st.checkbox("BarH Plot"): all_columns_names = df.columns.tolist()"Please Choose the X and Y Column") x_column = st.selectbox('Select X Columns For Barh Plot', all_columns_names) y_column = st.selectbox('Select Y Columns For Barh Plot', all_columns_names) barh_plot = df.plot.barh(x=x_column, y=y_column, figsize=(10, 10)) if st.button("Generate Barh Plot"): st.write(barh_plot) st.pyplot() st.subheader("Customizable Plots") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select the Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_column_names = st.multiselect('Select Columns To Plot', all_columns_names) # plot_fig_height = st.number_input("Choose Fig Size For Height",10,50) # plot_fig_width = st.number_input("Choose Fig Size For Width",10,50) # plot_fig_size =(plot_fig_height,plot_fig_width) cust_target = df.iloc[:, -1].name if st.button("Generate Plot"): st.success("Generating A Customizable Plot of: {} for :: {}".format( type_of_plot, selected_column_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_column_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_column_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_column_names] st.line_chart(cust_data) # Plot By Matplotlib # elif type_of_plot == 'pie': # custom_plot = df[selected_column_names].plot(subplots=True,kind=type_of_plot) # st.write(custom_plot) # st.pyplot() elif type_of_plot == 'hist': custom_plot = df[selected_column_names].plot(kind=type_of_plot, bins=2) st.write(custom_plot) st.pyplot() elif type_of_plot == 'box': custom_plot = df[selected_column_names].plot(kind=type_of_plot) st.write(custom_plot) st.pyplot() elif type_of_plot == 'kde': custom_plot = df[selected_column_names].plot(kind=type_of_plot) st.write(custom_plot) st.pyplot() else: cust_plot = df[selected_column_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() html_temp = """ <div style="background-color:powderblue;"><p style="color:blue;font-size:60px;"> Hello world colored</p></div> """ # html_temp2 = """ # <body style="background-color:red;"> # <p style="color:blue">Hello World Streamlit</p> # <form> # <input type="text"/> # </form> # </body> # </html>""" st.markdown(html_temp, unsafe_allow_html=True) st.subheader("Feature Engineering and ML Aspect") if st.checkbox("Show Features"): all_features = df.iloc[:, 0:-1] st.text('Features Names:: {}'.format(all_features.columns[0:-1])) st.dataframe(all_features.head(10)) if st.checkbox("Show Target"): all_target = df.iloc[:, -1] st.text('Target/Class Name:: {}'.format( st.dataframe(all_target.head(10)) all_ml_dict = { 'LR': LogisticRegression(), 'LDA': LinearDiscriminantAnalysis(), 'KNN': KNeighborsClassifier(), 'CART': DecisionTreeClassifier(), 'NB': GaussianNB(), 'SVM': SVC() } # models = [] model_choice = st.multiselect('Model Choices', list(all_ml_dict.keys())) for key in all_ml_dict: if 'LDA' in key: st.write(key) # results = [] # names = [] # allmodels = [] # scoring = 'accuracy' # for name, model in models: # kfold = model_selection.KFold(n_splits=10, random_state=seed) # cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) # results.append(cv_results) # names.append(name) # msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std()) # allmodels.append(msg) # model_results = results # model_names = names # Make Downloadable file as zip,since markdown strips to html st.markdown("""[](""") st.markdown("""[](./""") # def make_zip(data): # output_filename = '{}_archived'.format(data) # return shutil.make_archive(output_filename,"zip",os.path.join("downloadfiles")) def makezipfile(data): output_filename = '{}'.format(data) with ZipFile(output_filename, "w") as z: z.write(data) return output_filename if st.button("Download File"): DOWNLOAD_TPL = f'[{filename}]({makezipfile(filename)})' # st.text(DOWNLOAD_TPL) st.text(DOWNLOAD_TPL) st.markdown(DOWNLOAD_TPL)
def on_train_begin(self, logs=None): st.header("Summary") self._summary_chart = st.area_chart() self._summary_stats = st.text("%8s : 0" % "epoch") st.header("Training Log")
st.write(Final_Data) if st.checkbox("Show all the column Names"): st.write(Final_Data.columns) if st.checkbox("Show size of dataset"): if st.checkbox("Show row size"): st.write(Final_Data.shape[0]) if st.checkbox("Show column size"): st.write(Final_Data.shape[1]) if st.checkbox("Show complete dataset size"): st.write(Final_Data.shape) if st.checkbox("Show desc of Ratings in final data"): Final_Data.describe()["Ratings"] st.write("**displaying final dataset header lines using area chart**") st.area_chart(Final_Data) print("Number of NaN values = " + str(Final_Data.isnull().sum())) duplicates = Final_Data.duplicated(["MovieID", "CustID", "Ratings"]) print("Number of duplicate rows = " + str(duplicates.sum())) if st.checkbox("Show unique customer & movieId in Total Data:"): st.write("Total number of movie ratings = ", str(Final_Data.shape[0])) st.write("Number of unique users = ", str(len(np.unique(Final_Data["CustID"])))) st.write("Number of unique movies = ", str(len(np.unique(Final_Data["MovieID"])))) if not os.path.isfile("Data/TrainData.pkl"): Final_Data.iloc[:int(Final_Data.shape[0] *
def main(): def get_table_download_link2(df1, df2, frase): xlsx_io = io.BytesIO() writer = pd.ExcelWriter(xlsx_io, engine='xlsxwriter') df1.to_excel(writer, 'Обучающая выборка') df2.to_excel(writer, 'Тестовая ваборка') media_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' data = base64.b64encode("utf-8") href = f'<a href="data:{media_type};base64,{data}" download="RegressionResults.xlsx" >{frase}</a> (right-click and save)' st.markdown(href, unsafe_allow_html=True) def save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions): cols = [] for i in all_clummn_names: cols.append(str(i)) cols.remove(selected_y[-1]) cols.append(str(selected_y[-1]) + " ( Selected y )") df1 = pd.DataFrame(np.concatenate((trainX, trainY), axis=1), columns=cols) st.text("Обучающая выборка") st.write(df1) cols.pop() cols.append('Predictions') cols.append(str(selected_y[-1]) + " ( Real y value )") predictions = np.reshape(predictions, (predictions.shape[0], 1)) df2 = pd.DataFrame(np.concatenate((np.concatenate( (testX, predictions), axis=1), testY), axis=1), columns=cols) st.text("Тестовая выборка") st.write(df2) get_table_download_link2(df1, df2, "Сохранить результаты xlsx File") def save_exel(df, frase): xlsx_io = io.BytesIO() writer = pd.ExcelWriter(xlsx_io, engine='xlsxwriter') df.to_excel(writer, 'Результаты кластеризации') media_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' data = base64.b64encode("utf-8") href = f'<a href="data:{media_type};base64,{data}" download="ClusteringResults.xlsx" >{frase}</a> (right-click and save)' st.markdown(href, unsafe_allow_html=True) def get_file(): file = st.file_uploader( " Нажмите browse files, чтобы загрузить файл в формате .csv", type="csv") show_file = st.empty() if not file: " Файл не загружен. Загрузите файл для анализа в формате .csv") return else:" Загрузка файла выполнена успешно") return file def regression_errors_values(testY, predict, n_predict): st.write("Средняя абсолютная ошибка:", mean_absolute_error(testY, predict)) st.write("Средняя квадратичная ошибка:", mean_squared_error(testY, predict)) st.write("Средняя абсолютная ошибка (c нормализацией):", mean_absolute_error(testY, n_predict)) st.write("Средняя квадратичная ошибка (c нормализацией):", mean_squared_error(testY, n_predict)) def regression_plot_show(trainX, testY, predict, n_predict, reg_type, target_label): xx = [i for i in range(trainX.shape[0])] plt.figure() plt.plot(xx[0:testY.size], testY[0:testY.size], 'o', color='r', label='y') plt.plot(xx[0:testY.size], predict[0:testY.size], color='b', linewidth=2, label='predicted y') plt.plot(xx[0:testY.size], n_predict[0:testY.size], color='k', linewidth=2, label='predicted y with normalize') plt.ylabel(target_label) plt.xlabel('Line number in dataset') plt.legend(loc=4) plt.title(reg_type) st.pyplot( def model_linear_Regression(trainX, trainY, testX, testY): model = LinearRegression(normalize=False), trainY) predict = model.predict(testX) return predict def model_ridge_Regression(trainX, trainY, testX, testY): model = Ridge(normalize=True), trainY) predict = model.predict(testX) return predict def model_lasso(trainX, trainY, testX, testY): model = Lasso(normalize=False), trainY) predict = model.predict(testX) return predict def model_random_forest(trainX, trainY, testX, testY): model = RandomForestRegressor(criterion="mae", bootstrap=True), trainY) predict = model.predict(testX) return predict st.title("Анализ данных & Машинное обучение") st.subheader("Загрузите файл с данными для анализа") file = get_file() if file: data = pd.read_csv(file, sep=',') if st.checkbox("Показать данные"): num = st.slider("Колличество строк для отображеия ", 5, data.shape[0]) st.dataframe(data.head(num)) if st.checkbox("Заголовки столбцов"): st.write(data.columns) if st.checkbox("Показать размерность"): data_dm ="Размерность по", ("Строкам", "Столбцам")) if data_dm == "Столбцам": st.write("Число столбцов: " + str(data.shape[1])) elif data_dm == "Строкам": st.write("Число строк: " + str(data.shape[0])) if st.checkbox("Типы данных"): st.write(data.dtypes) if st.checkbox("Статистика по значениям"): st.write(data.describe().T) if st.checkbox("Рассмотреть отдельные столбцы"): all_data = data.columns.tolist() sltd_columns = st.multiselect("Select", all_data) new_data = data[sltd_columns] data = new_data st.dataframe(new_data) st.header("Визуализация данных") if st.checkbox("Построить корреляционную матрицу"): plt.figure(figsize=(10, 10)) plt.title('Correlation between different fearures') st.write( sns.heatmap(data.corr(), vmax=1, square=True, annot=True, cmap='gray_r')) st.pyplot() st.subheader("Построение графиков") all_clummn_names = data.columns.tolist() plot_type = st.selectbox("Выберите тип графика", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns = st.multiselect( "Выберите столбцы для построения графика", all_clummn_names) if st.button("Построить график"): st.success("Построение графика {} для {}".format( plot_type, selected_columns)) if plot_type == 'area': plot_data = data[selected_columns] st.area_chart(plot_data) elif plot_type == 'bar': plot_data = data[selected_columns] st.bar_chart(plot_data) elif plot_type == 'line': plot_data = data[selected_columns] st.line_chart(plot_data) elif plot_type: cust_data = data[selected_columns].plot(kind=plot_type) st.write(cust_data) st.pyplot() if st.checkbox("Круговая диаграмма"): all_clummn_names = data.columns.tolist() if st.button("Построить круговую диаграмму"): st.success("Поостроение круговой диаграммы") st.write( data.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() if st.checkbox("PCA проекция"): scaler = StandardScaler() pca = decomposition.PCA(n_components=2) X_reduced = pca.fit_transform(scaler.fit_transform(data)) plt.figure(figsize=(6, 6)) plt.scatter(X_reduced[:, 0], X_reduced[:, 1], edgecolor='none', alpha=0.7, s=40,'nipy_spectral', 10)) plt.title('Feautures PCA projection') st.pyplot( if st.checkbox("TSNE проекция"): scaler = StandardScaler() tsne = TSNE(random_state=17) tsne_representation = tsne.fit_transform( scaler.fit_transform(data)) plt.figure(figsize=(6, 6)) plt.scatter(tsne_representation[:, 0], tsne_representation[:, 1], edgecolor='none', alpha=0.7, s=40,'nipy_spectral', 10)) plt.title('Feautures T-sne projection ') st.pyplot( st.subheader("Применение методов машинного обучения") if st.checkbox("Выбрать тип решаемой задачи"): problem_type = "Выберите тип задачи", ("Регрессия", "Кластеризация", "Классификация")) if problem_type == "Регрессия": st.write("Выберите столбец у для задачи регрессии") selected_y = st.multiselect("", all_clummn_names) reg_type ="Выберите алгоритм решения", ("Все", "Линейная регрессия", "Ridge", "Lasso", "Случайный лес")) test_size_slider = st.slider( "Выберите размер тестовой выборки %", 1, 100) if st.checkbox("Выполнить"): if (selected_y): st.success( "Столбец {} выбран успешно".format(selected_y)) train, test = train_test_split( data, test_size=test_size_slider / 100) #st.write(test_size_slider/100) trainX = np.array(train.drop(selected_y, 1)) trainY = np.array(train[selected_y]) testX = np.array(test.drop(selected_y, 1)) testY = np.array(test[selected_y]) st.write(selected_y) # st.write(trainY.reshape(trainY.shape[0])) if reg_type == "Все": reg_type = "Линейная регрессия" n_predictions = model_linear_Regression( preprocessing.normalize(trainX), trainY, preprocessing.normalize(testX), testY) predictions = model_linear_Regression( trainX, trainY, testX, testY) regression_plot_show(trainX, testY, predictions, n_predictions, reg_type, selected_y) regression_errors_values(testY, predictions, n_predictions) save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions) reg_type = "Ridge" n_predictions = model_ridge_Regression( preprocessing.normalize(trainX), trainY, preprocessing.normalize(testX), testY) predictions = model_ridge_Regression( trainX, trainY, testX, testY) regression_plot_show(trainX, testY, predictions, n_predictions, reg_type, selected_y) regression_errors_values(testY, predictions, n_predictions) save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions) reg_type = "Случайный лес" n_predictions = model_random_forest( preprocessing.normalize(trainX), trainY, preprocessing.normalize(testX), testY) predictions = model_random_forest( trainX, trainY, testX, testY) regression_plot_show(trainX, testY, predictions, n_predictions, reg_type, selected_y) regression_errors_values(testY, predictions, n_predictions) save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions) reg_type = "Lasso" n_predictions = model_lasso( preprocessing.normalize(trainX), trainY, preprocessing.normalize(testX), testY) predictions = model_lasso(trainX, trainY, testX, testY) regression_plot_show(trainX, testY, predictions, n_predictions, reg_type, selected_y) regression_errors_values(testY, predictions, n_predictions) save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions) reg_type = '' if reg_type == "Линейная регрессия": n_predictions = model_linear_Regression( preprocessing.normalize(trainX), trainY, preprocessing.normalize(testX), testY) predictions = model_linear_Regression( trainX, trainY, testX, testY) regression_plot_show(trainX, testY, predictions, n_predictions, reg_type, selected_y) regression_errors_values(testY, predictions, n_predictions) save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions) if reg_type == "Ridge": n_predictions = model_ridge_Regression( preprocessing.normalize(trainX), trainY, preprocessing.normalize(testX), testY) predictions = model_ridge_Regression( trainX, trainY, testX, testY) regression_plot_show(trainX, testY, predictions, n_predictions, reg_type, selected_y) regression_errors_values(testY, predictions, n_predictions) save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions) if reg_type == "Lasso": n_predictions = model_lasso( preprocessing.normalize(trainX), trainY, preprocessing.normalize(testX), testY) predictions = model_lasso(trainX, trainY, testX, testY) regression_plot_show(trainX, testY, predictions, n_predictions, reg_type, selected_y) regression_errors_values(testY, predictions, n_predictions) save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions) if reg_type == "Случайный лес": n_predictions = model_random_forest( preprocessing.normalize(trainX), trainY, preprocessing.normalize(testX), testY) predictions = model_random_forest( trainX, trainY, testX, testY) regression_plot_show(trainX, testY, predictions, n_predictions, reg_type, selected_y) regression_errors_values(testY, predictions, n_predictions) save_train_test_data_xlsx(trainX, trainY, testX, testY, predictions) else: st.warning("Выберите стоббец у") elif problem_type == "Кластеризация": n_clusters = st.number_input("Введите количество кластеров:", 3) dataset = data.copy() scaler = StandardScaler() X = scaler.fit_transform(dataset) km = KMeans(n_clusters=n_clusters) # fit & predict clusters dataset['cluster'] = km.fit_predict(X) st.write(dataset['cluster']) st.write(dataset) if st.checkbox("PCA"): pca = decomposition.PCA(n_components=2) X_reduced = pca.fit_transform(X) plt.figure(figsize=(6, 6)) plt.scatter(X_reduced[:, 0], X_reduced[:, 1], edgecolor='none', alpha=0.7, s=40, c=dataset['cluster'],'nipy_spectral', 10)) plt.colorbar() plt.title('Feautures PCA projection') st.pyplot( if st.checkbox("TSNE"): tsne = TSNE(random_state=17) tsne_representation = tsne.fit_transform(X) plt.figure(figsize=(6, 6)) plt.scatter(tsne_representation[:, 0], tsne_representation[:, 1], edgecolor='none', alpha=0.7, s=40, c=dataset['cluster'],'nipy_spectral', 10)) plt.colorbar() plt.title('Feautures T-sne projection ') st.pyplot( save_exel(dataset, "Сохранить результаты xlsx File") elif problem_type == "Классификация": classification_type = "Выберите алгоритм решения", ("Все", "KNeighborsClassifier", "SVC_model")) st.write("Выберите столбец у для задачи классификации") selected_y = st.multiselect("", all_clummn_names) if st.button("Выбрать"): st.success("Столбец {} выбран успешно".format(selected_y)) # ".iloc" принимает row_indexer, column_indexer y = np.array(data[selected_y]) X = np.array(data.drop(selected_y, 1)) # test_size показывает, какой объем данных нужно выделить для тестового набора # Random_state — просто сид для случайной генерации # Этот параметр можно использовать для воссоздания определённого результата: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.20, random_state=27) if classification_type == "Все": SVC_model = svm.SVC() st.write( "В KNN-модели нужно указать параметр n_neighbors. Это число точек, на которое будет смотреть классификатор, чтобы определить, к какому классу принадлежит новая точка" ) # nbr = st.slider("Число точек ", 3, data.shape[0]) KNN_model = KNeighborsClassifier(n_neighbors=nbr), y_train), y_train) SVC_prediction = SVC_model.predict(X_test) KNN_prediction = KNN_model.predict(X_test) # Оценка точности — простейший вариант оценки работы классификатора st.write("Оценка точности классификатора SVC_model") st.write(accuracy_score(SVC_prediction, y_test)) st.write( "Матрица неточности и отчёт о классификации дадут больше информации о производительности" ) st.write(confusion_matrix(SVC_prediction, y_test)) st.write(classification_report(SVC_prediction, y_test)) st.write(SVC_prediction.tolist()) st.write( "Оценка точности классификатора KNeighborsClassifier") st.write(accuracy_score(KNN_prediction, y_test)) st.write( "Матрица неточности и отчёт о классификации дадут больше информации о производительности" ) # Но матрица неточности и отчёт о классификации дадут больше информации о производительности st.write(confusion_matrix(KNN_prediction, y_test)) st.write(classification_report(KNN_prediction, y_test)) st.write(KNN_prediction.tolist()) if classification_type == "KNeighborsClassifier": st.write( "В KNN-модели нужно указать параметр n_neighbors. Это число точек, на которое будет смотреть классификатор, чтобы определить, к какому классу принадлежит новая точка" ) # nbr = st.slider("Число точек ", 3, data.shape[0]) KNN_model = KNeighborsClassifier(n_neighbors=nbr), y_train) KNN_prediction = KNN_model.predict(X_test) st.write( "Оценка точности классификатора KNeighborsClassifier") st.write(accuracy_score(KNN_prediction, y_test)) st.write( "Матрица неточности и отчёт о классификации дадут больше информации о производительности" ) # Но матрица неточности и отчёт о классификации дадут больше информации о производительности st.write(confusion_matrix(KNN_prediction, y_test)) st.write(classification_report(KNN_prediction, y_test)) st.write(KNN_prediction.tolist()) if classification_type == "SVC_model": SVC_model = svm.SVC(), y_train) SVC_prediction = SVC_model.predict(X_test) # Оценка точности — простейший вариант оценки работы классификатора st.write("Оценка точности классификатора SVC_model") st.write(accuracy_score(SVC_prediction, y_test)) st.write( "Матрица неточности и отчёт о классификации дадут больше информации о производительности" ) st.write(confusion_matrix(SVC_prediction, y_test)) st.write(classification_report(SVC_prediction, y_test)) st.write(SVC_prediction.tolist()) if st.button("Завершить работу"): st.balloons()
def main(): st.write(""" # Heart Failure Prediction App This app predicts the **Heart Failure** for a patient. Data obtained from [here]( """) activities = ["EDA", "Plot", "Model Building", "Predict", "About"] choice = st.sidebar.selectbox("Select Activity", activities) if choice == 'EDA': st.subheader("Exploratory Data Analysis") df = pd.read_csv('heart_failure_clinical_records_dataset.csv') # data = st.file_uploader("Upload Dataset", type=["csv", "txt"]) # if data is not None: # df = pd.read_csv(data) st.dataframe(df) if st.checkbox("Show shape"): st.write(df.shape) if st.checkbox("Show columns"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Select Columns To Show"): selected_columns = st.multiselect("Select Columns", all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show summary"): st.write(df.describe()) if st.checkbox("Show value counts"): st.write(df.iloc[:, -1].value_counts()) elif choice == 'Plot': st.subheader("Data Visualization") df = pd.read_csv('heart_failure_clinical_records_dataset.csv') st.dataframe(df) if st.checkbox("Correlation with Seaborn"): corr = df.corr() st.write(sns.heatmap(corr)) st.pyplot() if st.checkbox("Pie Chart"): all_columns = df.columns.to_list() colums_to_plot = st.selectbox("Select 1 column to plot", all_columns) pie_plot = df[colums_to_plot].value_counts().plot.pie() st.write(pie_plot) st.pyplot() all_columns = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("selct Columns To plot", all_columns) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) # Plot by streamlit if type_of_plot == "area": cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == "bar": cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == "line": cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom plot elif type_of_plot: cust_plot = df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() elif choice == 'Model Building': st.subheader("Building ML Model") df = pd.read_csv('heart_failure_clinical_records_dataset.csv') st.dataframe(df) # Model building X = df.iloc[:, 0:-1] Y = df.iloc[:, -1] seed = 7 # Model models = [] # models.append(("LR", LogisticRegression())) # models.append(("LDA", LinearDiscriminantAnalysis())) # models.append(("KNN", KNeighborsClassifier())) # models.append(('CART', DecisionTreeClassifier())) # models.append(('NB', GaussianNB())) # models.append(('SVM', SVC(probability=True))) models.append(('RFC', RandomForestClassifier(n_estimators=100))) # models.append(('GBC', GradientBoostingClassifier())) # evaluate each model in turn # List model_name = [] model_mean = [] model_std = [] all_models = [] scoring = 'accuracy' for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) model_name.append(name) model_mean.append(cv_results.mean()) model_std.append(cv_results.std()) accuracy_results = { "model_name": name, "model_accuracy": cv_results.mean(), "standard_deviation": cv_results.std() } all_models.append(accuracy_results), Y) pickle.dump(model, open(name + '.pkl', 'wb')) st.dataframe( pd.DataFrame( zip(model_name, model_mean, model_std), columns=['Model Name', 'Model Accuracy', 'Standard Deviation'])) st.json(all_models) elif choice == 'Predict': models = [] # models.append("LR.pkl") # models.append("LDA.pkl") # models.append("KNN.pkl") # models.append('CART.pkl') # models.append('NB.pkl') # models.append('SVM.pkl') models.append('RFC.pkl') # models.append('GBC.pkl') def user_input_features(): age = st.slider('Age of the patient(Years)', 40, 95, 50) anaemia = st.selectbox( 'Anaemia-Decrease of red blood cells or hemoglobin(True-1, False-0)', (1, 0)) creatinine_phosphokinase = st.slider( 'Creatinine phosphokinase-Level of the CPK enzyme in the blood(mcg/L)', 23, 7861, 3300) diabetes = st.selectbox( 'Diabetes-If the patient has diabetes(True-1, False-0)', (1, 0)) ejection_fraction = st.slider( 'Ejection fraction-Percentage of blood leaving', 14, 80, 30) high_blood_pressure = st.selectbox( 'High blood pressure-If a patient has hypertension(True-1, False-0)', (1, 0)) platelets = st.slider( 'Platelets-Platelets in the blood(kiloplatelets/mL)', 25100, 850000, 40000) serum_creatinine = st.slider( 'Serum creatinine-Level of creatinine in the blood(mg/dL)', 0.5000, 9.4000, 1.2000) serum_sodium = st.slider( 'Serum sodium-Level of sodium in the blood(mEq/L)', 113, 148, 120) sex = st.selectbox('Sex-Woman or Man(Man-1,Women-0)', (1, 0)) smoking = st.selectbox( 'Smoking-If the patient smokes(True-1, False-0)', (1, 0)) time = st.slider('Time-Follow-up period(Days)', 4, 285, 100) data = { 'age': age, 'anaemia': anaemia, 'creatinine_phosphokinase': creatinine_phosphokinase, 'diabetes': diabetes, 'ejection_fraction': ejection_fraction, 'high_blood_pressure': high_blood_pressure, 'platelets': platelets, 'serum_creatinine': serum_creatinine, 'serum_sodium': serum_sodium, 'sex': sex, 'smoking': smoking, 'time': time } features = pd.DataFrame(data, index=[0]) return features input_df = user_input_features() if input_df is not None: st.dataframe(input_df) for name in models: model = pickle.load(open(name, 'rb')) prediction_proba = model.predict_proba(input_df) st.write('{} Predictions:'.format(name[0:-4])) st.write(prediction_proba) index = np.argmax(prediction_proba) if index == 0: st.write('Not Dead') else: st.write('Dead') elif choice == 'About': st.subheader("About") st.write("Made By Rishab Koul with the Streamlit Library")
def main(): st.header("project Dataset explorer") st.sidebar.header("OPTIONS") all_cols = df.columns.values numeric_cols = df.select_dtypes(include=numerics).columns.values obj_cols = df.select_dtypes(include=["object"]).columns.values if st.sidebar.checkbox("Data preview", True): st.subheader("Data preview") st.markdown( f"Shape of dataset : {df.shape[0]} rows, {df.shape[1]} columns") if st.checkbox("Data types"): st.dataframe(df.dtypes) if st.checkbox("Data Summary"): st.write(df.describe()) if st.sidebar.checkbox("Pattern distribution", False): st.subheader("Plot numeric column distribution") with st.echo(): col = st.selectbox("Choose a column to display", numeric_cols) n_bins = st.number_input("Max number of bins ?", 5, 100, 10) chart = (alt.Chart(df).mark_bar().encode( alt.X(f"{col}:Q", bin=alt.Bin(maxbins=n_bins)), alt.Y("count()"))) st.altair_chart(chart) st.markdown("---") if st.sidebar.checkbox("Scatterplot", False): st.subheader("Scatterplot") selected_cols = st.multiselect("Choose 2 columns :", numeric_cols) if len(selected_cols) == 2: color_by = st.selectbox("Color by column:", all_cols, index=len(all_cols) - 1) col1, col2 = selected_cols chart = (alt.Chart(df).mark_circle(size=20).encode( alt.X(f"{col1}:Q"), alt.Y(f"{col2}:Q"), alt.Color(f"{color_by}")).interactive()) st.altair_chart(chart) st.markdown("---") # seaborn plot if st.sidebar.checkbox("Correlation plot"): st.subheader("Correlation plot") cor = df.corr() mask = np.zeros_like(cor) mask[np.triu_indices_from(mask)] = True plt.figure(figsize=(12, 10)) with sns.axes_style("white"): st.write( sns.heatmap(cor, annot=True, linewidth=2, mask=mask, cmap="magma")) st.pyplot() # Pie plot if st.sidebar.checkbox("pie plot"): st.subheader("Pie plot") all_columns_names = df.columns.tolist() st.success("Generating A pie plot") st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() if st.sidebar.checkbox("plot of value counts"): st.subheader("Groupby columns") st.text("value counts by target") all_columns_name = df.columns.tolist() primary_col = st.selectbox("primary columns to groupby", all_columns_name) selected_columns_name = st.multiselect("select columns to plot", all_columns_name) if st.button("plot"): st.text("Generate value plot") if selected_columns_name: vc_plot = df.groupby( primary_col)[selected_columns_name].count() else: vc_plot = df.iloc[:, -1].value_counts() st.write(vc_plot.plot(kind="bar")) st.pyplot() # customizabe plot if st.sidebar.checkbox("customizable plot", False): st.subheader("Deviation") columns_names = df.columns.tolist() type_of_plot = st.selectbox( "select type of plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("select column to plot", columns_names) if st.button("Show plot"): st.success("Generating customizable plot of {} for {}".format( type_of_plot, selected_columns_names)) if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) elif type_of_plot: cust_plot = df[selected_columns_names].plot( kind=type_of_plot) st.write(cust_plot) st.pyplot() if st.sidebar.checkbox("Deviations"): st.subheader("Deviation plot") for feature in ['time', 'measurement', 'control_mode']: ax = plt.subplot() st.write( sns.distplot(df[feature][df.binary_result == 1], bins=50, label='Anormal', kde_kws={'bw': 0.02})) st.write( sns.distplot(df[feature][df.binary_result == 0], bins=50, label='Normal', kde_kws={'bw': 0.02})) ax.set_xlabel('') ax.set_title('histogram of feature: ' + str(feature)) plt.legend(loc='best') st.pyplot() def ztest(feature): mean = falsepositive[feature].mean() std = falsepositive[feature].std() zScore = (falsenegative[feature].mean() - mean) / (std / np.sqrt(sample_size)) return zScore columns = df.drop('binary_result', axis=1).columns falsepositive = df[df.binary_result == 0] falsenegative = df[df.binary_result == 1] sample_size = len(falsepositive) significant_features = ["measurement"] setpoint = 70 for i in columns: z_value = ztest(i) if (abs(z_value) >= setpoint): st.write(i, " is critical alarm") significant_features.append(i) st.subheader("Inliers & Outliers of Data") significant_features.append('binary_result') y = df[significant_features] inliers = df[df.binary_result == 0] ins = inliers.drop(['binary_result'], axis=1) outliers = df[df.binary_result == 1] outs = outliers.drop(['binary_result'], axis=1) ins.shape, outs.shape def falsepositive_accuracy(values): tp = list(values).count(1) total = values.shape[0] accuracy = np.round(tp / total, 4) return accuracy def falsenegative_accuracy(values): tn = list(values).count(-1) total = values.shape[0] accuracy = np.round(tn / total, 4) return accuracy st.subheader("Accuracy score For Isolation forest") ISF = IsolationForest(random_state=42) falsepositive_isf = ISF.predict(ins) falsenegative_isf = ISF.predict(outs) in_accuracy_isf = falsepositive_accuracy(falsepositive_isf) out_accuracy_isf = falsenegative_accuracy(falsenegative_isf) st.write("Accuracy in Detecting falsepositive Alarm:", in_accuracy_isf) st.write("Accuracy in Detecting falsenegative Alarm:", out_accuracy_isf) st.subheader("Accuracy score For Local Outlier Factor") LOF = LocalOutlierFactor(novelty=True) falsepositive_lof = LOF.predict(ins) falsenegative_lof = LOF.predict(outs) in_accuracy_lof = falsepositive_accuracy(falsepositive_lof) out_accuracy_lof = falsenegative_accuracy(falsenegative_lof) st.write("Accuracy in Detecting falsepositive Alarm :", in_accuracy_lof) st.write("Accuracy in Detecting falsenegative Alarm:", out_accuracy_lof) if st.sidebar.checkbox("Alarm Report", False): st.subheader("classification of Alarm") fig, (ax1, ax2) = plt.subplots(1, 2, figsize=[16, 3]) ax1.set_title("Accuracy of Isolation Forest", fontsize=20) st.write( sns.barplot(x=[in_accuracy_isf, out_accuracy_isf], y=['falsepositive Alarm', 'falsenegative Alarm'], label="classifiers", color="b", ax=ax1)) ax1.set(xlim=(0, 1)) ax2.set_title("Accuracy of Local Outlier Factor", fontsize=20) st.write( sns.barplot(x=[in_accuracy_lof, out_accuracy_lof], y=['falsepositive Alarm', 'falsenegative Alarm'], label="classifiers", color="r", ax=ax2)) ax2.set(xlim=(0, 1)) st.pyplot()
rn = st.slider("", 1 , 15 , 5) df , name = GetPerformanceStat(int(PL_ID), rn) st.header(name + "'s Performance Stats") st.subheader('Select the options you want to see stats for') chioce ='', ('Overall','Raids','Tackles') ) if chioce == 'Overall': # Total points stats st.subheader('Total points stats in recent '+str(rn)+' Matches') if st.checkbox('Show Data'): st.write(df[['player_total_points','player_raid_points_total','player_tackle_points_total']].T ) if st.checkbox('Show Bar Chart'): st.bar_chart(df[['player_total_points','player_raid_points_total','player_tackle_points_total']] ) if st.checkbox('Show Area Chart'): st.area_chart(df[['player_total_points','player_raid_points_total','player_tackle_points_total']]) elif chioce == 'Raids': # Raids Stats st.subheader('Total Raids Statsin recent '+str(rn)+' Matches') if st.checkbox('Show Data'): st.write(df[['player_raids_total','player_raids_successful','player_raids_empty','player_raids_unsuccessful']].T ) if st.checkbox('Show Bar Chart'): st.bar_chart(df[['player_raids_total','player_raids_successful','player_raids_empty','player_raids_unsuccessful']] ) if st.checkbox('Show Area Chart'): st.area_chart(df[['player_raids_total','player_raids_successful','player_raids_empty','player_raids_unsuccessful']] ) elif chioce == 'Tackles': # Tackel stats st.subheader('Total Raids Statsin recent '+str(rn)+' Matches') if st.checkbox('Show Data'):
def main(): """ BPX Energy """ st.title("BPX Energy LE Forecast Tool") st.subheader("SoHa Change Dev/Test") html_temp = """ <div style="background-color:green;"><p style="color:white;font-size:50px;padding:10px">SoHa Change is Awesome</p></div> """ st.markdown(html_temp,unsafe_allow_html=True) def file_selector(folder_path='./datasets'): filenames = os.listdir(folder_path) selected_filename = st.selectbox("Select A file",filenames) return os.path.join(folder_path,selected_filename) filename = file_selector()"You Selected {}".format(filename)) # Read Data df = pd.read_csv(filename) # Show Dataset if st.checkbox("Show Dataset"): number = st.number_input("Number of Rows to View") st.dataframe(df.head(n=9)) # Show Columns if st.button("Column Names"): st.write(df.columns) # Show Shape if st.checkbox("Shape of Dataset"): data_dim ="Show Dimension By ",("Rows","Columns")) if data_dim == 'Rows': st.text("Number of Rows") st.write(df.shape[0]) elif data_dim == 'Columns': st.text("Number of Columns") st.write(df.shape[1]) else: st.write(df.shape) # Select Columns if st.checkbox("Select Columns To Show"): all_columns = df.columns.tolist() selected_columns = st.multiselect("Select",all_columns) new_df = df[selected_columns] st.dataframe(new_df) # Show Values if st.button("Value Counts"): st.text("Value Counts By Target/Class") st.write(df.iloc[:,-1].value_counts()) # Show Datatypes if st.button("Data Types"): st.write(df.dtypes) # Show Summary if st.checkbox("Summary"): st.write(df.describe().T) ## Plot and Visualization st.subheader("Data Visualization") # Correlation # Seaborn Plot if st.checkbox("Correlation Plot[Seaborn]"): st.write(sns.heatmap(df.corr(),annot=True)) st.pyplot() # Pie Chart if st.checkbox("Pie Plot"): all_columns_names = df.columns.tolist() if st.button("Generate Pie Plot"): st.success("Generating A Pie Plot") st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() # Count Plot if st.checkbox("Plot of Value Counts"): st.text("Value Counts By Target") all_columns_names = df.columns.tolist() primary_col = st.selectbox("Primary Columm to GroupBy",all_columns_names) selected_columns_names = st.multiselect("Select Columns",all_columns_names) if st.button("Plot"): st.text("Generate Plot") if selected_columns_names: vc_plot = df.groupby(primary_col)[selected_columns_names].count() else: vc_plot = df.iloc[:,-1].value_counts() st.write(vc_plot.plot(kind="bar")) st.pyplot() # Customizable Plot st.subheader("Customizable Plot") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"]) selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot= df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() if st.button("SoHa Change"): st.balloons() st.sidebar.header("LE Utility Tool")"The LE & Forecast Utility is a tool meant to improve the speed & consistency of production reporting for operations PEs.") st.sidebar.header("Get Datasets") st.sidebar.markdown("[Common ML Dataset Repo]("")") st.sidebar.header("Developed By")"*****@*****.**")"*****@*****.**") st.sidebar.text("SoHa Change Team lead: Kellen McLoughlin") st.sidebar.text("Web App maintained by MarkusJBPX")
""") st.write(""" ## Giá mở """) st.line_chart(df.Open) st.write(""" ## Giá đóng """) st.line_chart(df.Close) st.write(""" ## Số lượng giao dịch trong ngày """) st.area_chart(df.Volume) file = st.file_uploader("Pick a file") if file: df1 = pd.read_csv(file) # Lấy cột Volume Test_1 = np.array(df1[['Volume']]) # Lấy cột Open Test_2 = np.array(df1[['Open']]) # Lấy cột High Test_3 = np.array(df1[['High']]) # Lấy cột Low
def main(): activities = ["EDA &VIZ", "Modelling"] choice = st.sidebar.selectbox("Select Activities", activities) if st.sidebar.checkbox('About'): st.sidebar.markdown(""" app work in progress .This is a beta release. version: b-0.0.1 initial release:27/6/2020 helpful suggestions are welcome. contact: [email protected] """) if choice == 'EDA &VIZ': st.title('Play with ML') html_temp1 = """<img src="images/dobby1.jpeg" alt="It's dobby" width="120" height="150">""" st.markdown(html_temp1, unsafe_allow_html=True) html_temp = """ <div style="background-color:coral;padding:12px"> <h2 style="color:white;text-align:center;"> Play with ML App </h2> </div> """ st.markdown(html_temp, unsafe_allow_html=True) st.markdown( 'hey,tired of modelling and tuning ML Models, wanna play with data & ML modles? Then upload a dataset here.. **_Dobby , a free elf_** is here for you ' ) st.subheader("Exploratory Data Analysis & Vizualization ") data = st.file_uploader("Upload a Dataset", type=["csv"]) if data is not None: st.subheader('EDA') df = pd.read_csv(data) st.write('shape:', df.shape) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() #st.write(all_columns) if st.checkbox("Null values"): st.write(df.isnull().sum()) if st.checkbox("Information"): st.write( if st.checkbox("Summary"): st.write(df.describe()) if st.checkbox("Show Selected Columns"): all_columns_names = df.columns.tolist() selected_columns = st.multiselect("Select Columns", all_columns) df1 = df[selected_columns] #st.dataframe(df1) if st.checkbox("Correlation Plot(Seaborn)"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() st.subheader('Data Visualization') if st.checkbox("Show Value Counts"): column = st.selectbox("Select a Column to show value counts", all_columns) st.write(df[column].value_counts()) st.write(df[column].value_counts().plot(kind='bar')) st.pyplot() all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot", [ "area", "bar", "pie", "line", "hist", "box", "kde", "altair_chart" ]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating {} plot for {}".format( type_of_plot, selected_columns_names)) if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) st.pyplot() elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) st.pyplot() elif type_of_plot == "Pie Plot": column_to_plot = st.selectbox("Select 1 Column", selected_columns_names) pie_plot = df[column_to_plot].value_counts().plot.pie( autopct="%1.1f%%") st.write(pie_plot) st.pyplot() elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) st.pyplot() elif type_of_plot == 'altair_chart': a = st.selectbox("Select X axis", all_columns) b = st.selectbox("Select Y axis", all_columns) c = st.selectbox("Select a column ", all_columns) cust_data = pd.DataFrame([a, b, c]) c = alt.Chart(cust_data).mark_circle().encode( x='a', y='b', size='c', color='c', tooltip=['a', 'b', 'c']) st.altair_chart(c, use_container_width=True) st.pyplot() elif type_of_plot: cust_plot = df[selected_columns_names].plot( kind=type_of_plot) st.write(cust_plot) st.pyplot() if choice == 'Modelling': html_temp = """ <div style="background-color:coral;padding:12px"> <h2 style="color:white;text-align:center;"> Play with ML App </h2> </div> """ st.header('Training') st.markdown( "**_Hello Iam Dobby. Dobby has no master - Dobby is a free elf_**. Due to SARS-CoV-2 lockdown I dont have much work to do , So Iam here to make your model." ) data = st.file_uploader("Upload a Dataset", type=["csv"]) if data is not None: st.subheader('EDA') df = pd.read_csv(data) st.dataframe(df.head()) st.write('shape:', df.shape) st.header('Data Preprocessing') all_columns = df.columns.tolist() features = st.multiselect("Select feature columns", all_columns) X = df[features] st.dataframe(X) st.write(X.head()) st.write(X.shape) labels = st.selectbox("Select label column", all_columns) y = df[labels] st.dataframe(y) st.write(y.head()) st.write(X.shape) all_columns = X.columns.tolist() if st.checkbox("Handling missing values"): radioval ="choose type", ('ffill', 'statistical')) if radioval == 'None': print('handling missing values skipped') elif radioval == 'fbfill': if st.checkbox("fbfill"): X = X.ffill(axis=0) X = X.ffill(axis=0) st.markdown('**_missing values are fb filled_**') elif radioval == 'statistical': if st.checkbox("handle with mean"): selected_columns = st.multiselect( "Select Columns to handle with mean ", all_columns) X[selected_columns] = X[selected_columns].fillna( X[selected_columns].mean(), inplace=True) st.write('handled with mean') elif st.checkbox("handle with median"): selected_columns = st.multiselect( "Select Columns to handle with median", all_columns) X[selected_columns] = X[selected_columns].fillna( X[selected_columns].median(), inplace=True) st.write('handled with median') elif st.checkbox("handle with mode"): selected_columns = st.multiselect( "Select Columns to handle with mode", all_columns) X[selected_columns] = X[selected_columns].fillna( X[selected_columns].mode()[0], inplace=True) st.write('handled with mode') st.markdown( '**_missing values are filled statistically_**') st.write('missing values:', X.isnull().sum()) if st.checkbox("One hot encoding"): if st.checkbox("encode features"): X = pd.get_dummies(X) st.write("features are one hot encoded") if st.checkbox("encode labels"): y = pd.get_dummies(y) st.write("labels are one hot encoded") st.dataframe(y) st.write('Train - val split') number = st.number_input('test split size', min_value=0.1, max_value=1.00) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=number, random_state=0) st.write(X_train.shape) st.write(X_test.shape) if st.checkbox("Feature Scaling"): radioval = "choose type of feature scaling", ('none', 'Standardization', 'Normalization')) if radioval == 'none': st.write("you skipped feature scaling") if radioval == 'Standardization': from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) #sc_y = StandardScaler() #y_train = sc_y.fit_transform(y_train) if radioval == 'Normalization': min_max_scaler = sklearn.preprocessing.MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) X_test = min_max_scaler.transform(X_test) st.header("Training") problem_types = ['Regression', 'Classification'] problem_type = st.selectbox("Select Problem Type ", problem_types) st.sidebar.markdown("Hyperparameter Tuning") if problem_type == 'Classification': models = [ 'Logistic Regression', 'KNN', 'SVM', 'DecisionTree', 'Random Forest', 'XgBoostClassifier' ] model = st.selectbox("Select a model ", models) if model == 'Logistic Regression': from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) if model == 'KNN': n_neighbors = st.sidebar.slider('n_neighbors', min_value=1, max_value=5, step=1) p = st.sidebar.selectbox("P", [1, 2, 3, 4]) from sklearn.neighbors import KNeighborsClassifier classifier = KNeighborsClassifier(n_neighbors=n_neighbors, metric='minkowski', p=p) if model == 'SVM': from sklearn.svm import SVC kernel_list = ['linear', 'poly', 'rbf', 'sigmoid'] kernel = st.sidebar.selectbox("P", kernel_list) C = st.sidebar.slider('C', min_value=1, max_value=6, step=1) degree = st.sidebar.slider('Degree', min_value=1, max_value=10, step=1) classifier = SVC(kernel=kernel, C=C, random_state=0, degree=degree) if model == 'DecisionTree': from sklearn.tree import DecisionTreeClassifier criterion = st.sidebar.selectbox("criterion", ["gini", "entropy"]) max_depth = st.sidebar.slider('max_depth', min_value=1, max_value=10, step=1) min_samples_leaf = st.sidebar.slider('min_samples_leaf', min_value=1, max_value=10, step=1) classifier = DecisionTreeClassifier( criterion=criterion, max_depth=max_depth, min_samples_leaf=min_samples_leaf, random_state=0) if model == 'Random Forest': from sklearn.ensemble import RandomForestClassifier criterion = st.sidebar.selectbox("criterion", ["gini", "entropy"]) n_estimators = st.sidebar.number_input('n_estimators', min_value=1, max_value=500, step=1) max_depth = st.sidebar.slider('max_depth', min_value=1, max_value=10, step=1) classifier = RandomForestClassifier( n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, random_state=0) if model == 'XgBoostClassifier': from xgboost import XGBClassifier n_estimators = st.sidebar.number_input('n_estimators', min_value=1, max_value=2000) reg_lambda = st.sidebar.number_input('reg_lambda', min_value=0.01, max_value=5.00, step=0.02) max_depth = st.sidebar.slider('max_depth', min_value=1, max_value=10, step=1) colsample_bytree = st.sidebar.number_input( 'colsample_bytree', min_value=0.50, max_value=1.00, step=0.05) classifier = XGBClassifier( n_estimators=n_estimators, reg_lambda=reg_lambda, max_depth=max_depth, colsample_bytree=colsample_bytree) if st.button("Train"): with st.spinner('model is training...'):, y_train) st.success('Model trained!') y_pred = classifier.predict(X_test) from sklearn.metrics import accuracy_score acc = accuracy_score(y_test, y_pred) st.write('val_accuracy:', acc) from sklearn.metrics import confusion_matrix, classification_report st.write(classification_report(y_test, y_pred)) cm = confusion_matrix(y_test, y_pred) st.markdown("**_confusion matrix_**") st.write(cm) y_pred = pd.DataFrame(y_pred) st.dataframe(y_pred) st.write(y_pred[0].value_counts()) st.write(y_pred[0].value_counts().plot(kind='bar')) st.pyplot() st.balloons() def download_model(model): output_model = pickle.dumps(model) st.write("model saved as output_model ") b64 = base64.b64encode(output_model).decode() href = f'<a href="data:file/output_model;base64,{b64}">Download Trained Model</a>' st.markdown(href, unsafe_allow_html=True) if st.button("save & Download model"): download_model(classifier) if problem_type == 'Regression': models = [ 'Linear Regression', 'SVR', 'DecisionTree', 'Random Forest', 'XgBoostRegression' ] model = st.selectbox("Select a model ", models) if model == 'Linear Regression': from sklearn.linear_model import LinearRegression regressor = LinearRegression() if model == 'SVR': from sklearn.svm import SVR kernel_list = ['linear', 'poly', 'rbf', 'sigmoid'] kernel = st.sidebar.selectbox("P", kernel_list) degree = st.sidebar.slider('Degree', min_value=1, max_value=10, step=1) regressor = SVR(kernel=kernel, degree=degree) if model == 'DecisionTree': from sklearn.tree import DecisionTreeRegressor criterion = st.sidebar.selectbox( "criterion", ["mse", "friedman_mse", "mae"]) max_depth = st.sidebar.slider('max_depth', min_value=1, max_value=10, step=1) min_samples_leaf = st.sidebar.slider('min_samples_leaf', min_value=1, max_value=10, step=1) regressor = DecisionTreeRegressor( criterion=criterion, max_depth=max_depth, min_samples_leaf=min_samples_leaf, random_state=0) if model == 'Random Forest': from sklearn.ensemble import RandomForestRegressor n_estimators = st.sidebar.number_input('n_estimators', min_value=1, max_value=500, step=1) max_depth = st.sidebar.slider('max_depth', min_value=1, max_value=10, step=1) criterion = st.sidebar.selectbox("criterion", ["mse", "mae"]) regressor = RandomForestRegressor( n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, random_state=0) if model == 'XgBoostRegression': from xgboost import XGBRegressor n_estimators = st.sidebar.number_input('n_estimators', min_value=1, max_value=2000) reg_lambda = st.sidebar.number_input('reg_lambda', min_value=0.01, max_value=5.00, step=0.02) max_depth = st.sidebar.slider('max_depth', min_value=1, max_value=10, step=1) booster = st.sidebar.selectbox( 'booster', ["gbtree", "gblinear", "dart"]) learning_rate = st.sidebar.number_input('learning_rate', min_value=0.05, max_value=3.00, step=0.01) colsample_bytree = st.sidebar.number_input( 'colsample_bytree', min_value=0.50, max_value=1.00, step=0.05) regressor = XGBRegressor(n_estimators=n_estimators, learning_rate=learning_rate, booster=booster, reg_lambda=reg_lambda, max_depth=max_depth, colsample_bytree=colsample_bytree) if st.button("Train"): with st.spinner('model is training...'):, y_train) st.success('Model trained!') y_pred = regressor.predict(X_test) from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score mae_tr = mean_absolute_error(y_train, regressor.predict(X_train)) mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) mse_tr = mean_squared_error(y_train, regressor.predict(X_train)) r2 = r2_score(y_test, y_pred) r2_tr = r2_score(y_train, regressor.predict(X_train)) st.write('mean absolute error:') st.write('train:', mae_tr, 'val:', mae) st.write('mean squared error:') st.write('train:', mse_tr, 'val:', mse) st.write('r2:') st.write('train:', r2_tr, 'val:', r2) y_pred = pd.DataFrame(y_pred) st.dataframe(y_pred) st.balloons() def download_model(model): output_model = pickle.dumps(model) st.write("model saved as output_model ") b64 = base64.b64encode(output_model).decode() href = f'<a href="data:file/output_model;base64,{b64}">Download Trained Model</a>' st.markdown(href, unsafe_allow_html=True) if st.button("save & Download model"): download_model(regressor)
df = pd.DataFrame({ 'pool selection': ['Pool 1', 'Pool 2', 'Pool 3', 'Pool 4'], }) option = st.sidebar.selectbox('Which pool do you want ?', df['pool selection']) 'You selected : ', option latest_iteration = st.empty() bar = st.progress(0) array = [0.0] if st.button('Compute'): chart = st.area_chart(array) for i in range(100): # Random data to display latest_iteration.text(f'Computing {i+1}%') bar.progress(i + 1) df = pd.DataFrame({ abs((np.random.randn() + 10) / 10), }) chart.add_rows(df) time.sleep(0.1) st.write("Finished !")