'TLOCREV': 'Local Revenue', 'Z33': 'Teacher Salaries', 'Z34': 'Employee Benefits'}, inplace=True) #change the display format of currency values for col in features.columns: features[col] = features[col].apply(lambda x: "${:,.2f}k".format((x/1000))) st.table(features) #user select input df = original_districts.loc[original_districts['LEAID']==district_id] #set sliders for key features side_menu = st.checkbox('Would you like to play around?') if side_menu: #change Total Federal Revenue min_tfedrev = int(df['TFEDREV'].values[0]) max_tfedrev = int(df['TFEDREV'].values[0]*2) tfedrev = st.sidebar.slider('Total Federal Revenue', min_value=min_tfedrev, max_value=max_tfedrev, step=round(min_tfedrev/5)) df['TFEDREV'] = tfedrev #change Salaries - support service - instructional staff min_v14 = int(df['TSTREV'].values[0]) max_v14 = int(df['TSTREV'].values[0]*2) tstrev = st.sidebar.slider('Total State Revenue', min_value=min_v14, max_value=max_v14, step=round(min_v14/5)) df['TSTREV'] = tstrev #change Total Local Revenue min_tlocrev = int(df['TLOCREV'].values[0])
def main(): """Web App""" st.title("Diabetes Risk Prediction 💉") st.text("-- By Mrinal Gosain") activites = ["EDA","Plot","Model Building","About"] choice = st.sidebar.selectbox("Select Activity",activites) # Exploratory data analysis! if choice == 'EDA': st.subheader("Exploratory Data Analysis 🔍") data = st.file_uploader("Upload Dataset",type=["csv","txt"]) if data is not None: # If data isn't empty! df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show shape"): st.write(df.shape) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Select Columns To Show"): selected_columns = st.multiselect("Select Columns",all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show Summary"): st.write(df.describe()) if st.checkbox("Show Value Counts"): st.write(df.iloc[:,-1].value_counts()) if st.checkbox("Correlation with Seaborn"): st.write(sns.heatmap(df.corr(),annot=True)) st.pyplot() if st.checkbox("Pie Chart"): all_columns = df.columns.to_list() columns_to_plot = st.selectbox("Select 1 Column ",all_columns) pie_plot = df[columns_to_plot].value_counts().plot.pie(autopct="%1.1f%%") st.write(pie_plot) st.pyplot() # Plotting ! elif choice == 'Plot': st.subheader("Data Visualization 📈") data = st.file_uploader("Upload Dataset",type=["csv","txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"]) selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot= df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() # Model building! elif choice == 'Model Building': st.subheader("Building ML Model 👨💻") data = st.file_uploader("Upload Dataset",type=["csv","txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) # Model Building X = df.iloc[:,0:-1] Y = df.iloc[:,-1] #Make sure that the predicted column is the last one! seed = 42 # Model models = [] models.append(("LR",LogisticRegression())) models.append(("LDA",LinearDiscriminantAnalysis())) models.append(("KNN",KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) models.append(('SVM', SVC())) # evaluate each model in turn # List model_names = [] model_mean = [] model_std = [] all_models = [] scoring = 'accuracy' for name,model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model,X,Y,cv=kfold,scoring=scoring) model_names.append(name) model_mean.append(cv_results.mean()) model_std.append(cv_results.std()) accuracy_results = {"model_name":name,"model_accuracy":cv_results.mean(),"standard_deviation":cv_results.std()} all_models.append(accuracy_results) if st.checkbox("Metrics as Table"): st.dataframe(pd.DataFrame(zip(model_names,model_mean,model_std),columns=["Model Name","Model Accuracy","Standard Deviation"])) if st.checkbox("Metrics as JSON"): st.json(all_models) elif choice == 'About': st.subheader("About Me 👨🎓") st.text("Hi, I am Mrinal. I am a data science enthusiast who loves to build machine learning application end to end.") st.text("I take avid interests in breaking down complex problems and leverage data to drive business") if st.button("Github 🔗"): webbrowser.open_new_tab(github_url) if st.button("Linkedin 🔗"): webbrowser.open_new_tab(linkedin_url)
else: ## 读入 ad_list 并保存到 session_state # 优点是后面选最优设计的时候快,但是缺点是一旦ad_list或ad代码有变动,由于不是用的st.cache,不会自动刷新,需要重启Streamlit,有时候会忘记,debug不方便。 if st.session_state.ad_list is None or st.session_state.folder_of_collection != last_folder_of_collection: st.session_state.ad_list = ad_list = SA.get_ad_list( selected_specifications) # st.session_state.folder_of_collection = last_folder_of_collection else: ad_list = st.session_state.ad_list for ad in ad_list: ad.read_swarm_data_json() ## 是否显示自动最优个体表格和帕累托前沿? if st.checkbox("Show Table and Pareto Front."): # st.checkbox("Great", value = True) df, fig = SA.inspect_swarm_and_show_table_plus_Pareto_front( ad_list) st.table(df) # df.style.format("{:.2%}") st.pyplot(fig) ## 根据用户在 text_input 的输入来筛选符合条件的最优个体 if True: df = SA.select_optimal_designs_manually( st, st.session_state, None, ad_list, selected_specifications) st.table(df) ## 以Json格式保存用户输入到文件 # st.write("Recorded values: ", st.session_state)
def main(): """Streamlit demo web app""" st.sidebar.title('Menu') choose_model = st.sidebar.selectbox("Choose the page or model", [ "Home", "Logistic Regression", "XGB", "Stock Backtesting", "Predict Volatility", "Technical Indicators", "Bollinger Band", "Stock Market", "Dashbord" ]) # Load data df, rows, columns, filename = load_data() data, drop_list = data_preprocessing(df) # Provide checkbox for uploading different training dataset if choose_model == "Home": if st.checkbox('Want to use other training set?'): uploaded_file = st.file_uploader("Choose a CSV file", type="csv") st.text( "Note: Don't easily change training set which may bring big influence on prediction" ) if uploaded_file: df, data, drop_list, filename, rows, columns = upload_different_data( uploaded_file) # Home page building if choose_model == "Home": home_page_builder(df, data, rows, columns) # Page for Logistic Regression if choose_model == "Logistic Regression": model_reg = logistic_page_builder(data) if (st.checkbox("Want to Use this model to predict on a new dataset?") ): logistic_predictor(model_reg, rows, columns, df, drop_list) # Page for XGB if choose_model == "XGB": model_xgb = xgb_page_builder(data) if (st.checkbox("Want to Use this model to predict on a new dataset?") ): xgb_predictor(model_xgb, rows, columns, df, drop_list) if choose_model == "Stock Backtesting": from Stock_Backtesting import ticker st.sidebar.header('Hyper Parameters') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = ticker(tickertxt) if choose_model == "Predict Volatility": from PredictVolatility import tickerpv st.sidebar.header('Predickt') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = tickerpv(tickertxt) if choose_model == "Technical Indicators": from Technical_Indicators import tickerti st.sidebar.header('Technical ') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = tickerti(tickertxt) if choose_model == "Bollinger Band": from bollinger import tickerbol st.sidebar.header('Bollinger Nabd ') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = tickerbol(tickertxt) if choose_model == "Stock Market": from Stockmarketscreening import stockmarket st.sidebar.header('Stock Market') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = stockmarket(tickertxt) if choose_model == "Dashbord": st.sidebar.header('Dashboard') cwd = os.getcwd() data1 = pd.read_excel(cwd + "/Tabelle Mustermann-2.xls") # print(data1) st.table(data1)
def _save_best_model_by_val_score_options(config): config["save_best_model_by_val_score"] = st.checkbox( "Save the best model by evaluation score", value=False, ) st.markdown("---")
def main(): """Tweet Classifier App with Streamlit """ # Creates a main title and subheader on your page - # these are static across all pages #st.title("Tweet Classifer") #st.subheader("Climate change tweet classification") # Creating sidebar with selection box - # you can create multiple pages this way st.sidebar.title('Navigation') options = [ "Overview", "Home", "About the Data", "Explore the data", "Prediction on the Go" ] selection = st.sidebar.radio("", options) # Building out the "Overview" page st.title("Climate Change Analysis") if selection == "Overview": st.header("Overview") capture_1 = Image.open("capture_1.png") st.image(capture_1, width=800) capture_2 = Image.open("capture_2.png") st.image(capture_2, width=800) # Building out the "Home" page if selection == "Home": st.subheader("Background") st.info( "Many companies are built around lessening one’s environmental impact or carbon footprint. They offer products and services that are environmentally friendly and sustainable, in line with their values and ideals. They would like to determine how people perceive climate change and whether or not they believe it is a real threat. This would add to their market research efforts in gauging how their product/service may be received." ) #pm = st.sidebar.button("Problem Statement",key="pm") #elif status == "Problem Statement": st.subheader("Problem Statement") st.info( "Create a model that determines whether or not a person believes in climate change or not based on their tweet" ) st.subheader("Data requirements") st.info( "The collection of this data was funded by a Canada Foundation for Innovation JELF Grant to Chris Bauch, University of Waterloo. The dataset aggregates tweets pertaining to climate change collected between Apr 27, 2015 and Feb 21, 2018. In total, 43943 tweets were collected. Each tweet is labelled as one of the following classes:" ) # Building out the "Information" page if selection == "About the Data": st.info("General Data Summaries") # You can read a markdown file from supporting resources folder st.markdown("Some information here") if st.checkbox("Preview Data"): status = st.radio(" ", ("First 5 Rows", "Show All Dataset")) if status == "First 5 Rows": st.dataframe(raw.head()) else: st.dataframe(raw) if st.button("Show data summaries"): st.text("Column names") st.write(raw.columns) st.text("Number of columns") st.write(raw.shape[1]) st.text("Number of rows") st.write(raw.shape[0]) st.text("Data types") st.write(raw.dtypes) st.text("Summary") st.write(raw.describe().T) #visualising the data if selection == "Explore the data": st.info( "N.B. it is recommended that the file is removed of all the noise data. For example – language stopwords (commonly used words of a language – is, am, the, of, in etc), URLs or links, social media entities (mentions, hashtags), punctuations and industry specific words. This step deals with removal of all types of noisy entities present in the text. To proceed click Preprocessing below" ) if st.button("Preprocessing"): raw['tidy_message'] = np.vectorize(remove_pattern)(raw['message'], "@[\w]*") # remove special characters, numbers, punctuations raw['tidy_message'] = raw['tidy_message'].str.replace( "[^a-zA-Z#]", " ") #remove short words of less than 3 letters in length raw['tidy_message'] = raw['tidy_message'].apply( lambda x: ' '.join([w for w in x.split() if len(w) > 3])) st.subheader("Visualising the data") col_names = raw.columns.tolist() plot_type = st.selectbox("select plot", ["bar", "hist", "box", "kde"]) select_col_names = st.multiselect("select columns to plot", col_names) if st.button("Generate plot"): st.success("{} plot for {}".format(plot_type, select_col_names)) if plot_type == "bar": s_grp = raw.groupby(["sentiment"]).count() st.bar_chart(s_grp) st.pyplot() #elif plot_type == 'area': # plot_data = raw[select_col_names] # st.area_chart(plot_data) elif plot_type == 'hist': plot_data = raw[select_col_names] st.bar_chart(plot_data) elif plot_type: cust_plot = raw[select_col_names].plot(kind=plot_type) st.write(cust_plot) st.pyplot() st.subheader("Visuals of common words used in the tweets") st.markdown( "The most frequent words appear in large size and the less frequent words appear in smaller sizes" ) cpw = st.checkbox("Common Positive Words", key="cpw") #cpw1 = st.text("Positive words: global warming, climate change, believe climate, change real") if cpw: positive_words = ' '.join( [text for text in raw['tidy_message'][raw['sentiment'] == 1]]) positive_words_cloud = WordCloud( width=800, height=500, random_state=21, max_font_size=110).generate(positive_words) plt.axis('off') plt.imshow(positive_words_cloud, interpolation="bilinear") plt.show() st.pyplot() #st.checkbox("Show/Hide") cnw = st.checkbox("Common negative Words", key="cnw") if cnw: negative_words = ' '.join( [text for text in raw['tidy_message'][raw['sentiment'] == -1]]) negative_words_cloud = WordCloud( width=800, height=500, random_state=21, max_font_size=110).generate(negative_words) plt.axis('off') plt.imshow(negative_words_cloud, interpolation="bilinear") plt.show() st.pyplot() #st.checkbox("Show/Hide") cnnw = st.checkbox("Common neutral/normal Words", key="cnnw") if cnnw: normal_words = ' '.join( [text for text in raw['tidy_message'][raw['sentiment'] == 0]]) normal_words_wordcloud = WordCloud( width=800, height=500, random_state=21, max_font_size=110).generate(normal_words) plt.axis('off') plt.imshow(normal_words_wordcloud, interpolation="bilinear") plt.show() st.pyplot() cnnww = st.checkbox("Common News Words", key="cnnww") if cnnww: news_words = ' '.join( [text for text in raw['tidy_message'][raw['sentiment'] == 2]]) news_words_wordcloud = WordCloud( width=800, height=500, random_state=21, max_font_size=110).generate(news_words) plt.axis('off') plt.imshow(news_words_wordcloud, interpolation="bilinear") plt.show() st.pyplot() # Building out the predication page if selection == "Prediction on the Go": st.sidebar.success( "The App allows only text to be entered. It will show any present entities,provides sentiments analysis and classifies the text as per the table on top. Enter the text in the text area provided and select the buttons of your choice below the text area" ) st.info("Prediction with ML Models") st.markdown("The table below shows the description of the sentiments") img = Image.open("class.png") st.image(img) # Creating a text box for user input tweet_text = st.text_area("Enter Text", "Type Here") #named entity if st.checkbox("Show Entities"): #if st.subheader("Extract entities from your text"): # ner= st.text_area("Enter your here","Type here",key="ner") # message = ner # if st.button("Extract"): nlp_result = entity_analyzer(tweet_text) st.write(nlp_result) #st.write=entity_analyzer(entity) #sentiment analysis if st.checkbox("Show Sentiment Analysis"): # if st.subheader("Sentiment of your Text"): # sa= st.text_area("Enter your here","Type here",key="sa") # message = sa # if st.button("Analyse"): sid = SentimentIntensityAnalyzer() res_sentiment = sid.polarity_scores(tweet_text) st.json(res_sentiment) if res_sentiment['compound'] == 0: st.write("The sentiment of your text is NEUTRAL") elif res_sentiment['compound'] > 0: st.success("The sentiment of your text is POSITIVE") else: st.warning("The sentiment of your text is NEGATIVE") news_vectorizer = open("resources/tfidfvect.pkl", "rb") tweet_cv = joblib.load(news_vectorizer) if st.checkbox("Classify"): # Transforming user input with vectorizer vect_text = tweet_cv.transform([tweet_text]).toarray() predictor = joblib.load( open(os.path.join("resources/Logistic_regression.pkl"), "rb")) prediction = predictor.predict(vect_text) # When model has successfully run, will print prediction # You can use a dictionary or similar structure to make this output # more human interpretable. st.success("Text Categorized as Class: {}".format(prediction)) Classifier = st.selectbox("Choose Classifier", ['Linear SVC', 'Logistic regression']) if st.button("Classify"): # Transforming user input with vectorizer # Load your .pkl file with the model of your choice + make predictions # Try loading in multiple models to give the user a choice if Classifier == 'Linear SVC': st.text("Using Linear SVC classifier ..") # Vectorizer news_vectorizer = open("resources/vectoriser.pkl", "rb") tweet_cv = joblib.load(news_vectorizer) predictor = joblib.load( open(os.path.join("resources/linearSVC.pkl"), "rb")) elif Classifier == 'Logistic regression': st.text("Using Logistic Regression Classifeir ..") # Vectorizer news_vectorizer = open("resources/tfidfvect.pkl", "rb") tweet_cv = joblib.load(news_vectorizer) predictor = joblib.load( open(os.path.join("resources/Logistic_regression.pkl"), "rb")) results = [] n = 0 while n < len(tweet_text): vect_text = tweet_cv.transform([tweet_text['message'][n] ]).toarray() prediction = predictor.predict(vect_text) results.append((tweet_text['message'][n], prediction)) n += 1 df = pd.DataFrame(results, columns=['Message', 'Sentiment']) #Table that tabulates the results predictions = st.table(df.head(size)) st.success( "Text Categorized as Class: {}".format(predictions))
def display_data(conn: Connection): if st.checkbox("Display data in sqlite databse"): st.dataframe(get_data(conn))
def main(): st.title("IST Automated EDA Test") st.info("JP EDA app test") """https://github.com/joeperrotta/streamlit""" activities = [ "Pandas Profile", "SweetViz", "General EDA", "EDA For Linear Models", "Model Building for Classification Problem" ] choice = st.sidebar.selectbox("Select Activities", activities) if choice == "Pandas Profile": st.subheader("Automated EDA with Pandas Profile") data_file = st.file_uploader("Upload CSV", type=['csv']) if data_file is not None: df = pd.read_csv(data_file) st.dataframe(df.head()) profile = ProfileReport(df) st_profile_report(profile) # elif choice == "Sweetviz": # st.subheader("Automated EDA with Sweetviz") # data = st.file_uploader("Upload a Dataset", type=["csv"]) # if data is not None: # df = pd.read_csv(data) # st.dataframe(df.head()) # if st.button("Generate Sweetviz Report"): # # Normal Workflow # report = sv.analyze(df) # report.show_html() # st_display_sweetviz("SWEETVIZ_REPORT.html") elif choice == 'General EDA': st.subheader("Exploratory Data Analysis") data = st.file_uploader("Upload a Dataset", type=["csv", "txt"]) if data is not None: df = load.read_csv(data) st.dataframe(df.head()) st.success("Data Frame Loaded successfully") if st.checkbox("Show dtypes"): st.write(dataframe.show_dtypes(df)) if st.checkbox("Show Columns"): st.write(dataframe.show_columns(df)) if st.checkbox("Show Missing"): st.write(dataframe.Show_Missing1(df)) if st.checkbox("column information"): st.write(info.Column_information(df)) if st.checkbox("Aggregation Tabulation"): st.write(dataframe.Tabulation(df)) if st.checkbox("Num Count Summary"): st.write(info.num_count_summary(df)) if st.checkbox("Statistical Summary"): st.write(info.statistical_summary(df)) # if st.checkbox("Show Selected Columns"): # selected_columns = st.multiselect("Select Columns",all_columns) # new_df = df[selected_columns] # st.dataframe(new_df) if st.checkbox("Show Selected Columns"): selected_columns = st.multiselect("Select Columns", dataframe.show_columns(df)) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Numerical Variables"): num_df = dataframe.Numerical_variables(df) numer_df = pd.DataFrame(num_df) st.dataframe(numer_df) if st.checkbox("Categorical Variables"): new_df = dataframe.categorical_variables(df) catego_df = pd.DataFrame(new_df) st.dataframe(catego_df) if st.checkbox("DropNA"): imp_df = dataframe.impute(num_df) st.dataframe(imp_df) if st.checkbox("Missing after DropNA"): st.write(dataframe.Show_Missing(imp_df)) all_columns_names = dataframe.show_columns(df) all_columns_names1 = dataframe.show_columns(df) selected_columns_names = st.selectbox( "Select Column 1 For Cross Tabultion", all_columns_names) selected_columns_names1 = st.selectbox( "Select Column 2 For Cross Tabultion", all_columns_names1) if st.button("Generate Cross Tab"): st.dataframe( pd.crosstab(df[selected_columns_names], df[selected_columns_names1])) all_columns_names3 = dataframe.show_columns(df) all_columns_names4 = dataframe.show_columns(df) selected_columns_name3 = st.selectbox( "Select Column 1 For Pearsonr Correlation (Numerical Columns)", all_columns_names3) selected_columns_names4 = st.selectbox( "Select Column 2 For Pearsonr Correlation (Numerical Columns)", all_columns_names4) if st.button("Generate Pearsonr Correlation"): df = pd.DataFrame(dataframe.Show_pearsonr( imp_df[selected_columns_name3], imp_df[selected_columns_names4]), index=['Pvalue', '0']) st.dataframe(df) spearmanr3 = dataframe.show_columns(df) spearmanr4 = dataframe.show_columns(df) spearmanr13 = st.selectbox( "Select Column 1 For spearmanr Correlation (Categorical Columns)", spearmanr4) spearmanr14 = st.selectbox( "Select Column 2 For spearmanr Correlation (Categorical Columns)", spearmanr4) if st.button("Generate spearmanr Correlation"): df = pd.DataFrame(dataframe.Show_spearmanr( catego_df[spearmanr13], catego_df[spearmanr14]), index=['Pvalue', '0']) st.dataframe(df) st.subheader("UNIVARIATE ANALYSIS") all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox( "Select Column for Histogram ", all_columns_names) if st.checkbox("Show Histogram for Selected variable"): st.write(dataframe.show_hist(df[selected_columns_names])) st.pyplot() all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox("Select Columns Distplot ", all_columns_names) if st.checkbox("Show DisPlot for Selected variable"): st.write(dataframe.Show_DisPlot(df[selected_columns_names])) st.pyplot() all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox("Select Columns CountPlot ", all_columns_names) if st.checkbox("Show CountPlot for Selected variable"): st.write(dataframe.Show_CountPlot(df[selected_columns_names])) st.pyplot() st.subheader("BIVARIATE ANALYSIS") Scatter1 = dataframe.show_columns(df) Scatter2 = dataframe.show_columns(df) Scatter11 = st.selectbox( "Select Column 1 For Scatter Plot (Numerical Columns)", Scatter1) Scatter22 = st.selectbox( "Select Column 2 For Scatter Plot (Numerical Columns)", Scatter2) if st.button("Generate PLOTLY Scatter PLOT"): st.pyplot(dataframe.plotly(df, df[Scatter11], df[Scatter22])) bar1 = dataframe.show_columns(df) bar2 = dataframe.show_columns(df) bar11 = st.selectbox("Select Column 1 For Bar Plot ", bar1) bar22 = st.selectbox("Select Column 2 For Bar Plot ", bar2) if st.button("Generate PLOTLY histogram PLOT"): st.pyplot(dataframe.plotly_histogram(df, df[bar11], df[bar22])) violin1 = dataframe.show_columns(df) violin2 = dataframe.show_columns(df) violin11 = st.selectbox("Select Column 1 For violin Plot", violin1) violin22 = st.selectbox("Select Column 2 For violin Plot", violin2) if st.button("Generate PLOTLY violin PLOT"): st.pyplot( dataframe.plotly_violin(df, df[violin11], df[violin22])) st.subheader("MULTIVARIATE ANALYSIS") if st.checkbox("Show Histogram"): st.write(dataframe.show_hist(df)) st.pyplot() if st.checkbox("Show HeatMap"): st.write(dataframe.Show_HeatMap(df)) st.pyplot() if st.checkbox("Show PairPlot"): st.write(dataframe.Show_PairPlot(df)) st.pyplot() if st.button("Generate Word Cloud"): st.write(dataframe.wordcloud(df)) st.pyplot() elif choice == 'EDA For Linear Models': st.subheader("EDA For Linear Models") data = st.file_uploader("Upload a Dataset", type=["csv", "txt", "xlsx", "tsv"]) if data is not None: df = load.read_csv(data) st.dataframe(df.head()) st.success("Data Frame Loaded successfully") all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox("Select Columns qqplot ", all_columns_names) if st.checkbox("Show qqplot for variable"): st.write(dataframe.qqplot(df[selected_columns_names])) st.pyplot() all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox("Select Columns outlier ", all_columns_names) if st.checkbox("Show outliers in variable"): st.write(dataframe.outlier(df[selected_columns_names])) # all_columns_names = show_columns(df) # selected_columns_names = st.selectbox("Select target ",all_columns_names) # if st.checkbox("Anderson Normality Test"): # st.write(Anderson_test(df[selected_columns_names])) if st.checkbox("Show Distplot Selected Columns"): selected_columns_names = st.selectbox( "Select Columns for Distplot ", all_columns_names) st.dataframe(dataframe.show_displot( df[selected_columns_names])) st.pyplot() con1 = dataframe.show_columns(df) con2 = dataframe.show_columns(df) conn1 = st.selectbox("Select 1st Columns for chi square test", con1) conn2 = st.selectbox("Select 2st Columns for chi square test", con2) if st.button("Generate chi square test"): st.write( dataframe.check_cat_relation(df[conn1], df[conn2], 0.5)) elif choice == 'Model Building for Classification Problem': st.subheader("Model Building for Classification Problem") data = st.file_uploader("Upload a Dataset", type=["csv", "txt", "xlsx", "tsv"]) if data is not None: df = load.read_csv(data) st.dataframe(df.head()) st.success("Data Frame Loaded successfully") if st.checkbox( "Select your Variables (Target Variable should be at last)" ): selected_columns_ = st.multiselect( "Select Columns for seperation ", dataframe.show_columns(df)) sep_df = df[selected_columns_] st.dataframe(sep_df) if st.checkbox("Show Indpendent Data"): x = sep_df.iloc[:, :-1] st.dataframe(x) if st.checkbox("Show Dependent Data"): y = sep_df.iloc[:, -1] st.dataframe(y) if st.checkbox("Dummay Variable"): x = dataframe.dummy(x) st.dataframe(x) if st.checkbox("IMpupter "): x = model.IMpupter(x) st.dataframe(x) if st.checkbox("Compute Principle Component Analysis"): x = dataframe.PCA(x) st.dataframe(x) st.subheader("TRAIN TEST SPLIT") if st.checkbox("Select X Train"): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=0) st.dataframe(x_train) if st.checkbox("Select x_test"): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=0) st.dataframe(x_test) if st.checkbox("Select y_train"): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=0) st.dataframe(y_train) if st.checkbox("Select y_test"): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=0) st.dataframe(y_test) st.subheader("MODEL BUILDING") st.write("Build youe BaseLine Model") if st.checkbox("Logistic Regression "): x = model.Logistic_Regression(x_train, y_train, x_test, y_test) st.write(x) if st.checkbox("Decision Tree "): x = model.Decision_Tree(x_train, y_train, x_test, y_test) st.write(x) if st.checkbox("Random Forest "): x = model.RandomForest(x_train, y_train, x_test, y_test) st.write(x) if st.checkbox("naive_bayes "): x = model.naive_bayes(x_train, y_train, x_test, y_test) st.write(x) if st.checkbox("XGB Classifier "): x = model.XGb_classifier(x_train, y_train, x_test, y_test) st.write(x) st.markdown('Automation is **_really_ _cool_**.') st.markdown('<style>h1{color: red;}</style>', unsafe_allow_html=True) st.title("Credits and Inspiration") """https://pycaret.org/"""
("Good", "Bad")) send_puntuation = st.empty() send_puntuation.error("No calification/Firts pomodoro of the session") if send_btn.button("Send Calification"): hour = st.experimental_get_query_params() hour = hour["starting_hour"][0] satisfaction = selections[selection] pomodoro_queries.add_pomodoro(cat_id, proj_id, hour, satisfaction=satisfaction) send_puntuation.success("Calification was sent") if st.checkbox( f"Enable to End/Cancel {'' if project == None else project.upper()}" ): # Cancel/End # End: project is actually ended # Cancel: project closed before the end end_cancel_selection = st.selectbox("Do you want to End/Cancel", ("End", "Cancel")) st.error( f"If you want to *{end_cancel_selection.upper()}* **{project}** hit the button" ) if st.button("End/Cancel"): if end_cancel_selection == "End": pomodoro_queries.end_project(proj_id) elif end_cancel_selection == "Cancel":
def main(): """A Simple Streamlit App For CSS Shape Generation """ st.title("Simple CSS Shape Generator") activity = [ 'Design', 'About', ] choice = st.sidebar.selectbox("Select Activity", activity) if choice == 'Design': st.subheader("Design") bgcolor = st.beta_color_picker("Pick a Background color") fontcolor = st.beta_color_picker("Pick a Font Color", "#fff") html_temp = """ <div style="background-color:{};padding:10px"> <h1 style="color:{};text-align:center;">Streamlit Simple CSS Shape Generator </h1> </div> """ st.markdown(html_temp.format(bgcolor, fontcolor), unsafe_allow_html=True) st.markdown( "<div><p style='color:{}'>Hello Streamlit</p></div>".format( bgcolor), unsafe_allow_html=True) st.subheader("Modify Shape") bgcolor2 = st.sidebar.beta_color_picker("Pick a Bckground color") height = st.sidebar.slider('Height Size', 50, 200, 50) width = st.sidebar.slider("Width Size", 50, 200, 50) # border = st.slider("Border Radius",10,60,10) top_left_border = st.sidebar.number_input('Top Left Border', 10, 50, 10) top_right_border = st.sidebar.number_input('Top Right Border', 10, 50, 10) bottom_left_border = st.sidebar.number_input('Bottom Left Border', 10, 50, 10) bottom_right_border = st.sidebar.number_input('Bottom Right Border', 10, 50, 10) border_style = st.sidebar.selectbox("Border Style", [ "dotted", "dashed", "solid", "double", "groove", "ridge", "inset", "outset", "none", "hidden" ]) border_color = st.sidebar.beta_color_picker("Pick a Border Color", "#654FEF") html_design = """ <div style="height:{}px;width:{}px;background-color:{};border-radius:{}px {}px {}px {}px;border-style:{};border-color:{}"> </div> """ st.markdown(html_design.format(height, width, bgcolor2, top_left_border, top_right_border, bottom_left_border, bottom_right_border, border_style, border_color), unsafe_allow_html=True) if st.checkbox("View Results"): st.subheader("Result") result_of_design = html_design.format(height, width, bgcolor2, top_left_border, top_right_border, bottom_left_border, bottom_right_border, border_style, border_color) st.code(result_of_design) if choice == "About": st.subheader("About") st.info("Jesus Saves @JCharisTech") st.text("By Jesse E.Agbe(JCharis)") st.success("Built with Streamlit")
st.line_chart(chart_data) st.text("--------------------------------------") map_data = pd.DataFrame(np.random.randn(500, 2) / [50, 50] + [32.26 - 100], columns=["lat", "lon"]) st.map(map_data) st.text("--------------------------------------") check_box_data = pd.DataFrame(np.random.randn(20, 3), columns=['1st_col', '2st_col', '3st_col']) if st.checkbox("show details"): st.dataframe(check_box_data) st.text("--------------------------------------") select_box_data = pd.DataFrame(np.random.randn(20, 3), columns=['1st_col', '2st_col', '3st_col']) col = st.selectbox('select the column to be displayed', select_box_data.columns) st.line_chart(select_box_data[col]) st.text("--------------------------------------") select_box_data_1 = pd.DataFrame(np.random.randn(20, 3),
def app(): st.markdown("<style>body {color: #fff;background-color: #111;}</style>",unsafe_allow_html=True,) st.markdown("<span style=“background-color:#121922”>",unsafe_allow_html=True) st.markdown( """<style> .css-19ih76x{text-align: left !important} </style> """, unsafe_allow_html=True) st.markdown("<h3 style='text-align: left; color: #008357;'>Campus BBLearn</h3>", unsafe_allow_html=True) #SHEET_ID = '12D4hfpuIkT7vM69buu-v-r-UYb8xx4wM1zi-34Fs9ck' #df = pd.read_csv('https://docs.google.com/spreadsheets/d/' + SHEET_ID + '/export?format=csv') df = pd.read_csv('https://docs.google.com/spreadsheets/d/1yvDAVczwETC2JwcNeaYx2h2PD5fGCSWdJoY4QoMqR48/export?format=csv&gid=883950483') #df = pd.read_csv('/mydrive/MyDrive/multiapps/bbc204.csv') df=df.sort_values(by=['SessionOwner']) CHOICES = {456987: "PAD", 7896321: "FCEyE", 43: "MED", 453: "ELM", 45783: "ING", 8123: "LENGUAS", 48123: "FHGT", 457123: "JURI", 457823: "FCS", 4578128: "FLEO", 4578123: "HGT", 578128:"FCJ"} def format_func(option): return CHOICES[option] buff, col, buff2 = st.beta_columns([1,3,1]) option = buff.selectbox("Seleccionar Unidad", options=list(CHOICES.keys()), format_func=format_func) #st.write(f"Seleccionaste {format_func(option)}" ) column = format_func(option) above_352 = df["SessionOwner"] == 'USAL_lti_production' sesionesu = df[above_352]['SessionName'].unique() df5=pd.value_counts(sesionesu) bool_series = df[above_352]["ContextIdentifier"].str.startswith(column, na = False) dupli=df[above_352][bool_series].drop_duplicates(subset = ['SessionName']) sesiones = df[above_352][bool_series]['SessionName'].unique() df6=pd.value_counts(sesiones) time = pd.DatetimeIndex(df[above_352][bool_series]['AttendeeTotalTimeInSession']) times1=time.hour * 60 + time.minute+ time.second/60 times=times1.values.sum() timeu = pd.DatetimeIndex(df[above_352]['AttendeeTotalTimeInSession']) times1u=timeu.hour * 60 + timeu.minute+ timeu.second/60 timesu=times1u.values.sum() times3=df6.index aulas=len(times3) times3t=df5.index aulast=len(times3t) df['RoomOpened'] = pd.to_datetime(df['RoomOpened']).dt.strftime('%d-%m-%y') minValue = df['RoomOpened'].min() maxValue = df['RoomOpened'].max() st.write('Período:',minValue,' al ',maxValue) st.write('Sesiones: ',aulas) st.sidebar.title('Consumos período') st.sidebar.write('Sesiones: ',aulast) st.sidebar.write('Minutos: ',round(timesu,1)) st.write('Minutos usados: ',round(times,1)) if st.checkbox('Mostrar Sesiones UA'): #page_size = 1000 #page_number = st.number_input( #label="Página número (c/500):", #min_value=1, #max_value=ceil(len(df)/page_size), #step=1, #) #current_start = (page_number-1)*page_size #current_end = page_number*page_size #st.write(df[current_start:current_end]) #st.write(df[above_352][['RoomOpened','SessionName','NameOfAttendee','AttendeeTotalTimeInSession']][current_start:current_end]) #dupli.index = [""] * len(dupli) #st.table(dupli[['RoomClosed','SessionName']]) #st.table(df[above_352][bool_series][['RoomOpened','NameOfAttendee','AttendeeTotalTimeInSession','ContextName']]) df['Minutos Usados']=round(pd.to_timedelta(df[above_352]['AttendeeTotalTimeInSession']).dt.total_seconds()/60) totalesua=df[above_352][bool_series].groupby("SessionName")['Minutos Usados'].sum() #dupli=dupli.sort_values(by=['RoomClosed']) st.table(totalesua) #st.markdown("### Sample Data") #df = create_table() #st.write(df) #st.write('Navigate to `Data Stats` page to visualize the data') if st.checkbox('Comparativo Salas x UA'): df['Minutos Usados']=round(pd.to_timedelta(df[above_352]['AttendeeTotalTimeInSession']).dt.total_seconds()/60) totalesuas=df[above_352].groupby("ua")['Minutos Usados'].sum() st.bar_chart(totalesuas) if st.checkbox('Tabla Salas x UA'): df['Minutos Usados']=round(pd.to_timedelta(df[above_352]['AttendeeTotalTimeInSession']).dt.total_seconds()/60) totalesuas=df[above_352].groupby("ua")['Minutos Usados'].sum() st.table(totalesuas)
def main(): st.image('img/gui_logo.jpeg', use_column_width=True) st.header('Bem vindo!') st.subheader('**Você está no sistema de recomendação de clientes**') st.markdown( 'O sistema recomendará novos clientes baseado em comparações com os seus atuais clientes de forma customizada a partir das características desejadas.' ) st.markdown( '### Precisamos que você nos forneça o **portifólio de seus clientes!**' ) st.markdown( ' *Obs.: Caso você não tenha um portifólio para usar, escolha um [desses](https://github.com/guireis1/Codenation-Final-Project/tree/master/data). *' ) file3 = st.file_uploader('Upload clientes.csv', type='csv') if file3 is not None: market_pre = pd.read_csv('data/data_preprocess.csv') market = pd.read_csv('data/market.csv') #market = pd.DataFrame(readcsv(file2)) #market= pd.read_csv(file2) #market_pre = pd.DataFrame(readcsv(file1)) #market_pre = pd.read_csv(file1) port = pd.DataFrame(readcsv(file3)) st.text('Loading data...done!') #Començando o processamento #market = pd.read_csv('market.csv') #market_pre = pd.read_csv('data_preprocess.csv') #port = pd.read_csv('data/estaticos_portfolio1.csv') market_pre.set_index('id', inplace=True) market.set_index(market_pre.index, inplace=True) market.drop('Unnamed: 0', axis=1, inplace=True) port = port.set_index('id') port.drop(port.columns, axis=1, inplace=True) port_market = market.merge(port, how='right', left_index=True, right_index=True) port_market_pre = market_pre.merge(port, how='right', left_index=True, right_index=True) st.markdown('DataFrame do Portofólio:') head(port_market) #Todos datasets prontos #st.sidebar.image(st.image('img/logo.png', use_column_width=True)) st.sidebar.header('Opções de análise do Portifólio:') sidemulti = st.sidebar.multiselect( 'Escolha: ', ('Visualização', 'Descritiva', 'Geolocalização')) if ('Visualização' in sidemulti): st.markdown('## **Visualização do Portifólio**') st.markdown('Perfil de clientes considerando features importantes') vis(port_market) st.markdown('*Para melhor visualização clique na imagem*') if ('Descritiva' in sidemulti): st.markdown('## **Análise Descritiva do Portifólio**') st.dataframe(descritiva(port_market)) missing(port_market, 'Visualização dos nulos do Portifólio') missing_dendo(port_market, 'Dendograma dos nulos do Portifólio') st.markdown('*Para melhor visualização clique na imagem*') if ('Geolocalização' in sidemulti): coordenadas = pd.read_csv( 'https://raw.githubusercontent.com/guireis1/Codenation-Final-Project/master/data/coordenadas' ) coordenadas.drop('Unnamed: 0', axis=1, inplace=True) st.markdown('## **Geolocalização do Portifólio**') st.markdown('Localização das empresas contidas no portifólio') cord_port = geoloc(port_market, coordenadas) cord_port_df = pd.DataFrame(cord_port, columns=('lat', 'lon')) st.map(cord_port_df) st.sidebar.header('Opções de análise do mercado:') sidemulti_market = st.sidebar.multiselect( 'Escolha: ', ('Visualização', 'Descritiva', 'Correlação', 'Análise dos Nulos', 'Colunas excluídas')) if ('Visualização' in sidemulti_market): st.markdown('## **Visualização do Mercado**') vis(market) st.markdown('*Para melhor visualização clique na imagem*') if ('Descritiva' in sidemulti_market): st.markdown('## **Análise Descritiva do Mercado**') st.dataframe(descritiva(market)) #missing(market,'Visualização dos nulos') #missing_dendo(market,'Dendograma nulos') if ('Correlação' in sidemulti_market): st.markdown('## **Correlações do Mercado**') st.markdown('Correlação padrão') st.image('img/corr_matrix.png', use_column_width=True) st.markdown('Correlação usando PPS') st.image('img/corr_pps.png', use_column_width=True) if ('Análise dos Nulos' in sidemulti_market): st.markdown('## **Análise dos nulos **') st.markdown('### **Colunas Numéricas:**') st.image('img/valores20.png', use_column_width=True) st.image('img/valores60.png', use_column_width=True) st.image('img/valores80.png', use_column_width=True) st.image('img/dendo_90.png', use_column_width=True) st.image('img/dendo100.png', use_column_width=True) st.markdown('### **Colunas Categoricas:**') st.image('img/valores_nulos.png', use_column_width=True) st.image('img/dendo_cat.png', use_column_width=True) if ('Colunas excluídas' in sidemulti_market): col_excluidas = [ 'sg_uf', 'idade_emp_cat', 'fl_me', 'fl_sa', 'fl_epp', 'fl_ltda', 'dt_situacao', 'fl_st_especial', 'nm_divisao', 'nm_segmento', 'fl_spa', 'vl_total_tancagem', 'vl_total_veiculos_antt', 'fl_optante_simples', 'qt_art', 'vl_total_veiculos_pesados_grupo', 'vl_total_veiculos_leves_grupo', 'vl_total_tancagem_grupo', 'vl_total_veiculos_antt_grupo', 'vl_potenc_cons_oleo_gas', 'fl_optante_simei', 'sg_uf_matriz', 'de_saude_rescencia', 'nu_meses_rescencia', 'de_indicador_telefone', 'fl_simples_irregular', 'vl_frota', 'qt_socios_pf', 'qt_socios_pj', 'idade_maxima_socios', 'idade_minima_socios', 'qt_socios_st_regular', 'qt_socios_st_suspensa', 'qt_socios_masculino', 'qt_socios_feminino', 'qt_socios_pep', 'qt_alteracao_socio_total', 'qt_alteracao_socio_90d', 'qt_alteracao_socio_180d', 'qt_alteracao_socio_365d', 'qt_socios_pj_ativos', 'qt_socios_pj_nulos', 'qt_socios_pj_baixados', 'qt_socios_pj_suspensos', 'qt_socios_pj_inaptos', 'vl_idade_media_socios_pj', 'vl_idade_maxima_socios_pj', 'vl_idade_minima_socios_pj', 'qt_coligados', 'qt_socios_coligados', 'qt_coligados_matriz', 'qt_coligados_ativo', 'qt_coligados_baixada', 'qt_coligados_inapta', 'qt_coligados_suspensa', 'qt_coligados_nula', 'idade_media_coligadas', 'idade_maxima_coligadas', 'idade_minima_coligadas', 'coligada_mais_nova_ativa', 'coligada_mais_antiga_ativa', 'idade_media_coligadas_ativas', 'coligada_mais_nova_baixada', 'coligada_mais_antiga_baixada', 'idade_media_coligadas_baixadas', 'qt_coligados_sa', 'qt_coligados_me', 'qt_coligados_mei', 'qt_coligados_ltda', 'qt_coligados_epp', 'qt_coligados_norte', 'qt_coligados_sul', 'qt_coligados_nordeste', 'qt_coligados_centro', 'qt_coligados_sudeste', 'qt_coligados_exterior', 'qt_ufs_coligados', 'qt_regioes_coligados', 'qt_ramos_coligados', 'qt_coligados_industria', 'qt_coligados_agropecuaria', 'qt_coligados_comercio', 'qt_coligados_serviço', 'qt_coligados_ccivil', 'qt_funcionarios_coligados', 'qt_funcionarios_coligados_gp', 'media_funcionarios_coligados_gp', 'max_funcionarios_coligados_gp', 'min_funcionarios_coligados_gp', 'vl_folha_coligados', 'media_vl_folha_coligados', 'max_vl_folha_coligados', 'min_vl_folha_coligados', 'vl_folha_coligados_gp', 'media_vl_folha_coligados_gp', 'max_vl_folha_coligados_gp', 'min_vl_folha_coligados_gp', 'faturamento_est_coligados', 'media_faturamento_est_coligados', 'max_faturamento_est_coligados', 'min_faturamento_est_coligados', 'faturamento_est_coligados_gp', 'media_faturamento_est_coligados_gp', 'max_faturamento_est_coligados_gp', 'min_faturamento_est_coligados_gp', 'total_filiais_coligados', 'media_filiais_coligados', 'max_filiais_coligados', 'min_filiais_coligados', 'qt_coligados_atividade_alto', 'qt_coligados_atividade_medio', 'qt_coligados_atividade_baixo', 'qt_coligados_atividade_mt_baixo', 'qt_coligados_atividade_inativo', 'qt_coligadas', 'sum_faturamento_estimado_coligadas', 'de_faixa_faturamento_estimado', 'vl_faturamento_estimado_aux', 'vl_faturamento_estimado_grupo_aux', 'qt_ex_funcionarios', 'qt_funcionarios_grupo', 'percent_func_genero_masc', 'percent_func_genero_fem', 'idade_ate_18', 'idade_de_19_a_23', 'idade_de_24_a_28', 'idade_de_29_a_33', 'idade_de_34_a_38', 'idade_de_39_a_43', 'idade_de_44_a_48', 'idade_de_49_a_53', 'idade_de_54_a_58', 'idade_acima_de_58', 'grau_instrucao_macro_analfabeto', 'grau_instrucao_macro_escolaridade_fundamental', 'grau_instrucao_macro_escolaridade_media', 'grau_instrucao_macro_escolaridade_superior', 'grau_instrucao_macro_desconhecido', 'total', 'meses_ultima_contratacaco', 'qt_admitidos_12meses', 'qt_desligados_12meses', 'qt_desligados', 'qt_admitidos', 'media_meses_servicos_all', 'max_meses_servicos_all', 'min_meses_servicos_all', 'media_meses_servicos', 'max_meses_servicos', 'min_meses_servicos', 'qt_funcionarios_12meses', 'qt_funcionarios_24meses', 'tx_crescimento_12meses', 'tx_crescimento_24meses' ] st.markdown('## **Colunas excluídas**') st.markdown( 'Decidimos não utiliza-las por quantidade de linhas não preenchidas, grandes correlações com outrar variáveis, pouca importância para o modelo ou redundância!' ) st.markdown('**São elas:**') st.write(col_excluidas) st.sidebar.header('Sistema de recomendação') start_model = st.sidebar.checkbox( 'Aperte para começarmos a modelagem do sistema!') st.sidebar.markdown('**Desenvolvido por,**') st.sidebar.markdown('*Guilherme Reis Mendes*') st.sidebar.markdown( '[LinkedIn](https://www.linkedin.com/in/guilherme-reis-2862ab153/)' ) st.sidebar.markdown('[GitHub](https://github.com/guireis1/)') if start_model: st.header('**Modelagem**') st.subheader( '**Primeiro selecione as features que gostaria de usar**') st.markdown( '*Essas serão as colunas que serão utilizadas no sistema de recomendação!*' ) st.markdown('**Colunas que recomendamos:**') col_select = [] ramo = st.checkbox('de_ramo') idade = st.checkbox('idade_emp_cat') meso = st.checkbox('nm_meso_regiao') juridica = st.checkbox('natureza_juridica_macro') faturamento = st.checkbox('de_faixa_faturamento_estimado_grupo') filiais = st.checkbox('qt_filiais') mei = st.checkbox('fl_mei') rm = st.checkbox('fl_rm') st.markdown('**Colunas opcionais:**') setor = st.checkbox('setor') rotatividade = st.checkbox('tx_rotatividade') idade_socios = st.checkbox('idade_media_socios') socios = st.checkbox('qt_socios') renda = st.checkbox('empsetorcensitariofaixarendapopulacao') leve = st.checkbox('vl_total_veiculos_leves_grupo') pesado = st.checkbox('vl_total_veiculos_pesados_grupo') iss = st.checkbox('fl_passivel_iss') atividade = st.checkbox('de_nivel_atividade') saude = st.checkbox('de_saude_tributaria') veiculo = st.checkbox('fl_veiculo') antt = st.checkbox('fl_antt') telefone = st.checkbox('fl_telefone') email = st.checkbox('fl_email') matriz = st.checkbox('fl_matriz') if ramo: col_select.append('de_ramo') if idade: col_select.append('idade_emp_cat') if meso: col_select.append('nm_meso_regiao') meso_ohe = pd.get_dummies(market_pre['nm_meso_regiao'], drop_first=True) if faturamento: col_select.append('de_faixa_faturamento_estimado_grupo') if juridica: col_select.append('natureza_juridica_macro') juridico_ohe = pd.get_dummies( market_pre['natureza_juridica_macro'], drop_first=True) if filiais: col_select.append('qt_filiais') if mei: col_select.append('fl_mei') if rm: col_select.append('fl_rm') if setor: col_select.append('setor') setor_ohe = pd.get_dummies(market_pre['setor'], drop_first=True) if rotatividade: col_select.append('tx_rotatividade') if idade_socios: col_select.append('idade_media_socios') if socios: col_select.append('qt_socios') if renda: col_select.append('empsetorcensitariofaixarendapopulacao') if leve: col_select.append('vl_total_veiculos_leves_grupo') if pesado: col_select.append('vl_total_veiculos_pesados_grupo') if iss: col_select.append('fl_passivel_iss') if atividade: col_select.append('de_nivel_atividade') if saude: col_select.append('de_saude_tributaria') if veiculo: col_select.append('fl_veiculo') if antt: col_select.append('fl_antt') if telefone: col_select.append('fl_telefone') if email: col_select.append('fl_email') if matriz: col_select.append('fl_matriz') st.markdown('## **Podemos continuar?**') features_select = st.checkbox('Sim') if features_select: st.text('*Colunas selecionadas com sucesso!*') st.write('Colunas Selecionadas:', col_select) st.subheader( 'Agora escolha a quantidade de recomendações que deseja!') st.markdown( '**Estamos trabalhando com k-nearest Kneighbors. O valor selecionado será proporcional ao número de samples do portifólio!**' ) st.markdown( '*Lembrando que quanto maior o valor de K, mais recomendações, porém, menos preciso*' ) slider_nn = st.slider('Número de vizinhos:', 2, 10) market_col_select = market_pre[col_select] if 'setor' in market_col_select: market_col_select.drop('setor', axis=1, inplace=True) market_col_select = pd.concat( [market_col_select, setor_ohe], axis=1) if 'nm_meso_regiao' in market_col_select: market_col_select.drop('nm_meso_regiao', axis=1, inplace=True) market_col_select = pd.concat( [market_col_select, meso_ohe], axis=1) if 'setor' in market_col_select: market_col_select.drop('natureza_juridica_macro', axis=1, inplace=True) market_col_select = pd.concat( [market_col_select, juridico_ohe], axis=1) market_col_select_scaled = StandardScaler().fit_transform( market_col_select) market_col_select_scaled = pd.DataFrame( market_col_select_scaled, columns=market_col_select.columns, index=market_col_select.index) head(market_col_select_scaled) st.markdown('## **Recomendação**') button_model = st.checkbox('Aperte para iniciar o sistema') if button_model: st.text('Loading model...wait!') port_model = market_col_select_scaled.merge( port, how='right', left_index=True, right_index=True) port_model.dropna(inplace=True) suggestion = recommend(port_model, slider_nn, market_col_select_scaled, market) suggestion['id'] = suggestion.index st.text('Loading model...done!') st.markdown('**Sistema de recomendação completo!**') size_sug = suggestion.shape[0] st.write('Foram geraradas ', size_sug, ' recomendações!') st.markdown('Baixe aqui:') st.markdown(get_table_download_link(suggestion), unsafe_allow_html=True) coordenadas_market = pd.read_csv( 'https://raw.githubusercontent.com/guireis1/Codenation-Final-Project/master/data/coordenadas' ) coordenadas_market.drop('Unnamed: 0', axis=1, inplace=True) cord_reco = geoloc(suggestion, coordenadas_market) cord_reco_df = pd.DataFrame(cord_reco, columns=('lat', 'lon')) st.markdown('**Geolocalização das empresas recomendadas**') st.map(cord_reco_df) st.markdown('**Visualização das empresas recomendadas**') vis(suggestion)
def write(): st.sidebar.title("Select your search params") #selected team is stored selected_team = st.sidebar.selectbox('Select blue team :', team) #selected season is stored selected_season = st.sidebar.multiselect('Select season', options=list( df['Season'].unique()), default=['Spring']) #selected year is stored selected_year = st.sidebar.slider('Select year :', min(years), max(years), (min(years), max(years))) #DYNAMIC TITLE #create list to hold items for title title_list = [] #selected season team for item in selected_team: title_list.append(item) #blanks space for after team name title_list.append(" ") #selected season if len(selected_season) < 2: #selected season item for item in selected_season: title_list.append(item + " ") else: #more than one item in list, put 'and' between items for item in selected_season: title_list.append(item + " ") title_list.append(" and ") #remove second 'and' title_list.pop(7) #selected year title_list.append(str(selected_year[0]) + ' to ' + str(selected_year[1])) #take list of selected item and concat a string def list_to_str(title_list): #init empty string title_string = "" #return concat string return title_string.join(title_list) #function output variable title_string = list_to_str(title_list) #print on title as subheader st.title(title_string) #return datafram with selected components def get_selected_data(selected_season, selected_year, selected_team, df): new_df = df.loc[ #the input from the sidebars queries the dataframe here #year is equal to or between selcted (df['Year'] >= selected_year[0]) & (df['Year'] <= selected_year[1]) & #and selected team is either blue side OR read side ((df['blueTeamTag'] == selected_team) | (df['redTeamTag'] == selected_team)) & #and selected season (df['Season'].isin(selected_season))] return new_df new_df = get_selected_data(selected_season, selected_year, selected_team, df) def get_win_loss(new_df, selected_team): #wins blue_wins = new_df.loc[(new_df['blueTeamTag'] == selected_team) & (new_df['bResult'] == 1)] red_wins = new_df.loc[(new_df['redTeamTag'] == selected_team) & (new_df['rResult'] == 1)] blue_wins = len(blue_wins.index) red_wins = len(red_wins.index) #losses blue_losses = new_df.loc[(new_df['blueTeamTag'] == selected_team) & (new_df['bResult'] == 0)] red_losses = new_df.loc[(new_df['redTeamTag'] == selected_team) & (new_df['rResult'] == 0)] blue_losses = len(blue_losses.index) red_losses = len(red_losses.index) #win/loss wins = blue_wins + red_wins losses = blue_losses + red_losses win_loss = wins / (wins + losses) return blue_wins, red_wins, blue_losses, red_losses, win_loss blue_wins, red_wins, blue_losses, red_losses, win_loss = get_win_loss( new_df, selected_team) ## Write content #print df, hide with checkbox if st.checkbox('Show raw data'): st.write(new_df.reset_index(drop=True)) st.title('------------------------') st.title('Overview') times_played_blue = new_df.blueTeamTag.value_counts() times_played_red = new_df.redTeamTag.value_counts() #Overview section st.subheader('Total games played: ' + str(times_played_blue[0] + times_played_red[0])) st.subheader('Times played blue side: ' + str(times_played_blue[0])) st.subheader('Times played red side: ' + str(times_played_red[0])) st.subheader('Wins: ' + str(blue_wins + red_wins)) st.subheader('Losses: ' + str(blue_losses + red_losses)) st.subheader('Win percentage: ' + str(round(win_loss, 2))) st.title('------------------------') #Team Members st.title('Team roles') blue_filter = new_df['blueTeamTag'] == selected_team red_filter = new_df['redTeamTag'] == selected_team #create filter for new dataframes blue_df = new_df.where(blue_filter) blue_df = blue_df.dropna() red_df = new_df.where(red_filter) red_df = red_df.dropna() #get all players to play top role, only one side matters top = blue_df['blueTop'].unique() #get counts of each champion played blue_top_champs = blue_df['blueTopChamp'].value_counts() red_top_champs = red_df['redTopChamp'].value_counts() #combine counts of both sides blue_top_champs.add(red_top_champs) #jungle jungle = blue_df['blueJungle'].unique() #get counts of each champion played blue_jungle_champs = blue_df['blueJungleChamp'].value_counts() red_jungle_champs = red_df['redJungleChamp'].value_counts() #combine counts of both sides blue_jungle_champs.add(red_jungle_champs) #middle mid = blue_df['blueMiddle'].unique() #get counts of each champion played blue_middle_champs = blue_df['blueMiddleChamp'].value_counts() red_middle_champs = red_df['redMiddleChamp'].value_counts() #combine counts of both sides blue_middle_champs.add(red_middle_champs) #adc adc = blue_df['blueADC'].unique() #get counts of each champion played blue_adc_champs = blue_df['blueADCChamp'].value_counts() red_adc_champs = red_df['redADCChamp'].value_counts() #combine counts of both sides blue_adc_champs.add(red_adc_champs) #support support = blue_df['blueSupport'].unique() #get counts of each champion played blue_support_champs = blue_df['blueSupportChamp'].value_counts() red_support_champs = red_df['redSupportChamp'].value_counts() #combine counts of both sides blue_support_champs.add(red_support_champs) #wite and plot #top st.subheader('Top') st.write(str(top)) #plot fig, ax = plt.subplots(figsize=(10, 5)) ax = sns.barplot(palette="Blues_r", alpha=0.8, y=blue_top_champs.index, x=blue_top_champs.values) st.pyplot(fig) #jungle st.subheader('Jungle') st.write(str(jungle)) #plot fig, ax = plt.subplots(figsize=(10, 5)) ax = sns.barplot(palette="Blues_r", alpha=0.8, y=blue_jungle_champs.index, x=blue_jungle_champs.values) st.pyplot(fig) #middle st.subheader('Middle') st.write(str(mid)) #plot fig, ax = plt.subplots(figsize=(10, 5)) ax = sns.barplot(palette="Blues_r", alpha=0.8, y=blue_middle_champs.index, x=blue_middle_champs.values) st.pyplot(fig) #adc st.subheader('ADC') st.write(str(adc)) #plot fig, ax = plt.subplots(figsize=(10, 5)) ax = sns.barplot(palette="Blues_r", alpha=0.8, y=blue_adc_champs.index, x=blue_adc_champs.values) st.pyplot(fig) #support st.subheader('Support') st.write(str(support)) #plot fig, ax = plt.subplots(figsize=(10, 5)) ax = sns.barplot(palette="Blues_r", alpha=0.8, y=blue_support_champs.index, x=blue_support_champs.values) st.pyplot(fig)
def main(): init = st.markdown("*Initialisation de l'application en cours...*") init = st.markdown(init_api()) # Affichage du titre et du sous-titre st.title("Implémenter un modèle de scoring") st.markdown("<i>API répondant aux besoins du projet 7 pour le parcours Data Scientist OpenClassRoom</i>", unsafe_allow_html=True) # Affichage d'informations dans la sidebar st.sidebar.subheader("Informations générales") # Chargement du logo logo = load_logo() st.sidebar.image(logo, width=200) # Chargement de la selectbox lst_id = load_selectbox() global id_client id_client = st.sidebar.selectbox("ID Client", lst_id) # Chargement des infos générales nb_credits, rev_moy, credits_moy, targets = load_infos_gen() # Affichage des infos dans la sidebar # Nombre de crédits existants st.sidebar.markdown("<u>Nombre crédits existants dans la base :</u>", unsafe_allow_html=True) st.sidebar.text(nb_credits) # Graphique camembert st.sidebar.markdown("<u>Différence solvabilité / non solvabilité</u>", unsafe_allow_html=True) plt.pie(targets, explode=[0, 0.1], labels=["Solvable", "Non solvable"], autopct='%1.1f%%', shadow=True, startangle=90) st.sidebar.pyplot() # Revenus moyens st.sidebar.markdown("<u>Revenus moyens $(USD) :</u>", unsafe_allow_html=True) st.sidebar.text(rev_moy) # Montant crédits moyen st.sidebar.markdown("<u>Montant crédits moyen $(USD) :</u>", unsafe_allow_html=True) st.sidebar.text(credits_moy) # Affichage de l'ID client sélectionné st.write("Vous avez sélectionné le client :", id_client) # Affichage état civil st.header("**Informations client**") #infos = st.checkbox("Afficher les informations du client?") if st.checkbox("Afficher les informations du client?"): infos_client = identite_client() #st.write("Statut famille :**", infos_client["status_famille"], "**") #st.write("Nombre d'enfant(s) :**", infos_client["nb_enfant"], "**") #st.write("Age client :", infos_client["age"], "ans.") st.write("Statut famille :**", infos_client["NAME_FAMILY_STATUS"][0], "**") st.write("Nombre d'enfant(s) :**", infos_client["CNT_CHILDREN"][0], "**") st.write("Age client :", int(infos_client["DAYS_BIRTH"].values / -365), "ans.") data_age = load_age_population() # Set the style of plots plt.style.use('fivethirtyeight') plt.figure(figsize=(9, 9)) # Plot the distribution of ages in years plt.hist(data_age, edgecolor = 'k', bins = 25) plt.axvline(int(infos_client["DAYS_BIRTH"].values / -365), color="red", linestyle=":") plt.title('Age of Client') plt.xlabel('Age (years)') plt.ylabel('Count') st.pyplot() st.subheader("*Revenus*") #st.write("Total revenus client :", infos_client["revenus"], "$") st.write("Total revenus client :", infos_client["AMT_INCOME_TOTAL"][0], "$") data_revenus = load_revenus_population() # Set the style of plots plt.style.use('fivethirtyeight') plt.figure(figsize=(9, 9)) # Plot the distribution of revenus plt.hist(data_revenus, edgecolor = 'k') plt.axvline(infos_client["AMT_INCOME_TOTAL"][0], color="red", linestyle=":") plt.title('Revenus du Client') plt.xlabel('Revenus ($ USD)') plt.ylabel('Count') st.pyplot() #st.write("Montant du crédit :", infos_client["montant_credit"], "$") #st.write("Annuités crédit :", infos_client["annuites"], "$") #st.write("Montant du bien pour le crédit :", infos_client["montant_bien"], "$") st.write("Montant du crédit :", infos_client["AMT_CREDIT"][0], "$") st.write("Annuités crédit :", infos_client["AMT_ANNUITY"][0], "$") st.write("Montant du bien pour le crédit :", infos_client["AMT_GOODS_PRICE"][0], "$") else: st.markdown("<i>Informations masquées</i>", unsafe_allow_html=True) # Affichage solvabilité client st.header("**Analyse dossier client**") st.markdown("<u>Probabilité de risque de faillite du client :</u>", unsafe_allow_html=True) prediction = load_prediction() st.write(round(prediction*100, 2), "%") st.markdown("<u>Données client :</u>", unsafe_allow_html=True) st.write(identite_client()) # Affichage des dossiers similaires chk_voisins = st.checkbox("Afficher dossiers similaires?") if chk_voisins: similar_id = load_voisins() st.markdown("<u>Liste des 10 dossiers les plus proches de ce client :</u>", unsafe_allow_html=True) st.write(similar_id) st.markdown("<i>Target 1 = Client en faillite</i>", unsafe_allow_html=True) else: st.markdown("<i>Informations masquées</i>", unsafe_allow_html=True)
def app(): st.title('Financial Scanner - Streamlit Beta') st.header('This is a header with an emoji :+1: ') st.subheader( 'This is a subheader that goes to the moon :rocket: :rocket: :rocket:') st.text('This is plain text and prefer using st.write') st.markdown( 'This is markdown and can write format like this: Streamlit is **_really_ cool**.' ) st.write( 'This is plain text for an application under-development to evaluate the potential of Streamlit' ) st.write( 'write command, by default uses markdown with a string Hello, *World!* :sunglasses:' ) df1 = pd.DataFrame({'col1': [1, 2, 3]}) st.write('this is a number', 2, 'and below is a df', df1) test_dict = {'a': 'alejandro', 'b': 'ramirez'} st.write('and this is a dictionary', test_dict) code_str = """def hello(): print("Hello, Streamlit!")""" st.code(code_str) # ---------------------------------------------------------------- if st.button('Refresh Data'): st.write('this just prints this message. will get it to donwload data') # ---------------------------------------------------------------- with st.beta_container(): st.title('Financial scanner') st.write('a stock filter with indicators') st.subheader('Filter') st.write('filter by market direction') with st.beta_container(): col1, col2, col3 = st.beta_columns([1, 1, 1]) with col1: st.subheader('Bullish') checkbox_a1 = st.checkbox('Stacked EMAs', value=False, key='scanner') checkbox_a2 = st.checkbox('checkbox 2', value=False, key='scanner') checkbox_a3 = st.checkbox('checkbox 3', value=False, key='scanner') checkbox_a4 = st.checkbox('checkbox 4', value=False, key='scanner') checkbox_a5 = st.checkbox('checkbox 5', value=False, key='scanner') with col2: st.subheader('Bearish') checkbox_b1 = st.checkbox('checkbox 6', value=False, key='scanner') checkbox_b2 = st.checkbox('checkbox 7', value=False, key='scanner') checkbox_b3 = st.checkbox('checkbox 8', value=False, key='scanner') checkbox_b4 = st.checkbox('checkbox 9', value=False, key='scanner') checkbox_b5 = st.checkbox('checkbox 10', value=False, key='scanner') with col3: st.subheader('Sideways') checkbox_c1 = st.checkbox('checkbox 11', value=False, key='scanner') checkbox_c2 = st.checkbox('checkbox 12', value=False, key='scanner') checkbox_c3 = st.checkbox('checkbox 13', value=False, key='scanner') checkbox_c4 = st.checkbox('checkbox 14', value=False, key='scanner') checkbox_c5 = st.checkbox('checkbox 15', value=False, key='scanner') checkboxes = [ checkbox_a1, checkbox_a2, checkbox_a3, checkbox_a4, checkbox_a5 ] with st.beta_container(): for check in checkboxes: if check: st.write('{} is checked. Great!'.format(check)) st.subheader('Dataframe to be filtered') df = pd.read_pickle(S.PRICE_FILE) st.write( 'df is loaded from pickle and resides in cache making filtering fast') st.write(df.head()) def ema_multiple_periods(df, ema_list=None): # initializes an empty data frame with empty multi-index columns ema_list = None if ema_list is None else [8, 21, 34, 55, 89] multi_index_cols = pd.MultiIndex.from_product([[], []]) ema = pd.DataFrame(index=df.index, columns=multi_index_cols) for period in ema_list: col = '{}{}'.format('ema', period) ema = ema.join( df.apply(lambda c: talib.EMA(c.values, period)).rename( columns={'Close': col})) return ema def ema_stacked(ema, s): if (ema[-1:][s].ema8[0] > ema[-1:][s].ema21[0] > ema[-1:][s].ema34[0] > ema[-1:][s].ema55[0] > ema[-1:][s].ema89[0]): return True with st.beta_container(): if checkbox_a1: st.subheader('Dataframe filtered') st.write('emas are stacked') df1 = df.xs('Close', axis=1, level=1, drop_level=False) ema_df = ema_multiple_periods(df1, ema_list=[8, 21, 34, 55, 89]) for s in ema_df.columns.get_level_values(0).unique(): if ema_stacked(ema_df, s): st.write(s) else: st.subheader('Results') st.write( 'this does not filter yet and returns all stocks. Functions need to be reefactored' ) scan_results = df.columns.get_level_values( 0).unique().sort_values().to_list() for stock in scan_results: st.write(stock) return
def main(): root_dir = '/root/caa/rp/monitor/' #data_extracted=root_dir+'Data_Extracted\\' cred_path = '/root/caascript/res/cred.csv' user = st.text_input("Enter Your name") if not os.path.exists(cred_path): st.warning( "No credentials Found \n\nPlease Save credentials via Login Page") user = "" if user != "": monitoring_Sheet = get_data_from_gsheet("RP_monitoring") if monitoring_Sheet.shape[0] == 0: st.error("Unable To Fetch Data...") else: assigned_accounts = monitoring_Sheet.loc[ monitoring_Sheet['intern_name'] == user.title()] if assigned_accounts.shape[0] == 0: st.warning("Sorry " + user + " no account has been assigned to you !!") else: st.write(assigned_accounts) left, right = st.beta_columns(2) start_date = left.date_input("Enter Start Date", datetime.date.today()) end_date = right.date_input("Enter End Date", datetime.date.today()) assigned_accounts_group_environment = assigned_accounts.groupby( "environment") current_date = datetime.date.today() month = current_date.strftime("%b") year = current_date.strftime("%Y") path_month_year = str(month) + "_" + str(year) working_dir = os.path.join(root_dir, path_month_year) if (not os.path.exists(working_dir)): os.makedirs(working_dir) os.makedirs( os.path.join(working_dir, 'Output_Files', 'DB_Query_files')) os.makedirs( os.path.join(working_dir, 'Output_Files', 'Script_Final_files')) st.success("Directory Created Successfully") if st.checkbox(label="Start Monitoring"): with st.spinner("Execution in progress...."): for env, group in assigned_accounts_group_environment: cred = pd.read_csv(cred_path) if env == 'AWS US': ldap_user = cred.Id.values[0] ldap_pass = cred.Ldap_pass.values[0] db_pass = cred.Db_pass.values[0] host = "localhost" elif env == 'AWS EU': ldap_user = cred.Id.values[1] ldap_pass = cred.Ldap_pass.values[1] db_pass = cred.Db_pass.values[1] host = "localhost" elif env == 'AWS GOLD': ldap_user = cred.Id.values[2] ldap_pass = cred.Ldap_pass.values[2] db_pass = cred.Db_pass.values[2] host = "localhost" server_port = [] localhost_port = [] server_port_group = group.groupby('port') #print(cred) for server_port, server_group_data in server_port_group: local_port = int( server_group_data['local_port'].values[0]) server = SSHTunnelForwarder( '172.27.128.59', ssh_username=ldap_user, ssh_password=ldap_pass, remote_bind_address=('localhost', server_port), local_bind_address=('0.0.0.0', local_port)) print( f"Destination Server Port {str(server_port)} and Source Port {str(local_port)} in execution" ) print( f" Establishing Connection with Destination Server Port {str(server_port)} and Source Port {str(local_port)} in execution" ) try: server.start() st.success("Connection Successfull") RP_monitoring_script( ldap_user, db_pass, host, start_date, end_date, server_group_data, working_dir) except: st.error( f"Couldn't connect to {str(env)} \n\n Check Logs for more details" ) finally: server.stop() st.warning( f"Connection closed {str(server_port)} " ) rp_monitoring_merging(group, working_dir) st.success("Monitoring Completed") else: st.error("Please enter your name to continue")
# view a 2D-3D map of the data, shift key for 3D '## Geo Data at %sh' % hour midpoint = (np.average(data["lat"]), np.average(data["lon"])) st.write( pdk.Deck( map_style="mapbox://styles/mapbox/light-v9", initial_view_state={ "latitude": midpoint[0], "longitude": midpoint[1], "zoom": 11, "pitch": 50, }, layers=[ pdk.Layer( "HexagonLayer", data=data, get_position=["lon", "lat"], radius=100, elevation_scale=4, elevation_range=[0, 1000], pickable=True, extruded=True, ), ], )) if st.checkbox('Show Raw Data'): '## Geo Data at %sh' % hour, data # displays the comment and the data
def main(): st.title("Stock Market Prices Demo") nav = st.sidebar.radio( "Navigation", ["Introduction", "Feature Engineering", "Prediction"]) if nav == "Introduction": st.header("Introduction") st.markdown( '''The stock market is known as a place where people can make a fortune if they can crack the mantra to successfully predict stock prices. The main goal of this demo is trying to do it using machine learning. The reason is clear - it will be useful to every business that is associated with the stock market.''') st.subheader('''Business task''') st.markdown( '''Select the most promising companies based on the predicted growth in stock prices for 2017.''' ) st.subheader('''Mathematical task''') st.markdown( '''Predict the stock price of a specific company based on the price history for the period from 2010 to 2016. Select the fastest-growing ones to the top.''' ) st.subheader("Dataset") st.markdown('''We will use the [Huge Stock Market Dataset] (https://www.kaggle.com/borismarjanovic/price-volume-data-for-all-us-stocks-etfs). High-quality financial data is expensive to acquire. Therefore, such data is rarely shared for free. The full historical daily prices and volume data for all US-based stocks and ETFs trading on the NYSE, NASDAQ, and NYSE MKT are provided. The dataset includes a lot of different companies. So, to show how our model works, we chose only some of them: Ford, Yandex, IBM, etc.''') st.subheader("Content") st.markdown( '''The data is presented in CSV format as follows: Date, Open, High, Low, Close, Volume. We will train the model on data from 2010 to 2016 because other data is way too old and has no significant information for the 2010s decade. The prediction will be built in 2017. Note that prices have been adjusted for dividends and splits. To demonstrate how data looks like, you can select any of them.''' ) option = st.selectbox("Choose company name:", sorted(COMPANY_NAMES_TO_STOCK_NAMES.keys())) df = get_data_frame_from_tigger(COMPANY_NAMES_TO_STOCK_NAMES[option]) st.markdown('''There you can see the head of the dataset:''') st.dataframe(df.head()) st.markdown( '''Let's analyze the description. This is the structure. It has ‘Date’ as the index feature. ‘High’ denotes the highest value of the day. ‘Low’ denotes the lowest. ‘Open’ is the opening Price and ‘Close’ is the closing for that Date. Now, sometimes close values are regulated by the companies. ‘Volume’ is the amount of Stock of that company traded on that date.''' ) st.subheader("Plotting dataset") st.markdown( "On the chart below you can see how the company's stock prices changed from 2010 to 2016." ) df_intro = df[["Date", "Open", "High", "Low", "Close"]] df_intro = df_intro[(df["Date"] <= datetime.datetime(2016, 12, 31))] df_intro.set_index("Date", inplace=True) st.line_chart(df_intro) if nav == "Feature Engineering": width = 1000 height = 500 st.header("Feature Engineering") st.subheader("Historical ETF prices") st.markdown( '''Data frame with historial prices for fund consists of 7 columns which are: *Date*, *Open/High/Low/Close* prices, *Volume* count and *Open Interest* number. *OpenInt column* has only 0 values, so we will just ignore it and focus on the rest of information. In tables below you can see sample prices from the data frame and also few statistics about each column e.g. min/max values, standard deviation etc.''' ) option = st.selectbox("Choose company name:", sorted(COMPANY_NAMES_TO_STOCK_NAMES.keys())) df = get_data_frame_from_tigger(COMPANY_NAMES_TO_STOCK_NAMES[option]) if st.checkbox("Show Head"): st.dataframe(df.head()) if st.checkbox("Show Description"): st.dataframe(df.describe()) st.subheader("I. OHLC Chart") st.markdown( '''An OHLC chart shows the *open, high, low and close* prices of a stock. It shows you how the price was changing during a particular day and give you a sense of e.g. momentum or volatility of stock. The tip of the lines represent the low and high values and the horizontal segments represent the open and close values. Sample points where the close value is higher (lower) then the open value are called increasing (decreasing). By default, increasing items are drawn in green whereas decreasing are drawn in red.''') fig = go.Figure([ go.Ohlc(x=df.Date, open=df.Open, high=df.High, low=df.Low, close=df.Close) ]) fig.update(layout_xaxis_rangeslider_visible=False) fig.update_layout(width=width, height=height) st.plotly_chart(fig, use_container_width=True) st.subheader("II. Volume") st.markdown( '''A *volume* is a very basic measure that shows a number of shares traded (bought, sold) over a certain period of time e.g. daily. It is such a simple but often overlooked indicator. *Volume* is so important because it basically represents the activity in stock trading. Higher volume value indicates higher interests in trading a stock.''' ) st.markdown('*2014*') df['Date'] = pd.to_datetime(df['Date']) fig = go.Figure( go.Bar(x=df[(df['Date'].dt.year == 2014)].Date, y=df.Volume, name='Volume', marker_color='red')) fig.update(layout_xaxis_rangeslider_visible=False) fig.update_layout(width=width, height=height) st.plotly_chart(fig, use_container_width=True) st.subheader("III. Moving Averages") st.markdown( '''Moving Averages (MA) help to smooth out stock prices on a chart by filtering out short-term price fluctuations. We calculate moving averages over a defined period of time e.g. last 9, 50 or 200 days. There are two (most common) averages used in technical analysis which are:''') st.markdown( '''\t•Simple Moving Average (SMA) - a simple average calculated over last N days e.g. 50, 100 or 200\t•Exponential Moving Average (EMA) - an average where greater weights are applied to recent prices''') df['EMA_9'] = df['Close'].ewm(5).mean().shift() df['SMA_50'] = df['Close'].rolling(50).mean().shift() df['SMA_100'] = df['Close'].rolling(100).mean().shift() df['SMA_200'] = df['Close'].rolling(200).mean().shift() fig = go.Figure() fig.add_trace(go.Scatter(x=df.Date, y=df.EMA_9, name='EMA 9')) fig.add_trace(go.Scatter(x=df.Date, y=df.SMA_50, name='SMA 50')) fig.add_trace(go.Scatter(x=df.Date, y=df.SMA_100, name='SMA 100')) fig.add_trace(go.Scatter(x=df.Date, y=df.SMA_200, name='SMA 200')) fig.add_trace( go.Scatter(x=df.Date, y=df.Close, name='Close', line_color='dimgray', opacity=0.3)) fig.update(layout_xaxis_rangeslider_visible=False) fig.update_layout(width=width, height=height) st.plotly_chart(fig, use_container_width=True) st.subheader("IV. RSI") st.markdown( '''Another commonly used indicator is a Relative Strength Index (RSI) that indicates magnitude of recent price changes. It can show that a stock is either overbought or oversold. Typically RSI value of 70 and above signal that a stock is becoming overbought/overvalued, meanwhile value of 30 and less can mean that it is oversold. Full range of RSI is from 0 to 100.''' ) num_days = 365 df['RSI'] = relative_strength_idx(df).fillna(0) fig = go.Figure( go.Scatter(x=df.Date.tail(num_days), y=df.RSI.tail(num_days))) fig.update(layout_xaxis_rangeslider_visible=False) fig.update_layout(width=width, height=height) st.plotly_chart(fig, use_container_width=True) st.subheader("V. MACDI") st.markdown( '''Moving Average Convergence Divergence (MACD) is an indicator which shows the relationship between two exponential moving averages i.e. 12-day and 26-day EMAs. We obtain MACD by substracting 26-day EMA (also called slow EMA) from the 12-day EMA (or fast EMA).''') ema_12 = pd.Series(df['Close'].ewm(span=12, min_periods=12).mean()) ema_26 = pd.Series(df['Close'].ewm(span=26, min_periods=26).mean()) macd = pd.Series(ema_12 - ema_26) macd_signal = pd.Series(macd.ewm(span=9, min_periods=9).mean()) fig = make_subplots(rows=2, cols=1) fig.add_trace(go.Scatter(x=df.Date, y=df.Close, name='Close'), row=1, col=1) fig.add_trace(go.Scatter(x=df.Date, y=ema_12, name='EMA 12'), row=1, col=1) fig.add_trace(go.Scatter(x=df.Date, y=ema_26, name='EMA 26'), row=1, col=1) fig.add_trace(go.Scatter(x=df.Date, y=macd, name='MACD'), row=2, col=1) fig.add_trace(go.Scatter(x=df.Date, y=macd_signal, name='Signal line'), row=2, col=1) fig.update(layout_xaxis_rangeslider_visible=False) fig.update_layout(width=width, height=height) st.plotly_chart(fig, use_container_width=True) st.subheader("VI. Stochastic") st.markdown( '''The last technical tool in this notebook is a stochastic oscillator is quite similar to RSI in the sense that it's values (also in range 0-100) can indicate whether a stock is overbought/oversold or not. It is arguably the most complicated indicator compared to the ones introduced earlier. Stochastic can be calculated as:''') st.latex(r'''\%K = (\frac{C - L_{14}}{H_{14} - L_{14}}) \times 100''') st.markdown( '''where: **C** is the most recent close price, **L** and **H** are the lowest/highest prices traded in last 14 days.''') st.markdown( '''This **%𝐾** stochastic is often referred as the *"slow stochastic indicator". There is also a *"fast stochastic indicator" that can be obtained as:''' ) st.latex(r'''\%D = SMA_{3}(\%K)''') stochs = stochastic(df, k=14, d=3) fig = go.Figure() fig.add_trace( go.Scatter(x=df.Date.tail(365), y=stochs.stoch_k.tail(365), name='K stochastic')) fig.add_trace( go.Scatter(x=df.Date.tail(365), y=stochs.stoch_d.tail(365), name='D stochastic')) fig.update(layout_xaxis_rangeslider_visible=False) fig.update_layout(width=width, height=height) st.plotly_chart(fig, use_container_width=True) if nav == "Prediction": st.header('Stock prices prediction') st.markdown( "Let's see how trained models work for a range of companies.") st.markdown( 'This plot shows predicted prices for companies you choose from 01-11-2016 to 31-10-2017.' ) company_names = st.multiselect( 'Choose company name(s):', sorted(COMPANY_NAMES_TO_STOCK_NAMES.keys()), default=[sorted(COMPANY_NAMES_TO_STOCK_NAMES.keys())[0]]) stock_names = create_list_of_stock_names(company_names) data_predicted_prices = load_data_for_predicted_prices_plot( stock_names) highlight_predicted_prices = alt.selection(type='single', on='mouseover', fields=['symbol'], nearest=True) chart_predicted_prices = alt.Chart( data_predicted_prices).mark_line().encode( x='date:T', y='predicted_price:Q', color=alt.Color('symbol:N', scale=alt.Scale(scheme='dark2')), strokeDash='symbol:N', tooltip=['symbol', 'date', 'predicted_price'], ) points_predicted_prices = chart_predicted_prices.mark_circle().encode( opacity=alt.value(0)).add_selection(highlight_predicted_prices) lines_predicted_prices = chart_predicted_prices.mark_line().encode( size=alt.condition(~highlight_predicted_prices, alt.value(1), alt.value(3))) layer_predicted_prices = (points_predicted_prices + lines_predicted_prices).interactive() st.altair_chart(layer_predicted_prices, use_container_width=True) st.markdown( '''This plot shows actual and predicted prices for a company you choose from 01-11-2016 to 31-10-2017. You can easily compare actual and predicted prices for a day. ''' ) company_name = st.selectbox( 'Choose company name:', sorted(COMPANY_NAMES_TO_STOCK_NAMES.keys())) stock_name = COMPANY_NAMES_TO_STOCK_NAMES[company_name] data_predicted_actual_prices, y_test, y_pred = load_data_for_predicted_actual_prices_plot( stock_name) nearest_predicted_actual_prices = alt.selection(type='single', nearest=True, on='mouseover', fields=['date'], empty='none') line = alt.Chart(data_predicted_actual_prices).mark_line( interpolate='basis').encode(x='date:T', y='price:Q', color=alt.Color( 'price_type:N', scale=alt.Scale(scheme='plasma'))) selectors_predicted_actual_prices = alt.Chart( data_predicted_actual_prices).mark_point().encode( x='date:T', opacity=alt.value(0), ).add_selection(nearest_predicted_actual_prices) points_predicted_actual_prices = line.mark_point().encode( opacity=alt.condition(nearest_predicted_actual_prices, alt.value( 1), alt.value(0))) text_predicted_actual_prices = line.mark_text( align='left', dx=10, dy=-10).encode(text=alt.condition( nearest_predicted_actual_prices, 'price:Q', alt.value(' '))) rules_predicted_actual_prices = alt.Chart( data_predicted_actual_prices).mark_rule(color='#f63366').encode( x='date:T', ).transform_filter(nearest_predicted_actual_prices) layer_predicted_actual_prices = alt.layer( line, selectors_predicted_actual_prices, points_predicted_actual_prices, rules_predicted_actual_prices, text_predicted_actual_prices).interactive() st.altair_chart(layer_predicted_actual_prices, use_container_width=True) st.markdown('''MSE: {}'''.format( np.round(mean_squared_error(y_test, y_pred), 6))) st.subheader('''Conclusion''') st.markdown( '''We analyzed stock prices on the stock market for American Airlines, Cern, Ford, IBM, Plug, Sony. Yandex for 2010-2016. Based on this, we made a price prediction for the same companies for 2017. Then, we built plots of these prices versus time, based on which investors can understand which stocks and when to buy or sell to get the greatest profit.''') st.markdown( '''The most promising companies for investment are Yandex and Cern. They show the most rapid growth in 2017 in contrast to the rest of the companies on the list.''')
build_sim_sir_w_date_chart, build_table, ) # This is somewhat dangerous: # Hide the main menu with "Rerun", "run on Save", "clear cache", and "record a screencast" # This should not be hidden in prod, but removed # In dev, this should be shown st.markdown(hide_menu_style, unsafe_allow_html=True) p = display_sidebar(st, DEFAULTS) m = SimSirModel(p) display_header(st, m, p) if st.checkbox("Show more info about this tool"): notes = "The total size of the susceptible population will be the entire catchment area for our hospitals." display_more_info(st=st, model=m, parameters=p, defaults=DEFAULTS, notes=notes) st.subheader("New Admissions") st.markdown( "Projected number of **daily** COVID-19 admissions. \n\n _NOTE: Now including back-casting of prior admissions for comparison._" ) admits_chart = build_admits_chart(alt=alt, admits_df=m.admits_df, max_y_axis=p.max_y_axis) st.altair_chart(admits_chart, use_container_width=True)
data = pd.read_csv(DATA_URL, nrows=nrows) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis='columns', inplace=True) data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN]) return data # Create a text element and let the reader know the data is loading. data_load_state = st.text('Loading data...') # Load 10,000 rows of data into the dataframe. data = load_data(10000) # Notify the reader that the data was successfully loaded. data_load_state.text("Done! (using st.cache)") if st.checkbox('Show raw data'): st.subheader('Raw data') st.write(data) st.subheader('Number of pickups by hour') hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0, 24))[0] st.bar_chart(hist_values) hour_to_filter = st.slider('hour', 0, 23, 17) # min: 0h, max: 23h, default: 17h = 17 filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter] st.subheader(f'Map of all pickups at {hour_to_filter}:00') st.map(filtered_data)
def main(): st.set_option('deprecation.showPyplotGlobalUse', False) image = Image.open('data/Reconn.png') st.image(image, use_column_width=False) def load_data(uploaded_file): df = pd.read_csv(uploaded_file) return df uploaded_file = st.file_uploader('Upload file to begin', type=("csv")) if uploaded_file is not None: df = load_data(uploaded_file) target_column = st.selectbox('Select Target Column', list(df.columns), key='target_column') st.sidebar.title('Know your dataset') if st.sidebar.checkbox("Preview Dataset"): st.markdown('## Dataset preview') if st.button("Head"): st.write(df.head(10)) elif st.button("Tail"): st.write(df.tail(10)) else: number = st.slider("Select No of Rows to show", 10, df.shape[0]) st.write(df.head(number)) if st.sidebar.checkbox("Show Column Names"): st.markdown('## Column names') st.write(df.columns) if st.sidebar.checkbox("Show Dimensions"): st.write(df.shape) if st.sidebar.checkbox('Describe', value=False): st.markdown('## Data Description') st.write(df.describe()) st.markdown('### Columns that are potential binary features') bin_cols = [] for col in df.columns: if len(df[col].value_counts()) == 2: bin_cols.append(col) st.write(bin_cols) st.markdown('### Columns Types') st.write(df.dtypes) if st.sidebar.checkbox('Missing Data', value=False): st.markdown('## Missing Data') total = df.isnull().sum().sort_values(ascending=False) percent = (df.isnull().sum() / df.isnull().count()).sort_values(ascending=False) missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent']) st.write(missing_data) try: sns.heatmap(df.isnull()) st.pyplot() except: st.warning('Error when showing plots') if st.sidebar.checkbox('Value Counts', value=False): st.markdown('## Value Counts') col = st.selectbox('Select Column', list(df.columns), key='val_col') st.write(df[col].value_counts()) if st.sidebar.checkbox('Unique elements', value=False): st.markdown('## Unique elements') if st.checkbox('Show all unique elements', value=False): st.write(df.nunique()) col = st.selectbox('Show columnwise unique elements', list(df.columns), key='unique_col') st.write(df[col].unique()) if st.sidebar.checkbox('Show Distribution', False): st.subheader(f'Distribution of {target_column}') try: sns.distplot(df[target_column]) st.write("Skewness: %.3f" % df[target_column].skew()) st.write("Kurtosis: %.3f" % df[target_column].kurt()) st.pyplot() except: st.error('Invalid Column') st.sidebar.title('Explore the Dataset') if target_column is not None: if st.sidebar.checkbox('Scatter Plot', value=False): scatter_cols = st.sidebar.multiselect('Select Column', list(df.columns), key='scatter_cols') st.markdown('## Scatter Plots') for col in scatter_cols: try: data = pd.concat([df[target_column], df[col]], axis=1) data.plot.scatter(x=col, y=target_column, ylim=(0, 800000)) st.pyplot() except: st.error('Invalid column') if st.sidebar.checkbox('Box Plot', value=False): box_cols = st.sidebar.multiselect('Select Column', list(df.columns), key='box_cols') st.markdown('## Box Plots') for col in box_cols: try: data = pd.concat([df[target_column], df[col]], axis=1) f, ax = plt.subplots(figsize=(8, 6)) fig = sns.boxplot(x=col, y=target_column, data=data) fig.axis(ymin=np.min(df[target_column]), ymax=np.max(df[target_column])) st.pyplot() except: st.error('Invalid column') if st.sidebar.checkbox('Pair Plot', value=False): pair_cols = st.sidebar.multiselect('Select Column', list(df.columns), key='pair_plot') plot_size = st.sidebar.number_input('Select Plot size', 1.0, 5.0, step=0.5, key='plot_size', value=2.5) st.markdown('## Pair Plots') cols = [target_column] for col in pair_cols: cols.append(col) try: sns.set() sns.pairplot(df[cols], height=plot_size) st.pyplot() except: st.error('Invalid column') if st.sidebar.checkbox('Correlation matrix', value=False): st.markdown('## Correlation matrix (heatmap style)') corrmat = df.corr() f, ax = plt.subplots(figsize=(12, 9)) sns.heatmap(corrmat, vmax=.8, square=True) st.pyplot() if st.checkbox('With Target Column', value=False): k = st.number_input( '# of Cols for heatmap', 3, len(df.columns), step=1, key='k') #number of variables for heatmap cols = corrmat.nlargest(k, target_column)[target_column].index cm = np.corrcoef(df[cols].values.T) sns.set(font_scale=1.25) hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values) st.pyplot() st.sidebar.title('Data processing') if st.sidebar.checkbox('Treat missing values'): st.markdown('## Treat missing values') # Select a column to treat missing values col_option = st.selectbox("Select Column to treat missing values", df.columns) # Specify options to treat missing values missing_values_clear = st.selectbox( "Select Missing values treatment method", ("Replace with Mean", "Replace with Median", "Replace with Mode")) if missing_values_clear == "Replace with Mean": replaced_value = df[col_option].mean() st.write("Mean value of column is :", replaced_value) elif missing_values_clear == "Replace with Median": replaced_value = df[col_option].median() st.write("Median value of column is :", replaced_value) elif missing_values_clear == "Replace with Mode": replaced_value = df[col_option].mode() st.write("Mode value of column is :", replaced_value) Replace = st.selectbox("Replace values of column?", ("No", "Yes")) if Replace == "Yes": df[col_option] = df[col_option].fillna(replaced_value) st.write("Null values replaced") elif Replace == "No": st.write("No changes made") if st.sidebar.checkbox('Encode categorical column'): st.markdown("## Encode categorical column") # Select a column to do encoding col_selected = st.selectbox( "Select Column to treat categorical values", df.columns) # Specify options to do encoding encoder_type = st.selectbox("Select Encoding method", ("Label Encoder", "")) if encoder_type == "Label Encoder": encoded_value = helper.labelEncoder.fit_transform( df[col_selected]) st.write("Label Encoded value of column is :", encoded_value) # elif encoder_type == "Ordinal Encoder": # encoded_value = helper.ordinalEncoder.fit_transform(df[col_selected]) # st.write("Ordinal Encoded value of column is :", encoded_value) Replace = st.selectbox("Replace values of column?", ("No", "Yes"), key='encoder') if Replace == "Yes": df[col_selected] = encoded_value st.write("Added encoded column in dataframe") st.write(df.head()) elif Replace == "No": st.write('No values replaced yet') if st.sidebar.checkbox('Scale column'): st.markdown("## Scaling column") col_scaled = st.selectbox("Select Column for feature scaling", df.columns) scaler_type = st.selectbox("Select Scaling method", ("Standard Scaler", "Min Max Scaler")) if scaler_type == "Standard Scaler": scaled_value = helper.standartScaler.fit_transform( df[col_scaled].values.reshape(-1, 1)) st.write("Standard scaled value of column is :", scaled_value) elif scaler_type == "Min Max Scaler": scaled_value = helper.minMaxScaler.fit_transform( df[col_scaled].values.reshape(-1, 1)) st.write("Min-Max scaled value of column is :", scaled_value) Replace = st.selectbox("Replace values of column?", ("No", "Yes"), key='scaler') if Replace == "Yes": df[col_scaled] = scaled_value st.write("Added scaled column in dataframe") st.write(df.head()) elif Replace == "No": st.write('No values replaced yet') st.sidebar.title('Download processed dataset') if st.sidebar.checkbox("download file"): st.sidebar.markdown(helper.get_table_download_link(df), unsafe_allow_html=True) if st.sidebar.button('Credits'): st.sidebar.markdown(''' **Md.Sadab Wasim** Get in touch: [Twitter](https://twitter.com/@sadab_wasim) Source Code: [Github](https://github.com/mdsadabwasim/reconn) ''')
encoder = OrdinalEncoder() encoder.fit(pd.concat([x_train[categorical_cols], x_test[categorical_cols]]).applymap(str).values) x_train[categorical_cols] = pd.DataFrame(encoder.transform(x_train[categorical_cols].applymap(str).values)) x_test[categorical_cols] = pd.DataFrame(encoder.transform(x_test[categorical_cols].applymap(str).values)) st.title("House Pricing Assistant -- Using Artificial Intelligence") train_data = pd.read_csv("train.csv") test_data = pd.read_csv("test.csv") desc = open("data_description.txt", "r") desc_text = desc.read() st.subheader("Use the window below to explore the unaltered dataset used to train the AI.") st.dataframe(train_data) show_key = st.checkbox('Show more information about training data') if show_key: st.success(desc_text) x_train = train_data.drop(columns=["SalePrice"]) y_train = train_data["SalePrice"] x_test = test_data preprocessing() train_button = st.checkbox("Visualize Data and Train Model") if train_button:
import pandas as pd import streamlit as st st.info( "## Instructions:\n" + "1. Upload simple csv (like `data.csv` from this repo)\n" + "2. Check the box to choose column names\n" + "3. Change the column names selection and see the dataframe update in response\n" + "4. Uncheck the box to exit column name selection and see the dataframe go back to its previous state\n" ) csv_file = st.file_uploader("File", type="csv") if csv_file is not None: dataframe = pd.read_csv(csv_file) all_columns = list(dataframe.columns) if st.checkbox("Select Columns", False): columns = st.multiselect("Columns", all_columns, all_columns) else: columns = all_columns st.write(dataframe.filter(columns))
def main(): """Customer Lifetime Value & Sales Revenue Forecasting""" st.title("Customer Lifetime Value & Sales Revenue Forecasting") st.subheader( "Built with Streamlit,Lifetimes, fbProphet and Plotly library") # Menu menu = [ 'Exploratory Data Analysis', 'Customer Lifetime Value', 'Sales Revenue Forecasting', 'About' ] choices = st.sidebar.selectbox('Select Menu', menu) if choices == 'Exploratory Data Analysis': st.subheader('Exploratory Data Analysis') clean = pd.read_csv('data/clean_df.csv') clean = clean.drop('Unnamed: 0', axis=1) clean = clean.rename(columns={"Price": "Revenue"}) clean["Date"] = pd.to_datetime(clean["Date"]) clean["Month"] = clean["Date"].dt.strftime("%B") if st.checkbox('View Data'): st.dataframe(clean) st.subheader("Annual Aggregation") if st.checkbox('View Top 10 Items By Revenue'): revenue = clean.groupby( "Description")["Revenue"].sum().reset_index().sort_values( by="Revenue", ascending=False) revenue_head = revenue.head(10).sort_values(by="Revenue") fig1 = px.bar(revenue_head, x="Revenue", y="Description", orientation="h") st.plotly_chart(fig1) if st.checkbox('View Bottom 10 Items By Revenue'): revenue = clean.groupby( "Description")["Revenue"].sum().reset_index().sort_values( by="Revenue", ascending=False) revenue_tail = revenue.tail(10).sort_values(by="Revenue") fig2 = px.bar(revenue_tail, x="Revenue", y="Description", orientation="h") st.plotly_chart(fig2) if st.checkbox('View Top 10 Popular Items'): quantity = clean.groupby( "Description")["Quantity"].sum().reset_index().sort_values( by="Quantity", ascending=False) quantity_head = quantity.head(10).sort_values(by="Quantity") fig3 = px.bar(quantity_head, x="Quantity", y="Description", orientation="h") st.plotly_chart(fig3) if st.checkbox('View Least Popular Items'): qty1 = st.selectbox("Select Total Quantity Sold", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], key="qty1") quantity = clean.groupby( "Description")["Quantity"].sum().reset_index().sort_values( by="Quantity", ascending=False) quantity_tail = quantity[quantity["Quantity"] == qty1].reset_index( drop=True) st.dataframe(quantity_tail[["Description"]]) st.subheader("Monthly Aggregation") if st.checkbox('View Monthly Top 10 Items By Revenue'): mth1 = st.selectbox("Select Month", [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ], key="mth1") monthrevenue = clean.groupby(["Month", "Description" ])["Revenue"].sum().reset_index() month_revenue = monthrevenue[monthrevenue["Month"] == mth1].sort_values(by="Revenue", ascending=False) month_revenue_head = month_revenue.head(10).sort_values( by="Revenue") fig4 = px.bar(month_revenue_head, x="Revenue", y="Description", orientation="h") st.plotly_chart(fig4) if st.checkbox('View Monthly Bottom 10 Items by Revenue'): mth2 = st.selectbox("Select Month", [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ], key="mth2") monthrevenue = clean.groupby(["Month", "Description" ])["Revenue"].sum().reset_index() month_revenue = monthrevenue[monthrevenue["Month"] == mth2].sort_values(by="Revenue", ascending=False) month_revenue_tail = month_revenue.tail(10).sort_values( by="Revenue") fig5 = px.bar(month_revenue_tail, x="Revenue", y="Description", orientation="h") st.plotly_chart(fig5) if st.checkbox('View Monthly Top 10 Popular Items'): mth3 = st.selectbox("Select Month", [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ], key="mth3") monthquantity = clean.groupby(["Month", "Description" ])["Quantity"].sum().reset_index() month_quantity = monthquantity[monthquantity["Month"] == mth3].sort_values(by="Quantity", ascending=False) month_quantity_head = month_quantity.head(10).sort_values( by="Quantity") fig6 = px.bar(month_quantity_head, x="Quantity", y="Description", orientation="h") st.plotly_chart(fig6) if st.checkbox('View Monthly Least Popular Items'): mth4 = st.selectbox("Select Month", [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ], key="mth4") qty2 = st.selectbox("Select Total Quantity Sold", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], key="qty2") monthquantity = clean.groupby(["Month", "Description" ])["Quantity"].sum().reset_index() month_quantity_tail = monthquantity[ (monthquantity["Month"] == mth4) & (monthquantity["Quantity"] == qty2)].reset_index(drop=True) st.dataframe(month_quantity_tail[["Description"]]) if choices == 'Customer Lifetime Value': st.subheader('Customer Lifetime Value') st.subheader("Model Based On 30 Days") output = pd.read_csv('data/output_df.csv') output["predicted_purchases"] = output["predicted_purchases"].round() output["expected_total_monetary_value"] = output[ "predicted_purchases"] * output["expected_monetary_value"] #output=output.rename(columns={"probability":"probability_alive"}) if st.checkbox('View Predictions'): #st.dataframe(output[["CustomerID","predicted_purchases","expected_monetary_value","expected_total_monetary_value","probability_alive"]]) st.dataframe(output[[ "CustomerID", "predicted_purchases", "expected_monetary_value", "expected_total_monetary_value" ]]) def get_table_download_link(df): csv = df.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() return f'<a href="data:file/csv;base64,{b64}" download="data/output_df.csv">Download</a>' st.markdown(get_table_download_link(output), unsafe_allow_html=True) if st.checkbox('View More On Expected Total Monetary Value'): exp_tot = output["expected_total_monetary_value"].describe( ).to_frame() st.dataframe(exp_tot) st.subheader("Boxplot") fig7 = px.box(output, y="expected_total_monetary_value") st.plotly_chart(fig7) st.subheader("Histogram") fig8 = px.histogram(output, x="expected_total_monetary_value") st.plotly_chart(fig8) if choices == 'Sales Revenue Forecasting': st.subheader('Sales Revenue Forecasting') df_load_state = st.text('Loading data...') df = load_data('data/data.csv') df_load_state.text('Loading data... done!') chart = df.groupby(['InvoiceDate'])[['Revenue']].sum() def plot_fig(): fig = go.Figure() fig.add_trace( go.Scatter(x=chart.index, y=chart['Revenue'], name="Revenue")) fig.layout.update(title_text='UK Revenue for year 2011 ', xaxis_rangeslider_visible=True) st.plotly_chart(fig) return fig # plotting the figure of Actual Data plot_fig() if st.checkbox('Show raw data'): st.subheader('Raw data') st.write(chart) #shape the df w.r.t requirement by fbProphet df_prophet = df.groupby(['InvoiceDate'], as_index=False)[['Revenue']].sum() #remove negative value #fbprophet works with 'None' df_prophet.iloc[21, 1] = None df_prophet.columns = ['ds', 'y'] #function to remove outliers def outliers_to_na(ts, devs): median = ts['y'].median() #print(median) std = np.std(ts['y']) #print(std) for x in range(len(ts)): val = ts['y'][x] #print(ts['y'][x]) if (val < median - devs * std or val > median + devs * std): ts['y'][x] = None return ts # remove outliers based on 2 std dev outliers_to_na(df_prophet, 2) #st.write(df_prophet) #season_choice = st.selectbox('Seasonality Mode',['additive','multiplicative']) #model_choice = st.selectbox('Model Choice',['Logistic Regression','Neural Network']) #if changepoint_prior_scale == 'additive': m = Prophet(seasonality_mode='additive', changepoint_prior_scale=0.11) m.fit(df_prophet) future = m.make_future_dataframe(periods=3, freq='M') future = m.predict(future) #plot forecast fig1 = plot_plotly(m, future) if st.checkbox('Show forecast data'): st.subheader('forecast data') st.write(future.loc[305:, ['ds', 'yhat']]) st.write( 'Quarterly Sales Revenue for Dec 2011, Jan 2012 , Feb 2012') st.plotly_chart(fig1) #plot component wise forecast st.write("Component wise forecast") fig2 = m.plot_components(future) st.write(fig2) if choices == 'About': st.subheader('About')
def render(): with st.sidebar.beta_expander('Files', expanded=True): interface.info('Load or download a file from s3') info = st.empty() col1, col2 = st.beta_columns([0.3, 1]) with col1: st.markdown(f"<sup><p style='margin-bottom:22px'></p></sup>", unsafe_allow_html=True) load_raw = st.checkbox('Raw') with col2: st.markdown(f"<sup><p style='margin-bottom:22px'></p></sup>", unsafe_allow_html=True) apply_filter = st.checkbox('Filter', True) link = st.text_input('Load from s3', value='') if not os.path.exists(DFT.ROOT / 'h5'): os.mkdir(DFT.ROOT / 'h5') if not os.path.exists(DFT.ROOT / 'h5/downloads'): os.mkdir(DFT.ROOT / 'h5/downloads/') if not os.path.exists(DFT.ROOT / 'h5/analyzed'): os.mkdir(DFT.ROOT / 'h5/analyzed/') downloaded_files = np.array(os.listdir(DFT.ROOT / 'h5/downloads/')) analyzed_files = np.array(os.listdir(DFT.ROOT / 'h5/analyzed/')) filenames = list(analyzed_files[analyzed_files.argsort()]) + list( downloaded_files[downloaded_files.argsort()]) filenames = [None] + [f for f in filenames if f[-3:] == '.h5'] def shownames(name): nonlocal analyzed_files if name in analyzed_files: return '* ' + name else: return name kind = None selector = st.empty() file = selector.selectbox('Load existing file', filenames, format_func=shownames) if link != '': kind = DFT.S3 file = link elif file is not None: if file in downloaded_files: file = DFT.ROOT / f'h5/downloads/{file}' else: file = DFT.ROOT / f'h5/analyzed/{file}' kind = DFT.LOCAL typed_name = st.text_input('Save, download or delete the given file', value='') def _get_file_from_name(typed_name): if typed_name[-3:] == '.h5': typed_name = typed_name[:-3] if typed_name + '.h5' in analyzed_files: typed_name = DFT.ROOT / f'h5/analyzed/{typed_name}.h5' elif typed_name + '.h5' in downloaded_files: typed_name = DFT.ROOT / f'h5/downloads/{typed_name}.h5' else: interface.error( f'Cannot find "{typed_name}" in the available files') return typed_name col1, col2, col3 = st.beta_columns([0.25, 0.4, 0.4]) with col1: st.markdown('') should_save = st.button('Save') with col2: st.markdown('') if st.button('Download'): download_path = _get_file_from_name(typed_name) interface.download(download_path) with col3: st.markdown('') if st.button('Delete'): typed_name = _get_file_from_name(typed_name) if file is not None and typed_name == file: interface.error( 'Cannot delete the file used in the current analysis.') os.remove(typed_name) interface.rerun() return file, load_raw, apply_filter, kind, should_save, typed_name, info
st.write(fig) st.header('Top 5 dangerous street by affected type') select = st.selectbox('Affected type of people', ['Pedestrian', 'Cyclists', 'Motorists']) if select == 'Pedestrians': st.write( original_data.query('injured_pedestrians >= 1')[[ 'on_street_name', 'injured_pedestrians' ]].sort_values(by=['injured_pedestrians'], ascending=False).dropna(how='any')[:5]) elif select == 'Cyclists': st.write( original_data.query('injured_cyclists >= 1')[[ 'on_street_name', 'injured_cyclists' ]].sort_values(by=['injured_cyclists'], ascending=False).dropna(how='any')[:5]) else: st.write( original_data.query('injured_motorists >= 1')[[ 'on_street_name', 'injured_motorists' ]].sort_values(by=['injured_motorists'], ascending=False).dropna(how='any')[:5]) if st.checkbox('Show Raw Data', False): st.subheader('Raw Data') st.write(data)
def recommendation(data): ### color = st.color_picker('Pick A Color', '#95D6A4') img_fed_up = Image.open("Fed_up/data/samples/logo.png") img_fed_up_sidebar = Image.open("Fed_up/data/samples/sidebar_logo.png") st.image(img_fed_up, width=200) st.sidebar.image(img_fed_up_sidebar, width=200) st.sidebar.markdown("# Here are your results!") st.sidebar.markdown("#### Feel free to adjust your search.") st.sidebar.markdown(" ") time = st.sidebar.slider("How patient are you today? (Minutes)", 15, 120, 60) steps = st.sidebar.slider("Define complexity? (steps)", 3, 20, 7) number_recipes = st.sidebar.slider("Number of recipes to show", 5, 20, 5) filtered_df = data[data.minutes < time] filtered_df = filtered_df[filtered_df.minutes > 10] filtered_df = filtered_df[filtered_df.n_steps < steps - 1] ### List of recipe recommendations on main window here ### recipes_picked = {} headers = [i for i in filtered_df["name"]] ingredients = [i.split("',") for i in filtered_df["ingredients"]] steps_todo = [i.split("',") for i in filtered_df["steps"]] urls = [i for i in filtered_df["image_url"]] rating_avg = [i for i in filtered_df["rating_mean"]] rating_qty = [i for i in filtered_df["rating_count"]] minutes_list = [i for i in filtered_df["minutes"]] calories = [int(i) for i in filtered_df["calories"]] total_fat = [i for i in filtered_df["total_fat"]] saturated_fat = [i for i in filtered_df["saturated_fat"]] sugar = [i for i in filtered_df["sugar"]] sodium = [i for i in filtered_df["sodium"]] protein = [i for i in filtered_df["protein"]] carbs = [i for i in filtered_df["carbohydrates"]] for i in range(number_recipes): #################### ### Recipe Title ### #################### st.header(headers[i].replace(" s ", "'s ").upper()) head_col_1, head_col_2 = st.beta_columns([6, 2]) ####################### ### Picture & Stats ### ####################### with head_col_1: response_pic = requests.get( urls[i] ) #"https://cdn.pixabay.com/photo/2017/06/01/18/46/cook-2364221_1280.jpg") img = Image.open(BytesIO(response_pic.content)) st.image(img, width=500) with head_col_2: st.write(f'Calories: {calories[i]} Cal') st.write(f'Total fat: {total_fat[i]} %*') st.write(f'Saturated fat: {saturated_fat[i]} %*') st.write(f'Sugar: {sugar[i]} %*') st.write(f'Sodium: {sodium[i]} %*') st.write(f'Protein: {protein[i]} %*') st.write(f'Carbohydrates: {carbs[i]} %*') ############################## #### Add, Minutes, rating #### ############################## check, minutes, rating = st.beta_columns(3) with check: check_box = st.checkbox(f'Add no. {i+1} to selection') if check_box: recipes_picked[f'{i+1}'] = 1 else: recipes_picked[f'{i+1}'] = 0 with minutes: st.write(f'{int(minutes_list[i])} minutes to prepare') with rating: st.write( f'{round(float(rating_avg[i]),2)} Stars on {int(rating_qty[i])} reviews.' ) ############################## #### ingredients and Steps ### ############################## ing, steps = st.beta_columns([2, 4]) with ing: st.subheader('Ingredient:') for j in ingredients[i]: ji = j.replace("[", "").replace("]", "").replace("'", "") st.text(ji.capitalize()) with steps: st.subheader('Directions::') #st.write(steps_todo[i]) for index, step in enumerate(steps_todo[i]): step = step.replace("[", "").replace("]", "").replace("'", "") st.write(f'{index+1}: {step.capitalize()}') st.sidebar.subheader("Your selection:") for i in range(len(recipes_picked)): if recipes_picked[f'{i+1}'] == 1: st.sidebar.write(headers[i].replace(" s ", "'s ").capitalize()) st.write( "\* refers to the average person with a calorie intake of 2000 calories per day." ) #### DF can be printed with comment line below #### ###'''st.write(filtered_df.head(5))''' return
opacity=0.4, stroked=True, filled=True, radius_scale=100, radius_min_pixels=4, radius_max_pixels=40, line_width_min_pixels=0.5, get_radius='ncasualty', get_fill_color=[252, 136, 3], get_line_color=[255, 0, 0], tooltip="test test", ), ], )) st.subheader(f'Map of terrorist activities in {year}') if st.checkbox('Show sample raw data'): st.write(map_data[0:10]) # Key takeaways st.markdown( '''Up to 2011, the number of attacks never exceeded 5000 per year. However, there has been a major increase in the number of attacks since 2012, with a spike to almost 17k attacks in 2014''') # Bar graph to visualize terrorism around the globe year_casualties = data2.groupby('iyear').agg({ 'eventid': 'count', 'ncasualty': 'sum' }).rename(columns={'eventid': 'Number of attacks'}) year_casualties['Casualties/attack'] = year_casualties[
st.subheader("Breakdown by minute between %i:00 and %i:00"% (hour,(hour+1) %24)) filtered=data[ (data['date/time'].dt.hour>= hour) & (data['date/time'].dt.hour<(hour+1)) ] hist=np.histogram(filtered['date/time'].dt.minute,bins=60,range=(0,60))[0] chart_data=pd.DataFrame({'minute':range(60),'crashes':hist}) fig=px.bar(chart_data,x='minute',y='crashes',hover_data=['minute','crashes'],height=400) st.write(fig) st.header("Top 5 dangerous streets by affected type") select=st.selectbox('Affected type of people',['Pedestrians','Cyclists','Motorists']) if select =='Pedestrians': st.write(original_data.query("injured_pedestrians>=1")[["on_street_name","injured_pedestrians"]].sort_values(by=['injured_pedestrians'],ascending=False).dropna(how='any')[:5]) elif select =='Cyclists': st.write(original_data.query("injured_cyclist>=1")[["on_street_name","injured_cyclist"]].sort_values(by=['injured_cyclist'],ascending=False).dropna(how='any')[:5]) else: st.write(original_data.query("injured_motorists>=1")[["on_street_name","injured_motorists"]].sort_values(by=['injured_motorists'],ascending=False).dropna(how='any')[:5]) if st.checkbox("ShowRaw Data", False): st.subheader('Raw Data') st.write(data)