chem_data = pd.json_normalize(chem_data) chem_data = chem_data.fillna(0) #Adding chem_data to original DataFrame data = data.join(chem_data) #Modifying Data According To The Needs data = data.drop(columns=["Materials Id"]) #Writing The Data Into The App st.subheader("Data Information") st.dataframe(data) st.write(data.describe()) #Displaying The Bar Graph chart = st.bar_chart(data) #Taking User Input def get_user_input(): #No. Of Atoms Chosen By User li = st.sidebar.slider("Number Of Lithium Atoms", 0, 20, 2) mn = st.sidebar.slider("Number Of Manganese Atoms", 0, 20, 1) si = st.sidebar.slider("Number Of Silicon Atoms", 0, 20, 2) o = st.sidebar.slider("Number Of Oxygen Atoms", 0, 50, 8) fe = st.sidebar.slider("Number Of Iron Atoms", 0, 10, 1) co = st.sidebar.slider("Number Of Cobalt Atoms", 0, 10, 0) formation_energy = st.sidebar.slider("Formation Energy In eV", -3.0, 0.0, -2.61) e_above_hull = st.sidebar.slider("Energy Above Hull in eV", 0.0, 0.2, 0.0582)
time.sleep(5) st.balloons() st.success('The results are ready for viewing') time.sleep(3) my_dataframe = pd.read_csv('dataset/creditcard.csv', nrows=20) st.dataframe(my_dataframe) # st.table(data.iloc[0:10]) d_copy = my_dataframe.copy() d_copy.drop(d_copy.iloc[:, 1:-2], inplace = True, axis = 1) st.dataframe(d_copy) st.area_chart(d_copy) st.bar_chart(d_copy) st.altair_chart(d_copy) y_pred = bilstm_model.predict(pad_sequences) y_pred = np.argmax(y_pred, axis=1) if st.button("Predict"): if y_pred[0] == 0: st.write("Fraud") elif y_pred[0] == 1: st.write("Normal")
def main(): df = pd.read_csv('base_ajustada.csv', sep=';', decimal=',') por_sexo = pd.DataFrame({ 'sexo': (df.SEXO.value_counts()), '%_Partic': (df.SEXO.value_counts() / df.shape[0]) * 100 }) faixa = pd.DataFrame({ 'faixa_etaria': (df.FAIXA_ETARIA.value_counts()), '%_Partic': (df.FAIXA_ETARIA.value_counts() / df.shape[0]) * 100 }) apoio_isol = pd.DataFrame({ 'apoio_isol': (df.APOIO_ISOL_SOCIAL.value_counts()), '%_Partic': (df.APOIO_ISOL_SOCIAL.value_counts() / df.shape[0]) * 100 }) comp_fam = pd.DataFrame({ 'comp_fam': (df.COMP_FAM_ISOL_SOCIAL.value_counts()), '%_Partic': (df.COMP_FAM_ISOL_SOCIAL.value_counts() / df.shape[0]) * 100 }) cruz = pd.DataFrame({ 'comp_fam': (df.COMP_FAM_ISOL_SOCIAL.value_counts()), 'apoio_isol': (df.APOIO_ISOL_SOCIAL.value_counts()) }) part = pd.DataFrame({ 'id': (df.ID), 'sexo': (df.SEXO), 'faixa_etaria': (df.FAIXA_ETARIA) }) #feminino = (pd.pivot_table(part,index=['sexo','faixa_etaria'],values=["id"],aggfunc=[len],margins=True)) #masculino = (pd.pivot_table(part,index=['sexo','faixa_etaria'],values=["id"],aggfunc=[len],margins=True)) st.sidebar.image('logo.png', width=200) st.sidebar.image('Instituto-Olhar-Azul.png', width=200) st.sidebar.markdown( '**Créditos:** Instituto Olhar https://www.institutoolhar.com.br/') st.sidebar.markdown( '**Veja a pesquisa na íntegra em:** https://blog.institutoolhar.com.br/termometro-da-crise-do-covid-19-3a-onda-rmbh/' ) st.image('3onda.png', width=500) st.title('AceleraDev Data Science') st.header('**Termômetro da Crise Covid 19**') st.subheader( 'Região Metropolitana de Belo Horizonte - 16 a 20 de Abril/2020') st.markdown( 'O Instituto Olhar – Pesquisa e Informação Estratégica é uma empresa que se dedica a projetos de pesquisa e inteligência de mercado para auxiliar empresas e organizações governamentais e não-governamentais em seus processos de planejamento e tomada de decisão.' ) st.subheader('Dimensões do Termômetro') st.markdown( 'O Termômetro da Crise Covid-19 é formado por quatro medidas independentes, que variam de 0 a 10, sendo 0 para discorda totalmente e 10 para apoia totalmente:' ) st.markdown( '**Isolamento Social**, que mede o quanto as pessoas apoiam e estão comprometidas com o isolamento; **Medo e da Presença do Covid-19** mede o quanto as pessoas estão temerosas e sentem a presença do Covid-19; **Atuação dos Governos** que mede a avaliação da população sobre a atuação dos governos Federal, Estadual e Municipais e **Economia**, referente a percepção sobre o impacto econômico gerado pelo Covid-19, em nível Mundial, Brasil, Estadual, Municipal e na Renda Familiar.' ) st.markdown( 'Neste exercício trabalharemos com a medida **Isolamento Social**.') st.subheader('Conhecendo os dados') st.markdown('') st.markdown('**Visualizando as informações**') check = st.checkbox("Clique aqui para exibir dados") if check: st.write(df) st.write('Quantidade de pessoas entrevistadas:', df.shape[0]) st.markdown('') st.subheader('**Percentual de participação dos entrevistados:**') radio = st.radio('Escolha por:', ('Por Sexo', 'Por Faixa Etária')) if radio == 'Por Sexo': st.write(por_sexo) st.bar_chart(por_sexo['%_Partic']) if radio == 'Por Faixa Etária': st.write(faixa) st.bar_chart(faixa['%_Partic']) #st.area_chart (faixa['%_Partic']) #st.line_chart (faixa['%_Partic']) st.markdown('') st.write(pd.crosstab(part['sexo'], part['faixa_etaria'], margins=True)) st.subheader('**Comportamento perante ao isolamento social**:') st.text('*Em quantidade de entrevistados, variando termômetro de 0 a 10') radio = st.radio( 'Escolha por:', ('Apoio ao isolamento social', 'Comprometimento da família')) if radio == 'Apoio ao isolamento social': st.write(apoio_isol) st.area_chart(apoio_isol['%_Partic']) if radio == 'Comprometimento da família': st.write(comp_fam) st.area_chart(comp_fam['%_Partic']) st.markdown('') st.subheader('**Comprometimento Familiar x Apoio ao Isolamento Social**') st.text('*Em quantidade de entrevistados, variando termômetro de 0 a 10') st.write(cruz) st.line_chart(cruz) st.markdown('**Matriz de Correlação:**') sns.heatmap(cruz.corr(), vmin=0, vmax=1, fmt='.2f', square=True, linewidths=1, annot=True) st.pyplot() st.markdown('**Distribuição dos dados categóricos:**') sns.catplot(x="FAIXA_ETARIA", y="ID", hue="SEXO", kind='swarm', data=df, height=10, aspect=0.6) st.pyplot() st.markdown( '**Veja a pesquisa na íntegra em:** https://blog.institutoolhar.com.br/termometro-da-crise-do-covid-19-3a-onda-rmbh/' ) st.sidebar.markdown('') st.sidebar.markdown('') st.sidebar.markdown('Desenvolvido por:') st.sidebar.markdown('**Juliana Figueiras de Souza**') st.sidebar.markdown('AceleraDev Data Science')
DATE_COLUMN = 'date/time' DATA_URL = ('https://s3-us-west-2.amazonaws.com/' 'streamlit-demo-data/uber-raw-data-sep14.csv.gz') @st.cache def load_data(nrows): data = pd.read_csv(DATA_URL, nrows=nrows) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis='columns', inplace=True) data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN]) return data data_load_state = st.text('Loading data...') data = load_data(10000) data_load_state.text("Done! (using st.cache)") if st.checkbox('Show raw data'): st.subheader('Raw data') st.write(data) st.subheader('Number of pickups by hour') hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0] st.bar_chart(hist_values) # Some number in the range 0-23 hour_to_filter = st.slider('hour', 0, 23, 17) filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter] st.subheader('Map of all pickups at %s:00' % hour_to_filter) st.map(filtered_data)
def main(): st.image('logo.png', width=200) st.title('AceleraDev Data Science') st.subheader('Semana 3 - Análise de dados exploratória') #data_load_state = st.text('Loading data...') #data = load_data(10000) #data_load_state.text("Done! (using st.cache)") file_in = st.file_uploader('Escolha um arquivo "csv":', type='csv') if file_in is not None: df = st.cache(pd.read_csv)(file_in) n_lin, n_col = df.shape aux = pd.DataFrame({ 'types': df.dtypes, 'NA #': df.isna().sum(), 'NA %': (df.isna().sum() / n_lin * 100) }) aux.reset_index(inplace=True) aux.rename(columns={'index': 'names'}, inplace=True) num_cols = list(aux[aux['types'] != 'object']['names']) cat_cols = list(aux[aux['types'] == 'object']['names']) cols = list(df.columns) st.markdown('**Número de linhas e colunas**') st.markdown('{} linhas e {} colunas'.format(n_lin, n_col)) max_lin_slider = n_lin if n_lin < 100 else 100 slider = st.slider('Escolha a quantidade de linhas para espiar:', min_value=1, max_value=max_lin_slider, value=10) st.dataframe(df.head(slider)) st.markdown(f'**Nomes das {len(cat_cols)} colunas categóricas:**') st.text(str(cat_cols).strip('[]').replace('\'', '')) st.markdown(f'**Nomes das {len(num_cols)} colunas núméricas:**') st.text(str(num_cols).strip('[]').replace('\'', '')) st.markdown('**Informações das colunas:**') st.table(aux) st.markdown('**Detalhes das colunas numéricas:**') transpose = '.T' if st.checkbox('Transpor detalhes') else '' st.dataframe(eval(f'df.describe(){transpose}')) st.subheader('Análise univariada') selected_column = st.selectbox( 'Escolha uma coluna para análise univariada:', list(aux[aux['types'] != 'object']['names'])) hist_bin = 100 if (df[selected_column].nunique() <= 100)\ else round(df[selected_column].nunique() / 10) hist_values = np.histogram(df[selected_column], bins=hist_bin) hist_frame = pd.DataFrame(hist_values[0], index=hist_values[1][:-1]) st.bar_chart(hist_frame) col_describe = df[selected_column].describe() col_more = pd.Series( { 'skew': df[selected_column].skew(), 'kurtosis': df[selected_column].kurtosis() }, name=col_describe.name) st.write(pd.concat([col_more, col_describe])) st.subheader('Cálculos das colunas numéricas:') typeCalc = { 'Média': '.mean()', 'Mediana': '.median()', 'Desvio Padrão': '.std()' } calc_choosed = st.selectbox( 'Escolha o tipo de cálculo para as colunas numéricas:', ['', 'Média', 'Mediana', 'Desvio Padrão']) if calc_choosed != '': exec("st.table(df[num_cols]{0})".format(typeCalc[calc_choosed])) st.markdown('**Percentual dos dados faltantes:**') st.table(aux[aux['NA #'] != 0][['types', 'NA %']]) st.subheader('Imputação de dados numéricos faltantes') percentage = st.slider( 'Escolha o limite percentual faltante das colunas a serem prenchidas:', min_value=0, max_value=100, value=0) col_list = list(aux[aux['NA %'] <= percentage]['names']) select_method = st.radio('Escolha um método de preenchimento:', ('Média', 'Mediana')) imputed_df = df[col_list].fillna(df[col_list].mean( ) if select_method == 'Média' else df[col_list].median()) impputed_exploration = pd.DataFrame({ 'names': imputed_df.columns, 'types': imputed_df.dtypes, 'NA #': imputed_df.isna().sum(), 'NA %': (imputed_df.isna().sum() / n_lin * 100) }) st.table(impputed_exploration[ impputed_exploration['types'] != 'object']['NA %']) st.subheader('Arquivo com os dados imputados:') st.markdown(get_table_download_link(imputed_df), unsafe_allow_html=True)
st.write(''' ## 显示面积图 streamlit.area_chart(data=None, width=0, height=0, use_container_width=True) ''') chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) st.area_chart(chart_data) st.write(''' ## 显示条形图 streamlit.area_chart(data=None, width=0, height=0, use_container_width=True) ''') chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) st.bar_chart(chart_data) st.write(''' ## 显示matplotlib.pyplot图 streamlit.pyplot(fig=None, clear_figure=None, **kwargs) * Matplotlib支持几种不同类型的“后端”。如果使用Matplotlib和Streamlit遇到错误,请尝试将后端设置为“ TkAgg” * `echo "backend: TkAgg" >> ~/.matplotlib/matplotlibrc` ''') import matplotlib.pyplot as plt import numpy as np arr = np.random.normal(1, 1, size=100) plt.hist(arr, bins=20) st.pyplot() st.write('''
def main(): """資料探勘App""" st.title("資料探勘App") st.subheader("製作第一個Data App") html_temp = """ <div style="background-color:tomato;"><p style="color:white;font-size:40px;"> App by Streamlit</p></div> """ st.markdown(html_temp, unsafe_allow_html=True) # img_list = glob.glob("images/*.png") # # st.write(img_list) # # for i in img_list: # # c_image = Image.open(i) # # st.image(i) # all_image = [Image.open(i) for i in img_list] # st.image(all_image) # 讀取資料路徑的函數 def file_selector(folder_path='./datasets'): filenames = os.listdir(folder_path) selected_filename = st.selectbox('Select a file', filenames) return os.path.join(folder_path, selected_filename) # filename = file_selector() st.write('You selected `%s`' % filename) df = pd.read_csv(filename) # 顯示資料集 if st.checkbox("顯示資料集"): number = st.number_input("Number of Rows to View") st.dataframe(df.head(number)) # 顯示欄位 if st.button("顯示欄位"): st.write(df.columns) # Show Shape of Dataset if st.checkbox("資料集維度"): st.write(df.shape) data_dim = st.radio("Show Dimension by", ("Rows", "Columns")) if data_dim == 'Rows': st.text("Number of Rows") st.write(df.shape[0]) elif data_dim == 'Columns': st.text("Number of Columns") st.write(df.shape[1]) # Show Columns By Selection if st.checkbox("顯示選擇欄位"): all_columns = df.columns.tolist() selected_columns = st.multiselect('Select', all_columns) new_df = df[selected_columns] st.dataframe(new_df) # Datatypes if st.button("資料型態"): st.write(df.dtypes) # Value Counts if st.button("計數"): st.text("Value Counts By Target/Class") st.write(df.iloc[:, -1].value_counts()) # Summary if st.checkbox("摘要"): st.write(df.describe()) st.subheader("Data Visualization") # Show Correlation Plots # Matplotlib Plot if st.checkbox("Correlation Plot [Matplotlib]"): plt.matshow(df.corr()) st.pyplot() # Seaborn Plot if st.checkbox("Correlation Plot with Annotation[Seaborn]"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() # Counts Plots if st.checkbox("Plot of Value Counts"): st.text("Value Counts By Target/Class") all_columns_names = df.columns.tolist() primary_col = st.selectbox('Select Primary Column To Group By', all_columns_names) selected_column_names = st.multiselect('Select Columns', all_columns_names) if st.button("Plot"): st.text("Generating Plot for: {} and {}".format( primary_col, selected_column_names)) if selected_column_names: vc_plot = df.groupby( primary_col)[selected_column_names].count() else: vc_plot = df.iloc[:, -1].value_counts() st.write(vc_plot.plot(kind='bar')) st.pyplot() # Pie Plot if st.checkbox("Pie Plot"): all_columns_names = df.columns.tolist() # st.info("Please Choose Target Column") # int_column = st.selectbox('Select Int Columns For Pie Plot',all_columns_names) if st.button("Generate Pie Plot"): # cust_values = df[int_column].value_counts() # st.write(cust_values.plot.pie(autopct="%1.1f%%")) st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() # Barh Plot if st.checkbox("BarH Plot"): all_columns_names = df.columns.tolist() st.info("Please Choose the X and Y Column") x_column = st.selectbox('Select X Columns For Barh Plot', all_columns_names) y_column = st.selectbox('Select Y Columns For Barh Plot', all_columns_names) barh_plot = df.plot.barh(x=x_column, y=y_column, figsize=(10, 10)) if st.button("Generate Barh Plot"): st.write(barh_plot) st.pyplot() # Custom Plots st.subheader("Customizable Plots") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select the Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_column_names = st.multiselect('Select Columns To Plot', all_columns_names) # plot_fig_height = st.number_input("Choose Fig Size For Height",10,50) # plot_fig_width = st.number_input("Choose Fig Size For Width",10,50) # plot_fig_size =(plot_fig_height,plot_fig_width) cust_target = df.iloc[:, -1].name if st.button("Generate Plot"): st.success("Generating A Customizable Plot of: {} for :: {}".format( type_of_plot, selected_column_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_column_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_column_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_column_names] st.line_chart(cust_data) elif type_of_plot == 'hist': custom_plot = df[selected_column_names].plot(kind=type_of_plot, bins=2) st.write(custom_plot) st.pyplot() elif type_of_plot == 'box': custom_plot = df[selected_column_names].plot(kind=type_of_plot) st.write(custom_plot) st.pyplot() elif type_of_plot == 'kde': custom_plot = df[selected_column_names].plot(kind=type_of_plot) st.write(custom_plot) st.pyplot() else: cust_plot = df[selected_column_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() st.subheader("Our Features and Target") if st.checkbox("Show Features"): all_features = df.iloc[:, 0:-1] st.text('Features Names:: {}'.format(all_features.columns[0:-1])) st.dataframe(all_features.head(10)) if st.checkbox("Show Target"): all_target = df.iloc[:, -1] st.text('Target/Class Name:: {}'.format(all_target.name)) st.dataframe(all_target.head(10)) # Make Downloadable file as zip,since markdown strips to html st.markdown("""[google.com](iris.zip)""") st.markdown("""[google.com](./iris.zip)""") # def make_zip(data): # output_filename = '{}_archived'.format(data) # return shutil.make_archive(output_filename,"zip",os.path.join("downloadfiles")) def makezipfile(data): output_filename = '{}_zipped.zip'.format(data) with ZipFile(output_filename, "w") as z: z.write(data) return output_filename if st.button("Download File"): DOWNLOAD_TPL = f'[{filename}]({makezipfile(filename)})' # st.text(DOWNLOAD_TPL) st.text(DOWNLOAD_TPL) st.markdown(DOWNLOAD_TPL)
] novas_colunas.insert(0, 'Taxonom') return novas_colunas app = app_hub() option = st.sidebar.selectbox('Escolha o exercicio: ', app.dicionario) 'Voce selecionou a opcao: ', option if option == app.dicionario[0]: st.write( 'Para cada coluna identique a quantidade de linhas com dados faltantes (em alguns casos, o dado faltante é uma string vazia, em outros casos é uma string contendo algum valor do tipo: "sem informação"). Faça um método que retorna a média de dados faltantes por coluna' ) st.markdown('### Valores vazios ou faltantes: ') st.bar_chart(app.count_nulls(app.construir(app.linhas))) if st.checkbox('Mostrar lista de dados faltantes'): st.write(app.count_nulls(app.construir(app.linhas))) st.markdown('### Porcetagem de valores faltantes por coluna: ') st.bar_chart(app.media_nulls(app.count_nulls(app.construir(app.linhas)))) if st.checkbox('Mostrar lista de porcetagem'): st.write( list(app.media_nulls(app.count_nulls(app.construir(app.linhas))))) if option == app.dicionario[2]: mapa_bio = pd.read_csv('Arquivos/mapa_biodiversidade.csv', header=0) st.write( 'Monte filtros de ocorrências por estados, nome de espécie (nome exato ou parte do nome) e categoria de ameaça, e outros filtros que julgar relevante.' ) municipios = st.multiselect("Escolha os municipios", list(set(mapa_bio['Municipio'])), ["Londrina"])
columns=["stock 1", "stock 2"], index=pd.date_range("1/2/2011", periods=20, freq="M"), ) st.line_chart(chart_data2) st.subheader("Example of area chart") with st.echo(): st.area_chart(chart_data) st.subheader("Example of bar chart") with st.echo(): trimmed_data = chart_data[["pv", "uv"]].iloc[:10] st.bar_chart(trimmed_data) st.subheader("Matplotlib") st.write("You can use Matplotlib in Streamlit. " "Just use `st.pyplot()` instead of `plt.show()`.") try: # noqa: F401 with st.echo(): from matplotlib import cm, pyplot as plt from mpl_toolkits.mplot3d import Axes3D # Create some data X, Y = np.meshgrid(np.arange(-5, 5, 0.25), np.arange(-5, 5, 0.25)) Z = np.sin(np.sqrt(X**2 + Y**2))
def main(): "Simple EDA App with Streamlist Components" menu = ["Home", 'EDA', "Sweetviz", "Custom Analysis", "ML", "About"] choice = st.sidebar.selectbox("Menu", menu) if choice == "Home": image = Image.open('Data_science.jpg') #st.image(image, caption='commons.wikimedia.org' ,use_column_width=True) st.image(image, caption='commons.wikimedia.org') st.markdown( "Download here [data set](https://drive.google.com/file/d/1MAjahv92AkpGQ6-fPFrJbSXM8PkY6_00/view?usp=sharing) for checking stuff" ) if choice == "EDA": st.title("Automated EDA with Pandas") st.markdown("You can upload your data in 'csv' format") data_file = st.file_uploader("Uplod CSV", type=['csv'], encoding=None, key='a') if data_file is not None: df = pd.read_csv(data_file) st.dataframe(df.head()) profile = ProfileReport(df) st_profile_report(profile) elif choice == "Sweetviz": st.subheader("Automated EDA with Sweetviz") st.markdown("You can upload your data in 'csv' format") data_file = st.file_uploader("Uplod CSV", type=['csv'], encoding=None, key='a') if data_file is not None: df = pd.read_csv(data_file) st.dataframe(df.head()) st.subheader("Analysis data with plots") if st.button("Sweetviz Report"): report = SV.analyze(df) report.show_html() st_display_sweetviz("SWEETVIZ_REPORT.html") # st.subheader("Compare data with plots") # if st.button("Compare"): # report = SV.compare(df[100:], df[:100]) # report.show_html() # st_display_sweetviz("Compare.html") elif choice == 'Custom Analysis': st.subheader("Data Visualization") data_file = st.file_uploader("Uplod CSV", type=['csv'], encoding=None, key='a') if data_file is not None: df = pd.read_csv(data_file) st.dataframe(df.head()) if st.checkbox("Correlation Matrix"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() if st.checkbox("Pie Chart"): all_columns = df.columns.to_list() columns_to_plot = st.selectbox("Select one Column", all_columns) pie_plot = df[columns_to_plot].value_counts().plot.pie( autopct="%1.1f%%") st.write(pie_plot) st.pyplot() all_columns = df.columns.to_list() type_of_plot = st.selectbox( "Select Type of Plot", ['Area', 'Line', 'Bar', 'hist', 'box', 'kde']) selected_col_names = st.multiselect('Select Columns To plot Data', all_columns) if st.button("Produce Plot"): st.success( f"Creating Customizable Plot of {type_of_plot} for {selected_col_names}" ) # Streamlit plots if type_of_plot == 'Area': custom_data = df[selected_col_names] st.area_chart(custom_data) elif type_of_plot == 'Line': custom_data = df[selected_col_names] st.line_chart(custom_data) elif type_of_plot == 'Bar': custom_data = df[selected_col_names] st.bar_chart(custom_data) # Custom Plots elif type_of_plot: custom_plt = df[selected_col_names].plot(kind=type_of_plot) st.write(custom_plt) st.pyplot() elif choice == "ML": st.title("Binary Classification") st.markdown("The is an basic idea about ML") st.sidebar.title("Binary Classification Web App") st.markdown("Are Mushrooms edible or poisonous? 🍄") #st.sidebar.markdown("Are your mushrooms edible or poisonous? 🍄") @st.cache(persist=True) #@st.cache(persist=True) def load_data(): data = pd.read_csv("mushrooms.csv") labelEncoder = LabelEncoder() for col in data.columns: data[col] = labelEncoder.fit_transform(data[col]) return data @st.cache(persist=True) def split(df): y = df.type X = df.drop("type", axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) return X_train, X_test, y_train, y_test def plot_metrics(metrics_list): if "Confusion Matrix" in metrics_list: st.subheader("Confusion Matrix") plot_confusion_matrix(model, X_test, y_test) st.pyplot() if "ROC Curve" in metrics_list: st.subheader("ROC Curve") plot_roc_curve(model, X_test, y_test) st.pyplot() if "Precision-Recall Curve" in metrics_list: st.subheader("Precision-Recall Curve") plot_precision_recall_curve(model, X_test, y_test) st.pyplot() df = load_data() class_names = df['type'] if st.sidebar.checkbox("Show row data", False): st.subheader("Mushroom Data Set (Classification)") st.write(df) st.write("The shape of data", df.shape) st.markdown( "This [data set](https://archive.ics.uci.edu/ml/datasets/Mushroom) includes descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms " "in the Agaricus and Lepiota Family (pp. 500-525). Each species is identified as definitely edible, definitely poisonous, " "or of unknown edibility and not recommended. This latter class was combined with the poisonous one." ) if st.checkbox("Show Summary"): st.write(df.describe().T) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Select Columns To See Values"): all_columns = df.columns.to_list() selected_col = st.multiselect("Select Columns", all_columns) new_df = df[selected_col] st.dataframe(new_df) if st.checkbox("Show value counts"): st.write(df.iloc[:, 0].value_counts()) X_train, X_test, y_train, y_test = split(df) st.sidebar.subheader("Choose a Classifier") Classifier = st.sidebar.selectbox( "Classifier", ("Support Vector Machine (SVM)", "Logistic Regession", "Random Forest")) if Classifier == 'Support Vector Machine (SVM)': st.sidebar.subheader('Model Hyperparameters') ##Choose Parameters\ C = st.sidebar.number_input("C (Regularization parameter)", 0.01, 10.0, step=0.01, key='C_SVM') kernel = st.sidebar.radio("Kernel", ("rbf", "linear"), key='kernel') gamma = st.sidebar.radio("Gamma (Kernel Coefficient)", ("scale", "auto"), key='gamma') metrics = st.sidebar.multiselect( "Whitch metrics to plot?", ("Confusion Matrix", "Roc-Curve", "Precision-Recall Curve")) if st.sidebar.button("Classify", key="classify"): st.subheader("Support Vector Machine (SVM) Results") model = SVC(C=C, kernel=kernel, gamma=gamma) model.fit(X_train, y_train) accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) st.write("Accuracy: ", accuracy.round(2)) st.write( "Precision: ", precision_score(y_test, y_pred, labels=class_names).round(2)) st.write( "Recall: ", recall_score(y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics) if Classifier == 'Logistic Regession': st.sidebar.subheader("Model Hyperparameters") C = st.sidebar.number_input("C (Regularization parameter)", 0.01, 10.0, step=0.01, key='C_LR') max_iter = st.sidebar.slider("Maximum Number of iterations", 100, 500, key='max_iter') metrics = st.sidebar.multiselect( "Which metrics to plot?", ("Confusion Matrix", 'ROC-Curve', 'precision-Recall Curve')) if st.sidebar.button("Classify", key='Classify'): st.subheader("Logistc Regression Results") #model = LogisticRegression(C=C, penalty='12', max_iter=max_iter) model = LogisticRegression(C=C, penalty='l2', max_iter=max_iter) #model.fit(X_train, y_train) model.fit(X_train, y_train) accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) st.write("Accuracy: ", accuracy.round(2)) st.write( "Precision: ", precision_score(y_test, y_pred, labels=class_names).round(2)) st.write( 'Recall: ', recall_score(y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics) if Classifier == 'Random Forest': st.sidebar.subheader("Model Hyperparameters") n_estimators = st.sidebar.number_input( "The number of trees in the forest", 10, 5000, key='n_estimators') max_depth = st.sidebar.number_input( "The maximum depth of the tree", 1, 20, step=1, key='n_estimators') bootstrap = st.sidebar.radio( "Bootstrap samples when builidng tees", ("True", 'False'), key='bootstrap') metrics = st.sidebar.multiselect( "Which metrics to plot?", ("Confusion Matrix", 'ROC-Curve', 'precision-Recall Curve')) if st.sidebar.button("Classify", key='classify'): st.subheader("Random Forest Classifer Results") model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, bootstrap=bootstrap, n_jobs=-1) model.fit(X_train, y_train) accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) st.write("Accuracy: ", accuracy.round(2)) st.write( "Precision: ", precision_score(y_test, y_pred, labels=class_names).round(2)) st.write( "Recall: ", recall_score(y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics)
# # ------------------- Analise Data by Hour of Day -------------------------------------------------------------------- # st.subheader('Analysis of Data by Hour of Day') report_hist_values = np.histogram(report_df.time.dt.hour, bins=24, range=(0, 24))[0] incident_hist_values = np.histogram(incident_df.time.dt.hour, bins=24, range=(0, 24))[0] st.bar_chart( pd.DataFrame({ 'Reports': report_hist_values, 'Accidents': incident_hist_values, })) # Maps by Hour of Day hour_to_filter = st.slider('hour', 0, 23, 8) # min: 0h, max: 23h, default: 17h period = st.slider('period', 0, 23, 1) # min: 0h, max: 23h, default: 17h filtered_report_df = report_df[ (report_df.time.dt.hour >= hour_to_filter - period) & (report_df.time.dt.hour <= hour_to_filter + period)] report_map_data = filtered_report_df.loc[:, ['lat', 'lng']].copy() report_map_data.columns = ['lat', 'lon'] filtered_incident_df = incident_df[
DATA_URL = ('https://s3-us-west-2.amazonaws.com/' 'streamlit-demo-data/uber-raw-data-sep14.csv.gz') @st.cache def load_data(nrows): data = pd.read_csv(DATA_URL, nrows=nrows) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis='columns', inplace=True) data[DATE_TIME] = pd.to_datetime(data[DATE_TIME]) return data data = load_data(100000) # hour = 10 # hour = st.selectbox('selecciona la hora', range(0,24),1) hour = st.slider('selecciona la hora', 0, 24, 10, 1) data = data[data[DATE_TIME].dt.hour == hour] if st.checkbox('Vista de datos'): st.subheader('Datos de solo %sh' % hour) st.write(data) st.subheader('Datos por minutos a %sh' % hour) st.bar_chart( np.histogram(data[DATE_TIME].dt.minute, bins=60, range=(0, 60))[0]) st.subheader('Mapa de datos a %sh' % hour) st.map(data)
def _get_deltas_that_melt_dataframes(self): return [ lambda df: st.line_chart(df), lambda df: st.bar_chart(df), lambda df: st.area_chart(df), ]
import streamlit as st st.bar_chart({"d6": [1, 5, 2, 6, 2, 1]}) with st.expander("See explanation"): st.write( """ The chart above shows some numbers I picked for you. I rolled actual dice for these, so they're *guaranteed* to be random. """ ) st.image("https://static.streamlit.io/examples/dice.jpg", width=200) st.markdown("Photo by [@brett_jordon](https://unsplash.com/photos/4aB1nGtD_Sg)")
CANVAS_SIZE = 192 col1, col2 = st.beta_columns(2) with col1: canvas = st_canvas( fill_color='#000000', stroke_width=20, stroke_color='#FFFFFF', background_color='#000000', width=CANVAS_SIZE, height=CANVAS_SIZE, drawing_mode='freedraw', key='canvas' ) if canvas.image_data is not None: img = canvas.image_data.astype(np.uint8) img = cv2.resize(img, dsize=(28, 28)) preview_img = cv2.resize(img, dsize=(CANVAS_SIZE, CANVAS_SIZE), interpolation=cv2.INTER_NEAREST) col2.image(preview_img) x = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) x = x.reshape((-1, 28, 28, 1)) y = model.predict(x).squeeze() st.write('## Result: %d' % np.argmax(y)) st.bar_chart(y)
def app(): #st.markdown('<img style="float: left;" src="https://virtual.usal.edu.ar/branding/themes/Usal_7Julio_2017/images/60usalpad.png" />', unsafe_allow_html=True) st.markdown( '<style>div[data-baseweb="select"] > div {text-transform: capitalize;}body{background-color:#008357;}</style>', unsafe_allow_html=True) st.markdown( "<h3 style='text-align: center; color: green;'>Salas Collaborate</h3>", unsafe_allow_html=True) #SHEET_ID = '12D4hfpuIkT7vM69buu-v-r-UYb8xx4wM1zi-34Fs9ck' df = pd.read_csv( 'https://docs.google.com/spreadsheets/d/12D4hfpuIkT7vM69buu-v-r-UYb8xx4wM1zi-34Fs9ck/export?format=csv&gid=1912357955' ) #df = pd.read_csv('/mydrive/MyDrive/multiapps/bbc204.csv') df = df.sort_values(by=['SessionOwner']) #options = ['USAL_lti_production', 'USAL_rest_production','josemarcucci'] options = ['josemarcucci'] # selecting rows based on condition df = df.loc[~df['SessionOwner'].isin(options)] countries = df['SessionOwner'].unique() duplit = df.drop_duplicates(subset=['SessionName']) df5 = pd.value_counts(duplit['SessionName']) df['Minutos'] = round( pd.to_timedelta(df['AttendeeTotalTimeInSession']).dt.total_seconds() / 60) totales = df.groupby("SessionOwner")['Minutos'].sum() #st.table(totales) #st.table(duplit[['SessionOwner','SessionName']]) buff, col, buff2 = st.beta_columns([1, 3, 1]) '## Tipo de plataforma' if st.checkbox('Ver comparativo UAs'): st.bar_chart(totales) '## Tipo de plataforma' country = buff.selectbox('Usuario BBC o plataforma', countries) df[df['SessionOwner'] == country] #option = st.selectbox("Seleccionar Unidad", options=list(CHOICES.keys()), format_func=format_func) #st.write(f"Seleccionaste {format_func(option)}" ) #column = format_func(option) above_352 = df["SessionOwner"] == country #moderador = df["AttendeeRole"] == "Moderator" bool_series = df[above_352]["SessionOwner"].str.startswith(country, na=False) dupli = df[above_352][bool_series].drop_duplicates(subset=['SessionName']) #dupli=df[above_352][bool_series].drop_duplicates(['SessionName']).groupby('SessionName').agg({'AttendeeTotalTimeInSession':'sum'}) sesiones = df[above_352][bool_series]['SessionName'].unique() df6 = pd.value_counts(sesiones) time = pd.DatetimeIndex( df[above_352][bool_series]['AttendeeTotalTimeInSession']) times1 = time.hour * 60 + time.minute + time.second / 60 times = times1.values.sum() timeu = pd.DatetimeIndex(df['AttendeeTotalTimeInSession']) times1u = timeu.hour * 60 + timeu.minute + timeu.second / 60 timesu = times1u.values.sum() times3t = df5.index aulast = len(times3t) times3 = df6.index aulas = len(times3) df['RoomOpened'] = pd.to_datetime(df['RoomOpened']).dt.strftime('%d-%m-%y') maxValue = df['RoomOpened'].max() minValue = df['RoomOpened'].min() st.write('Período:', minValue, ' al ', maxValue) st.write('Salas: ', aulas) st.write('Minutos usados: ', round(times, 1)) st.sidebar.markdown( "<h3 style='text-align: left; color: black;font-weight:500;'>Minutos y salas (Semanal)</h3>", unsafe_allow_html=True) st.sidebar.write('Minutos: ', round(timesu, 1)) st.sidebar.write('Salas: ', aulast) st.sidebar.markdown( "<h3 style='text-align: left; color: black;font-weight:500;'>Minutos totales</h3>", unsafe_allow_html=True) st.sidebar.write('Minutos: ', 23883910 + round(timesu, 1)) #st.sidebar.write('Salas: ',aulast) dupli.index = [""] * len(dupli) #dupli.columns=['RoomClosed', 'SessionName'] #dupli.rename(columns={'RoomClosed':'Fecha','SessioName':'Sala'}) if st.checkbox('Mostrar Salas'): #st.table(df[above_352][bool_series][['RoomOpened','SessionName','NameOfAttendee','AttendeeTotalTimeInSession']]) dupli = dupli.sort_values(by=['RoomClosed']) st.table(dupli[['RoomClosed', 'SessionName']])
if st.sidebar.button('Generate Charts'): st.write("Fetching data for the %s %s!" % (option, codeSelect[:-1])) if (codeSelect == 'Traded Stock Codes'): data = get_history(symbol=option, start=startDate, end=endDate) else: data = get_history(symbol=option, start=startDate, end=endDate, index=True) st.write("""### Closing Price Chart""") st.line_chart(data.Close) st.write("""### Opening Price Chart""") st.line_chart(data.Open) st.write("""### High Price Chart""") st.bar_chart(data.High) st.write("""### Low Price Chart""") st.bar_chart(data.Low) st.write("""### Opening/Closing Price Chart""") arr1 = np.vstack([data.Open, data.Close]) st.line_chart(pd.DataFrame(arr1.T, columns=["Opening", "Closing"])) st.write("""### High/Low Price Chart""") arr2 = np.vstack([data.High, data.Low]) st.line_chart(pd.DataFrame(arr2.T, columns=["High", "Low"])) st.write("""### Combined Price Chart""") arr = np.vstack([data.Open, data.Close, data.High, data.Low]) st.line_chart( pd.DataFrame(arr.T, columns=["Opening", "Closing", "High", "Low"])) st.write("""### Volume""") st.line_chart(data.Volume)
import numpy as np import pandas as pd import streamlit as st with st.echo(code_location="below"): st.title('Python Project NYPD statistics') st.write("This is simple project with Python using NYPD data about incidents in New York city. Dataset is about gun involving accidents in period 2006-2019. https://www1.nyc.gov/site/nypd/stats/crime-statistics/crime-statistics-landing.page") a = pd.read_csv('NYPD_Shooting_Incident_Data__Historic_.csv', sep=';', header=0) st.write('Amount of attackers by race in New York city total by years') # FIRST CHART WITH AMOUNT OF ATTACKS WITH GUNS TOTAL OVER YEARS suspects = pd.DataFrame({'lab': ['BLACK', 'ASIAN', 'WHITE HISPANIC', 'ASIAN / PACIFIC ISLANDER', 'BLACK HISPANIC', 'UNKNOWN'], 'Amount people attacks per race total:': [a.PERP_RACE.str.count("BLACK").sum(), a.PERP_RACE.str.count( "ASIAN").sum(), a.PERP_RACE.str.count("WHITE HISPANIC").sum(), a.PERP_RACE.str.count("ASIAN / PACIFIC ISLANDER").sum(), a.PERP_RACE.str.count("BLACK HISPANIC").sum(), a.PERP_RACE.str.count("UNKNOWN").sum(), ]}) st.bar_chart(suspects.set_index('lab')) # SECOND CHART WITH AMOUNT OF VICTIMS TOTOL OVER YEARS victims = pd.DataFrame({'lab': ['BLACK', 'ASIAN', 'WHITE HISPANIC', 'ASIAN / PACIFIC ISLANDER', 'BLACK HISPANIC', 'UNKNOWN'], 'Peole race which was attacked per race total:': [a.VIC_RACE.str.count("BLACK").sum(), a.VIC_RACE.str.count( "ASIAN").sum(), a.VIC_RACE.str.count("WHITE HISPANIC").sum(), a.VIC_RACE.str.count("ASIAN / PACIFIC ISLANDER").sum(), a.VIC_RACE.str.count("BLACK HISPANIC").sum(), a.VIC_RACE.str.count("UNKNOWN").sum(), ]}) st.write('Amount of victims by race in New York city total by years') st.bar_chart(victims.set_index('lab')) # MAP DATA, st.write("Map of areas where acts took place") data = pd.DataFrame({ 'lat': a.Latitude, 'lon': a.Longitude
import streamlit as st import numpy as np st.title("Wilde-Daten.de") st.text(" Hier entsteht die Homepage von Wilde-Daten") st.title("Ein Zufallsdiagramm") chart = st.bar_chart(np.random.rand(10, 5)) btn = st.button("Zufallsgenerator!") if btn: chart.bar_chart(np.random.rand(10, 5))
def app(): st.title('c4project - Black Lives Matter') st.text("") st.text("") """ """ #st.write("Tweets") data = pd.read_csv('sample_tweets.csv') data_size = data.shape[0] st.write("There are " + str(data_size) + " tweets available on BLM") st.dataframe(data, width=None, height=None) #st.line_chart(data.Time) st.text("") st.text("") st.text("") #st.write("Web links from Tweets") data2 = pd.read_csv('urls.csv') data_size2 = data2.shape[0] st.write("There are " + str(data_size2) + " urls associated with the tweets") st.dataframe(data2, width=None, height=None) """ """ fig, ax = plt.subplots(figsize=(8, 6)) # Create textblob objects of the tweets sentiment_objects = [TextBlob(tweet) for tweet in data['Tweet']] # Create list of polarity values and tweet text sentiment_values = [[tweet.sentiment.polarity, str(tweet)] for tweet in sentiment_objects] # Create dataframe containing the polarity value and tweet text sentiment_df = pd.DataFrame(sentiment_values, columns=["polarity", "tweet"]) # plot polarities on a histogram: fig, ax = plt.subplots(figsize=(8, 6)) st.text("") st.text("") st.text("") st.write("Current Sentiments on Tweets on BLM as of 2 PM CST") # Plot histogram of the polarity values st.bar_chart(sentiment_df.polarity) st.text("") st.text("") st.text("") st.line_chart(data.Time) total_points = st.slider("Number of points in tweets", 1, 5000, 200) num_turns = st.slider("Number of turns in spiral", 1, 100, 9) Point = namedtuple('Point', 'x y') data = [] points_per_turn = total_points / num_turns for curr_point_num in range(total_points): curr_turn, i = divmod(curr_point_num, points_per_turn) angle = (curr_turn + 1) * 2 * math.pi * i / points_per_turn radius = curr_point_num / total_points x = radius * math.cos(angle) y = radius * math.sin(angle) data.append(Point(x, y)) st.altair_chart( alt.Chart(pd.DataFrame(data), height=500, width=500).mark_circle(color='#0068c9', opacity=0.5).encode(x='x:Q', y='y:Q'))
def inicializar(self, app=app_hub()): opcoes = [ 'Quantidade de valores nao preenchidos', 'Porcetagem de dados faltantes por coluna' ] #Exercicio 01 valores vazios if self.option == app.dicionario[0]: self.option_01 = st.selectbox('Exercicio 01: Escolha o grafico:', opcoes) st.write( 'Para cada coluna identique a quantidade de linhas com dados faltantes (em alguns casos, o dado faltante é uma string vazia, em outros casos é uma string contendo algum valor do tipo: "sem informação"). Faça um método que retorna a média de dados faltantes por coluna' ) if self.option_01 == opcoes[0]: st.markdown('### Valores vazios ou faltantes: ') st.bar_chart(app.count_nulls(app.construir(app.linhas))) if st.checkbox('Mostrar lista de dados faltantes'): st.write(app.count_nulls(app.construir(app.linhas))) if self.option_01 == opcoes[1]: st.markdown('### Porcetagem de valores faltantes por coluna: ') st.bar_chart( app.media_nulls(app.count_nulls(app.construir( app.linhas)))) if st.checkbox('Mostrar lista de porcetagem'): st.write( list( app.media_nulls( app.count_nulls(app.construir(app.linhas))))) #Exercicio 02 Nivel taxonomico if self.option == app.dicionario[1]: st.write( 'Para cada item identifique até qual nível taxônomico a ocorrência foi identificada.' ) #verificaTaxonomia metodo importado do felipe st.bar_chart(verificaTaxonomia(app.construir(app.linhas))) if st.checkbox('Valores por Coluna'): st.write(verificaTaxonomia(app.construir(app.linhas))) #Exercicio 03 Filtros if self.option == app.dicionario[2]: mapa_bio = pd.read_csv('Arquivos/mapa_biodiversidade.csv', header=0) st.write( 'Monte filtros de ocorrências por estados, nome de espécie (nome exato ou parte do nome) e categoria de ameaça, e outros filtros que julgar relevante.' ) municipios = st.multiselect("Escolha os municipios", list(set(mapa_bio['Municipio'])), ["Londrina"]) data = mapa_bio.loc[mapa_bio['Municipio'].isin(municipios)] st.deck_gl_chart(viewport={ 'latitude': -23.37, 'longitude': -51.28, 'zoom': 11, 'pitch': 50, }, layers=[{ 'type': 'HexagonLayer', 'data': data, 'radius': 200, 'elevationScale': 4, 'elevationRange': [0, 1000], 'pickable': True, 'extruded': True, }, { 'type': 'ScatterplotLayer', 'data': data, }]) if st.checkbox('Mostrar dados'): st.dataframe(data) #Exercicio 04 - Geocode - Verificar se dados batem if self.option == app.dicionario[3]: st.image('Arquivos/Erro.jpg')
# Create a text element and let the reader know the data is loading. data_load_state = st.text('Loading data...') # Load 10,000 rows of data into the dataframe. data = load_data() # Notify the reader that the data was successfully loaded. data_load_state.text("Done! (using st.cache)") NOW = dt.datetime(2011,12,10) segmented_rfm = buildRFM(data, NOW) st.subheader('RFM Score') st.write(segmented_rfm.head(5)) st.subheader('Monetary value') st.bar_chart(segmented_rfm.groupby('RFMScore').agg('monetary_value').mean()) st.subheader('Frequency') st.bar_chart(segmented_rfm.groupby('RFMScore').agg('frequency').mean()) st.subheader('Recency') st.bar_chart(segmented_rfm.groupby('RFMScore').agg('recency').mean()) df = pd.read_excel('Online Retail.xlsx', dtype={'CustomerID': str, 'InvoiceID': str}, parse_dates=['InvoiceDate'], infer_datetime_format=True) df.dropna(subset=['CustomerID'], inplace=True) n_orders = df.groupby(['CustomerID'])['InvoiceNo'].nunique() mult_orders_perc = np.sum(n_orders > 1) / df['CustomerID'].nunique() df = df[['CustomerID', 'InvoiceNo', 'InvoiceDate']].drop_duplicates() df['order_month'] = df['InvoiceDate'].dt.to_period('M') df['cohort'] = df.groupby('CustomerID')['InvoiceDate'] \ .transform('min') \
def main(): """Semi Automated ML App with Streamlit """ activities = ["EDA", "Plots"] choice = st.sidebar.selectbox("Select Activities", activities) if choice == 'EDA': st.subheader("Exploratory Data Analysis") data = st.file_uploader("Upload a Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show Shape"): st.write(df.shape) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Summary"): st.write(df.describe()) if st.checkbox("Show Selected Columns"): selected_columns = st.multiselect("Select Columns", all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts()) if st.checkbox("Correlation Plot(Matplotlib)"): plt.matshow(df.corr()) st.pyplot() if st.checkbox("Correlation Plot(Seaborn)"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() if st.checkbox("Pie Plot"): all_columns = df.columns.to_list() column_to_plot = st.selectbox("Select 1 Column", all_columns) pie_plot = df[column_to_plot].value_counts().plot.pie( autopct="%1.1f%%") st.write(pie_plot) st.pyplot() elif choice == 'Plots': st.subheader("Data Visualization") data = st.file_uploader("Upload a Dataset", type=["csv", "txt", "xlsx"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts().plot(kind='bar')) st.pyplot() # Customizable Plot all_columns_names = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot = df[selected_columns_names].plot( kind=type_of_plot) st.write(cust_plot) st.pyplot()
st.write("Updated dataframe") st.write(weed_df) # df = pd.DataFrame(np.random.randn(200, 3), columns=['a', 'b', 'c']) st.vega_lite_chart( weed_df, { 'mark': 'circle', 'encoding': { 'x': { "field": "year_month", "type": "temporal" }, 'y': { "aggregate": "mean", "field": "sold", "type": "quantitative" }, }, }) bar_df = pd.DataFrame(weed_df[['region', 'sold']]) st.write("Chart of sold") st.bar_chart(bar_df) st.title("This one you can filter stuff with") option = st.multiselect("Select a region", np.unique(weed_df['region'])) 'You selected:', option st.dataframe(weed_df[weed_df['region'].isin(option)])
def data_app(): """ Data Processer and Visualizer """ st.title("Data Cake") st.subheader("A to Z Data Analysis") file = ['./dataset/Ac1', [0, 1]] def file_selector(): filename = st.file_uploader("Upload Excel File", type=['xls', 'xlsx']) if filename is not None: sheetnames = pd.ExcelFile(filename).sheet_names sheet = st.selectbox("Sheet Sheet", sheetnames) return [filename, sheet] file = file_selector() # Read Data try: df = pd.read_excel(file[0], sheet_name=file[1]) except Exception as e: st.info("Please upload Excel file") # Show Datas try: if st.checkbox("Show Dataset"): number = st.number_input("Number of Rows to View", 5, 10) st.dataframe(df.head(number)) except Exception as e: st.info("Please upload Excel file") # Show Columns try: if st.button("Column Names"): st.write(df.columns) except Exception as e: st.info("Please upload Excel file") # Show Shape try: if st.checkbox("Shape of Dataset"): st.write(df.shape) except Exception as e: st.info("Please upload Excel file") # Select Columns try: if st.checkbox("Select Columns To Show"): all_columns = df.columns.tolist() selected_columns = st.multiselect("Select", all_columns) new_df = df[selected_columns] st.dataframe(new_df) except Exception as e: st.info("Please upload Excel file") # Show Datatypes try: if st.button("Data Types"): st.write(df.dtypes) except Exception as e: st.info("Please upload Excel file") # Show Summary try: if st.checkbox("Summary"): st.write(df.describe().T) except Exception as e: st.info("Please upload Excel file") ## Plot and Visualization st.subheader("Data Visualization") # Correlation # Seaborn Plot if st.checkbox("Correlation Plot[Seaborn]"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() # Pie Chart if st.checkbox("Pie Plot"): all_columns_names = df.columns.tolist() if st.button("Generate Pie Plot"): st.success("Generating A Pie Plot") st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() # Count Plot if st.checkbox("Plot of Value Counts"): st.text("Value Counts By Target") all_columns_names = df.columns.tolist() primary_col = st.selectbox("Primary Columm to GroupBy", all_columns_names) selected_columns_names = st.multiselect("Select Columns", all_columns_names) if st.button("Plot"): st.text("Generate Plot") if selected_columns_names: vc_plot = df.groupby( primary_col)[selected_columns_names].count() else: vc_plot = df.iloc[:, -1].value_counts() st.write(vc_plot.plot(kind="bar")) st.pyplot() #Contour Plot if st.checkbox("Contour Plot "): st.text("3D Contour Plot") all_columns_names = df.columns.tolist() X = st.selectbox("Select X axis", all_columns_names) Y = st.selectbox("Select Y axis", all_columns_names, index=1) VS = st.selectbox("Select Z axis", all_columns_names, index=2) Z_F = df.pivot_table(index=X, columns=Y, values=VS).T.values X_unique = np.sort(df[X].unique()) Y_unique = np.sort(df[Y].unique()) X_F, Y_F = np.meshgrid(X_unique, Y_unique) pd.DataFrame(Z_F).round(3) pd.DataFrame(X_F).round(3) pd.DataFrame(Y_F).round(3) fig, ax = plt.subplots(1, 1) cp = ax.contourf(X_F, Y_F, Z_F) fig.colorbar(cp) # Add a colorbar to a plot st.pyplot(fig=fig) # Customizable Plot try: st.subheader("Customizable Plot") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot = df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() if st.button("Ready to ML !"): st.balloons() except: st.info("Please upload Excel file") st.sidebar.header("Data Cake") st.sidebar.info("Built by Veera Ragavan")
retweets = spark_twitter.count_retweet(tweets) tags = spark_twitter.count_tags(tweets) if retweets != 0: t = "<div><span class='highlight blue'><span class='bold'>Numero retweets relativi all'argomento: "+ str(retweets) +" </span></span></div>" st.markdown(t, unsafe_allow_html=True) if len(tags) > 0: df_tags = pd.DataFrame(tags, columns =['Tags','occurences']) df_tags = df_tags.rename(columns={'Tags':'index'}).set_index('index') # Setto la colonna hashtag come indice per visualizzarla su xaxis df_tags_limit = df_tags.head(20) st.dataframe(df_tags_limit) chat_data_tags = pd.DataFrame(df_tags_limit, columns=['Tags', 'occurences']) st.bar_chart(chat_data_tags) dict_result = spark_twitter.word_count(tweets) #print(len(dict_result)) if len(dict_result) > 0: df = pd.DataFrame(dict_result, columns =['Hashtag','occurences']) df = df.rename(columns={'Hashtag':'index'}).set_index('index') # Setto la colonna hashtag come indice per visualizzarla su xaxis st.dataframe(df) df_limit = df.head(10) #st.dataframe(df_limit) chart_data = pd.DataFrame(df_limit, columns =['Hashtag','occurences'])
#%% import altair as alt #%% #selected_stock = st.selectbox("Please selet the stock", Stocks) selected_stock = st.sidebar.selectbox("Please selet the stock", Stocks) st.subheader('Selected Stock') st.line_chart(df2[selected_stock]) n_test = st.sidebar.slider("Months of prediction:", 1,40) period = n_test*30 #%% st.subheader(selected_stock + ' Monthly Return Data') monthly_return = df3[selected_stock].resample('M').ffill().pct_change() df6 = pd.DataFrame(monthly_return) st.bar_chart( df6 ) #%% st.header('The results of the forecast will be ready in a minute') df2 = df2.reset_index() #install prophet by: conda install -c conda-forge prophet #from plotly import graph_objs as go #from fbprophet.plot import plot_plotly #forecasting df_train = df2[['Date',selected_stock]] df_train = df_train.rename(columns = {"Date":"ds" , selected_stock:"y"}) m = Prophet()
def bar_of_nulls(data): st.write('Missing Values') st.bar_chart( data.isnull().sum().to_frame().rename(columns={0: 'Missing values'}))
cols = st.selectbox('Covid metric' , metrics) if cols in metrics: metricstoshow = cols if metricstoshow == 'confirmed': st.title("Confirmed cases") #confirmed_cases = st.slider("Number of confirmed cases", 1 , int(confirmed_melted["confirmed"].max())) fecha = st.selectbox("Select date" , confirmed_melted['fecha'].unique()) pais = st.selectbox("Select country to check data" , confirmed_melted['Country/Region'].unique()) confirmado_hasta_la_fecha = confirmed_melted[confirmed_melted["Country/Region"] == pais][confirmed_melted["fecha"] == fecha] st.text("Casos confirmados a la fecha : " + fecha) st.write(confirmado_hasta_la_fecha) confirmado_por_pais = confirmed_melted[confirmed_melted["Country/Region"] == pais]#[confirmed_melted["fecha"] == fecha] st.header("Casos de Covid en " + pais) st.bar_chart(confirmado_por_pais['confirmed']) st.text("Total de casos en todas las fechas del pais: " + pais) st.write(confirmado_por_pais) total1 = confirmed_melted.loc[confirmed_melted['Country/Region'] == pais][confirmed_melted["fecha"] == fecha]['confirmed'].sum() st.text( "Casos totales en " + pais + " a la fecha : " + fecha) st.text(total1) confirmed_melted['fecha'] = pd.to_datetime(confirmed_melted['fecha'],format= '%m/%d/%y', errors="ignore") # fecha1 = datetime.date(20,1,22) view = pdk.ViewState(latitude=0,longitude=0,zoom=0.2,) covidLayer1 = pdk.Layer( "ScatterplotLayer", data=confirmed_melted, pickable= True, opacity=0.3, stroked=True,
initial_view_state=initial_view_state, layers=[ pdk.Layer( 'HexagonLayer', data=data, get_position='[longitude, latitude]', radius=10000, get_elevation='properties.brightness', elevation_scale=200, # elevation_range=[0, 1000], pickable=True, extruded=True, ), pdk.Layer( 'ScatterplotLayer', data=data, get_position='[longitude, latitude]', get_color='[200, 30, 0, 160]', get_radius=10000, ), ], tooltip=tooltip )) month_data = df[df[DATE_COLUMN].dt.month ==date.month] month_data = month_data.set_index(DATE_COLUMN, drop=True) st.write(f'Fires per day in {month_name}') st.bar_chart(month_data.groupby([month_data.index.day]).count().brightness)