def plot_subr_treemap(filtered_json_week): filtered = pd.read_json(filtered_json_week, orient='split') ## Aggregate overall filtered_agg = get_flag_perc(filtered, groups = ['subreddit_cat','subreddit']) fig_subr_treemap = px.treemap(filtered_agg, path=['subreddit_cat', 'subreddit'], values='ISFLAG', color='subreddit_cat') return fig_subr_treemap
def ventas_totales_por_producto(df): df_productos = df.loc[:, [ "pk_cid", "pk_partition", ] + PRODUCTS] df_permanencia = df_productos[df_productos[PRODUCTS].sum( axis=1) >= 1].pivot_table(values=PRODUCTS, index='pk_cid', aggfunc='sum') for c in df_permanencia.columns: df_permanencia[c] = np.where(df_permanencia[c] > 3, 1, df_permanencia[c]) df_ventas_totales = pd.DataFrame( df_permanencia.apply(sum).sort_values(ascending=False), columns=['Ventas_totales']) df_ventas_totales.reset_index(drop=False, inplace=True) df_ventas_totales.columns = ['Producto', 'Ventas_totales'] d_equiv = { 'long_term_deposit': 'ahorro', 'pension_plan': 'ahorro', 'short_term_deposit': 'ahorro', 'funds': 'inversion', 'securities': 'inversion', 'credit_card': 'financiacion', 'loans': 'financiacion', 'mortgage': 'financiacion', 'debit_card': 'cuentas', 'em_account_p': 'cuentas', 'em_account_pp': 'cuentas', 'em_acount products': 'cuentas', 'emc_account products': 'cuentas', 'payroll': 'cuentas', 'payroll_account': 'cuentas', 'emc_account': 'cuentas', 'em_acount': 'cuentas' } df_ventas_totales['categoria'] = df_ventas_totales['Producto'].map(d_equiv) fig = px.treemap(df_ventas_totales, path=['categoria', 'Producto'], values='Ventas_totales', width=1000, height=500) return fig
def content(): st.title('Exploratory Data Analysis 🤔') df = get_data() if st.checkbox("Show first rows & shape of the data"): st.write(df.head()) st.write(df.shape) years = df['YEAR'].sort_values(ascending=True).unique() months = df['MONTH'].sort_values(ascending=True).unique() days = df['DAY_OF_WEEK'].sort_values(ascending=True).unique() hours = df['HOUR'].sort_values(ascending=True).unique() if st.button('Click Here to See Chart '): plt.figure(figsize=(10, 6)) st.subheader("How has crime changed over the years?") sns.countplot(data=df, x='YEAR', palette="coolwarm") st.pyplot() st.markdown( 'As you can see at the chart above, although the highest crime rates seem to be in 2016 and in 2017, the fact that the data set is from the 6th month of 2015 and till 10th of 2018 does not make such an inference possible. (See: Number of Crimes by Month, Day and Hour for Each Years) In this context, considering that the dataset contains only 6 months of 2015 and only 9 months of 2018, we can conclude that the number of crimes has not changed significantly according to years.' ) st.subheader("Total Number of Crimes by Month, Day and Hour") fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(7, 12)) month_year = sns.countplot(x='MONTH', data=df, color="firebrick", order=None, ax=axes[0]) day_year = sns.countplot(x='DAY_OF_WEEK', data=df, color="steelblue", order=None, ax=axes[1]) hours_year = sns.countplot(x='HOUR', data=df, color="mediumseagreen", order=None, ax=axes[2]) st.pyplot() st.markdown( 'When we look at the general picture for all years, we can conclude that crimes are mostly committed in summer. Although the crime density on the days of the week seem almost equal, we can say that they are committed at a higher level on Fridays. On the other hand, contrary to the perception that crimes are usually committed at night, when we examine the above chart, we can say that it is surprisingly more intense between 5 and 7 o clock in the evening.Considering that the most common crime type is a motor vehicle accident response, I think that the high traffic and motor vehicle density at the specified time intervals may explain the reason for the high crime rate in this hour interval.' ) st.subheader("Number of Crimes by Month, Day and Hour (Year by Year)") fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(7, 12)) year_selecting = st.selectbox('YEAR', years) # month_selecting = st.selectbox('MONTH', months) # day_selecting = st.selectbox('DAY', days) # hour_selecting = st.selectbox('HOUR', hours) month = sns.countplot(x='MONTH', data=df[df['YEAR'] == year_selecting], color="firebrick", order=None, ax=axes[0]) day = sns.countplot(x='DAY_OF_WEEK', data=df[df['YEAR'] == year_selecting], color="steelblue", order=None, ax=axes[1]) hours = sns.countplot(x='HOUR', data=df[df['YEAR'] == year_selecting], color="mediumseagreen", order=None, ax=axes[2]) st.pyplot() st.subheader( "What can we say about the distribution of different offenses over the city?" ) nr_crimes = df['OFFENSE_CODE_GROUP'].value_counts() counts = nr_crimes.values categories = pd.DataFrame(data=nr_crimes.index, columns=["OFFENSE_CODE_GROUP"]) categories['counts'] = counts fig = px.treemap(categories, path=['OFFENSE_CODE_GROUP'], values=counts, height=800, width=1000, title='Top Crimes in Boston', color_discrete_sequence=px.colors.sequential.Brwnyl) fig.data[0].textinfo = 'label+value' st.plotly_chart(fig) st.subheader("Heatmaps") offensecodegroups = df.OFFENSE_CODE_GROUP.sort_values( ascending=True).unique() OCG_selecting = st.selectbox('OFFENSE GROUPS', offensecodegroups) df_drop = df.dropna(subset=['Lat', 'Long', 'DISTRICT']) df_heatmap_mvar = df_drop[df_drop["OFFENSE_CODE_GROUP"] == OCG_selecting] map_mvar = folium.Map(location=[42.361145, -71.057083], tiles='cartodbpositron', zoom_start=12) # Add a heatmap to the base map heatmap = HeatMap(data=df_heatmap_mvar[['Lat', 'Long']], radius=10).add_to(map_mvar) folium_static(map_mvar)
df = df[(df['date'] > start_date) & (df['date'] <= start_date + datetime.timedelta(days=x))] #Province selection new_df = df[(df['prname'].isin(pr))] #showing count selection value = st.sidebar.selectbox("Showing", ["numtotal", "numdeaths"]) var = "This is the trend for {}".format(pr) st.text(var) #First plot fig = px.scatter(new_df, x='date', y=value, color=new_df['prname']) st.plotly_chart(fig) var1 = "This is the distribution in provinces" #second plot df1 = df.groupby(['prname'], as_index=False)[value].max() df1 = df1.drop(index=2) fig = px.treemap(df1, path=['prname'], values=value) st.plotly_chart(fig) #third table st.write(new_df) ######notes for me : ## using Quicklab ## posibility to use it for Datanalyst ## Kunbernetees
import pandas as pd import streamlit as st import plotly_express as px df = st.cache(pd.read_csv)('stonk.csv') fig = px.treemap(df, path=['sandp', 'GICS Sector', 'GICS Sub-Industry', 'Symbol'], values='CIK') st.plotly_chart(fig) st.write(df)
# melting data df = pd.melt( sales, id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], var_name='d', value_name='sold').dropna() df = pd.merge(df, calendar, on='d', how='left') df = pd.merge(df, prices, on=['store_id', 'item_id', 'wm_yr_wk'], how='left') # tree map group = sales.groupby(['state_id', 'store_id', 'cat_id', 'dept_id'], as_index=False)['item_id'].count().dropna() group['USA'] = 'United States of America' group.rename(columns={ 'state_id': 'State', 'store_id': 'Store', 'cat_id': 'Category', 'dept_id': 'Department', 'item_id': 'Count' }, inplace=True) fig = px.treemap(group, path=['USA', 'State', 'Store', 'Category', 'Department'], values='Count', color='Count', color_continuous_scale=px.colors.sequential.Sunset, title='Walmart: Distribution of items') fig.update_layout(template='seaborn') fig.show()