Esempio n. 1
0
def plot_subr_treemap(filtered_json_week):
    filtered = pd.read_json(filtered_json_week, orient='split')
    ## Aggregate overall
    filtered_agg = get_flag_perc(filtered, groups = ['subreddit_cat','subreddit'])
    fig_subr_treemap = px.treemap(filtered_agg, 
                          path=['subreddit_cat', 'subreddit'], 
                          values='ISFLAG',
                          color='subreddit_cat')
    return fig_subr_treemap
def ventas_totales_por_producto(df):
    df_productos = df.loc[:, [
        "pk_cid",
        "pk_partition",
    ] + PRODUCTS]
    df_permanencia = df_productos[df_productos[PRODUCTS].sum(
        axis=1) >= 1].pivot_table(values=PRODUCTS,
                                  index='pk_cid',
                                  aggfunc='sum')
    for c in df_permanencia.columns:
        df_permanencia[c] = np.where(df_permanencia[c] > 3, 1,
                                     df_permanencia[c])

    df_ventas_totales = pd.DataFrame(
        df_permanencia.apply(sum).sort_values(ascending=False),
        columns=['Ventas_totales'])
    df_ventas_totales.reset_index(drop=False, inplace=True)
    df_ventas_totales.columns = ['Producto', 'Ventas_totales']
    d_equiv = {
        'long_term_deposit': 'ahorro',
        'pension_plan': 'ahorro',
        'short_term_deposit': 'ahorro',
        'funds': 'inversion',
        'securities': 'inversion',
        'credit_card': 'financiacion',
        'loans': 'financiacion',
        'mortgage': 'financiacion',
        'debit_card': 'cuentas',
        'em_account_p': 'cuentas',
        'em_account_pp': 'cuentas',
        'em_acount products': 'cuentas',
        'emc_account products': 'cuentas',
        'payroll': 'cuentas',
        'payroll_account': 'cuentas',
        'emc_account': 'cuentas',
        'em_acount': 'cuentas'
    }
    df_ventas_totales['categoria'] = df_ventas_totales['Producto'].map(d_equiv)

    fig = px.treemap(df_ventas_totales,
                     path=['categoria', 'Producto'],
                     values='Ventas_totales',
                     width=1000,
                     height=500)

    return fig
Esempio n. 3
0
def content():
    st.title('Exploratory Data Analysis 🤔')

    df = get_data()
    if st.checkbox("Show first rows  & shape of the data"):
        st.write(df.head())
        st.write(df.shape)
    years = df['YEAR'].sort_values(ascending=True).unique()
    months = df['MONTH'].sort_values(ascending=True).unique()
    days = df['DAY_OF_WEEK'].sort_values(ascending=True).unique()
    hours = df['HOUR'].sort_values(ascending=True).unique()

    if st.button('Click Here to See Chart '):

        plt.figure(figsize=(10, 6))
        st.subheader("How has crime changed over the years?")
        sns.countplot(data=df, x='YEAR', palette="coolwarm")
        st.pyplot()

    st.markdown(
        'As you can see at the chart above, although the highest crime rates seem to be in 2016 and in 2017, the fact that the data set is from the 6th month of 2015 and till 10th of 2018 does not make such an inference possible. (See: Number of Crimes by Month, Day and Hour for Each Years) In this context, considering that the dataset contains only 6 months of 2015 and only 9 months of 2018, we can conclude that the number of crimes has not changed significantly according to years.'
    )

    st.subheader("Total Number of Crimes by Month, Day and Hour")
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(7, 12))

    month_year = sns.countplot(x='MONTH',
                               data=df,
                               color="firebrick",
                               order=None,
                               ax=axes[0])

    day_year = sns.countplot(x='DAY_OF_WEEK',
                             data=df,
                             color="steelblue",
                             order=None,
                             ax=axes[1])

    hours_year = sns.countplot(x='HOUR',
                               data=df,
                               color="mediumseagreen",
                               order=None,
                               ax=axes[2])
    st.pyplot()
    st.markdown(
        'When we look at the general picture for all years, we can conclude that crimes are mostly committed in summer. Although the crime density on the days of the week seem almost equal, we can say that they are committed at a higher level on Fridays. On the other hand, contrary to the perception that crimes are usually committed at night, when we examine the above chart, we can say that it is surprisingly more intense between 5 and 7 o clock in the evening.Considering that the most common crime type is a motor vehicle accident response, I think that the high traffic and motor vehicle density at the specified time intervals may explain the reason for the high crime rate in this hour interval.'
    )

    st.subheader("Number of Crimes by Month, Day and Hour (Year by Year)")

    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(7, 12))

    year_selecting = st.selectbox('YEAR', years)
    # month_selecting = st.selectbox('MONTH', months)
    # day_selecting = st.selectbox('DAY', days)
    # hour_selecting = st.selectbox('HOUR', hours)

    month = sns.countplot(x='MONTH',
                          data=df[df['YEAR'] == year_selecting],
                          color="firebrick",
                          order=None,
                          ax=axes[0])

    day = sns.countplot(x='DAY_OF_WEEK',
                        data=df[df['YEAR'] == year_selecting],
                        color="steelblue",
                        order=None,
                        ax=axes[1])

    hours = sns.countplot(x='HOUR',
                          data=df[df['YEAR'] == year_selecting],
                          color="mediumseagreen",
                          order=None,
                          ax=axes[2])
    st.pyplot()

    st.subheader(
        "What can we say about the distribution of different offenses over the city?"
    )

    nr_crimes = df['OFFENSE_CODE_GROUP'].value_counts()
    counts = nr_crimes.values
    categories = pd.DataFrame(data=nr_crimes.index,
                              columns=["OFFENSE_CODE_GROUP"])
    categories['counts'] = counts

    fig = px.treemap(categories,
                     path=['OFFENSE_CODE_GROUP'],
                     values=counts,
                     height=800,
                     width=1000,
                     title='Top Crimes in Boston',
                     color_discrete_sequence=px.colors.sequential.Brwnyl)
    fig.data[0].textinfo = 'label+value'
    st.plotly_chart(fig)

    st.subheader("Heatmaps")

    offensecodegroups = df.OFFENSE_CODE_GROUP.sort_values(
        ascending=True).unique()
    OCG_selecting = st.selectbox('OFFENSE GROUPS', offensecodegroups)

    df_drop = df.dropna(subset=['Lat', 'Long', 'DISTRICT'])

    df_heatmap_mvar = df_drop[df_drop["OFFENSE_CODE_GROUP"] == OCG_selecting]

    map_mvar = folium.Map(location=[42.361145, -71.057083],
                          tiles='cartodbpositron',
                          zoom_start=12)

    # Add a heatmap to the base map
    heatmap = HeatMap(data=df_heatmap_mvar[['Lat', 'Long']],
                      radius=10).add_to(map_mvar)

    folium_static(map_mvar)
Esempio n. 4
0
df = df[(df['date'] > start_date)
        & (df['date'] <= start_date + datetime.timedelta(days=x))]

#Province selection
new_df = df[(df['prname'].isin(pr))]

#showing count selection
value = st.sidebar.selectbox("Showing", ["numtotal", "numdeaths"])
var = "This is the trend for {}".format(pr)
st.text(var)

#First plot
fig = px.scatter(new_df, x='date', y=value, color=new_df['prname'])
st.plotly_chart(fig)
var1 = "This is the distribution in provinces"

#second plot

df1 = df.groupby(['prname'], as_index=False)[value].max()
df1 = df1.drop(index=2)
fig = px.treemap(df1, path=['prname'], values=value)
st.plotly_chart(fig)

#third table
st.write(new_df)

######notes for me :
## using Quicklab
## posibility to use it for Datanalyst
## Kunbernetees
Esempio n. 5
0
import pandas as pd
import streamlit as st
import plotly_express as px

df = st.cache(pd.read_csv)('stonk.csv')

fig = px.treemap(df,
                 path=['sandp', 'GICS Sector', 'GICS Sub-Industry', 'Symbol'],
                 values='CIK')

st.plotly_chart(fig)

st.write(df)
Esempio n. 6
0
# melting data
df = pd.melt(
    sales,
    id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'],
    var_name='d',
    value_name='sold').dropna()
df = pd.merge(df, calendar, on='d', how='left')
df = pd.merge(df, prices, on=['store_id', 'item_id', 'wm_yr_wk'], how='left')

# tree map
group = sales.groupby(['state_id', 'store_id', 'cat_id', 'dept_id'],
                      as_index=False)['item_id'].count().dropna()
group['USA'] = 'United States of America'
group.rename(columns={
    'state_id': 'State',
    'store_id': 'Store',
    'cat_id': 'Category',
    'dept_id': 'Department',
    'item_id': 'Count'
},
             inplace=True)
fig = px.treemap(group,
                 path=['USA', 'State', 'Store', 'Category', 'Department'],
                 values='Count',
                 color='Count',
                 color_continuous_scale=px.colors.sequential.Sunset,
                 title='Walmart: Distribution of items')
fig.update_layout(template='seaborn')
fig.show()