Beispiel #1
0
def display_basket():
    st.header("Basket Optimization")
    st.write(
        """Market Basket Analysis is one of the key techniques used by large retailers to uncover associations between items. It works by looking for combinations of items that occur together frequently in transactions. To put it another way, it allows retailers to identify relationships between the items that people buy.
     """)
    data = create_dataframe()
    data = select_multiple_products(data)

    basket = create_basket(data)
    basket_set = create_basket_set(basket)

    st.subheader("Frequent Itemset")
    st.write(
        """ An itemset is considered as "frequent" if it meets a user-specified support threshold."""
    )
    frequent_itemsets = build_frequent_sets(basket_set)
    st.dataframe(frequent_itemsets.head(15))

    st.subheader("Association Rules")
    st.write(
        """Association rules are usually required to satisfy a user-specified minimum support and a user-specified minimum confidence at the same time."""
    )
    rules = build_rules(frequent_itemsets)
    st.dataframe(rules)
def display_bubble_plot():
    st.header("Animated Bubble Plot")

    data = create_dataframe()
    data = data.sample(n=2000, random_state=1)

    # Define figure
    figure = {'data': [], 'layout': {}, 'frames': []}

    dataset = data
    dataset['month_year'] = pd.to_datetime(
        dataset['month_year']).dt.strftime('%Y-%m')
    years = list(dataset['month_year'].sort_values().unique())

    weekdays = list(dataset['order_purchase_day'].sort_values().unique())
    N = len(weekdays)
    data = []

    year = years[0]

    for day in weekdays:

        df = dataset[(dataset['order_purchase_day'] == day)
                     & (dataset['month_year'] == year)]

        data.append(
            go.Scatter(
                x=df['freight_value'],
                y=df['payment_value'],
                text=df['order_purchase_day'],
                mode='markers',
                marker=dict(
                    size=df['price'],
                    sizemode="area",
                    color=np.random.rand(N),  #set color equal to a variable
                    colorscale='rdylgn',  # one of plotly colorscales
                    showscale=False),
                name=day))

    layout = {
        'xaxis': {
            'title': 'Freight Value',
            #'tickformat' : '%B <br>%Y',
            'type': 'linear',
            #'autorange': True,
            'range': [0, 110],
            'showline': True,
            'showticklabels': True,
            'linecolor': 'rgb(204, 204, 204)',
            'linewidth': 2,
            'ticks': 'outside',
            'tickfont': dict(
                family='Arial',
                size=12,
                color='rgb(82, 82, 82)',
            )
        },
        'yaxis': {
            'title': 'Payment Value',
            #'autorange': True,
            'range': [0, 2500],
            'showline': True,
            'showticklabels': True,
            'linecolor': 'rgb(204, 204, 204)',
            'linewidth': 2,
            'ticks': 'outside',
            'tickfont': dict(
                family='Arial',
                size=12,
                color='rgb(82, 82, 82)',
            )
        },
        'hovermode':
        'closest',
        'showlegend':
        True,
        'title_text':
        "Freight vs Payment Value per Weekday",
        'title_font':
        dict(family='Arial', size=20, color='rgb(82, 82, 82)'),
        'legend_title':
        "Weekday",
        'legend_traceorder':
        "grouped",
        'legend_title_font_color':
        "green",
        'legend_title_font':
        dict(family='Arial', size=15, color='rgb(82, 82, 82)'),
        'plot_bgcolor':
        'rgb(223, 232, 243)',
        'updatemenus': [{
            'buttons': [{
                'args': [
                    None, {
                        'frame': {
                            'duration': 1200,
                            'redraw': True
                        },
                        'fromcurrent': True,
                        'transition': {
                            'duration': 100,
                            'easing': 'quadratic-in-out'
                        }
                    }
                ],
                'label':
                'Play',
                'method':
                'animate'
            }, {
                'args': [[None], {
                    'frame': {
                        'duration': 0,
                        'redraw': False
                    },
                    'mode': 'immediate',
                    'transition': {
                        'duration': 0
                    }
                }],
                'label':
                'Pause',
                'method':
                'animate'
            }],
            'direction':
            'left',
            'pad': {
                'r': 10,
                't': 80
            },
            'showactive':
            False,
            'type':
            'buttons',
            'x':
            0.1,
            'xanchor':
            'right',
            'y':
            0,
            'yanchor':
            'top'
        }],
        'sliders': [{
            'active': 0,
            'yanchor': 'top',
            'xanchor': 'left',
            'currentvalue': {
                'font': {
                    'size': 14
                },
                'prefix': 'Month-Year:',
                'visible': True,
                'xanchor': 'right'
            },
            'transition': {
                'duration': 500,
                'easing': 'cubic-in-out'
            },
            'pad': {
                'b': 10,
                't': 50
            },
            'len': 0.9,
            'x': 0.1,
            'y': 0,
            'steps': []
        }]
    }

    frames = []

    for year in years[1:]:

        frame = {'data': [], 'name': year}

        for day in weekdays:

            df = dataset[(dataset['order_purchase_day'] == day)
                         & (dataset['month_year'] == year)]

            frame['data'].append(
                go.Scatter(
                    x=df['freight_value'],
                    y=df['payment_value'],
                    text=df['order_purchase_day'],
                    mode='markers',
                    marker=dict(
                        size=df['price'],
                        sizemode="area",
                        color=np.random.rand(
                            N),  #set color equal to a variable
                        colorscale='rdylgn',  # one of plotly colorscales
                        showscale=False),
                    name=day))

        frames.append(go.Frame(data=frame['data'], name=frame['name']))

        slider_step = {
            'args': [[year], {
                'frame': {
                    'duration': 1200,
                    'redraw': True
                },
                'mode': 'immediate',
                'transition': {
                    'duration': 500
                }
            }],
            'label':
            year,
            'method':
            'animate'
        }

        layout['sliders'][0]['steps'].append(slider_step)

    fig = go.Figure(data=data, layout=layout, frames=frames)
    return st.plotly_chart(fig)
Beispiel #3
0
def display_segmentation():
    st.header("Customer Segmentation")

    st.markdown("""
    <h4 style="color:#26608e;">Using RFM (Recency - Frequency - Monetary Value) Clustering</h2>
    <ul>
    <li>Low Value: Customers who are less active than others, not very frequent buyer/visitor and generates very low - zero - maybe negative revenue.</li>
    <li>Mid Value: Using platform fairly frequent and generates moderate revenue.</li>
    <li>High Value: High Revenue, Frequency and low Inactivity.</li>
    </ul>

    """,
                unsafe_allow_html=True)

    df = create_dataframe()
    rfm = create_rfm(df)

    rfm = remove_outliers(rfm)

    rfm = create_groups(rfm)
    rfm_log = normalize_data(rfm)
    rfm_scaled = scaled_data(rfm_log, rfm)

    rfm_scaled = create_clusters(rfm_scaled, rfm)

    fig = plot_parallel_coordinate(rfm_scaled)

    st.plotly_chart(fig)

    rfm_melted = pd.melt(
        frame=rfm_scaled,
        id_vars=['customer_unique_id', 'RFM_Level', 'K_Cluster'],
        var_name='Metrics',
        value_name='Value')
    fig = plot_snake(rfm_melted)
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        Low (1 & 0): The frequency is not too high compared to the nominal transaction, but the last time he was trading was fast
        Medium (2): The frequency is quite high and the transaction nominal is quite high, but the last time he was trading was quite long
        High (3): The frequency for spending is high and the nominal spent is also a lot, but the last transaction time is long
        """)

    # How many customers are there by category?
    rfm_cus_level = rfm_scaled.groupby(
        'RFM_Level')['customer_unique_id'].nunique().reset_index()
    fig = plot_bar(rfm_cus_level, 'RFM_Level', 'customer_unique_id',
                   'customer_unique_id', 'crimson',
                   'Customer Based on RFM Level', 'RFMLevel',
                   'Amount of Customer')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.markdown("""
        <ul>
        <li>Bronze: A customer who doesn't make frequent purchases and the transaction nominal is low but the last time he made transactions was fast. There are 36,000 customers of this type.</li>
        <ul>
        <li>
        Action: You can try giving discounts or offers at an affordable nominal so that the conversion rate increases because there are quite a lot of customers in the Bronze category
        </li>
        </ul>
        </ul>
        <ul>
        <li>Silver: A customer who makes purchases quite often and the transaction nominal is quite high, but the last transaction time was quite long. There are 42,000 customers of this type.</li>
        <ul>
        <li>
        Action: Given a combination of discounts and post-transaction campaigns to increase purchases by using a personnelized email that can give a personal touch.</li>
        </ul>
        </ul>
        <ul>
        <li>Gold: Customers who frequently shop and have a lot of nominal transactions, but the last transaction took a long time. There are 15,000 customers of this type.
        </li>
        <ul>
        <li>
        Action: More often given the campaign after making transactions to make purchases again. You can also give rewards because they are more likely to make transactions and the nominal is high.</li>
        </ul>
        </ul> """,
                    unsafe_allow_html=True)
Beispiel #4
0
def display_retention():
    st.header("Customer Retention")
    st.write(
        "Retention analysis is key to understanding if customers come back to your buy your product"
        " and at what frequency. Inevitably, the percentage of customers coming back will decrease"
        " with time after their acquisition as some users fail to see the value"
        "or maybe they just don’t need it or did not like it.")

    st.subheader("Monthly Retention Rate")
    st.markdown(
        """<h4 style="color:#26608e;">Monthly Retention Rate = Retained Customers From Prev. Month/Active Customers Total</h2>""",
        unsafe_allow_html=True)

    # data
    df = create_dataframe()

    df_retention = retention_table(df)

    fig = plot_bar(df_retention, 'month_y', 'RetentionRate', 'RetentionRate',
                   'indianred', [0, .008], 'Monthly Retention Rate',
                   'Month - Year', 'Rate')
    st.plotly_chart(fig)

    with st.beta_expander("See explanation"):
        st.write("""
        Monthly Retention Rate is significantly lower over time.
        """)

    fig = plot_bar(df_retention, 'month_y', 'ChurnRate', 'ChurnRate',
                   'crimson', [0.97, 1], 'Monthly Churn Rate', 'Month - Year',
                   'Rate')
    st.plotly_chart(fig)

    with st.beta_expander("See explanation"):
        st.write("""
        Monthly Churn Rate is significantly high with the maximum rate at January '18.
        """)

    st.subheader("Cohort Based Retention Rate")
    st.write(
        "Cohorts are determined as first purchase year-month of the customers."
        "You will be measuring what percentage of the customers retained after their"
        "first purchase in each month. This view will helps to see how recent and old cohorts"
        "differ regarding retention rate and if recent changes in customer experience affected"
        "new customer’s retention or not.")

    st.subheader("You can see: ")
    st.write("1. Size of each user cohort (new users per period)")
    #st.text("2. Number of returning users per subsequent period for each cohort")
    st.write(
        "2. Percentage of returning users per subsequent period for each cohort"
    )

    user_retention = cohort_based_retention_rate(df)
    if st.checkbox('Show Retention Table'):
        st.dataframe(user_retention.style.highlight_max(axis=0))
    fig = plot_heatmap_log(user_retention, 'bluyl',
                           "Cohort Based Retention Rate")
    st.plotly_chart(fig)

    with st.beta_expander("See explanation"):
        st.write("""
        First month natural log of retention rate is better with rate decreasing over time.
        """)

    user_churn = 1 - user_retention

    if st.checkbox('Show Churn Table'):
        st.dataframe(user_churn.style.highlight_max(axis=0))
def display_metrics():
    st.header("Key Performance Indicators (KPIs)")

    st.markdown("""
    KPIs are the critical (key) indicators of progress toward an 
    intended result. 
    KPIs provides a focus for strategic and operational improvement, 
    create an analytical basis for decision making and help focus attention on what matters most. 
    As Peter Drucker famously said,
    <blockquote>
    <b>What gets measured gets done. </b>
    </blockquote>
    Managing with the use of KPIs includes setting targets (the desired level of performance) and tracking progress against that target. 
    Managing with KPIs often means working to improve leading indicators that 
    will later drive lagging benefits. 
    Leading indicators are precursors of future success; 
    lagging indicators show how successful the organization was at achieving results in the past. 
    """,
                unsafe_allow_html=True)

    # data
    df = create_dataframe()
    st.markdown(
        "Sample data of [Olist](https://www.kaggle.com/olistbr/brazilian-ecommerce"
    )
    if st.checkbox('Show Online Data'):
        st.dataframe(df.head(10))

    #st.subheader("Monthly Revenue")
    st.markdown(
        """<h4 style="color:#26608e;"> Revenue = Active Customer Count * Order Count * Average Revenue per Order</h4>""",
        unsafe_allow_html=True)
    df_revenue = monthly_revenue(df)
    fig = plot_scatter(df_revenue, 'month_year', 'payment_value', 'firebrick',
                       'Monthly Revenue', 'Month - Year', 'R$')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        Revenue is growing steadily with the maximum revenue around November '17.
        """)

    df_revenue = monthly_revenue_growth_rate(df)
    fig = plot_scatter(df_revenue, 'month_year', 'monthly_growth', 'royalblue',
                       'Monthly Revenue Growth Rate', 'Month - Year', '%')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        The growth rate has not been steady with it decreasing over time.
        """)

    df_monthly_active = monthly_active_customers(df)
    fig = plot_bar(df_monthly_active, 'month_year', 'customer_unique_id',
                   'customer_unique_id', 'crimson', 'Monthly Active Users',
                   'Month - Year', 'Number')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        The number of monthly active users has been increasing steadily with Nov '17 showing the highest number.
        """)

    df_monthly_sales = monthly_order_count(df)
    fig = plot_bar(df_monthly_sales, 'month_year', 'order_status',
                   'Monthly Order Count', 'indianred',
                   'Monthly Total # of Order', 'Month - Year', 'Number')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        The number of monthly active users has been increasing steadily with Nov '17 showing the highest number.
        """)

    df_monthly_order_avg = average_revenue_per_order(df)
    # st.dataframe(tx_monthly_active)
    fig = plot_bar(df_monthly_order_avg, 'month_year', 'payment_value',
                   'Monthly Order Average', 'lightslategray',
                   'Monthly Average # of Order', 'Month - Year', 'Number')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        However, the average has almost stayed the same over the month.
        """)

    df_user_type_revenue = dataframe_user_type_revenue(df)
    if st.checkbox('Show Revenue Data'):
        st.dataframe(df_user_type_revenue)

    fig = plot_scatter(df_user_type_revenue.query("usertype == 'Existing'"),
                       'month_year', 'payment_value', 'firebrick',
                       'Monthly Revenue (Existing Customers)', 'Month - Year',
                       'R$')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        Revenue is growing steadily with the maximum revenue between April '18 to May '18.
        """)

    fig = plot_scatter(df_user_type_revenue.query("usertype == 'New'"),
                       'month_year', 'payment_value', 'royalblue',
                       'Monthly Revenue (New Customers)', 'Month - Year', 'R$')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        Revenue is growing steadily with the maximum revenue around November '17. 
        However, the revenue from new customers is much higher than existing customers
        """)

    df_user_ratio = dataframe_new_customer_ratio(df)
    if st.checkbox('Show Ratio Data'):
        st.dataframe(df_user_ratio)

    fig = plot_bar(df_user_ratio, 'month_year', 'NewCusRatio',
                   'Monthly New Customer Ratio', 'crimson',
                   'Monthly New Customer Ratio', 'Month - Year', '%')
    st.plotly_chart(fig)
    with st.beta_expander("See explanation"):
        st.write("""
        New Customer ratio has been decreasing over the months.
        """)