Exemple #1
0
def plot_history_alive_min_thresholds(model, summary, transaction_data,
                                      threshold):
    """Plotting function for threshold plot of min customer alive probability
        vs pct cumulative customers with probability.
    Parameters
    ----------
    model : bgf model, fit to all data
    summary : summary set of data, output from summary_data_from_transaction_data function
    Returns
    -------
    saved figure
    """
    from lifetimes.utils import coalesce, calculate_alive_path
    #make a summary frame with customers with more than one purchase
    summary_multiple = summary[summary['frequency'] > 0]
    #find all purchase paths for customers, save in a list and then append to paths list
    paths = []
    for customer in summary_multiple.index:
        individual = summary.loc[[customer]]
        sp_trans = transaction_data.ix[transaction_data['CustomerNo'] ==
                                       individual.index[0]]
        path = calculate_alive_path(model, sp_trans, 'OrderDate',
                                    int(individual['T']), '1D')
        paths.append(path)
    #find the min path prob for each individual
    min_paths = [path.min() for path in paths]

    #sort them, then plot the cumulative totals for each threshold (max = 0.7673)
    #make a threshold
    y = np.arange(0, len(min_paths)) / len(min_paths)
    ax = plt.scatter(sorted(min_paths), y)
    plt.xlabel('min probability active')
    plt.ylabel('cumulative fraction of customers')
    plt.title('Fraction of customers with Min Probablity Active')
    return ax
Exemple #2
0
def test_calculate_alive_path(example_transaction_data, example_summary_data,
                              fitted_bg):
    user_data = example_transaction_data[example_transaction_data['id'] == 33]
    frequency, recency, T = example_summary_data.loc[33]
    alive_path = utils.calculate_alive_path(fitted_bg, user_data, 'date', 205)
    assert alive_path[0] == 1
    assert alive_path[T] == fitted_bg.conditional_probability_alive(
        frequency, recency, T)
def get_customer_journey(transactions, bgf, id):
    id = int(id)
    days_since_birth = 30
    sp_trans = transactions.loc[transactions['user_id'] == id]
    sp_trans_list = sp_trans.drop('user_id', axis=1).values

    path = calculate_alive_path(bgf, sp_trans, 'event_time', days_since_birth, freq="D")
    path_dates = pd.date_range(start=min(sp_trans['event_time']), periods=len(path), freq="D")

    return create_palive_figure(path, path_dates, sp_trans), sp_trans_list
Exemple #4
0
def plot_history_alive(model,
                       t,
                       transactions,
                       datetime_col,
                       freq='D',
                       **kwargs):
    """
    Draw a graph showing the probablility of being alive for a customer in time.

    Parameters:
        model: A fitted lifetimes model
        t: the number of time units since the birth we want to draw the p_alive
        transactions: a Pandas DataFrame containing the transactions history of the customer_id
        datetime_col: the column in the transactions that denotes the datetime the purchase was made
        freq: Default 'D' for days. Other examples= 'W' for weekly

    """
    from matplotlib import pyplot as plt

    start_date = kwargs.pop('start_date', min(transactions[datetime_col]))
    ax = kwargs.pop('ax', None) or plt.subplot(111)

    # Get purchasing history of user
    customer_history = transactions[[datetime_col]].copy()
    customer_history.index = pd.DatetimeIndex(customer_history[datetime_col])

    # Add transactions column
    customer_history['transactions'] = 1
    customer_history = customer_history.resample(freq).sum()

    # plot alive_path
    path = calculate_alive_path(model, transactions, datetime_col, t, freq)
    path_dates = pd.date_range(start=min(transactions[datetime_col]),
                               periods=len(path),
                               freq=freq)
    plt.plot(path_dates, path, '-', label='P_alive')

    # plot buying dates
    payment_dates = customer_history[
        customer_history['transactions'] >= 1].index
    plt.vlines(payment_dates.values,
               ymin=0,
               ymax=1,
               colors='r',
               linestyles='dashed',
               label='purchases')

    plt.ylim(0, 1.0)
    plt.yticks(np.arange(0, 1.1, 0.1))
    plt.xlim(start_date, path_dates[-1])
    plt.legend(loc=3)
    plt.ylabel('P_alive')
    plt.title('History of P_alive')

    return ax
def plot_history_alive(model, t, transactions, datetime_col, freq='D', **kwargs):
    """
    Draws a graph showing the probablility of being alive for a customer in time
    :param model: A fitted lifetimes model
    :param t: the number of time units since the birth we want to draw the p_alive
    :param transactions: a Pandas DataFrame containing the transactions history of the customer_id
    :param datetime_col: the column in the transactions that denotes the datetime the purchase was made
    :param freq: Default 'D' for days. Other examples= 'W' for weekly
    """

    from matplotlib import pyplot as plt

    start_date = kwargs.pop('start_date', min(transactions[datetime_col]))
    ax = kwargs.pop('ax', None) or plt.subplot(111)

    # Get purchasing history of user
    customer_history = transactions[[datetime_col]].copy()
    customer_history.index = pd.DatetimeIndex(customer_history[datetime_col])

    # Add transactions column
    customer_history['transactions'] = 1
    customer_history = customer_history.resample(freq, how='sum').reset_index()

    # plot alive_path
    path = calculate_alive_path(model, transactions, datetime_col, t, freq)
    path_dates = pd.date_range(start=min(transactions[datetime_col]), periods=len(path), freq=freq)
    plt.plot(path_dates, path, '-', label='P_alive')

    # plot buying dates
    payment_dates = customer_history[customer_history['transactions'] >= 1]['index']
    plt.vlines(payment_dates.values, ymin=0, ymax=1, colors='r', linestyles='dashed', label='purchases')

    plt.ylim(0, 1.0)
    plt.yticks(np.arange(0, 1.1, 0.1))
    plt.xlim(start_date, path_dates[-1])
    plt.legend(loc=3)
    plt.ylabel('P_alive')
    plt.title('History of P_alive')

    return ax
Exemple #6
0
def test_calculate_alive_path(example_transaction_data, example_summary_data, fitted_bg):
    user_data = example_transaction_data[example_transaction_data['id'] == 33]
    frequency, recency, T = example_summary_data.loc[33]
    alive_path = utils.calculate_alive_path(fitted_bg, user_data, 'date', 205)
    assert alive_path[0] == 1
    assert alive_path[T] == fitted_bg.conditional_probability_alive(frequency, recency, T)
Exemple #7
0
def plot_history_alive(model, t, transactions, datetime_col, freq='D',
                       start_date=None, ax=None, **kwargs):
    """
    Draw a graph showing the probablility of being alive for a customer in time.

    Parameters
    ----------
    model: lifetimes model
        A fitted lifetimes model.
    t: int
        the number of time units since the birth we want to draw the p_alive
    transactions: pandas DataFrame
        DataFrame containing the transactions history of the customer_id
    datetime_col: str
        The column in the transactions that denotes the datetime the purchase was made
    freq: str, optional
        Default 'D' for days. Other examples= 'W' for weekly
    start_date: datetime, optional
        Limit xaxis to start date
    ax: matplotlib.AxesSubplot, optional
        Using user axes
    kwargs
        Passed into the matplotlib.pyplot.plot command.

    Returns
    -------
    axes: matplotlib.AxesSubplot

    """
    from matplotlib import pyplot as plt

    if start_date is None:
        start_date = min(transactions[datetime_col])

    if ax is None:
        ax = plt.subplot(111)

    # Get purchasing history of user
    customer_history = transactions[[datetime_col]].copy()
    customer_history.index = pd.DatetimeIndex(customer_history[datetime_col])

    # Add transactions column
    customer_history['transactions'] = 1
    customer_history = customer_history.resample(freq).sum()

    # plot alive_path
    path = calculate_alive_path(model, transactions, datetime_col, t, freq)
    path_dates = pd.date_range(start=min(transactions[datetime_col]), periods=len(path), freq=freq)
    plt.plot(path_dates, path, '-', label='P_alive')

    # plot buying dates
    payment_dates = customer_history[customer_history['transactions'] >= 1].index
    plt.vlines(payment_dates.values, ymin=0, ymax=1, colors='r', linestyles='dashed', label='purchases')

    plt.ylim(0, 1.0)
    plt.yticks(np.arange(0, 1.1, 0.1))
    plt.xlim(start_date, path_dates[-1])
    plt.legend(loc=3)
    plt.ylabel('P_alive')
    plt.title('History of P_alive')

    return ax
Exemple #8
0
def plot_history_alive(model, t, transactions, datetime_col, freq='D',
                       start_date=None, end_date=None, ax=None,
                       title='Evolution of survival probability', **kwargs):
    """
    Draw a graph showing the probablility of being alive for a customer in time.

    Parameters
    ----------
    model: lifetimes model
        A fitted lifetimes model.
    t: int
        the number of time units since the birth we want to draw the p_alive
    transactions: pandas DataFrame
        DataFrame containing the transactions history of the customer_id
    datetime_col: str
        The column in the transactions that denotes the datetime the purchase
        was made
    freq: str, optional
        Default 'D' for days. Other examples= 'W' for weekly
    start_date: datetime, optional
        Limit xaxis to start date
    ax: matplotlib.AxesSubplot, optional
        Using user axes
    title: str, optional
        Title of the plot.
    kwargs
        Passed into the matplotlib.pyplot.plot command.

    Returns
    -------
    axes: matplotlib.AxesSubplot

    """
    from matplotlib import pyplot as plt

    first_transaction_date = transactions[datetime_col].min()
    if start_date is None:
        start_date = dt.datetime.strptime(first_transaction_date,"%Y-%m-%d") - \
                                          dt.timedelta(days=7)
    if ax is None:
        ax = plt.subplot(111)
    # Get purchasing history of user
    customer_history = transactions[[datetime_col]].copy()
    customer_history.index = pd.DatetimeIndex(customer_history[datetime_col])

    # Add transactions column
    customer_history['transactions'] = 1
    customer_history = customer_history.resample(freq).sum()

    current_date = dt.datetime.strftime(dt.datetime.now(),"%Y-%m-%d")
    refresh_date = dt.datetime.strftime(
        dt.datetime.strptime(first_transaction_date,"%Y-%m-%d") + dt.timedelta(days=t),
        "%Y-%m-%d")
    periods_to_plot = (dt.datetime.strptime(end_date,"%Y-%m-%d") \
                       - start_date
                      ).days
    if freq == 'W':
        periods_to_plot = int(periods_to_plot/7.)
    # plot alive_path
    path = pd.concat(
        [pd.Series([None]*7),
         calculate_alive_path(model, transactions, datetime_col, periods_to_plot, freq)*100.])
    path_dates = pd.date_range(start=start_date, periods=len(path), freq=freq)
    past_dates_path = path_dates[path_dates<=refresh_date]
    past_path = path[path_dates<=refresh_date]
    future_dates_path = path_dates[path_dates>=refresh_date]
    future_path = path[path_dates>=refresh_date]
    plt.plot(past_dates_path, past_path, color=glovo_gray, ls='-', label='P_alive')
    plt.plot(future_dates_path, future_path, color=glovo_gray, ls='--')

    # plot buying dates
    payment_dates = customer_history[customer_history['transactions'] >= 1].index
    plt.vlines(payment_dates.values, ymin=0, ymax=100,
               colors=glovo_green, linestyles='dashed', label='Orders')
    plt.vlines(refresh_date, ymin=0, ymax=100,
               colors=glovo_yellow, linestyles='solid', label='Last refresh')
    plt.vlines(current_date, ymin=0, ymax=100,
               colors='red', linestyles='solid', label='Today')
    plt.ylim(0, 105)
    plt.yticks(np.arange(0, 110, 10))
    plt.xticks(rotation=-20.)
    plt.xlim(start_date, path_dates[-1])
    plt.legend(loc=3)
    plt.ylabel('P_alive (%)')
    plt.title(title)

    return ax