def plot_history_alive_min_thresholds(model, summary, transaction_data, threshold): """Plotting function for threshold plot of min customer alive probability vs pct cumulative customers with probability. Parameters ---------- model : bgf model, fit to all data summary : summary set of data, output from summary_data_from_transaction_data function Returns ------- saved figure """ from lifetimes.utils import coalesce, calculate_alive_path #make a summary frame with customers with more than one purchase summary_multiple = summary[summary['frequency'] > 0] #find all purchase paths for customers, save in a list and then append to paths list paths = [] for customer in summary_multiple.index: individual = summary.loc[[customer]] sp_trans = transaction_data.ix[transaction_data['CustomerNo'] == individual.index[0]] path = calculate_alive_path(model, sp_trans, 'OrderDate', int(individual['T']), '1D') paths.append(path) #find the min path prob for each individual min_paths = [path.min() for path in paths] #sort them, then plot the cumulative totals for each threshold (max = 0.7673) #make a threshold y = np.arange(0, len(min_paths)) / len(min_paths) ax = plt.scatter(sorted(min_paths), y) plt.xlabel('min probability active') plt.ylabel('cumulative fraction of customers') plt.title('Fraction of customers with Min Probablity Active') return ax
def test_calculate_alive_path(example_transaction_data, example_summary_data, fitted_bg): user_data = example_transaction_data[example_transaction_data['id'] == 33] frequency, recency, T = example_summary_data.loc[33] alive_path = utils.calculate_alive_path(fitted_bg, user_data, 'date', 205) assert alive_path[0] == 1 assert alive_path[T] == fitted_bg.conditional_probability_alive( frequency, recency, T)
def get_customer_journey(transactions, bgf, id): id = int(id) days_since_birth = 30 sp_trans = transactions.loc[transactions['user_id'] == id] sp_trans_list = sp_trans.drop('user_id', axis=1).values path = calculate_alive_path(bgf, sp_trans, 'event_time', days_since_birth, freq="D") path_dates = pd.date_range(start=min(sp_trans['event_time']), periods=len(path), freq="D") return create_palive_figure(path, path_dates, sp_trans), sp_trans_list
def plot_history_alive(model, t, transactions, datetime_col, freq='D', **kwargs): """ Draw a graph showing the probablility of being alive for a customer in time. Parameters: model: A fitted lifetimes model t: the number of time units since the birth we want to draw the p_alive transactions: a Pandas DataFrame containing the transactions history of the customer_id datetime_col: the column in the transactions that denotes the datetime the purchase was made freq: Default 'D' for days. Other examples= 'W' for weekly """ from matplotlib import pyplot as plt start_date = kwargs.pop('start_date', min(transactions[datetime_col])) ax = kwargs.pop('ax', None) or plt.subplot(111) # Get purchasing history of user customer_history = transactions[[datetime_col]].copy() customer_history.index = pd.DatetimeIndex(customer_history[datetime_col]) # Add transactions column customer_history['transactions'] = 1 customer_history = customer_history.resample(freq).sum() # plot alive_path path = calculate_alive_path(model, transactions, datetime_col, t, freq) path_dates = pd.date_range(start=min(transactions[datetime_col]), periods=len(path), freq=freq) plt.plot(path_dates, path, '-', label='P_alive') # plot buying dates payment_dates = customer_history[ customer_history['transactions'] >= 1].index plt.vlines(payment_dates.values, ymin=0, ymax=1, colors='r', linestyles='dashed', label='purchases') plt.ylim(0, 1.0) plt.yticks(np.arange(0, 1.1, 0.1)) plt.xlim(start_date, path_dates[-1]) plt.legend(loc=3) plt.ylabel('P_alive') plt.title('History of P_alive') return ax
def plot_history_alive(model, t, transactions, datetime_col, freq='D', **kwargs): """ Draws a graph showing the probablility of being alive for a customer in time :param model: A fitted lifetimes model :param t: the number of time units since the birth we want to draw the p_alive :param transactions: a Pandas DataFrame containing the transactions history of the customer_id :param datetime_col: the column in the transactions that denotes the datetime the purchase was made :param freq: Default 'D' for days. Other examples= 'W' for weekly """ from matplotlib import pyplot as plt start_date = kwargs.pop('start_date', min(transactions[datetime_col])) ax = kwargs.pop('ax', None) or plt.subplot(111) # Get purchasing history of user customer_history = transactions[[datetime_col]].copy() customer_history.index = pd.DatetimeIndex(customer_history[datetime_col]) # Add transactions column customer_history['transactions'] = 1 customer_history = customer_history.resample(freq, how='sum').reset_index() # plot alive_path path = calculate_alive_path(model, transactions, datetime_col, t, freq) path_dates = pd.date_range(start=min(transactions[datetime_col]), periods=len(path), freq=freq) plt.plot(path_dates, path, '-', label='P_alive') # plot buying dates payment_dates = customer_history[customer_history['transactions'] >= 1]['index'] plt.vlines(payment_dates.values, ymin=0, ymax=1, colors='r', linestyles='dashed', label='purchases') plt.ylim(0, 1.0) plt.yticks(np.arange(0, 1.1, 0.1)) plt.xlim(start_date, path_dates[-1]) plt.legend(loc=3) plt.ylabel('P_alive') plt.title('History of P_alive') return ax
def test_calculate_alive_path(example_transaction_data, example_summary_data, fitted_bg): user_data = example_transaction_data[example_transaction_data['id'] == 33] frequency, recency, T = example_summary_data.loc[33] alive_path = utils.calculate_alive_path(fitted_bg, user_data, 'date', 205) assert alive_path[0] == 1 assert alive_path[T] == fitted_bg.conditional_probability_alive(frequency, recency, T)
def plot_history_alive(model, t, transactions, datetime_col, freq='D', start_date=None, ax=None, **kwargs): """ Draw a graph showing the probablility of being alive for a customer in time. Parameters ---------- model: lifetimes model A fitted lifetimes model. t: int the number of time units since the birth we want to draw the p_alive transactions: pandas DataFrame DataFrame containing the transactions history of the customer_id datetime_col: str The column in the transactions that denotes the datetime the purchase was made freq: str, optional Default 'D' for days. Other examples= 'W' for weekly start_date: datetime, optional Limit xaxis to start date ax: matplotlib.AxesSubplot, optional Using user axes kwargs Passed into the matplotlib.pyplot.plot command. Returns ------- axes: matplotlib.AxesSubplot """ from matplotlib import pyplot as plt if start_date is None: start_date = min(transactions[datetime_col]) if ax is None: ax = plt.subplot(111) # Get purchasing history of user customer_history = transactions[[datetime_col]].copy() customer_history.index = pd.DatetimeIndex(customer_history[datetime_col]) # Add transactions column customer_history['transactions'] = 1 customer_history = customer_history.resample(freq).sum() # plot alive_path path = calculate_alive_path(model, transactions, datetime_col, t, freq) path_dates = pd.date_range(start=min(transactions[datetime_col]), periods=len(path), freq=freq) plt.plot(path_dates, path, '-', label='P_alive') # plot buying dates payment_dates = customer_history[customer_history['transactions'] >= 1].index plt.vlines(payment_dates.values, ymin=0, ymax=1, colors='r', linestyles='dashed', label='purchases') plt.ylim(0, 1.0) plt.yticks(np.arange(0, 1.1, 0.1)) plt.xlim(start_date, path_dates[-1]) plt.legend(loc=3) plt.ylabel('P_alive') plt.title('History of P_alive') return ax
def plot_history_alive(model, t, transactions, datetime_col, freq='D', start_date=None, end_date=None, ax=None, title='Evolution of survival probability', **kwargs): """ Draw a graph showing the probablility of being alive for a customer in time. Parameters ---------- model: lifetimes model A fitted lifetimes model. t: int the number of time units since the birth we want to draw the p_alive transactions: pandas DataFrame DataFrame containing the transactions history of the customer_id datetime_col: str The column in the transactions that denotes the datetime the purchase was made freq: str, optional Default 'D' for days. Other examples= 'W' for weekly start_date: datetime, optional Limit xaxis to start date ax: matplotlib.AxesSubplot, optional Using user axes title: str, optional Title of the plot. kwargs Passed into the matplotlib.pyplot.plot command. Returns ------- axes: matplotlib.AxesSubplot """ from matplotlib import pyplot as plt first_transaction_date = transactions[datetime_col].min() if start_date is None: start_date = dt.datetime.strptime(first_transaction_date,"%Y-%m-%d") - \ dt.timedelta(days=7) if ax is None: ax = plt.subplot(111) # Get purchasing history of user customer_history = transactions[[datetime_col]].copy() customer_history.index = pd.DatetimeIndex(customer_history[datetime_col]) # Add transactions column customer_history['transactions'] = 1 customer_history = customer_history.resample(freq).sum() current_date = dt.datetime.strftime(dt.datetime.now(),"%Y-%m-%d") refresh_date = dt.datetime.strftime( dt.datetime.strptime(first_transaction_date,"%Y-%m-%d") + dt.timedelta(days=t), "%Y-%m-%d") periods_to_plot = (dt.datetime.strptime(end_date,"%Y-%m-%d") \ - start_date ).days if freq == 'W': periods_to_plot = int(periods_to_plot/7.) # plot alive_path path = pd.concat( [pd.Series([None]*7), calculate_alive_path(model, transactions, datetime_col, periods_to_plot, freq)*100.]) path_dates = pd.date_range(start=start_date, periods=len(path), freq=freq) past_dates_path = path_dates[path_dates<=refresh_date] past_path = path[path_dates<=refresh_date] future_dates_path = path_dates[path_dates>=refresh_date] future_path = path[path_dates>=refresh_date] plt.plot(past_dates_path, past_path, color=glovo_gray, ls='-', label='P_alive') plt.plot(future_dates_path, future_path, color=glovo_gray, ls='--') # plot buying dates payment_dates = customer_history[customer_history['transactions'] >= 1].index plt.vlines(payment_dates.values, ymin=0, ymax=100, colors=glovo_green, linestyles='dashed', label='Orders') plt.vlines(refresh_date, ymin=0, ymax=100, colors=glovo_yellow, linestyles='solid', label='Last refresh') plt.vlines(current_date, ymin=0, ymax=100, colors='red', linestyles='solid', label='Today') plt.ylim(0, 105) plt.yticks(np.arange(0, 110, 10)) plt.xticks(rotation=-20.) plt.xlim(start_date, path_dates[-1]) plt.legend(loc=3) plt.ylabel('P_alive (%)') plt.title(title) return ax