def test_expected_cumulative_transactions_dedups_inside_a_time_period( fitted_bg, example_transaction_data): by_week = utils.expected_cumulative_transactions(fitted_bg, example_transaction_data, 'date', 'id', 10, freq='W') by_day = utils.expected_cumulative_transactions(fitted_bg, example_transaction_data, 'date', 'id', 10, freq='D') assert (by_week['actual'] >= by_day['actual']).all()
def test_expected_cumulative_transactions_dedups_inside_a_time_period( fitted_bg, example_transaction_data): by_week = utils.expected_cumulative_transactions(fitted_bg, example_transaction_data, "date", "id", 10, freq="W") by_day = utils.expected_cumulative_transactions(fitted_bg, example_transaction_data, "date", "id", 10, freq="D") assert (by_week["actual"] >= by_day["actual"]).all()
def df_cum_transactions(cdnow_transactions): datetime_col = 'date' customer_id_col = 'id_sample' t = 25 * 7 datetime_format = '%Y%m%d' freq = 'D' observation_period_end = '19970930' freq_multiplier = 7 transactions_summary = utils.summary_data_from_transaction_data( cdnow_transactions, customer_id_col, datetime_col, datetime_format=datetime_format, freq=freq, freq_multiplier=freq_multiplier, observation_period_end=observation_period_end) transactions_summary = transactions_summary.reset_index() model = ParetoNBDFitter() model.fit(transactions_summary['frequency'], transactions_summary['recency'], transactions_summary['T']) df_cum = utils.expected_cumulative_transactions( model, cdnow_transactions, datetime_col, customer_id_col, t, datetime_format, freq, set_index_date=False, freq_multiplier=freq_multiplier) return df_cum
def plot_incremental_transactions(model, transactions, datetime_col, customer_id_col, t, t_cal, datetime_format=None, freq='D', set_index_date=False, title='Tracking Daily Transactions', xlabel='day', ylabel='Transactions', **kwargs): """ Plot a figure of the predicted and actual cumulative transactions of users Parameters: model: A fitted lifetimes model transactions: a Pandas DataFrame containing the transactions history of the customer_id datetime_col: the column in transactions that denotes the datetime the purchase was made. customer_id_col: the column in transactions that denotes the customer_id t: the number of time units since the begining of data for which we want to calculate cumulative transactions datetime_format: a string that represents the timestamp format. Useful if Pandas can't understand the provided format. freq: Default 'D' for days, 'W' for weeks, 'M' for months... etc. Full list here: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#dateoffset-objects set_index_date: when True set date as Pandas DataFrame index, default False - number of time units title: figure title xlabel: figure xlabel, if set_index_date is True will be overwrited to date ylabel: figure ylabel kwargs: passed into the pandas.DataFrame.plot command. """ from matplotlib import pyplot as plt ax = kwargs.pop('ax', None) or plt.subplot(111) df_cum_transactions = expected_cumulative_transactions( model, transactions, datetime_col, customer_id_col, t, datetime_format=datetime_format, freq=freq, set_index_date=set_index_date) # get incremental from cumulative transactions df_cum_transactions = df_cum_transactions.apply(lambda x: x - x.shift(1)) ax = df_cum_transactions.plot(ax=ax, title=title, **kwargs) if set_index_date: x_vline = df_cum_transactions.index[int(t_cal)] xlabel = 'date' else: x_vline = t_cal ax.axvline(x=x_vline, color='r', linestyle='--') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) return ax
def test_expected_cumulative_transactions_date_index(cdnow_transactions): """ Test set_index as date for cumulative transactions and bgf fitter. Get first 14 cdnow transactions dates and validate that date index, freq_multiplier = 1 working and compare with tested data for last 4 records. dates = ['1997-01-11', '1997-01-12', '1997-01-13', '1997-01-14'] actual_trans = [11, 12, 15, 19] expected_trans = [10.67, 12.67, 14.87, 17.24] """ datetime_col = "date" customer_id_col = "id_sample" t = 14 datetime_format = "%Y%m%d" freq = "D" observation_period_end = "19970930" freq_multiplier = 1 transactions_summary = utils.summary_data_from_transaction_data( cdnow_transactions, customer_id_col, datetime_col, datetime_format=datetime_format, freq=freq, freq_multiplier=freq_multiplier, observation_period_end=observation_period_end, ) transactions_summary = transactions_summary.reset_index() model = BetaGeoFitter() model.fit(transactions_summary["frequency"], transactions_summary["recency"], transactions_summary["T"]) df_cum = utils.expected_cumulative_transactions( model, cdnow_transactions, datetime_col, customer_id_col, t, datetime_format, freq, set_index_date=True, freq_multiplier=freq_multiplier, ) dates = ["1997-01-11", "1997-01-12", "1997-01-13", "1997-01-14"] actual_trans = [11, 12, 15, 19] expected_trans = [10.67, 12.67, 14.87, 17.24] date_index = df_cum.iloc[-4:].index.to_timestamp().astype(str) actual = df_cum["actual"].iloc[-4:].values predicted = df_cum["predicted"].iloc[-4:].values.round(2) assert all(dates == date_index) assert_allclose(actual, actual_trans) assert_allclose(predicted, expected_trans, atol=1e-2)
def df_cum_transactions(cdnow_transactions): datetime_col = "date" customer_id_col = "id_sample" t = 25 * 7 datetime_format = "%Y%m%d" freq = "D" observation_period_end = "19970930" freq_multiplier = 7 transactions_summary = utils.summary_data_from_transaction_data( cdnow_transactions, customer_id_col, datetime_col, datetime_format=datetime_format, freq=freq, freq_multiplier=freq_multiplier, observation_period_end=observation_period_end, ) transactions_summary = transactions_summary.reset_index() model = ParetoNBDFitter() model.fit(transactions_summary["frequency"], transactions_summary["recency"], transactions_summary["T"]) df_cum = utils.expected_cumulative_transactions( model, cdnow_transactions, datetime_col, customer_id_col, t, datetime_format, freq, set_index_date=False, freq_multiplier=freq_multiplier, ) return df_cum
def plot_cumulative_transactions(model, transactions, datetime_col, customer_id_col, t, t_cal, datetime_format=None, freq='D', set_index_date=False, title='Tracking Cumulative Transactions', xlabel='day', ylabel='Cumulative Transactions', ax=None, **kwargs): """ Plot a figure of the predicted and actual cumulative transactions of users. Parameters ---------- model: lifetimes model A fitted lifetimes model transactions: pandas DataFrame DataFrame containing the transactions history of the customer_id datetime_col: str The column in transactions that denotes the datetime the purchase was made. customer_id_col: str The column in transactions that denotes the customer_id t: float The number of time units since the begining of data for which we want to calculate cumulative transactions datetime_format: str, optional A string that represents the timestamp format. Useful if Pandas can't understand the provided format. freq: str, optional Default 'D' for days, 'W' for weeks, 'M' for months... etc. Full list here: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#dateoffset-objects set_index_date: bool, optional When True set date as Pandas DataFrame index, default False - number of time units title: str, optional Figure title xlabel: str, optional Figure xlabel ylabel: str, optional Figure ylabel ax: matplotlib.AxesSubplot, optional Using user axes kwargs Passed into the pandas.DataFrame.plot command. Returns ------- axes: matplotlib.AxesSubplot """ from matplotlib import pyplot as plt if ax is None: ax = plt.subplot(111) df_cum_transactions = expected_cumulative_transactions(model, transactions, datetime_col, customer_id_col, t, datetime_format=datetime_format, freq=freq, set_index_date=set_index_date) ax = df_cum_transactions.plot(ax=ax, title=title, **kwargs) if set_index_date: x_vline = df_cum_transactions.index[int(t_cal)] xlabel = 'date' else: x_vline = t_cal ax.axvline(x=x_vline, color='r', linestyle='--') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) return ax