def plot_history_alive_indiv(self, df, indiv): ''' Plot history alive/active for single customer ''' plot_history_alive(self.bgf, int(self.individual['T']), self.sp_trans, 'OrderDate') plt.savefig('ProbabilityAliveByHistory_Customer{}.png'.format(indiv)) plt.close()
def test_plot_customer_alive_history(self): plt.figure() transaction_data = load_transaction_data() # yes I know this is using the wrong data, but I'm testing plotting here. id = 35 days_since_birth = 200 sp_trans = transaction_data.ix[transaction_data['id'] == id] plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date') return plt.gcf()
def test_plot_customer_alive_history(self): from matplotlib import pyplot as plt transaction_data = load_transaction_data() # yes I know this is using the wrong data, but I'm testing plotting here. id = 35 days_since_birth = 200 sp_trans = transaction_data.ix[transaction_data['id'] == id] plt.figure() plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date') plt.show()
def test_plot_customer_alive_history(self, bgf): from datetime import datetime, timedelta x_expected = np.arange(datetime(2014, 6, 30), datetime(2015, 1, 17), timedelta(days=1)) y_expected = [ 1.0, 1.0, 1.0, 0.75, 0.72, 0.69, 0.67, 0.64, 0.62, 0.59, 0.57, 0.55, 0.81, 0.79, 0.77, 0.75, 0.85, 0.87, 0.85, 0.82, 0.8, 0.89, 0.87, 0.84, 0.82, 0.79, 0.76, 0.73, 0.69, 0.66, 0.62, 0.59, 0.9, 0.89, 0.87, 0.85, 0.83, 0.81, 0.91, 0.9, 0.88, 0.87, 0.85, 0.83, 0.8, 0.78, 0.75, 0.73, 0.7, 0.67, 0.64, 0.61, 0.57, 0.54, 0.51, 0.48, 0.45, 0.42, 0.39, 0.37, 0.34, 0.32, 0.29, 0.27, 0.25, 0.23, 0.21, 0.2, 0.18, 0.92, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.85, 0.84, 0.82, 0.8, 0.79, 0.77, 0.75, 0.73, 0.71, 0.94, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.86, 0.85, 0.83, 0.82, 0.8, 0.78, 0.77, 0.94, 0.93, 0.93, 0.92, 0.91, 0.9, 0.89, 0.94, 0.94, 0.93, 0.92, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.95, 0.94, 0.94, 0.95, 0.95, 0.94, 0.93, 0.95, 0.95, 0.94, 0.94, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.86, 0.96, 0.95, 0.95, 0.94, 0.93, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.85, 0.84, 0.83, 0.81, 0.79, 0.78, 0.76, 0.74, 0.72, 0.7, 0.68, 0.66, 0.63, 0.61, 0.59, 0.56, 0.54, 0.52, 0.49, 0.47, 0.44, 0.42, 0.4, 0.38, 0.36, 0.34, 0.32, 0.3, 0.28, 0.26, 0.24, 0.23, 0.21, 0.2, 0.18, 0.17, 0.16, 0.15, 0.14, 0.13, 0.12, 0.11, 0.1, 0.1, 0.09, 0.08, 0.08, 0.07, 0.07, 0.06, 0.06, 0.05 ] labels = ['P_alive', 'purchases'] transaction_data = load_transaction_data() # yes I know this is using the wrong data, but I'm testing plotting here. id_user = 35 days_since_birth = 200 sp_trans = transaction_data.loc[transaction_data['id'] == id_user] ax = plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date') x, y = ax.lines[0].get_data() legend = ax.legend_ assert_allclose([np.round(e, 5) for e in y], y_expected, atol=0.01) # y has some weird array shapes assert_array_equal(x, x_expected) assert_array_equal([e.get_text() for e in legend.get_texts()], labels) assert_equal(ax.title.get_text(), "History of P_alive") assert_equal(ax.xaxis.get_label().get_text(), "") assert_equal(ax.yaxis.get_label().get_text(), "P_alive") plt.close()
from lifetimes.plotting import plot_period_transactions plot_period_transactions(bgf) plt.savefig('period_transactions.png', dpi=200) plt.clf() plt.cla() plt.close() transaction_data = pd.read_csv('transaction_data_clean.csv') from lifetimes.plotting import plot_history_alive id = 14096 days_since_birth = 200 sp_trans = transaction_data.loc[transaction_data['CustomerID'] == id] plot_history_alive(bgf, days_since_birth, sp_trans, 'InvoiceDate') plt.savefig('history_alive.png', dpi=200) plt.clf() plt.cla() plt.close() returning_customers_summary = data.loc[data['frequency'] > 0] from lifetimes import GammaGammaFitter ggf = GammaGammaFitter(penalizer_coef=0) ggf.fit(returning_customers_summary['frequency'], returning_customers_summary['monetary_value']) print(ggf) bgf.fit(returning_customers_summary['frequency'],
summary.iloc[9] t = 12 #predict purchases in 10 periods individual = summary.iloc[20] # The below function is an alias to `bfg.conditional_expected_number_of_purchases_up_to_time` bgf.predict(t, individual['frequency'], individual['recency'], individual['T']) # 0.0576511 df.head() from lifetimes.plotting import plot_history_alive customerId = 3 days_since_birth = 24 #the number of time units since the birth we want to draw the p_alive sp_trans = df.loc[df['customerId'] == customerId] plot_history_alive(bgf, days_since_birth, sp_trans, 'date', freq='M') df.columns df.loc[df.customerId == 3].agg( {'valueWithIVA': ['sum', 'count', 'mean', 'std', 'min', 'max']}) summary.head(10) summary_ggf = summary.loc[(summary.frequency > 0) & (summary.monetary_value > 0)] summary_ggf.columns summary_ggf[['frequency', 'monetary_value']].corr()
# clear past visualization instructions plt.clf() # customer of interest CustomerID = '12383' # grab customer's metrics and transaction history cmetrics_pd = input_pd[input_pd['CustomerID'] == CustomerID] trans_history = orders_pd.loc[orders_pd['CustomerID'] == CustomerID] # calculate age at end of dataset days_since_birth = 400 # plot history of being "alive" plot_history_alive(model, days_since_birth, trans_history, 'InvoiceDate') display() # COMMAND ---------- # MAGIC %md From this chart, we can see this customer made his or her first purchase in January 2011 followed by a repeat purchase later that month. There was about a 1-month lull in activity during which the probability of the customer being alive declined slightly but with purchases in March, April and June of that year, the customer sent repeated signals that he or she was engaged. Since that last June purchase, the customer hasn't been seen in our transaction history, and our belief that the customer remains engaged has been dropping though as a moderate pace given the signals previously sent. # MAGIC # MAGIC How does the model arrive at these probabilities? The exact math is tricky but by plotting the probability of being alive as a heatmap relative to frequency and recency, we can understand the probabilities assigned to the intersections of these two values: # COMMAND ---------- from lifetimes.plotting import plot_probability_alive_matrix # set figure size plt.subplots(figsize=(12, 8))
def plot_hist_median_freq(bgf, conv, df): id = conv[conv.frequency == conv.median()['frequency']].iloc[0].name days_since_birth = 365 sp_trans = df.loc[df['id'] == id] return plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
def plot_hist_best_freq(bgf, conv, df): id = conv.idxmax()['frequency'] days_since_birth = 365 sp_trans = df.loc[df['id'] == id] return plot_history_alive(bgf, days_since_birth, sp_trans, 'date')