def plot_history_alive_indiv(self, df, indiv):
     '''
     Plot history alive/active for single customer
     '''
     plot_history_alive(self.bgf, int(self.individual['T']), self.sp_trans, 'OrderDate')
     plt.savefig('ProbabilityAliveByHistory_Customer{}.png'.format(indiv))
     plt.close()
Beispiel #2
0
 def test_plot_customer_alive_history(self):
     plt.figure()
     transaction_data = load_transaction_data()
     # yes I know this is using the wrong data, but I'm testing plotting here.
     id = 35
     days_since_birth = 200
     sp_trans = transaction_data.ix[transaction_data['id'] == id]
     plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
     return plt.gcf()
Beispiel #3
0
 def test_plot_customer_alive_history(self):
     plt.figure()
     transaction_data = load_transaction_data()
     # yes I know this is using the wrong data, but I'm testing plotting here.
     id = 35
     days_since_birth = 200
     sp_trans = transaction_data.ix[transaction_data['id'] == id]
     plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
     return plt.gcf()
    def test_plot_customer_alive_history(self):
        from matplotlib import pyplot as plt

        transaction_data = load_transaction_data()
        # yes I know this is using the wrong data, but I'm testing plotting here.
        id = 35
        days_since_birth = 200
        sp_trans = transaction_data.ix[transaction_data['id'] == id]
        plt.figure()
        plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
        plt.show()
Beispiel #5
0
    def test_plot_customer_alive_history(self):
        from matplotlib import pyplot as plt

        transaction_data = load_transaction_data()
        # yes I know this is using the wrong data, but I'm testing plotting here.
        id = 35
        days_since_birth = 200
        sp_trans = transaction_data.ix[transaction_data['id'] == id]
        plt.figure()
        plotting.plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
        plt.show()
Beispiel #6
0
    def test_plot_customer_alive_history(self, bgf):
        from datetime import datetime, timedelta

        x_expected = np.arange(datetime(2014, 6, 30), datetime(2015, 1, 17),
                               timedelta(days=1))
        y_expected = [
            1.0, 1.0, 1.0, 0.75, 0.72, 0.69, 0.67, 0.64, 0.62, 0.59, 0.57,
            0.55, 0.81, 0.79, 0.77, 0.75, 0.85, 0.87, 0.85, 0.82, 0.8, 0.89,
            0.87, 0.84, 0.82, 0.79, 0.76, 0.73, 0.69, 0.66, 0.62, 0.59, 0.9,
            0.89, 0.87, 0.85, 0.83, 0.81, 0.91, 0.9, 0.88, 0.87, 0.85, 0.83,
            0.8, 0.78, 0.75, 0.73, 0.7, 0.67, 0.64, 0.61, 0.57, 0.54, 0.51,
            0.48, 0.45, 0.42, 0.39, 0.37, 0.34, 0.32, 0.29, 0.27, 0.25, 0.23,
            0.21, 0.2, 0.18, 0.92, 0.93, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87,
            0.85, 0.84, 0.82, 0.8, 0.79, 0.77, 0.75, 0.73, 0.71, 0.94, 0.93,
            0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.86, 0.85, 0.83, 0.82, 0.8,
            0.78, 0.77, 0.94, 0.93, 0.93, 0.92, 0.91, 0.9, 0.89, 0.94, 0.94,
            0.93, 0.92, 0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.95, 0.94, 0.94,
            0.95, 0.95, 0.94, 0.93, 0.95, 0.95, 0.94, 0.94, 0.93, 0.92, 0.91,
            0.9, 0.89, 0.88, 0.87, 0.86, 0.96, 0.95, 0.95, 0.94, 0.93, 0.93,
            0.92, 0.91, 0.9, 0.89, 0.88, 0.87, 0.85, 0.84, 0.83, 0.81, 0.79,
            0.78, 0.76, 0.74, 0.72, 0.7, 0.68, 0.66, 0.63, 0.61, 0.59, 0.56,
            0.54, 0.52, 0.49, 0.47, 0.44, 0.42, 0.4, 0.38, 0.36, 0.34, 0.32,
            0.3, 0.28, 0.26, 0.24, 0.23, 0.21, 0.2, 0.18, 0.17, 0.16, 0.15,
            0.14, 0.13, 0.12, 0.11, 0.1, 0.1, 0.09, 0.08, 0.08, 0.07, 0.07,
            0.06, 0.06, 0.05
        ]
        labels = ['P_alive', 'purchases']

        transaction_data = load_transaction_data()
        # yes I know this is using the wrong data, but I'm testing plotting here.
        id_user = 35
        days_since_birth = 200
        sp_trans = transaction_data.loc[transaction_data['id'] == id_user]
        ax = plotting.plot_history_alive(bgf, days_since_birth, sp_trans,
                                         'date')

        x, y = ax.lines[0].get_data()
        legend = ax.legend_

        assert_allclose([np.round(e, 5) for e in y], y_expected,
                        atol=0.01)  # y has some weird array shapes
        assert_array_equal(x, x_expected)
        assert_array_equal([e.get_text() for e in legend.get_texts()], labels)
        assert_equal(ax.title.get_text(), "History of P_alive")
        assert_equal(ax.xaxis.get_label().get_text(), "")
        assert_equal(ax.yaxis.get_label().get_text(), "P_alive")
        plt.close()
Beispiel #7
0
from lifetimes.plotting import plot_period_transactions
plot_period_transactions(bgf)

plt.savefig('period_transactions.png', dpi=200)
plt.clf()
plt.cla()
plt.close()


transaction_data = pd.read_csv('transaction_data_clean.csv')

from lifetimes.plotting import plot_history_alive
id = 14096
days_since_birth = 200
sp_trans = transaction_data.loc[transaction_data['CustomerID'] == id]
plot_history_alive(bgf, days_since_birth, sp_trans, 'InvoiceDate')
plt.savefig('history_alive.png', dpi=200)
plt.clf()
plt.cla()
plt.close()


returning_customers_summary = data.loc[data['frequency'] > 0]
from lifetimes import GammaGammaFitter

ggf = GammaGammaFitter(penalizer_coef=0)
ggf.fit(returning_customers_summary['frequency'],
        returning_customers_summary['monetary_value'])
print(ggf)

bgf.fit(returning_customers_summary['frequency'],
Beispiel #8
0
summary.iloc[9]

t = 12  #predict purchases in 10 periods
individual = summary.iloc[20]
# The below function is an alias to `bfg.conditional_expected_number_of_purchases_up_to_time`
bgf.predict(t, individual['frequency'], individual['recency'], individual['T'])
# 0.0576511

df.head()

from lifetimes.plotting import plot_history_alive

customerId = 3
days_since_birth = 24  #the number of time units since the birth we want to draw the p_alive
sp_trans = df.loc[df['customerId'] == customerId]
plot_history_alive(bgf, days_since_birth, sp_trans, 'date', freq='M')

df.columns

df.loc[df.customerId == 3].agg(
    {'valueWithIVA': ['sum', 'count', 'mean', 'std', 'min', 'max']})

summary.head(10)

summary_ggf = summary.loc[(summary.frequency > 0)
                          & (summary.monetary_value > 0)]

summary_ggf.columns

summary_ggf[['frequency', 'monetary_value']].corr()
# clear past visualization instructions
plt.clf()

# customer of interest
CustomerID = '12383'

# grab customer's metrics and transaction history
cmetrics_pd = input_pd[input_pd['CustomerID'] == CustomerID]
trans_history = orders_pd.loc[orders_pd['CustomerID'] == CustomerID]

# calculate age at end of dataset
days_since_birth = 400

# plot history of being "alive"
plot_history_alive(model, days_since_birth, trans_history, 'InvoiceDate')

display()

# COMMAND ----------

# MAGIC %md From this chart, we can see this customer made his or her first purchase in January 2011 followed by a repeat purchase later that month.  There was about a 1-month lull in activity during which the probability of the customer being alive declined slightly but with purchases in March, April and June of that year, the customer sent repeated signals that he or she was engaged. Since that last June purchase, the customer hasn't been seen in our transaction history, and our belief that the customer remains engaged has been dropping though as a moderate pace given the signals previously sent.
# MAGIC
# MAGIC How does the model arrive at these probabilities? The exact math is tricky but by plotting the probability of being alive as a heatmap relative to frequency and recency, we can understand the probabilities assigned to the intersections of these two values:

# COMMAND ----------

from lifetimes.plotting import plot_probability_alive_matrix

# set figure size
plt.subplots(figsize=(12, 8))
Beispiel #10
0
def plot_hist_median_freq(bgf, conv, df):
    id = conv[conv.frequency == conv.median()['frequency']].iloc[0].name
    days_since_birth = 365
    sp_trans = df.loc[df['id'] == id]
    return plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
Beispiel #11
0
def plot_hist_best_freq(bgf, conv, df):
    id = conv.idxmax()['frequency']
    days_since_birth = 365
    sp_trans = df.loc[df['id'] == id]
    return plot_history_alive(bgf, days_since_birth, sp_trans, 'date')