def conditional_probability_alive(self, m_periods_in_future, frequency, recency, n_periods): """ Conditional probability alive. Conditional probability customer is alive at transaction opportunity n_periods + m_periods_in_future. .. math:: P(alive at n_periods + m_periods_in_future|alpha, beta, gamma, delta, frequency, recency, n_periods) See (A10) in Fader and Hardie 2010. Parameters ---------- m: array_like transaction opportunities Returns ------- array_like alive probabilities """ params = self._unload_params("alpha", "beta", "gamma", "delta") alpha, beta, gamma, delta = params p1 = betaln(alpha + frequency, beta + n_periods - frequency) - betaln(alpha, beta) p2 = betaln(gamma, delta + n_periods + m_periods_in_future) - betaln(gamma, delta) p3 = self._loglikelihood(params, frequency, recency, n_periods) return exp(p1 + p2) / exp(p3)
def p2(j, x): i = I[int(j) :] return np.sum( binom(i, x) * exp( betaln(alpha + x, beta + i - x) - betaln(alpha, beta) + betaln(gamma + 1, delta + i) - betaln(gamma, delta) ) )
def expected_number_of_transactions_in_first_n_periods(self, n): r""" Return expected number of transactions in first n n_periods. Expected number of transactions occurring across first n transaction opportunities. Used by Fader and Hardie to assess in-sample fit. .. math:: Pr(X(n) = x| \alpha, \beta, \gamma, \delta) See (7) in Fader & Hardie 2010. Parameters ---------- n: float number of transaction opportunities Returns ------- DataFrame: Predicted values, indexed by x """ params = self._unload_params("alpha", "beta", "gamma", "delta") alpha, beta, gamma, delta = params x_counts = self.data.groupby("frequency")["weights"].sum() x = np.asarray(x_counts.index) p1 = binom(n, x) * exp( betaln(alpha + x, beta + n - x) - betaln(alpha, beta) + betaln(gamma, delta + n) - betaln(gamma, delta) ) I = np.arange(x.min(), n) @np.vectorize def p2(j, x): i = I[int(j) :] return np.sum( binom(i, x) * exp( betaln(alpha + x, beta + i - x) - betaln(alpha, beta) + betaln(gamma + 1, delta + i) - betaln(gamma, delta) ) ) p1 += np.fromfunction(p2, (x.shape[0],), x=x) idx = pd.Index(x, name="frequency") return DataFrame(p1 * x_counts.sum(), index=idx, columns=["model"])
def _loglikelihood(params, x, tx, T): warnings.simplefilter(action="ignore", category=FutureWarning) """Log likelihood for optimizer.""" alpha, beta, gamma, delta = params betaln_ab = betaln(alpha, beta) betaln_gd = betaln(gamma, delta) A = betaln(alpha + x, beta + T - x) - betaln_ab + betaln(gamma, delta + T) - betaln_gd B = 1e-15 * np.ones_like(T) recency_T = T - tx - 1 for j in np.arange(recency_T.max() + 1): ix = recency_T >= j B = B + ix * betaf(alpha + x, beta + tx - x + j) * betaf(gamma + 1, delta + tx + j) B = log(B) - betaln_gd - betaln_ab return logaddexp(A, B)
def _loglikelihood(params, x, tx, T): """Log likelihood for optimizer.""" alpha, beta, gamma, delta = params betaln_ab = betaln(alpha, beta) betaln_gd = betaln(gamma, delta) A = betaln(alpha + x, beta + T - x) - betaln_ab + betaln( gamma, delta + T) - betaln_gd B = 1e-15 * np.ones_like(T) recency_T = T - tx - 1 for j in np.arange(recency_T.max() + 1): ix = recency_T >= j B1 = betaln(alpha + x, beta + tx - x + j) B2 = betaln(gamma + 1, delta + tx + j) B = B + ix * (exp(B1 - betaln_ab)) * (exp(B2 - betaln_gd)) # v0.11.3 # B = B + ix * betaf(alpha + x, beta + tx - x + j) * betaf(gamma + 1, delta + tx + j) log_B = log(B) # v0.11.3 # B = log(B) - betaln_gd - betaln_ab result = logaddexp(A, log_B) return result
def conditional_expected_number_of_purchases_up_to_time(self, m_periods_in_future, frequency, recency, n_periods): r""" Conditional expected purchases in future time period. The expected number of future transactions across the next m_periods_in_future transaction opportunities by a customer with purchase history (x, tx, n). .. math:: E(X(n_{periods}, n_{periods}+m_{periods_in_future})| \alpha, \beta, \gamma, \delta, frequency, recency, n_{periods}) See (13) in Fader & Hardie 2010. Parameters ---------- t: array_like time n_periods (n+t) Returns ------- array_like predicted transactions """ x = frequency tx = recency n = n_periods params = self._unload_params("alpha", "beta", "gamma", "delta") alpha, beta, gamma, delta = params p1 = 1 / exp(self._loglikelihood(params, x, tx, n)) p2 = exp(betaln(alpha + x + 1, beta + n - x) - betaln(alpha, beta)) p3 = delta / (gamma - 1) * exp(gammaln(gamma + delta) - gammaln(1 + delta)) p4 = exp(gammaln(1 + delta + n) - gammaln(gamma + delta + n)) p5 = exp(gammaln(1 + delta + n + m_periods_in_future) - gammaln(gamma + delta + n + m_periods_in_future)) return p1 * p2 * p3 * (p4 - p5)
def betaprob(pred_values, lam, nu): """log-likelihood of lambda and nu for beta-distributed pred_values. lam=alpha/(alpha+beta), nu=alpha+beta""" return np.sum(-spf.betaln(lam * nu, (1.0 - lam) * nu) + (lam * nu - 1.0) * np.log(pred_values + 1e-10) + ((1.0 - lam) * nu - 1.0) * np.log(1.0 - pred_values + 1e-10))