def conditional_probability_alive(self, m_periods_in_future, frequency, recency, n_periods):
        """
        Conditional probability alive.

        Conditional probability customer is alive at transaction opportunity
        n_periods + m_periods_in_future.

        .. math:: P(alive at n_periods + m_periods_in_future|alpha, beta, gamma, delta, frequency, recency, n_periods)

        See (A10) in Fader and Hardie 2010.

        Parameters
        ----------
        m: array_like
            transaction opportunities

        Returns
        -------
        array_like
            alive probabilities

        """
        params = self._unload_params("alpha", "beta", "gamma", "delta")
        alpha, beta, gamma, delta = params

        p1 = betaln(alpha + frequency, beta + n_periods - frequency) - betaln(alpha, beta)
        p2 = betaln(gamma, delta + n_periods + m_periods_in_future) - betaln(gamma, delta)
        p3 = self._loglikelihood(params, frequency, recency, n_periods)

        return exp(p1 + p2) / exp(p3)
 def p2(j, x):
     i = I[int(j) :]
     return np.sum(
         binom(i, x)
         * exp(
             betaln(alpha + x, beta + i - x)
             - betaln(alpha, beta)
             + betaln(gamma + 1, delta + i)
             - betaln(gamma, delta)
         )
     )
    def expected_number_of_transactions_in_first_n_periods(self, n):
        r"""
        Return expected number of transactions in first n n_periods.

        Expected number of transactions occurring across first n transaction
        opportunities.
        Used by Fader and Hardie to assess in-sample fit.

        .. math:: Pr(X(n) = x| \alpha, \beta, \gamma, \delta)

        See (7) in Fader & Hardie 2010.

        Parameters
        ----------
        n: float
            number of transaction opportunities

        Returns
        -------
        DataFrame:
            Predicted values, indexed by x

        """
        params = self._unload_params("alpha", "beta", "gamma", "delta")
        alpha, beta, gamma, delta = params

        x_counts = self.data.groupby("frequency")["weights"].sum()
        x = np.asarray(x_counts.index)

        p1 = binom(n, x) * exp(
            betaln(alpha + x, beta + n - x) - betaln(alpha, beta) + betaln(gamma, delta + n) - betaln(gamma, delta)
        )

        I = np.arange(x.min(), n)

        @np.vectorize
        def p2(j, x):
            i = I[int(j) :]
            return np.sum(
                binom(i, x)
                * exp(
                    betaln(alpha + x, beta + i - x)
                    - betaln(alpha, beta)
                    + betaln(gamma + 1, delta + i)
                    - betaln(gamma, delta)
                )
            )

        p1 += np.fromfunction(p2, (x.shape[0],), x=x)

        idx = pd.Index(x, name="frequency")
        return DataFrame(p1 * x_counts.sum(), index=idx, columns=["model"])
    def _loglikelihood(params, x, tx, T):
        warnings.simplefilter(action="ignore", category=FutureWarning)

        """Log likelihood for optimizer."""
        alpha, beta, gamma, delta = params

        betaln_ab = betaln(alpha, beta)
        betaln_gd = betaln(gamma, delta)

        A = betaln(alpha + x, beta + T - x) - betaln_ab + betaln(gamma, delta + T) - betaln_gd

        B = 1e-15 * np.ones_like(T)
        recency_T = T - tx - 1

        for j in np.arange(recency_T.max() + 1):
            ix = recency_T >= j
            B = B + ix * betaf(alpha + x, beta + tx - x + j) * betaf(gamma + 1, delta + tx + j)

        B = log(B) - betaln_gd - betaln_ab
        return logaddexp(A, B)
Beispiel #5
0
    def _loglikelihood(params, x, tx, T):
        """Log likelihood for optimizer."""
        alpha, beta, gamma, delta = params

        betaln_ab = betaln(alpha, beta)
        betaln_gd = betaln(gamma, delta)

        A = betaln(alpha + x, beta + T - x) - betaln_ab + betaln(
            gamma, delta + T) - betaln_gd

        B = 1e-15 * np.ones_like(T)
        recency_T = T - tx - 1

        for j in np.arange(recency_T.max() + 1):
            ix = recency_T >= j
            B1 = betaln(alpha + x, beta + tx - x + j)
            B2 = betaln(gamma + 1, delta + tx + j)
            B = B + ix * (exp(B1 - betaln_ab)) * (exp(B2 - betaln_gd))
            # v0.11.3
            # B = B + ix * betaf(alpha + x, beta + tx - x + j) * betaf(gamma + 1, delta + tx + j)

        log_B = log(B)
        # v0.11.3
        # B = log(B) - betaln_gd - betaln_ab
        result = logaddexp(A, log_B)
        return result
    def conditional_expected_number_of_purchases_up_to_time(self, m_periods_in_future, frequency, recency, n_periods):
        r"""
        Conditional expected purchases in future time period.

        The  expected  number  of  future  transactions across the next m_periods_in_future
        transaction opportunities by a customer with purchase history
        (x, tx, n).

        .. math:: E(X(n_{periods}, n_{periods}+m_{periods_in_future})| \alpha, \beta, \gamma, \delta, frequency, recency, n_{periods})

        See (13) in Fader & Hardie 2010.

        Parameters
        ----------
        t: array_like
            time n_periods (n+t)

        Returns
        -------
        array_like
            predicted transactions

        """
        x = frequency
        tx = recency
        n = n_periods

        params = self._unload_params("alpha", "beta", "gamma", "delta")
        alpha, beta, gamma, delta = params

        p1 = 1 / exp(self._loglikelihood(params, x, tx, n))
        p2 = exp(betaln(alpha + x + 1, beta + n - x) - betaln(alpha, beta))
        p3 = delta / (gamma - 1) * exp(gammaln(gamma + delta) - gammaln(1 + delta))
        p4 = exp(gammaln(1 + delta + n) - gammaln(gamma + delta + n))
        p5 = exp(gammaln(1 + delta + n + m_periods_in_future) - gammaln(gamma + delta + n + m_periods_in_future))

        return p1 * p2 * p3 * (p4 - p5)
Beispiel #7
0
def betaprob(pred_values, lam, nu):
    """log-likelihood of lambda and nu for beta-distributed pred_values. 
    lam=alpha/(alpha+beta), nu=alpha+beta"""
    return np.sum(-spf.betaln(lam * nu, (1.0 - lam) * nu) +
                  (lam * nu - 1.0) * np.log(pred_values + 1e-10) +
                  ((1.0 - lam) * nu - 1.0) * np.log(1.0 - pred_values + 1e-10))