Esempio n. 1
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=1,
            initial_params=None,
            verbose=False,
            tol=1e-4,
            index=None):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.
            index: index for resulted DataFrame which is accessible via self.data


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            [frequency, scaled_recency, scaled_T, self.penalizer_coef],
            iterative_fitting, initial_params, 4, verbose, tol)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T],
                              columns=['frequency', 'recency', 'T'])
        if index is not None:
            self.data.index = index
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Esempio n. 2
0
    def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve peformance but possibly
                hurt estimates.
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood,
            frequency,
            scaled_recency,
            scaled_T,
            iterative_fitting,
            self.penalizer_coef,
            initial_params,
            verbose,
        )

        self.params_ = OrderedDict(zip(["r", "alpha", "a", "b"], params))
        self.params_["alpha"] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T], columns=["frequency", "recency", "T"])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params("r", "alpha", "a", "b"), size=size
        )

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Esempio n. 3
0
    def fit(self,
            frequency,
            recency,
            T,
            iterative_fitting=1,
            initial_params=None,
            verbose=False):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform `iterative_fitting` additional fits to find the best
                parameters for the model. Setting to 0 will improve peformance but possibly
                hurt estimates.
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(
            self._negative_log_likelihood, frequency, scaled_recency, scaled_T,
            iterative_fitting, self.penalizer_coef, initial_params, verbose)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T],
                              columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(
            T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Esempio n. 4
0
    def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4):
        """
        This methods fits the data to the BG/NBD model.

        Parameters:
            frequency: the frequency vector of customers' purchases (denoted x in literature).
            recency: the recency vector of customers' purchases (denoted t_x in literature).
            T: the vector of customers' age (time since first purchase)
            iterative_fitting: perform iterative_fitting fits over random/warm-started initial params
            initial_params: set the initial parameters for the fitter.
            verbose: set to true to print out convergence diagnostics.


        Returns:
            self, with additional properties and methods like params_ and predict

        """
        frequency = asarray(frequency)
        recency = asarray(recency)
        T = asarray(T)
        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood,
                                                      [frequency, scaled_recency, scaled_T, self.penalizer_coef],
                                                      iterative_fitting,
                                                      initial_params,
                                                      4,
                                                      verbose,
                                                      tol)

        self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params))
        self.params_['alpha'] /= self._scale

        self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T'])
        self.generate_new_data = lambda size=1: beta_geometric_nbd_model(T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Esempio n. 5
0
def test_scale_time():
    max_T = 200.
    T = np.arange(max_T)
    assert utils._scale_time(T) == 10. / (max_T-1)
Esempio n. 6
0
    def fit(self,
            frequency,
            recency,
            T,
            weights=None,
            iterative_fitting=1,
            initial_params=None,
            verbose=False,
            tol=1e-4,
            index=None,
            fit_method="Nelder-Mead",
            maxiter=2000,
            **kwargs):
        """
        Pareto/NBD model fitter.

        Parameters
        ----------
        frequency: array_like
            the frequency vector of customers' purchases
            (denoted x in literature).
        recency: array_like
            the recency vector of customers' purchases
            (denoted t_x in literature).
        T: array_like
            customers' age (time units since first purchase)
        weights: None or array_like
            Number of customers with given frequency/recency/T,
            defaults to 1 if not specified. Fader and
            Hardie condense the individual RFM matrix into all
            observed combinations of frequency/recency/T. This
            parameter represents the count of customers with a given
            purchase pattern. Instead of calculating individual
            log-likelihood, the log-likelihood is calculated for each
            pattern and multiplied by the number of customers with
            that pattern.
        iterative_fitting: int, optional
            perform iterative_fitting fits over random/warm-started initial params
        initial_params: array_like, optional
            set the initial parameters for the fitter.
        verbose : bool, optional
            set to true to print out convergence diagnostics.
        tol : float, optional
            tolerance for termination of the function minimization process.
        index: array_like, optional
            index for resulted DataFrame which is accessible via self.data
        fit_method : string, optional
            fit_method to passing to scipy.optimize.minimize
        maxiter : int, optional
            max iterations for optimizer in scipy.optimize.minimize will be
            overwritten if set in kwargs.
        kwargs:
            key word arguments to pass to the scipy.optimize.minimize
            function as options dict

        Returns
        -------
        ParetoNBDFitter
            with additional properties like ``params_`` and methods like ``predict``

        """
        frequency = asarray(frequency).astype(int)
        recency = asarray(recency)
        T = asarray(T)

        if weights is None:
            weights = np.ones(recency.shape[0], dtype=np.int64)
        else:
            weights = asarray(weights)

        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params, self._negative_log_likelihood_ = self._fit(
            (frequency, scaled_recency, scaled_T, weights,
             self.penalizer_coef), iterative_fitting, initial_params, 4,
            verbose, tol, fit_method, maxiter, **kwargs)
        self._hessian_ = None
        self.params_ = pd.Series(*(params, ["r", "alpha", "s", "beta"]))
        self.params_["alpha"] /= self._scale
        self.params_["beta"] /= self._scale

        self.data = DataFrame(
            {
                "frequency": frequency,
                "recency": recency,
                "T": T,
                "weights": weights
            },
            index=index)
        self.generate_new_data = lambda size=1: pareto_nbd_model(
            T, *self._unload_params("r", "alpha", "s", "beta"), size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time
        return self
Esempio n. 7
0
def test_scale_time():
    max_T = 200.
    T = np.arange(max_T)
    assert utils._scale_time(T) == 10. / (max_T - 1)
    def fit(self,
            frequency,
            recency,
            T,
            weights=None,
            iterative_fitting=1,
            initial_params=None,
            verbose=False,
            tol=1e-4,
            index=None,
            fit_method="Nelder-Mead",
            maxiter=2000,
            covariates=None,
            dropout_rate_scale_parameter_covariates=None,
            **kwargs):
        """
        Pareto/NBD model fitter.

        Parameters
        ----------
        frequency: array_like
            the frequency vector of customers' purchases
            (denoted x in literature).
        recency: array_like
            the recency vector of customers' purchases
            (denoted t_x in literature).
        T: array_like
            customers' age (time units since first purchase)
        weights: None or array_like
            Number of customers with given frequency/recency/T,
            defaults to 1 if not specified. Fader and
            Hardie condense the individual RFM matrix into all
            observed combinations of frequency/recency/T. This
            parameter represents the count of customers with a given
            purchase pattern. Instead of calculating individual
            log-likelihood, the log-likelihood is calculated for each
            pattern and multiplied by the number of customers with
            that pattern.
        iterative_fitting: int, optional
            perform iterative_fitting fits over random/warm-started initial params
        initial_params: array_like, optional
            set the initial parameters for the fitter.
        verbose : bool, optional
            set to true to print out convergence diagnostics.
        tol : float, optional
            tolerance for termination of the function minimization process.
        index: array_like, optional
            index for resulted DataFrame which is accessible via self.data
        fit_method : string, optional
            fit_method to passing to scipy.optimize.minimize
        maxiter : int, optional
            max iterations for optimizer in scipy.optimize.minimize will be
            overwritten if set in kwargs.
        covariates: array_like, optional
            Array of time-independent customer features (n_customers x n_covariates).
        dropout_rate_scale_parameter_covariates: array_like, optional
            Array of time-independent customer features (n_customers x n_covariates)
            used exclusively for the dropout rate's scale parameter (denoted beta in
            the literature). If this is None and `covariates` isn't, the latter's
            values are used for both the transaction and dropout rate scale
            parameter derivation.
        kwargs:
            key word arguments to pass to the scipy.optimize.minimize
            function as options dict

        Returns
        -------
        ParetoNBDwithCovariatesFitter
            with additional properties like ``params_`` and methods like ``predict``
        """

        frequency = asarray(frequency).astype(int)
        recency = asarray(recency)
        T = asarray(T)

        if weights is None:
            weights = np.ones(recency.shape[0], dtype=np.int64)
        else:
            weights = asarray(weights)

        _check_inputs(frequency, recency, T)

        self._scale = _scale_time(T)
        scaled_recency = recency * self._scale
        scaled_T = T * self._scale

        params_size = 6

        self.covariates = covariates
        self.dropout_rate_scale_parameter_covariates = dropout_rate_scale_parameter_covariates
        self.covariates_size = [1, 1]

        if self.covariates is not None:
            covariates_size = self.covariates.shape[1]
            params_size += (covariates_size - 1)
            self.covariates_size = [covariates_size, covariates_size]
        if self.dropout_rate_scale_parameter_covariates is not None:
            dropout_rate_scale_parameter_covariates_size = self.dropout_rate_scale_parameter_covariates.shape[
                1]
            params_size += (dropout_rate_scale_parameter_covariates_size - 1)
            self.covariates_size[
                1] = dropout_rate_scale_parameter_covariates_size

        params, self._negative_log_likelihood_ = self._fit(
            minimizing_function_args=(frequency, scaled_recency, scaled_T,
                                      weights, self.penalizer_coef),
            iterative_fitting=iterative_fitting,
            initial_params=initial_params,
            params_size=params_size,
            disp=verbose,
            tol=tol,
            fit_method=fit_method,
            maxiter=maxiter,
            **kwargs)

        params = tuple(params[:4]) + \
                (params[4:4+self.covariates_size[0]], ) + \
                (params[4+self.covariates_size[0]:4+self.covariates_size[0]+self.covariates_size[1]], )

        self._hessian_ = None
        self.params_ = pd.Series(
            *(params, ["r", "alpha_0", "s", "beta_0", "gamma_1", "gamma_2"]))
        self.params_["alpha_0"] /= self._scale
        self.params_["beta_0"] /= self._scale

        self.data = DataFrame(
            {
                "frequency": frequency,
                "recency": recency,
                "T": T,
                "weights": weights
            },
            index=index)
        self.generate_new_data = lambda size=1: pareto_nbd_model(
            T,
            *self._convert_parameters(
                self._unload_params("r", "alpha_0", "s", "beta_0", "gamma_1",
                                    "gamma_2")),
            size=size)

        self.predict = self.conditional_expected_number_of_purchases_up_to_time

        return self