def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform iterative_fitting fits over random/warm-started initial params initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. index: index for resulted DataFrame which is accessible via self.data Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, [frequency, scaled_recency, scaled_T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose, tol) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) if index is not None: self.data.index = index self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve peformance but possibly hurt estimates. initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, frequency, scaled_recency, scaled_T, iterative_fitting, self.penalizer_coef, initial_params, verbose, ) self.params_ = OrderedDict(zip(["r", "alpha", "a", "b"], params)) self.params_["alpha"] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=["frequency", "recency", "T"]) self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params("r", "alpha", "a", "b"), size=size ) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve peformance but possibly hurt estimates. initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, frequency, scaled_recency, scaled_T, iterative_fitting, self.penalizer_coef, initial_params, verbose) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform iterative_fitting fits over random/warm-started initial params initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood, [frequency, scaled_recency, scaled_T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose, tol) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: beta_geometric_nbd_model(T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def test_scale_time(): max_T = 200. T = np.arange(max_T) assert utils._scale_time(T) == 10. / (max_T-1)
def fit(self, frequency, recency, T, weights=None, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None, fit_method="Nelder-Mead", maxiter=2000, **kwargs): """ Pareto/NBD model fitter. Parameters ---------- frequency: array_like the frequency vector of customers' purchases (denoted x in literature). recency: array_like the recency vector of customers' purchases (denoted t_x in literature). T: array_like customers' age (time units since first purchase) weights: None or array_like Number of customers with given frequency/recency/T, defaults to 1 if not specified. Fader and Hardie condense the individual RFM matrix into all observed combinations of frequency/recency/T. This parameter represents the count of customers with a given purchase pattern. Instead of calculating individual log-likelihood, the log-likelihood is calculated for each pattern and multiplied by the number of customers with that pattern. iterative_fitting: int, optional perform iterative_fitting fits over random/warm-started initial params initial_params: array_like, optional set the initial parameters for the fitter. verbose : bool, optional set to true to print out convergence diagnostics. tol : float, optional tolerance for termination of the function minimization process. index: array_like, optional index for resulted DataFrame which is accessible via self.data fit_method : string, optional fit_method to passing to scipy.optimize.minimize maxiter : int, optional max iterations for optimizer in scipy.optimize.minimize will be overwritten if set in kwargs. kwargs: key word arguments to pass to the scipy.optimize.minimize function as options dict Returns ------- ParetoNBDFitter with additional properties like ``params_`` and methods like ``predict`` """ frequency = asarray(frequency).astype(int) recency = asarray(recency) T = asarray(T) if weights is None: weights = np.ones(recency.shape[0], dtype=np.int64) else: weights = asarray(weights) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = self._fit( (frequency, scaled_recency, scaled_T, weights, self.penalizer_coef), iterative_fitting, initial_params, 4, verbose, tol, fit_method, maxiter, **kwargs) self._hessian_ = None self.params_ = pd.Series(*(params, ["r", "alpha", "s", "beta"])) self.params_["alpha"] /= self._scale self.params_["beta"] /= self._scale self.data = DataFrame( { "frequency": frequency, "recency": recency, "T": T, "weights": weights }, index=index) self.generate_new_data = lambda size=1: pareto_nbd_model( T, *self._unload_params("r", "alpha", "s", "beta"), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def test_scale_time(): max_T = 200. T = np.arange(max_T) assert utils._scale_time(T) == 10. / (max_T - 1)
def fit(self, frequency, recency, T, weights=None, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None, fit_method="Nelder-Mead", maxiter=2000, covariates=None, dropout_rate_scale_parameter_covariates=None, **kwargs): """ Pareto/NBD model fitter. Parameters ---------- frequency: array_like the frequency vector of customers' purchases (denoted x in literature). recency: array_like the recency vector of customers' purchases (denoted t_x in literature). T: array_like customers' age (time units since first purchase) weights: None or array_like Number of customers with given frequency/recency/T, defaults to 1 if not specified. Fader and Hardie condense the individual RFM matrix into all observed combinations of frequency/recency/T. This parameter represents the count of customers with a given purchase pattern. Instead of calculating individual log-likelihood, the log-likelihood is calculated for each pattern and multiplied by the number of customers with that pattern. iterative_fitting: int, optional perform iterative_fitting fits over random/warm-started initial params initial_params: array_like, optional set the initial parameters for the fitter. verbose : bool, optional set to true to print out convergence diagnostics. tol : float, optional tolerance for termination of the function minimization process. index: array_like, optional index for resulted DataFrame which is accessible via self.data fit_method : string, optional fit_method to passing to scipy.optimize.minimize maxiter : int, optional max iterations for optimizer in scipy.optimize.minimize will be overwritten if set in kwargs. covariates: array_like, optional Array of time-independent customer features (n_customers x n_covariates). dropout_rate_scale_parameter_covariates: array_like, optional Array of time-independent customer features (n_customers x n_covariates) used exclusively for the dropout rate's scale parameter (denoted beta in the literature). If this is None and `covariates` isn't, the latter's values are used for both the transaction and dropout rate scale parameter derivation. kwargs: key word arguments to pass to the scipy.optimize.minimize function as options dict Returns ------- ParetoNBDwithCovariatesFitter with additional properties like ``params_`` and methods like ``predict`` """ frequency = asarray(frequency).astype(int) recency = asarray(recency) T = asarray(T) if weights is None: weights = np.ones(recency.shape[0], dtype=np.int64) else: weights = asarray(weights) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params_size = 6 self.covariates = covariates self.dropout_rate_scale_parameter_covariates = dropout_rate_scale_parameter_covariates self.covariates_size = [1, 1] if self.covariates is not None: covariates_size = self.covariates.shape[1] params_size += (covariates_size - 1) self.covariates_size = [covariates_size, covariates_size] if self.dropout_rate_scale_parameter_covariates is not None: dropout_rate_scale_parameter_covariates_size = self.dropout_rate_scale_parameter_covariates.shape[ 1] params_size += (dropout_rate_scale_parameter_covariates_size - 1) self.covariates_size[ 1] = dropout_rate_scale_parameter_covariates_size params, self._negative_log_likelihood_ = self._fit( minimizing_function_args=(frequency, scaled_recency, scaled_T, weights, self.penalizer_coef), iterative_fitting=iterative_fitting, initial_params=initial_params, params_size=params_size, disp=verbose, tol=tol, fit_method=fit_method, maxiter=maxiter, **kwargs) params = tuple(params[:4]) + \ (params[4:4+self.covariates_size[0]], ) + \ (params[4+self.covariates_size[0]:4+self.covariates_size[0]+self.covariates_size[1]], ) self._hessian_ = None self.params_ = pd.Series( *(params, ["r", "alpha_0", "s", "beta_0", "gamma_1", "gamma_2"])) self.params_["alpha_0"] /= self._scale self.params_["beta_0"] /= self._scale self.data = DataFrame( { "frequency": frequency, "recency": recency, "T": T, "weights": weights }, index=index) self.generate_new_data = lambda size=1: pareto_nbd_model( T, *self._convert_parameters( self._unload_params("r", "alpha_0", "s", "beta_0", "gamma_1", "gamma_2")), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self