def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the Pareto/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve performances but possibly hurt estimates. initial_params: set initial params for the iterative fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and plot """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood, [frequency, recency, T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose) self.params_ = OrderedDict(zip(['r', 'alpha', 's', 'beta'], params)) self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: pareto_nbd_model(T, *params, size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def simulate(pareto, mbg): times = [10, 365, 3650, 36500] r_par, alpha_par, s, beta = pareto.params_ r_mbg, alpha_mbg, a, b = mbg.summary['coef'] size = 100 for duration in times: generated_customers_par = pareto_nbd_model(T=duration, r=r_par, alpha=alpha_par, s=s, beta=beta, size=size) generated_customers_mbg = modified_beta_geometric_nbd_model( T=duration, r=r_mbg, alpha=alpha_mbg, a=a, b=b, size=size) print("Duration: {}".format(duration)) print("Pareto/NBD : Modified BG/NBD") print("Number of Purchases: {} : Number of Purchases: {}".format( generated_customers_par['frequency'].sum(), generated_customers_mbg['frequency'].sum())) print("Number of Customers Alive: {} : Number of Customers Alive: {}". format(generated_customers_par['alive'].sum(), generated_customers_mbg['alive'].sum())) print( "-----------------------------------------------------------------" )
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None): """ This methods fits the data to the Pareto/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform iterative_fitting fits over random/warm-started initial params initial_params: set initial params for the iterative fitter. verbose: set to true to print out convergence diagnostics. index: index for resulted DataFrame which is accessible via self.data Returns: self, with additional properties and methods like params_ and plot """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, [frequency, recency, T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose, tol) self.params_ = OrderedDict(zip(['r', 'alpha', 's', 'beta'], params)) self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) if index is not None: self.data.index = index self.generate_new_data = lambda size=1: pareto_nbd_model( T, *params, size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the Pareto/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve peformance but possibly hurt estimates. initial_params: set initial params for the iterative fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and plot """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, frequency, recency, T, iterative_fitting, self.penalizer_coef, initial_params, verbose) self.params_ = OrderedDict(zip(['r', 'alpha', 's', 'beta'], params)) self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: pareto_nbd_model( T, *params, size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, weights=None, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None, fit_method="Nelder-Mead", maxiter=2000, **kwargs): """ Pareto/NBD model fitter. Parameters ---------- frequency: array_like the frequency vector of customers' purchases (denoted x in literature). recency: array_like the recency vector of customers' purchases (denoted t_x in literature). T: array_like customers' age (time units since first purchase) weights: None or array_like Number of customers with given frequency/recency/T, defaults to 1 if not specified. Fader and Hardie condense the individual RFM matrix into all observed combinations of frequency/recency/T. This parameter represents the count of customers with a given purchase pattern. Instead of calculating individual log-likelihood, the log-likelihood is calculated for each pattern and multiplied by the number of customers with that pattern. iterative_fitting: int, optional perform iterative_fitting fits over random/warm-started initial params initial_params: array_like, optional set the initial parameters for the fitter. verbose : bool, optional set to true to print out convergence diagnostics. tol : float, optional tolerance for termination of the function minimization process. index: array_like, optional index for resulted DataFrame which is accessible via self.data fit_method : string, optional fit_method to passing to scipy.optimize.minimize maxiter : int, optional max iterations for optimizer in scipy.optimize.minimize will be overwritten if set in kwargs. kwargs: key word arguments to pass to the scipy.optimize.minimize function as options dict Returns ------- ParetoNBDFitter with additional properties like ``params_`` and methods like ``predict`` """ frequency = asarray(frequency).astype(int) recency = asarray(recency) T = asarray(T) if weights is None: weights = np.ones(recency.shape[0], dtype=np.int64) else: weights = asarray(weights) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = self._fit( (frequency, scaled_recency, scaled_T, weights, self.penalizer_coef), iterative_fitting, initial_params, 4, verbose, tol, fit_method, maxiter, **kwargs) self._hessian_ = None self.params_ = pd.Series(*(params, ["r", "alpha", "s", "beta"])) self.params_["alpha"] /= self._scale self.params_["beta"] /= self._scale self.data = DataFrame( { "frequency": frequency, "recency": recency, "T": T, "weights": weights }, index=index) self.generate_new_data = lambda size=1: pareto_nbd_model( T, *self._unload_params("r", "alpha", "s", "beta"), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, weights=None, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None, fit_method="Nelder-Mead", maxiter=2000, covariates=None, dropout_rate_scale_parameter_covariates=None, **kwargs): """ Pareto/NBD model fitter. Parameters ---------- frequency: array_like the frequency vector of customers' purchases (denoted x in literature). recency: array_like the recency vector of customers' purchases (denoted t_x in literature). T: array_like customers' age (time units since first purchase) weights: None or array_like Number of customers with given frequency/recency/T, defaults to 1 if not specified. Fader and Hardie condense the individual RFM matrix into all observed combinations of frequency/recency/T. This parameter represents the count of customers with a given purchase pattern. Instead of calculating individual log-likelihood, the log-likelihood is calculated for each pattern and multiplied by the number of customers with that pattern. iterative_fitting: int, optional perform iterative_fitting fits over random/warm-started initial params initial_params: array_like, optional set the initial parameters for the fitter. verbose : bool, optional set to true to print out convergence diagnostics. tol : float, optional tolerance for termination of the function minimization process. index: array_like, optional index for resulted DataFrame which is accessible via self.data fit_method : string, optional fit_method to passing to scipy.optimize.minimize maxiter : int, optional max iterations for optimizer in scipy.optimize.minimize will be overwritten if set in kwargs. covariates: array_like, optional Array of time-independent customer features (n_customers x n_covariates). dropout_rate_scale_parameter_covariates: array_like, optional Array of time-independent customer features (n_customers x n_covariates) used exclusively for the dropout rate's scale parameter (denoted beta in the literature). If this is None and `covariates` isn't, the latter's values are used for both the transaction and dropout rate scale parameter derivation. kwargs: key word arguments to pass to the scipy.optimize.minimize function as options dict Returns ------- ParetoNBDwithCovariatesFitter with additional properties like ``params_`` and methods like ``predict`` """ frequency = asarray(frequency).astype(int) recency = asarray(recency) T = asarray(T) if weights is None: weights = np.ones(recency.shape[0], dtype=np.int64) else: weights = asarray(weights) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params_size = 6 self.covariates = covariates self.dropout_rate_scale_parameter_covariates = dropout_rate_scale_parameter_covariates self.covariates_size = [1, 1] if self.covariates is not None: covariates_size = self.covariates.shape[1] params_size += (covariates_size - 1) self.covariates_size = [covariates_size, covariates_size] if self.dropout_rate_scale_parameter_covariates is not None: dropout_rate_scale_parameter_covariates_size = self.dropout_rate_scale_parameter_covariates.shape[ 1] params_size += (dropout_rate_scale_parameter_covariates_size - 1) self.covariates_size[ 1] = dropout_rate_scale_parameter_covariates_size params, self._negative_log_likelihood_ = self._fit( minimizing_function_args=(frequency, scaled_recency, scaled_T, weights, self.penalizer_coef), iterative_fitting=iterative_fitting, initial_params=initial_params, params_size=params_size, disp=verbose, tol=tol, fit_method=fit_method, maxiter=maxiter, **kwargs) params = tuple(params[:4]) + \ (params[4:4+self.covariates_size[0]], ) + \ (params[4+self.covariates_size[0]:4+self.covariates_size[0]+self.covariates_size[1]], ) self._hessian_ = None self.params_ = pd.Series( *(params, ["r", "alpha_0", "s", "beta_0", "gamma_1", "gamma_2"])) self.params_["alpha_0"] /= self._scale self.params_["beta_0"] /= self._scale self.data = DataFrame( { "frequency": frequency, "recency": recency, "T": T, "weights": weights }, index=index) self.generate_new_data = lambda size=1: pareto_nbd_model( T, *self._convert_parameters( self._unload_params("r", "alpha_0", "s", "beta_0", "gamma_1", "gamma_2")), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self