def fit(self, frequency, monetary_value, iterative_fitting=4, initial_params=None, verbose=False, tol=1e-4): """ This methods fits the data to the Gamma/Gamma model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). monetary_value: the monetary value vector of customer's purchases (denoted m in literature). iterative_fitting: perform iterative_fitting fits over random/warm-started initial params. initial_params: set initial params for the iterative fitter. verbose: set to true to print out convergence diagnostics. tol: tolerance for termination of the function minimization process. Returns: self, fitted and with parameters estimated """ _check_inputs(frequency, monetary_value=monetary_value) params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood, [frequency, monetary_value, self.penalizer_coef], iterative_fitting, initial_params, 3, verbose, tol) self.data = DataFrame(vconcat[frequency, monetary_value], columns=['frequency', 'monetary_value']) self.params_ = OrderedDict(zip(['p', 'q', 'v'], params)) return self
def fit(self, frequency, monetary_value, iterative_fitting=4, initial_params=None, verbose=False, tol=1e-4): """ This methods fits the data to the Gamma/Gamma model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). monetary_value: the monetary value vector of customer's purchases (denoted m in literature). iterative_fitting: perform iterative_fitting fits over random/warm-started initial params. initial_params: set initial params for the iterative fitter. verbose: set to true to print out convergence diagnostics. tol: tolerance for termination of the function minimization process. Returns: self, fitted and with parameters estimated """ _check_inputs(frequency, monetary_value=monetary_value) params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, [frequency, monetary_value, self.penalizer_coef], iterative_fitting, initial_params, 3, verbose, tol) self.data = DataFrame(vconcat[frequency, monetary_value], columns=['frequency', 'monetary_value']) self.params_ = OrderedDict(zip(['p', 'q', 'v'], params)) return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the Pareto/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve performances but possibly hurt estimates. initial_params: set initial params for the iterative fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and plot """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood, [frequency, recency, T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose) self.params_ = OrderedDict(zip(['r', 'alpha', 's', 'beta'], params)) self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: pareto_nbd_model(T, *params, size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, n, n_custs, verbose=False): """ Fit the BG/BB model. Parameters: frequency: Total periods with observed transactions recency: Period of most recent transaction n: Number of transaction opportunities n_custs: Number of customers with given frequency/recency/T. Fader and Hardie condense the individual RFM matrix into all observed combinations of frequency/recency/T. This parameter represents the count of customers with a given purchase pattern. Instead of calculating individual loglikelihood, the loglikelihood is calculated for each pattern and multiplied by the number of customers with that pattern. Returns: self """ frequency = asarray(frequency) recency = asarray(recency) n = asarray(n) n_custs = asarray(n_custs) _check_inputs(frequency, recency, n) params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, [frequency, recency, n, n_custs, self.penalizer_coef], 0, np.ones(4), 4, verbose) self.params_ = OrderedDict( zip(['alpha', 'beta', 'gamma', 'delta'], params)) self.data = DataFrame(vconcat[frequency, recency, n, n_custs], columns=['frequency', 'recency', 'n', 'n_custs']) return self
def fit(self, frequency, recency, n, n_custs, verbose=False): """ Fit the BG/BB model. Parameters: frequency: Total periods with observed transactions recency: Period of most recent transaction n: Number of transaction opportunities n_custs: Number of customers with given frequency/recency/T. Fader and Hardie condense the individual RFM matrix into all observed combinations of frequency/recency/T. This parameter represents the count of customers with a given purchase pattern. Instead of calculating individual loglikelihood, the loglikelihood is calculated for each pattern and multiplied by the number of customers with that pattern. Returns: self """ frequency = asarray(frequency) recency = asarray(recency) n = asarray(n) n_custs= asarray(n_custs) _check_inputs(frequency, recency, n) params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood, [frequency, recency, n, n_custs, self.penalizer_coef], 0, np.ones(4), 4, verbose) self.params_ = OrderedDict(zip(['alpha','beta','gamma','delta'], params)) self.data = DataFrame(vconcat[frequency, recency, n, n_custs], columns=['frequency','recency','n','n_custs']) return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform iterative_fitting fits over random/warm-started initial params initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. index: index for resulted DataFrame which is accessible via self.data Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, [frequency, scaled_recency, scaled_T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose, tol) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) if index is not None: self.data.index = index self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve peformance but possibly hurt estimates. initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, frequency, scaled_recency, scaled_T, iterative_fitting, self.penalizer_coef, initial_params, verbose, ) self.params_ = OrderedDict(zip(["r", "alpha", "a", "b"], params)) self.params_["alpha"] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=["frequency", "recency", "T"]) self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params("r", "alpha", "a", "b"), size=size ) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve peformance but possibly hurt estimates. initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, frequency, scaled_recency, scaled_T, iterative_fitting, self.penalizer_coef, initial_params, verbose) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform iterative_fitting fits over random/warm-started initial params initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood, [frequency, scaled_recency, scaled_T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose, tol) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: beta_geometric_nbd_model(T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4): """ This methods fits the data to the Pareto/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform iterative_fitting fits over random/warm-started initial params initial_params: set initial params for the iterative fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and plot """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, [frequency, recency, T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose, tol) self.params_ = OrderedDict(zip(['r', 'alpha', 's', 'beta'], params)) self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: pareto_nbd_model( T, *params, size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def test_check_inputs(): freq, recency, T = np.array([0,1,2]), np.array([0, 1, 10]), np.array([5, 6, 15]) assert utils._check_inputs(freq, recency, T) is None with pytest.raises(ValueError): bad_recency = T + 1 utils._check_inputs(freq, bad_recency, T) with pytest.raises(ValueError): bad_recency = recency.copy() bad_recency[0] = 1 utils._check_inputs(freq, bad_recency, T) with pytest.raises(ValueError): bad_freq = np.array([0, 0.5, 2]) utils._check_inputs(bad_freq, recency, T)
def test_check_inputs(): frequency = np.array([0, 1, 2]) recency = np.array([0, 1, 10]) T = np.array([5, 6, 15]) monetary_value = np.array([2.3, 490, 33.33]) assert utils._check_inputs(frequency, recency, T, monetary_value) is None with pytest.raises(ValueError): bad_recency = T + 1 utils._check_inputs(frequency, bad_recency, T) with pytest.raises(ValueError): bad_recency = recency.copy() bad_recency[0] = 1 utils._check_inputs(frequency, bad_recency, T) with pytest.raises(ValueError): bad_freq = np.array([0, 0.5, 2]) utils._check_inputs(bad_freq, recency, T) with pytest.raises(ValueError): bad_monetary_value = monetary_value.copy() bad_monetary_value[0] = 0 utils._check_inputs(frequency, recency, T, bad_monetary_value)
def test_summary_data_from_transaction_data_obeys_data_contraints(example_summary_data): assert utils._check_inputs(example_summary_data['frequency'], example_summary_data['recency'], example_summary_data['T']) is None
def fit(self, frequency, recency, T, weights=None, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None, fit_method="Nelder-Mead", maxiter=2000, **kwargs): """ Pareto/NBD model fitter. Parameters ---------- frequency: array_like the frequency vector of customers' purchases (denoted x in literature). recency: array_like the recency vector of customers' purchases (denoted t_x in literature). T: array_like customers' age (time units since first purchase) weights: None or array_like Number of customers with given frequency/recency/T, defaults to 1 if not specified. Fader and Hardie condense the individual RFM matrix into all observed combinations of frequency/recency/T. This parameter represents the count of customers with a given purchase pattern. Instead of calculating individual log-likelihood, the log-likelihood is calculated for each pattern and multiplied by the number of customers with that pattern. iterative_fitting: int, optional perform iterative_fitting fits over random/warm-started initial params initial_params: array_like, optional set the initial parameters for the fitter. verbose : bool, optional set to true to print out convergence diagnostics. tol : float, optional tolerance for termination of the function minimization process. index: array_like, optional index for resulted DataFrame which is accessible via self.data fit_method : string, optional fit_method to passing to scipy.optimize.minimize maxiter : int, optional max iterations for optimizer in scipy.optimize.minimize will be overwritten if set in kwargs. kwargs: key word arguments to pass to the scipy.optimize.minimize function as options dict Returns ------- ParetoNBDFitter with additional properties like ``params_`` and methods like ``predict`` """ frequency = asarray(frequency).astype(int) recency = asarray(recency) T = asarray(T) if weights is None: weights = np.ones(recency.shape[0], dtype=np.int64) else: weights = asarray(weights) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = self._fit( (frequency, scaled_recency, scaled_T, weights, self.penalizer_coef), iterative_fitting, initial_params, 4, verbose, tol, fit_method, maxiter, **kwargs) self._hessian_ = None self.params_ = pd.Series(*(params, ["r", "alpha", "s", "beta"])) self.params_["alpha"] /= self._scale self.params_["beta"] /= self._scale self.data = DataFrame( { "frequency": frequency, "recency": recency, "T": T, "weights": weights }, index=index) self.generate_new_data = lambda size=1: pareto_nbd_model( T, *self._unload_params("r", "alpha", "s", "beta"), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def test_summary_data_from_transaction_data_obeys_data_contraints( example_summary_data): assert utils._check_inputs(example_summary_data['frequency'], example_summary_data['recency'], example_summary_data['T']) is None
def fit(self, frequency, recency, T, weights=None, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None, fit_method="Nelder-Mead", maxiter=2000, covariates=None, dropout_rate_scale_parameter_covariates=None, **kwargs): """ Pareto/NBD model fitter. Parameters ---------- frequency: array_like the frequency vector of customers' purchases (denoted x in literature). recency: array_like the recency vector of customers' purchases (denoted t_x in literature). T: array_like customers' age (time units since first purchase) weights: None or array_like Number of customers with given frequency/recency/T, defaults to 1 if not specified. Fader and Hardie condense the individual RFM matrix into all observed combinations of frequency/recency/T. This parameter represents the count of customers with a given purchase pattern. Instead of calculating individual log-likelihood, the log-likelihood is calculated for each pattern and multiplied by the number of customers with that pattern. iterative_fitting: int, optional perform iterative_fitting fits over random/warm-started initial params initial_params: array_like, optional set the initial parameters for the fitter. verbose : bool, optional set to true to print out convergence diagnostics. tol : float, optional tolerance for termination of the function minimization process. index: array_like, optional index for resulted DataFrame which is accessible via self.data fit_method : string, optional fit_method to passing to scipy.optimize.minimize maxiter : int, optional max iterations for optimizer in scipy.optimize.minimize will be overwritten if set in kwargs. covariates: array_like, optional Array of time-independent customer features (n_customers x n_covariates). dropout_rate_scale_parameter_covariates: array_like, optional Array of time-independent customer features (n_customers x n_covariates) used exclusively for the dropout rate's scale parameter (denoted beta in the literature). If this is None and `covariates` isn't, the latter's values are used for both the transaction and dropout rate scale parameter derivation. kwargs: key word arguments to pass to the scipy.optimize.minimize function as options dict Returns ------- ParetoNBDwithCovariatesFitter with additional properties like ``params_`` and methods like ``predict`` """ frequency = asarray(frequency).astype(int) recency = asarray(recency) T = asarray(T) if weights is None: weights = np.ones(recency.shape[0], dtype=np.int64) else: weights = asarray(weights) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params_size = 6 self.covariates = covariates self.dropout_rate_scale_parameter_covariates = dropout_rate_scale_parameter_covariates self.covariates_size = [1, 1] if self.covariates is not None: covariates_size = self.covariates.shape[1] params_size += (covariates_size - 1) self.covariates_size = [covariates_size, covariates_size] if self.dropout_rate_scale_parameter_covariates is not None: dropout_rate_scale_parameter_covariates_size = self.dropout_rate_scale_parameter_covariates.shape[ 1] params_size += (dropout_rate_scale_parameter_covariates_size - 1) self.covariates_size[ 1] = dropout_rate_scale_parameter_covariates_size params, self._negative_log_likelihood_ = self._fit( minimizing_function_args=(frequency, scaled_recency, scaled_T, weights, self.penalizer_coef), iterative_fitting=iterative_fitting, initial_params=initial_params, params_size=params_size, disp=verbose, tol=tol, fit_method=fit_method, maxiter=maxiter, **kwargs) params = tuple(params[:4]) + \ (params[4:4+self.covariates_size[0]], ) + \ (params[4+self.covariates_size[0]:4+self.covariates_size[0]+self.covariates_size[1]], ) self._hessian_ = None self.params_ = pd.Series( *(params, ["r", "alpha_0", "s", "beta_0", "gamma_1", "gamma_2"])) self.params_["alpha_0"] /= self._scale self.params_["beta_0"] /= self._scale self.data = DataFrame( { "frequency": frequency, "recency": recency, "T": T, "weights": weights }, index=index) self.generate_new_data = lambda size=1: pareto_nbd_model( T, *self._convert_parameters( self._unload_params("r", "alpha_0", "s", "beta_0", "gamma_1", "gamma_2")), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def test_summary_data_from_transaction_data_obeys_data_contraints( example_summary_data): assert (utils._check_inputs(example_summary_data["frequency"], example_summary_data["recency"], example_summary_data["T"]) is None)