def test_beta_geometric_nbd_model_transactional_data(T, r, alpha, a, b, observation_period_end, freq, size): np.random.seed(188898) transaction_data = beta_geometric_nbd_model_transactional_data( T=T, r=r, alpha=alpha, a=a, b=b, observation_period_end=observation_period_end, freq=freq, size=size) actual = summary_data_from_transaction_data( transactions=transaction_data, customer_id_col="customer_id", datetime_col="date", observation_period_end=observation_period_end, freq=freq, ) np.random.seed(188898) expected = beta_geometric_nbd_model(T=T, r=r, alpha=alpha, a=a, b=b, size=size)[[ "frequency", "recency", "T" ]] expected["recency"] = expected["recency"].apply(np.ceil) expected = expected.reset_index(drop=True) actual = actual.reset_index(drop=True) assert expected.equals(actual)
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4, index=None): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform iterative_fitting fits over random/warm-started initial params initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. index: index for resulted DataFrame which is accessible via self.data Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, [frequency, scaled_recency, scaled_T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose, tol) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) if index is not None: self.data.index = index self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve peformance but possibly hurt estimates. initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, frequency, scaled_recency, scaled_T, iterative_fitting, self.penalizer_coef, initial_params, verbose, ) self.params_ = OrderedDict(zip(["r", "alpha", "a", "b"], params)) self.params_["alpha"] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=["frequency", "recency", "T"]) self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params("r", "alpha", "a", "b"), size=size ) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve peformance but possibly hurt estimates. initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, frequency, scaled_recency, scaled_T, iterative_fitting, self.penalizer_coef, initial_params, verbose) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=1, initial_params=None, verbose=False, tol=1e-4): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform iterative_fitting fits over random/warm-started initial params initial_params: set the initial parameters for the fitter. verbose: set to true to print out convergence diagnostics. Returns: self, with additional properties and methods like params_ and predict """ frequency = asarray(frequency) recency = asarray(recency) T = asarray(T) _check_inputs(frequency, recency, T) self._scale = _scale_time(T) scaled_recency = recency * self._scale scaled_T = T * self._scale params, self._negative_log_likelihood_ = _fit(self._negative_log_likelihood, [frequency, scaled_recency, scaled_T, self.penalizer_coef], iterative_fitting, initial_params, 4, verbose, tol) self.params_ = OrderedDict(zip(['r', 'alpha', 'a', 'b'], params)) self.params_['alpha'] /= self._scale self.data = DataFrame(vconcat[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: beta_geometric_nbd_model(T, *self._unload_params('r', 'alpha', 'a', 'b'), size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self
def fit(self, frequency, recency, T, iterative_fitting=0, initial_params=None): """ This methods fits the data to the BG/NBD model. Parameters: frequency: the frequency vector of customers' purchases (denoted x in literature). recency: the recency vector of customers' purchases (denoted t_x in literature). T: the vector of customers' age (time since first purchase) iterative_fitting: perform `iterative_fitting` additional fits to find the best parameters for the model. Setting to 0 will improve peformance but possibly hurt estimates. Returns: self, with additional properties and methods like params_ and plot """ frequency = np.asarray(frequency) recency = np.asarray(recency) T = np.asarray(T) params, self._negative_log_likelihood_ = _fit( self._negative_log_likelihood, frequency, recency, T, iterative_fitting, self.penalizer_coef, initial_params) self.params_ = dict(zip(['r', 'alpha', 'a', 'b'], params)) self.data = pd.DataFrame(np.c_[frequency, recency, T], columns=['frequency', 'recency', 'T']) self.generate_new_data = lambda size=1: beta_geometric_nbd_model( T, *params, size=size) self.predict = self.conditional_expected_number_of_purchases_up_to_time return self