def test_consistence_over_T(): params = {'r': 1, 'alpha': 10, 's': 0.8, 'beta': 5} gen_data = gen.generate_pareto_data_for_T_N(60, 100, params) # compressed_gen_data = comp.compress_data(gen_data) fitting_window = [(0, 30), (15, 45), (30, 60)] t = 365 # time horizon fitted_pars = [] fitted_Ex = [] for fw in fitting_window: pareto_model = mod.ParetoNBDModel() filtered_data = comp.filter_data_by_T(gen_data, fw[0], fw[1]) pareto_model.fit(filtered_data['frequency'], filtered_data['recency'], filtered_data['T'], bootstrap_size=10) fitted_pars.append((pareto_model.params, pareto_model.params_C)) Ex = pareto_model.expected_number_of_purchases_up_to_time(t) fitted_Ex.append((Ex.n, Ex.s)) ex_0, ex_err_0 = fitted_Ex[0] for ex, ex_err in fitted_Ex: assert math.fabs(ex - ex_0) / math.sqrt(ex_err**2 + ex_err_0**2) < 5
def test_Pareto_expected_purchases_before_fitting(): model = models.ParetoNBDModel() error_generated = False try: _, _ = model.expected_number_of_purchases_up_to_time(1) except ValueError: error_generated = True assert error_generated
def test_Pareto_expected_number_of_purchases_with_error(): fitted_model = _fit_and_simulate(models.ParetoNBDModel(), params, t) e_x, err_e_x = fitted_model.expected_number_of_sessions_up_to_time_with_errors( t) assert e_x is not None and err_e_x is not None assert math.fabs( err_e_x - EM_expected_number_of_purchases_up_to_time_error( fitted_model.fitter, t, fitted_model.params_C)) < 10**-6
def test_params_covariance(): model = _fit_and_simulate(models.ParetoNBDModel(), params, t) covariance_matrix = np.matrix(model.params_C) # test symmetry transpose_covariance = covariance_matrix.transpose() assert np.all(np.fabs(covariance_matrix - transpose_covariance) < 10**-6) # test_diagonal for i in range(len(covariance_matrix)): assert covariance_matrix.item((i, i)) >= 0
def test_models_fitting_and_simulation(): test_models = [ models.ParetoNBDModel(), models.BetaGeoModel(), models.ModifiedBetaGeoModel() ] for model in test_models: assert model.params is None assert model.params_C is None for model in test_models: fitted_model = _fit_and_simulate(model=model, parameters=params, t=t) assert fitted_model.params is not None assert fitted_model.params_C is not None
def generate_pareto_data_for_T_N(T, N, params): """ Quick data generator over time :param T: Max T to generate :param N: How many users per T :param params: The pareto params :type params: dict :return: Generated data """ from lifetimes import models pareto = models.ParetoNBDModel() data = pd.DataFrame() for t in range(T + 1): new_data = pareto.generateData(t, params, N) data = pd.concat([data, new_data]) return data