def test_BG_integration_in_models_with_uncertainties(): T = 10 size = 1000 params = {'alpha': 0.17, 'beta': 1.18} data = gen.bgext_model([1] * 300 + [2] * 200 + [3] * 180 + [4] * 37, params['alpha'], params['beta']) # , size=size) data = gen.bgext_model(T, params['alpha'], params['beta'], size=size) data = compress_bgext_data(data) model = models.BGModel(penalizer_coef=0.1) model.fit(data['frequency'], data['T'], bootstrap_size=10, N=data['N'], initial_params=params.values()) print("Generation params") print(params) print("Fitted params") print(model.params) print(model.params_C) print("Uncertain parameters") print(model.uparams) print("E[X(t)] as a function of t") for t in [0, 1, 2, 3, 4, 5, 7, 10, 20, 50, 100, 1000, 10000]: Ex = model.expected_number_of_purchases_up_to_time(t) print(t, Ex) assert Ex.n >= 0 assert Ex.s >= 0 t = 10 print("E[X(t) = n] as a function of n, t = " + str(t)) tot_prob = 0.0 for n in range(t + 1): prob = model.fitter.probability_of_n_purchases_up_to_time(t, n) print(n, prob) tot_prob += prob assert 1 >= prob >= 0 uprob = model.probability_of_n_purchases_up_to_time(t, n) print(uprob) assert is_almost_equal(uprob.n, prob) assert math.fabs(tot_prob - 1.0) < 0.00001
def get_estimates_from_bootstrap(params, daily_installs, observed_days, conversion_rate, free_trial_conversion, N): model = BGModel(penalizer_coef=0.01) Ts = reduce( lambda x, y: x + y, [[(observed_days - day) / 7] * int(math.floor(installs * conversion_rate * free_trial_conversion)) for day, installs in enumerate(daily_installs)]) Ts = filter(lambda x: x > 0, Ts) gen_data = gen.bgext_model(Ts, params['alpha'], params['beta']) data = comp.compress_bgext_data(gen_data) model.fit(frequency=data["frequency"], T=data["T"], N=data["N"], bootstrap_size=N) exs = [] for i in range(N): a = model.sampled_parameters[i]['alpha'] b = model.sampled_parameters[i]['beta'] cov = model.params_C [a, b] = uncertainties.correlated_values([a, b], cov) Ex = model.wrapped_static_expected_number_of_purchases_up_to_time( a, b, 52) + 1 if not math.isnan(Ex.n) and not math.isinf(Ex.n): print((i, Ex)) exs.append(Ex) return exs, model.expected_number_of_purchases_up_to_time(52) + 1
def test_goodness_of_test_BG(): params = {'alpha': 0.32, 'beta': 0.85} gen_data = compress_bgext_data(gen.bgext_model(T=sample_T, alpha=params['alpha'], beta=params['beta'])) assert goodness_of_test(gen_data, fitter_class=est.BGFitter, verbose=True) # clearly not BG frequency = [0, 1, 2, 3, 4, 5] T = [5, 5, 5, 5, 5, 5] N = [10, 13, 17, 30, 40, 40] non_bg_data = pd.DataFrame({'frequency': frequency, 'T': T, 'N': N}) assert not goodness_of_test(non_bg_data, fitter_class=est.BGFitter, verbose=True) # borderline BG frequency = [0, 1, 2, 3, 4, 5] T = [5, 5, 5, 5, 5, 5] N = [10, 10, 10, 30, 10, 40] borderline_bg_data = pd.DataFrame({'frequency': frequency, 'T': T, 'N': N}) assert not goodness_of_test(borderline_bg_data, fitter_class=est.BGFitter, verbose=True)
def test_BG_fitting_compressed_or_not(): T = 10 size = 1000 params = {'alpha': 0.3, 'beta': 3.7} data = gen.bgext_model(T, params['alpha'], params['beta'], size=size) print(data) compressed_data = compress_bgext_data(data) fitter = est.BGFitter(penalizer_coef=0.1) fitter_compressed = est.BGFitter(penalizer_coef=0.1) fitter.fit(data['frequency'], data['T'], initial_params=params.values()) fitter_compressed.fit(compressed_data['frequency'], compressed_data['T'], N=compressed_data['N'], initial_params=params.values()) print(params) print(fitter.params_) print(fitter_compressed.params_) for par_name in params.keys(): assert math.fabs(fitter.params_[par_name] - fitter_compressed.params_[par_name]) < 0.00001
def test_split_data(): params = {'alpha': 0.32, 'beta': 0.85} gen_data = compress_bgext_data(gen.bgext_model(T=sample_T, alpha=params['alpha'], beta=params['beta'])) gen_data, test_data = split_dataset(gen_data, 0.3) assert goodness_of_test(gen_data, fitter_class=est.BGFitter, verbose=True, test_data=test_data)
def test_generte_BGExt_for_external_studies(): params = {'alpha': 0.32, 'beta': 0.85} gen_data = gen.bgext_model(20, params['alpha'], params['beta'], size=10000) c_gen_data = compress_bgext_data(gen_data) c_gen_data.to_csv("/Users/marcomeneghelli/Desktop/bg_data_2.csv")
def test_BGExt_generation(): params = {'alpha': 2.23, 'beta': 9.35} gen_data = gen.bgext_model(52, params['alpha'], params['beta'], size=1000) assert len(gen_data) == 1000 assert 'T' in gen_data assert 'frequency' in gen_data assert 'theta' in gen_data print(gen_data) print(compress_bgext_data(gen_data)) gen_data = gen.bgext_model([5, 5, 1, 1], params['alpha'], params['beta'], size=10) assert len(gen_data) == 4 assert 'T' in gen_data assert 'frequency' in gen_data assert 'theta' in gen_data print(gen_data)
def test_correlations_of_uparams_and_derivatives(): T = 10 size = 100 params = {'alpha': 0.17, 'beta': 1.18} data = gen.bgext_model(T, params['alpha'], params['beta'], size=size) data = compress_bgext_data(data) model = models.BGModel(penalizer_coef=0.1) model.fit(data['frequency'], data['T'], bootstrap_size=10, N=data['N'], initial_params=params.values()) print("Generation params") print(params) print("Fitted params") print(model.params) print(model.params_C) print("Uncertain parameters") print(model.uparams) assert is_almost_equal( correlation_matrix([model.uparams['alpha'], model.uparams['alpha']])[0, 1], 1.0) assert 1.0 > correlation_matrix([ model.uparams['alpha'] + ufloat(1, 1), model.uparams['alpha'] ])[0, 1] > 0.0 # stub of profile p1 = model.expected_number_of_purchases_up_to_time(1) p2 = model.expected_number_of_purchases_up_to_time(2) assert 1.0 > correlation_matrix([p1, p2])[0, 1] > 0.0 # stub of profile p1 = model.expected_number_of_purchases_up_to_time(1) p2 = model.expected_number_of_purchases_up_to_time(10) assert 1.0 > correlation_matrix([p1, p2])[0, 1] > 0.0 # stub of profile p1 = model.expected_number_of_purchases_up_to_time(1) p2 = model.expected_number_of_purchases_up_to_time(100) assert 1.0 > correlation_matrix([p1, p2])[0, 1] > 0.0
def test_generate_BG_neg_likelihoods(): params = {'alpha': 0.32, 'beta': 0.85} simulation_size = 100 N_users = 1000 gen_data = compress_bgext_data(gen.bgext_model(T=sample_T, alpha=params['alpha'], beta=params['beta'])) fitter = est.BGFitter(0.1) fitter.fit(**gen_data) n_lls = generate_neg_likelihoods(fitter=fitter, size=N_users, simulation_size=simulation_size) assert len(n_lls) == simulation_size assert n_lls.std() > 0
def test_BG_additional_functions(): T = 10 size = 1000 params = {'alpha': 0.3, 'beta': 3.7} data = gen.bgext_model(T, params['alpha'], params['beta'], size=size) print(data) data = compress_bgext_data(data) fitter = est.BGFitter(penalizer_coef=0.1) fitter.fit(data['frequency'], data['T'], N=data['N'], initial_params=params.values()) print("Generation params") print(params) print("Fitted params") print(fitter.params_) print("E[X(t)] as a function of t") for t in [1, 10, 100, 1000, 10000]: Ex = fitter.expected_number_of_purchases_up_to_time(t) covariance_matrix = np.cov( np.vstack([[(params['alpha'] * 0.1)**2, 0], [0, (params['beta'] * 0.1)**2]])) Ex_err = fitter.expected_number_of_purchases_up_to_time_error( t, covariance_matrix) print(t, Ex, Ex / t, Ex_err) assert Ex >= 0 assert Ex_err >= 0 t = 10 print("P[X(t) = n] as a function of n, t = " + str(t)) tot_prob = 0.0 for n in range(t + 1): prob = fitter.probability_of_n_purchases_up_to_time(t, n) print(n, prob) tot_prob += prob assert 1 >= prob >= 0 assert math.fabs(tot_prob - 1.0) < 0.00001
def test_address_dispersion_of_fit_with_few_renewals(): params = {'alpha': 2.26, 'beta': 8.13} # similar to ReadIt print("True number of renewals:") true_Ex = est.BGFitter.static_expected_number_of_purchases_up_to_time( params['alpha'], params['beta'], 52) + 1 print(true_Ex) print("Estimates:") N = 30 conv_day = 8 T = 2 estimates = [] for i in range(N): gen_data = gen.bgext_model([T - 2, T - 1, T] * (conv_day * 7), params['alpha'], params['beta']) data = compress_bgext_data(gen_data) model = models.BGModel(penalizer_coef=0.1) model.fit( data['frequency'], data['T'], bootstrap_size=30, N=data['N'], ) # initial_params=params.values() # print "Fitted params" # print model.params # print model.params_C Ex = model.expected_number_of_purchases_up_to_time(52) + 1 print(Ex) estimates.append(Ex.n) plt.hist(estimates, 50, normed=0, facecolor='g', alpha=0.75) plt.xlabel('estimates') plt.title('Histogram of ' + str(N) + ' estimates (true value in red) - ' + str(conv_day) + ' conv/day, T: ' + str(T)) plt.axvline(x=true_Ex, color="red") plt.grid(True) plt.show()
def test_BG_on_simil_real_data(): T = 10 size = 1000 params = {'alpha': 0.17, 'beta': 1.18} # let's take a case similar to Spy Calc Free: # data = gen.bgext_model([1] * 300 + [2] * 200 + [3] * 180 + [4] * 37, params['alpha'], params['beta']) #, size=size) data = gen.bgext_model([1] * 3000 + [2] * 2000 + [3] * 1800 + [4] * 370, params['alpha'], params['beta']) # , size=size) # data = pd.read_csv("/Users/marcomeneghelli/Desktop/SCF_data.csv") data = compress_bgext_data(data) model = models.BGModel(penalizer_coef=0.1) model.fit(data['frequency'], data['T'], bootstrap_size=10, N=data['N'], initial_params=params.values()) print("Generation params") print(params) print("Fitted params") print(model.params) print(model.params_C) print("E[X(t)] as a function of t") for t in [0, 1, 10, 100, 1000, 10000]: Ex = model.expected_number_of_purchases_up_to_time(t) print(t, Ex) assert Ex.n >= 0 assert Ex.s >= 0 t = 10 print("E[X(t) = n] as a function of n, t = " + str(t)) tot_prob = 0.0 for n in range(t + 1): prob = model.fitter.probability_of_n_purchases_up_to_time(t, n) print(n, prob) tot_prob += prob assert 1 >= prob >= 0 assert math.fabs(tot_prob - 1.0) < 0.00001
def get_estimates(params, daily_installs, observed_days, conversion_rate, free_trial_conversion, N): Ts = reduce( lambda x, y: x + y, [[(observed_days - day) / 7] * int(math.floor(installs * conversion_rate * free_trial_conversion)) for day, installs in enumerate(daily_installs)]) Ts = filter(lambda x: x > 0, Ts) exs = [] for i in range(N): gen_data = gen.bgext_model(Ts, params['alpha'], params['beta']) data = comp.compress_bgext_data(gen_data) model = BGModel(penalizer_coef=0.1) model.fit(data['frequency'], data['T'], bootstrap_size=30, N=data['N']) Ex = model.expected_number_of_purchases_up_to_time(52) + 1 print((i, Ex)) exs.append(Ex) return exs
conversion_rate = 0.06 free_trial_conversion = 0.6 true_Ex = BGModel().fitter.static_expected_number_of_purchases_up_to_time(params['alpha'], params['beta'], 52) + 1 exss = [] fitted_e_x = [] percentiles_e_x = [] observed_days = 20 for n in range(N): if ((n+1) % 10) == 0: print(n+1) Ts = reduce(lambda x, y: x + y, [[(observed_days - day) / 7] * int(math.floor(installs * conversion_rate * free_trial_conversion)) for day, installs in enumerate(daily_installs)]) Ts = filter(lambda x: x > 0, Ts) current_model = BGModel() gen_data = gen.bgext_model(Ts, params['alpha'], params['beta']) data = comp.compress_bgext_data(gen_data) current_model.fit(frequency=data["frequency"], T=data["T"], N=data["N"], bootstrap_size=100) ex = current_model.expected_number_of_purchases_up_to_time(52) + 1 fitted_e_x.append(ex) percentiles_data = filter(lambda x: not (math.isnan(x) or math.isinf(x)), [BGFitter.static_expected_number_of_purchases_up_to_time(pars['alpha'], pars['beta'], 52) + 1 for pars in current_model.sampled_parameters]) if len(percentiles_data) > 0: percentiles = (np.percentile(percentiles_data, 16), np.percentile(percentiles_data, 84)) percentiles_e_x.append(percentiles) # plt.hist( # percentiles_data, # bins=range(40), normed=0, # alpha=0.3 # ) # plt.axvline(x=true_Ex, color='red', alpha =0.7) # plt.axvline(x=percentiles[0], color='blue', alpha =0.7)
def test_BG_compression(): params = {'alpha': 2.23, 'beta': 9.35} gen_data = gen.bgext_model(52, params['alpha'], params['beta'], size=1000) comp_data = compress_bgext_data(gen_data) assert len(gen_data) == sum(comp_data['N'])
test_data['N'] = test_N test_data = test_data[test_data['N'] > 0] train_data = data.copy(deep=True) train_data['N'] = train_N train_data = train_data[train_data['N'] > 0] return train_data, test_data if __name__ == "__main__": params = {'alpha': 0.32, 'beta': 0.85} gen_data = compress_bgext_data(gen.bgext_model(T=[2] * 1000 + [3] * 1000 + [4] * 1000 + [5] * 1000 + [6] * 1000 + [7] * 1000, alpha=params['alpha'], beta=params['beta'])) test_n = multinomial_sample(gen_data['N']) test_data = gen_data.copy(deep=True) test_data['N'] = test_n print(goodness_of_test( data=gen_data, fitter_class=est.BGFitter, test_data=test_data, verbose=True)) # simulation_size = 100 # N_users = 10000 # T_horizon = 10