Beispiel #1
0
def test_BG_integration_in_models_with_uncertainties():
    T = 10
    size = 1000
    params = {'alpha': 0.17, 'beta': 1.18}

    data = gen.bgext_model([1] * 300 + [2] * 200 + [3] * 180 + [4] * 37,
                           params['alpha'], params['beta'])  # , size=size)
    data = gen.bgext_model(T, params['alpha'], params['beta'], size=size)

    data = compress_bgext_data(data)

    model = models.BGModel(penalizer_coef=0.1)

    model.fit(data['frequency'],
              data['T'],
              bootstrap_size=10,
              N=data['N'],
              initial_params=params.values())

    print("Generation params")
    print(params)

    print("Fitted params")
    print(model.params)
    print(model.params_C)

    print("Uncertain parameters")
    print(model.uparams)

    print("E[X(t)] as a function of t")
    for t in [0, 1, 2, 3, 4, 5, 7, 10, 20, 50, 100, 1000, 10000]:
        Ex = model.expected_number_of_purchases_up_to_time(t)
        print(t, Ex)
        assert Ex.n >= 0
        assert Ex.s >= 0

    t = 10
    print("E[X(t) = n] as a function of n, t = " + str(t))
    tot_prob = 0.0
    for n in range(t + 1):
        prob = model.fitter.probability_of_n_purchases_up_to_time(t, n)
        print(n, prob)
        tot_prob += prob
        assert 1 >= prob >= 0

        uprob = model.probability_of_n_purchases_up_to_time(t, n)
        print(uprob)
        assert is_almost_equal(uprob.n, prob)

    assert math.fabs(tot_prob - 1.0) < 0.00001
def get_estimates_from_bootstrap(params, daily_installs, observed_days,
                                 conversion_rate, free_trial_conversion, N):
    model = BGModel(penalizer_coef=0.01)
    Ts = reduce(
        lambda x, y: x + y,
        [[(observed_days - day) / 7] *
         int(math.floor(installs * conversion_rate * free_trial_conversion))
         for day, installs in enumerate(daily_installs)])
    Ts = filter(lambda x: x > 0, Ts)
    gen_data = gen.bgext_model(Ts, params['alpha'], params['beta'])
    data = comp.compress_bgext_data(gen_data)
    model.fit(frequency=data["frequency"],
              T=data["T"],
              N=data["N"],
              bootstrap_size=N)
    exs = []
    for i in range(N):
        a = model.sampled_parameters[i]['alpha']
        b = model.sampled_parameters[i]['beta']
        cov = model.params_C
        [a, b] = uncertainties.correlated_values([a, b], cov)
        Ex = model.wrapped_static_expected_number_of_purchases_up_to_time(
            a, b, 52) + 1
        if not math.isnan(Ex.n) and not math.isinf(Ex.n):
            print((i, Ex))
            exs.append(Ex)
    return exs, model.expected_number_of_purchases_up_to_time(52) + 1
Beispiel #3
0
def test_goodness_of_test_BG():
    params = {'alpha': 0.32, 'beta': 0.85}

    gen_data = compress_bgext_data(gen.bgext_model(T=sample_T,
                                                   alpha=params['alpha'],
                                                   beta=params['beta']))
    assert goodness_of_test(gen_data, fitter_class=est.BGFitter, verbose=True)

    # clearly not BG
    frequency = [0, 1, 2, 3, 4, 5]
    T = [5, 5, 5, 5, 5, 5]
    N = [10, 13, 17, 30, 40, 40]

    non_bg_data = pd.DataFrame({'frequency': frequency, 'T': T, 'N': N})

    assert not goodness_of_test(non_bg_data, fitter_class=est.BGFitter, verbose=True)

    # borderline BG
    frequency = [0, 1, 2, 3, 4, 5]
    T = [5, 5, 5, 5, 5, 5]
    N = [10, 10, 10, 30, 10, 40]

    borderline_bg_data = pd.DataFrame({'frequency': frequency, 'T': T, 'N': N})

    assert not goodness_of_test(borderline_bg_data, fitter_class=est.BGFitter, verbose=True)
Beispiel #4
0
def test_BG_fitting_compressed_or_not():
    T = 10
    size = 1000
    params = {'alpha': 0.3, 'beta': 3.7}

    data = gen.bgext_model(T, params['alpha'], params['beta'], size=size)

    print(data)

    compressed_data = compress_bgext_data(data)

    fitter = est.BGFitter(penalizer_coef=0.1)
    fitter_compressed = est.BGFitter(penalizer_coef=0.1)

    fitter.fit(data['frequency'], data['T'], initial_params=params.values())
    fitter_compressed.fit(compressed_data['frequency'],
                          compressed_data['T'],
                          N=compressed_data['N'],
                          initial_params=params.values())

    print(params)
    print(fitter.params_)
    print(fitter_compressed.params_)

    for par_name in params.keys():
        assert math.fabs(fitter.params_[par_name] -
                         fitter_compressed.params_[par_name]) < 0.00001
Beispiel #5
0
def test_split_data():
    params = {'alpha': 0.32, 'beta': 0.85}

    gen_data = compress_bgext_data(gen.bgext_model(T=sample_T,
                                                   alpha=params['alpha'],
                                                   beta=params['beta']))
    gen_data, test_data = split_dataset(gen_data, 0.3)
    assert goodness_of_test(gen_data, fitter_class=est.BGFitter, verbose=True, test_data=test_data)
Beispiel #6
0
def test_generte_BGExt_for_external_studies():
    params = {'alpha': 0.32, 'beta': 0.85}

    gen_data = gen.bgext_model(20, params['alpha'], params['beta'], size=10000)

    c_gen_data = compress_bgext_data(gen_data)

    c_gen_data.to_csv("/Users/marcomeneghelli/Desktop/bg_data_2.csv")
Beispiel #7
0
def test_BGExt_generation():
    params = {'alpha': 2.23, 'beta': 9.35}

    gen_data = gen.bgext_model(52, params['alpha'], params['beta'], size=1000)

    assert len(gen_data) == 1000
    assert 'T' in gen_data
    assert 'frequency' in gen_data
    assert 'theta' in gen_data
    print(gen_data)

    print(compress_bgext_data(gen_data))

    gen_data = gen.bgext_model([5, 5, 1, 1],
                               params['alpha'],
                               params['beta'],
                               size=10)

    assert len(gen_data) == 4
    assert 'T' in gen_data
    assert 'frequency' in gen_data
    assert 'theta' in gen_data
    print(gen_data)
Beispiel #8
0
def test_correlations_of_uparams_and_derivatives():
    T = 10
    size = 100
    params = {'alpha': 0.17, 'beta': 1.18}

    data = gen.bgext_model(T, params['alpha'], params['beta'], size=size)
    data = compress_bgext_data(data)

    model = models.BGModel(penalizer_coef=0.1)
    model.fit(data['frequency'],
              data['T'],
              bootstrap_size=10,
              N=data['N'],
              initial_params=params.values())

    print("Generation params")
    print(params)

    print("Fitted params")
    print(model.params)
    print(model.params_C)

    print("Uncertain parameters")
    print(model.uparams)

    assert is_almost_equal(
        correlation_matrix([model.uparams['alpha'],
                            model.uparams['alpha']])[0, 1], 1.0)
    assert 1.0 > correlation_matrix([
        model.uparams['alpha'] + ufloat(1, 1), model.uparams['alpha']
    ])[0, 1] > 0.0

    # stub of profile
    p1 = model.expected_number_of_purchases_up_to_time(1)
    p2 = model.expected_number_of_purchases_up_to_time(2)

    assert 1.0 > correlation_matrix([p1, p2])[0, 1] > 0.0

    # stub of profile
    p1 = model.expected_number_of_purchases_up_to_time(1)
    p2 = model.expected_number_of_purchases_up_to_time(10)

    assert 1.0 > correlation_matrix([p1, p2])[0, 1] > 0.0

    # stub of profile
    p1 = model.expected_number_of_purchases_up_to_time(1)
    p2 = model.expected_number_of_purchases_up_to_time(100)

    assert 1.0 > correlation_matrix([p1, p2])[0, 1] > 0.0
Beispiel #9
0
def test_generate_BG_neg_likelihoods():
    params = {'alpha': 0.32, 'beta': 0.85}

    simulation_size = 100
    N_users = 1000

    gen_data = compress_bgext_data(gen.bgext_model(T=sample_T,
                                                   alpha=params['alpha'],
                                                   beta=params['beta']))
    fitter = est.BGFitter(0.1)
    fitter.fit(**gen_data)
    n_lls = generate_neg_likelihoods(fitter=fitter, size=N_users, simulation_size=simulation_size)

    assert len(n_lls) == simulation_size
    assert n_lls.std() > 0
Beispiel #10
0
def test_BG_additional_functions():
    T = 10
    size = 1000
    params = {'alpha': 0.3, 'beta': 3.7}

    data = gen.bgext_model(T, params['alpha'], params['beta'], size=size)

    print(data)

    data = compress_bgext_data(data)

    fitter = est.BGFitter(penalizer_coef=0.1)

    fitter.fit(data['frequency'],
               data['T'],
               N=data['N'],
               initial_params=params.values())

    print("Generation params")
    print(params)

    print("Fitted params")
    print(fitter.params_)

    print("E[X(t)] as a function of t")
    for t in [1, 10, 100, 1000, 10000]:
        Ex = fitter.expected_number_of_purchases_up_to_time(t)
        covariance_matrix = np.cov(
            np.vstack([[(params['alpha'] * 0.1)**2, 0],
                       [0, (params['beta'] * 0.1)**2]]))
        Ex_err = fitter.expected_number_of_purchases_up_to_time_error(
            t, covariance_matrix)
        print(t, Ex, Ex / t, Ex_err)
        assert Ex >= 0
        assert Ex_err >= 0

    t = 10
    print("P[X(t) = n] as a function of n, t = " + str(t))
    tot_prob = 0.0
    for n in range(t + 1):
        prob = fitter.probability_of_n_purchases_up_to_time(t, n)
        print(n, prob)
        tot_prob += prob
        assert 1 >= prob >= 0

    assert math.fabs(tot_prob - 1.0) < 0.00001
Beispiel #11
0
def test_address_dispersion_of_fit_with_few_renewals():
    params = {'alpha': 2.26, 'beta': 8.13}  # similar to ReadIt

    print("True number of renewals:")
    true_Ex = est.BGFitter.static_expected_number_of_purchases_up_to_time(
        params['alpha'], params['beta'], 52) + 1
    print(true_Ex)

    print("Estimates:")
    N = 30
    conv_day = 8
    T = 2

    estimates = []
    for i in range(N):
        gen_data = gen.bgext_model([T - 2, T - 1, T] * (conv_day * 7),
                                   params['alpha'], params['beta'])
        data = compress_bgext_data(gen_data)

        model = models.BGModel(penalizer_coef=0.1)

        model.fit(
            data['frequency'],
            data['T'],
            bootstrap_size=30,
            N=data['N'],
        )  # initial_params=params.values()

        # print "Fitted params"
        # print model.params
        # print model.params_C

        Ex = model.expected_number_of_purchases_up_to_time(52) + 1
        print(Ex)
        estimates.append(Ex.n)

    plt.hist(estimates, 50, normed=0, facecolor='g', alpha=0.75)

    plt.xlabel('estimates')
    plt.title('Histogram of ' + str(N) + ' estimates (true value in red) - ' +
              str(conv_day) + ' conv/day, T: ' + str(T))

    plt.axvline(x=true_Ex, color="red")
    plt.grid(True)
    plt.show()
Beispiel #12
0
def test_BG_on_simil_real_data():
    T = 10
    size = 1000
    params = {'alpha': 0.17, 'beta': 1.18}

    # let's take a case similar to Spy Calc Free:
    # data = gen.bgext_model([1] * 300 + [2] * 200 + [3] * 180 + [4] * 37, params['alpha'], params['beta']) #, size=size)
    data = gen.bgext_model([1] * 3000 + [2] * 2000 + [3] * 1800 + [4] * 370,
                           params['alpha'], params['beta'])  # , size=size)

    # data = pd.read_csv("/Users/marcomeneghelli/Desktop/SCF_data.csv")
    data = compress_bgext_data(data)

    model = models.BGModel(penalizer_coef=0.1)

    model.fit(data['frequency'],
              data['T'],
              bootstrap_size=10,
              N=data['N'],
              initial_params=params.values())

    print("Generation params")
    print(params)

    print("Fitted params")
    print(model.params)
    print(model.params_C)

    print("E[X(t)] as a function of t")
    for t in [0, 1, 10, 100, 1000, 10000]:
        Ex = model.expected_number_of_purchases_up_to_time(t)
        print(t, Ex)
        assert Ex.n >= 0
        assert Ex.s >= 0

    t = 10
    print("E[X(t) = n] as a function of n, t = " + str(t))
    tot_prob = 0.0
    for n in range(t + 1):
        prob = model.fitter.probability_of_n_purchases_up_to_time(t, n)
        print(n, prob)
        tot_prob += prob
        assert 1 >= prob >= 0

    assert math.fabs(tot_prob - 1.0) < 0.00001
def get_estimates(params, daily_installs, observed_days, conversion_rate,
                  free_trial_conversion, N):
    Ts = reduce(
        lambda x, y: x + y,
        [[(observed_days - day) / 7] *
         int(math.floor(installs * conversion_rate * free_trial_conversion))
         for day, installs in enumerate(daily_installs)])
    Ts = filter(lambda x: x > 0, Ts)
    exs = []
    for i in range(N):
        gen_data = gen.bgext_model(Ts, params['alpha'], params['beta'])
        data = comp.compress_bgext_data(gen_data)

        model = BGModel(penalizer_coef=0.1)

        model.fit(data['frequency'], data['T'], bootstrap_size=30, N=data['N'])

        Ex = model.expected_number_of_purchases_up_to_time(52) + 1
        print((i, Ex))
        exs.append(Ex)
    return exs
 conversion_rate = 0.06
 free_trial_conversion = 0.6
 true_Ex = BGModel().fitter.static_expected_number_of_purchases_up_to_time(params['alpha'], params['beta'], 52) + 1
 exss = []
 fitted_e_x = []
 percentiles_e_x = []
 observed_days = 20
 for n in range(N):
     if ((n+1) % 10) == 0:
         print(n+1)
     Ts = reduce(lambda x, y: x + y,
                 [[(observed_days - day) / 7] * int(math.floor(installs * conversion_rate * free_trial_conversion))
                  for day, installs in enumerate(daily_installs)])
     Ts = filter(lambda x: x > 0, Ts)
     current_model = BGModel()
     gen_data = gen.bgext_model(Ts, params['alpha'], params['beta'])
     data = comp.compress_bgext_data(gen_data)
     current_model.fit(frequency=data["frequency"], T=data["T"], N=data["N"], bootstrap_size=100)
     ex = current_model.expected_number_of_purchases_up_to_time(52) + 1
     fitted_e_x.append(ex)
     percentiles_data = filter(lambda x: not (math.isnan(x) or math.isinf(x)), [BGFitter.static_expected_number_of_purchases_up_to_time(pars['alpha'], pars['beta'], 52) + 1 for pars in current_model.sampled_parameters])
     if len(percentiles_data) > 0:
         percentiles = (np.percentile(percentiles_data, 16), np.percentile(percentiles_data, 84))
         percentiles_e_x.append(percentiles)
     # plt.hist(
     #     percentiles_data,
     #     bins=range(40), normed=0,
     #     alpha=0.3
     # )
     # plt.axvline(x=true_Ex, color='red', alpha =0.7)
     # plt.axvline(x=percentiles[0], color='blue', alpha =0.7)
Beispiel #15
0
def test_BG_compression():
    params = {'alpha': 2.23, 'beta': 9.35}
    gen_data = gen.bgext_model(52, params['alpha'], params['beta'], size=1000)
    comp_data = compress_bgext_data(gen_data)
    assert len(gen_data) == sum(comp_data['N'])
Beispiel #16
0
    test_data['N'] = test_N
    test_data = test_data[test_data['N'] > 0]
    train_data = data.copy(deep=True)
    train_data['N'] = train_N
    train_data = train_data[train_data['N'] > 0]
    return train_data, test_data




if __name__ == "__main__":
    params = {'alpha': 0.32, 'beta': 0.85}

    gen_data = compress_bgext_data(gen.bgext_model(T=[2] * 1000 + [3] * 1000
                                                     + [4] * 1000 + [5] * 1000
                                                     + [6] * 1000 + [7] * 1000,
                                                   alpha=params['alpha'],
                                                   beta=params['beta']))

    test_n = multinomial_sample(gen_data['N'])
    test_data = gen_data.copy(deep=True)
    test_data['N'] = test_n
    print(goodness_of_test(
        data=gen_data,
        fitter_class=est.BGFitter,
        test_data=test_data,
        verbose=True))

    # simulation_size = 100
    # N_users = 10000
    # T_horizon = 10