Example #1
0
    def test_fit_with_and_without_weights(self, cdnow_customers):
        original_dataset_with_weights = cdnow_customers.copy()
        original_dataset_with_weights = original_dataset_with_weights.groupby(
            ['frequency', 'recency', 'T']).size()
        original_dataset_with_weights = original_dataset_with_weights.reset_index(
        )
        original_dataset_with_weights = original_dataset_with_weights.rename(
            columns={0: 'weights'})

        pnbd_noweights = estimation.ParetoNBDFitter()
        pnbd_noweights.fit(
            cdnow_customers['frequency'],
            cdnow_customers['recency'],
            cdnow_customers['T'],
        )

        pnbd = estimation.ParetoNBDFitter()
        pnbd.fit(
            original_dataset_with_weights['frequency'],
            original_dataset_with_weights['recency'],
            original_dataset_with_weights['T'],
            original_dataset_with_weights['weights'],
        )

        npt.assert_array_almost_equal(
            np.array(pnbd_noweights._unload_params('r', 'alpha', 's', 'beta')),
            np.array(pnbd._unload_params('r', 'alpha', 's', 'beta')),
            decimal=2)
Example #2
0
    def test_fit_with_index(self, cdnow_customers):
        ptf = estimation.ParetoNBDFitter()
        index = range(len(cdnow_customers), 0, -1)
        ptf.fit(cdnow_customers['frequency'],
                cdnow_customers['recency'],
                cdnow_customers['T'],
                index=index)
        assert (ptf.data.index == index).all() == True

        ptf = estimation.ParetoNBDFitter()
        ptf.fit(cdnow_customers['frequency'],
                cdnow_customers['recency'],
                cdnow_customers['T'],
                index=None)
        assert (ptf.data.index == index).all() == False
Example #3
0
    def test_conditional_probability_alive_is_between_0_and_1(self, cdnow_customers):
        ptf = estimation.ParetoNBDFitter()
        ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])

        for freq in np.arange(0, 100, 10.):
            for recency in np.arange(0, 100, 10.):
                for t in np.arange(recency, 100, 10.):
                    assert 0.0 <= ptf.conditional_probability_alive(freq, recency, t) <= 1.0
Example #4
0
    def test_conditional_probability_alive_is_between_0_and_1(self):
        ptf = estimation.ParetoNBDFitter()
        ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])

        for i in range(0, 100, 10):
            for j in range(0, 100, 10):
                for k in range(j, 100, 10):
                    assert 0 <= ptf.conditional_probability_alive(i, j, k) <= 1.0
Example #5
0
    def test_conditional_probability_alive_matrix(self, cdnow_customers):
        ptf = estimation.ParetoNBDFitter()
        ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
        Z = ptf.conditional_probability_alive_matrix()
        max_t = int(ptf.data['T'].max())

        for t_x in range(Z.shape[0]):
            for x in range(Z.shape[1]):
                assert Z[t_x][x] == ptf.conditional_probability_alive(x, t_x, max_t)
Example #6
0
    def test_expectation_returns_same_value_as_R_BTYD(self, cdnow_customers):
        """ From https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """
        ptf = estimation.ParetoNBDFitter()
        ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], tol=1e-6)

        expected = np.array([0.00000000, 0.05077821, 0.09916088, 0.14542507, 0.18979930,
            0.23247466, 0.27361274, 0.31335159, 0.35181024, 0.38909211])
        actual = ptf.expected_number_of_purchases_up_to_time(range(10))
        npt.assert_allclose(expected, actual, atol=0.01)
Example #7
0
 def test_overflow_error(self):
      
     ptf = estimation.ParetoNBDFitter()
     params = np.array([10.465, 7.98565181e-03, 3.0516, 2.820])
     freq = np.array([400., 500., 500.])
     rec = np.array([5., 1., 4.])
     age = np.array([6., 37., 37.])
     assert all([r < 0 and not np.isinf(r) and not pd.isnull(r) 
                 for r in ptf._log_A_0(params, freq, rec, age)])
Example #8
0
 def test_conditional_probability_alive_overflow_error(self):
     ptf = estimation.ParetoNBDFitter()
     ptf.params_ = OrderedDict(
         zip(['r', 'alpha', 's', 'beta'],
         [10.465, 7.98565181e-03, 3.0516, 2.820]))
     freq = np.array([400., 500., 500.])
     rec = np.array([5., 1., 4.])
     age = np.array([6., 37., 37.])
     assert all([r <= 1 and r >= 0 and not np.isinf(r) and not pd.isnull(r)
                 for r in ptf.conditional_probability_alive(freq, rec, age)])
Example #9
0
 def test_params_out_is_close_to_Hardie_paper(self, cdnow_customers):
     ptf = estimation.ParetoNBDFitter()
     ptf.fit(cdnow_customers['frequency'],
             cdnow_customers['recency'],
             cdnow_customers['T'],
             iterative_fitting=3)
     expected = np.array([0.553, 10.578, 0.606, 11.669])
     npt.assert_array_almost_equal(
         expected,
         np.array(ptf._unload_params('r', 'alpha', 's', 'beta')),
         decimal=3)
Example #10
0
 def test_conditional_expectation_returns_same_value_as_R_BTYD(self, cdnow_customers):
     """ From https://cran.r-project.org/web/packages/BTYD/vignettes/BTYD-walkthrough.pdf """
     ptf = estimation.ParetoNBDFitter()
     ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
     x = 26.00
     t_x = 30.86
     T = 31
     t = 52
     expected =  25.46
     actual = ptf.conditional_expected_number_of_purchases_up_to_time(t, x, t_x, T)
     assert abs(expected - actual) < 0.01
Example #11
0
 def test_conditional_probability_alive(self, cdnow_customers):
     """
     Target taken from page 8,
     https://cran.r-project.org/web/packages/BTYD/vignettes/BTYD-walkthrough.pdf
     """
     ptf = estimation.ParetoNBDFitter()
     ptf.params_ = OrderedDict(
         zip(['r', 'alpha', 's', 'beta'],
             [0.5534, 10.5802, 0.6061, 11.6562]))
     p_alive = ptf.conditional_probability_alive(26.00, 30.86, 31.00)
     assert abs(p_alive - 0.9979) < 0.001
Example #12
0
    def test_conditional_expectation_underflow(self):
        """ Test a pair of inputs for the ParetoNBD ptf.conditional_expected_number_of_purchases_up_to_time().
            For a small change in the input, the result shouldn't change dramatically -- however, if the
            function doesn't guard against numeric underflow, this change in input will result in an
            underflow error.
        """
        ptf = estimation.ParetoNBDFitter()
        alpha = 10.58
        beta = 11.67
        r = 0.55
        s = 0.61
        ptf.params_ = OrderedDict({'alpha':alpha, 'beta':beta, 'r':r, 's':s})

        # small change in inputs
        left = ptf.conditional_expected_number_of_purchases_up_to_time(10, 132, 200, 200) # 6.2060517889632418
        right = ptf.conditional_expected_number_of_purchases_up_to_time(10, 133, 200, 200) # 6.2528722475748113
        assert abs(left - right) < 0.05
Example #13
0
    def test_conditional_probability_of_n_purchases_up_to_time_is_between_0_and_1(
            self, cdnow_customers):
        """
        Due to the large parameter space we take a random subset.
        """
        ptf = estimation.ParetoNBDFitter()
        ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'],
                cdnow_customers['T'])

        for freq in np.random.choice(100, 5):
            for recency in np.random.choice(100, 5):
                for age in recency + np.random.choice(100, 5):
                    for t in np.random.choice(100, 5):
                        for n in np.random.choice(10, 5):
                            assert (
                                0.0 <= ptf.
                                conditional_probability_of_n_purchases_up_to_time(
                                    n, t, freq, recency, age) <= 1.0)
Example #14
0
    def test_Ex_estimation_and_error(self):
        ptf = estimation.ParetoNBDFitter()

        frequency = [0, 0, 0, 1, 1, 0, 2, 5, 6, 6, 0, 10]
        recency = [0, 0, 0, 1, 10, 0, 8, 8, 9, 9, 0, 10]
        T = [10] * len(frequency)

        ptf.fit(frequency, recency, T, initial_params=[0.5, 2, 0.5, 0.5])

        t = 100
        C = [[0.02, 0, 0, 0], [0, 1.0, 0, 0], [0, 0, 0.03, 0.0],
             [0, 0, 0, 2.0]]
        Ex = ptf.expected_number_of_purchases_up_to_time(t)
        Ex_err = ptf.expected_number_of_purchases_up_to_time_error(t, C)

        assert 25 > Ex > 15
        assert Ex_err > 0

        compressed_frequency = [0, 1, 1, 2, 5, 6, 10]
        compressed_recency = [0, 1, 10, 8, 8, 9, 10]
        compressed_T = [10, 10, 10, 10, 10, 10, 10]

        ptf.fit(compressed_frequency,
                compressed_recency,
                compressed_T,
                initial_params=[0.5, 2, 0.5, 0.5])

        t = 100
        C = [[0.02, 0, 0, 0], [0, 1.0, 0, 0], [0, 0, 0.03, 0.0],
             [0, 0, 0, 2.0]]
        Ex2 = ptf.expected_number_of_purchases_up_to_time(t)
        Ex2_err = ptf.expected_number_of_purchases_up_to_time_error(t, C)

        assert 25 > Ex > 15
        assert Ex_err > 0

        ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'],
                cdnow_customers['T'])
        Ex = ptf.expected_number_of_purchases_up_to_time(t)
        Ex_err = ptf.expected_number_of_purchases_up_to_time_error(t, C)

        assert Ex > 0
        assert Ex_err > 0
Example #15
0
    def test_conditional_probability_of_n_purchases_up_to_time_adds_up_to_1(
            self, cdnow_customers):
        """
        Due to the large parameter space we take a random subset. We also restrict our limits to keep the number of
        values of n for which the probability needs to be calculated to a sane level.
        """
        ptf = estimation.ParetoNBDFitter()
        ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'],
                cdnow_customers['T'])

        for freq in np.random.choice(10, 5):
            for recency in np.random.choice(9, 5):
                for age in np.random.choice(np.arange(recency, 10, 1), 5):
                    for t in 1 + np.random.choice(9, 5):
                        npt.assert_almost_equal(np.sum([
                            ptf.
                            conditional_probability_of_n_purchases_up_to_time(
                                n, t, freq, recency, age)
                            for n in np.arange(0, 20, 1)
                        ]),
                                                1.0,
                                                decimal=2)