Esempio n. 1
0
    def test_find_strength_diff(self):
        bt = BradleyTerry()
        bt.is_fitted = True
        bt.target_col_name = 'result'
        bt.lkp = {0: 'A', 1: 'B', 2: 'C', 3: 'D'}
        bt.rplc_lkp = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
        bt._params = np.array([0.3, 0.2, -0.2, -0.4])
        bt.params_ = np.array([0.3, 0.2, -0.2, -0.4])

        calc_strength = bt.find_strength_diff(INDEXED_DATA_NORESCOL)
        correct_strength = np.array([-0.4, -0.1, 0.2, -0.2])
        np.testing.assert_array_almost_equal(calc_strength,
                                             correct_strength,
                                             decimal=10)
Esempio n. 2
0
 def test_unpack_data_for_choix(self):
     bt = BradleyTerry()
     bt.rplc_lkp, bt.lkp = generate_entity_lookup(
         get_distinct_entities(INDEXED_DATA))
     bt.target_col_name = 'result'
     data, n_ents = bt.unpack_data_for_choix(INDEXED_DATA,
                                             INDEXED_DATA.index.names)
     correct_data = {'winner': [(2, 1), (0, 1), (3, 2), (3, 2)]}
     corrrect_n_ents = 4
     self.assertEqual(n_ents, corrrect_n_ents)
     self.assertEqual(data.keys(), correct_data.keys())
     for key in data:
         for iteration, _tuple in enumerate(data[key]):
             self.assertTupleEqual(_tuple, correct_data[key][iteration])
Esempio n. 3
0
    def test_unpack_data_for_pylogit(self):
        bt = BradleyTerry()

        bt.rplc_lkp, bt.lkp = generate_entity_lookup(
            get_distinct_entities(INDEXED_EVERYONE_WINS_ONCE_DATA))

        bt.target_col_name = 'result'

        long_format_pylogit = bt.unpack_data_for_pylogit(
            INDEXED_EVERYONE_WINS_ONCE_DATA, ['ent1', 'ent2'])

        correct_lf_output = pd.DataFrame({
            'observation': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
            'entity': [1, 2, 0, 1, 2, 3, 2, 3, 1, 3],
            'CHOICE': [0, 1, 1, 0, 0, 1, 0, 1, 1, 0]
        })

        assert_frame_equal(long_format_pylogit.astype('int32'),
                           correct_lf_output.astype('int32'))
Esempio n. 4
0
    def test_predict_proba(self):
        bt = BradleyTerry()
        bt.is_fitted = True
        bt.target_col_name = 'result'
        bt.lkp = {0: 'A', 1: 'B', 2: 'C', 3: 'D'}
        bt.rplc_lkp = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
        bt._params = np.array([0.3, 0.2, -0.2, -0.4])
        bt.params_ = np.array([0.3, 0.2, -0.2, -0.4])
        bt.pylogit_fit = False

        def exp_func(x):
            return 1 / (1 + np.exp(-x))

        pred_probs = bt.predict_proba(INDEXED_DATA_NORESCOL)
        corect_probs = np.array(
            [exp_func(-0.4),
             exp_func(-0.1),
             exp_func(0.2),
             exp_func(-0.2)])

        np.testing.assert_array_equal(pred_probs, corect_probs)
Esempio n. 5
0
    def test_join_up_dataframes(self):
        bt = BradleyTerry()

        # Testing case when only df_i is fed in but df_j is meant to be the same
        bt.rplc_lkp, bt.lkp = generate_entity_lookup(
            get_distinct_entities(INDEXED_EVERYONE_WINS_ONCE_DATA))

        bt.target_col_name = 'result'

        long_format_pylogit = bt.unpack_data_for_pylogit(
            INDEXED_EVERYONE_WINS_ONCE_DATA, ['ent1', 'ent2'])

        x_comb = bt.join_up_dataframes(long_format_pylogit,
                                       df_i=INDEXED_ENT1_ATTRIBUTES)

        correct_x_comb = pd.DataFrame({
            'observation': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
            'entity': [1, 2, 0, 1, 2, 3, 2, 3, 1, 3],
            'CHOICE': [0, 1, 1, 0, 0, 1, 0, 1, 1, 0],
            'feat1': [11, 12, 1, 11, 12, 15, 12, 15, 11, 15]
        })

        assert_frame_equal(x_comb.astype('int32'),
                           correct_x_comb.astype('int32'))

        # Testing the case when different df_i and df_j is fed in
        # x_comb_1_2 = \
        #     bt.join_up_dataframes(INDEXED_EVERYONE_WINS_ONCE_DATA,
        #                           INDEXED_ENT1_ATTRIBUTES,
        #                           INDEXED_ENT2_ATTRIBUTES)
        #
        # correct_x_comb_1_2 = pd.DataFrame(
        #     {'observation': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
        #      'entity': [1, 2, 0, 1, 2, 3, 2, 3, 1, 3],
        #      'CHOICE': [0, 1, 1, 0, 0, 1, 0, 1, 1, 0],
        #      'feat1': [11, 12, 1, 11, 12, 15, 12, 15, 11, 15],
        #      'feat2'}
        # ).set_index(['ent1', 'ent2'])
        #
        # assert_frame_equal(x_comb_1_2, correct_x_comb_1_2)
        #
        # # Testing the case when df_j is fed in but df_i is meant to be the same
        # x_comb_2_1, x_comb_entnames_2_1, run_choix_2_1 = \
        #     bt.join_up_dataframes(INDEXED_DATA, df_j=INDEXED_ENT2_ATTRIBUTES)
        #
        # correct_x_comb_2_1 = pd.DataFrame(
        #     {'ent1': ['C', 'B', 'C', 'D'],
        #      'ent2': ['B', 'A', 'D', 'C'],
        #      'result': [1, 0, 0, 1],
        #      'feat1_ent2': [1, 1, 0, 1],
        #      'feat1_ent1': [1, 1, 1, 0]}
        # ).set_index(['ent1', 'ent2'])
        #
        # assert_frame_equal(x_comb_2_1, correct_x_comb_2_1)
        # self.assertListEqual(x_comb_entnames_2_1, ['ent1', 'ent2'])
        # self.assertFalse(run_choix_2_1)
        #
        # # Case when df_j is fed in but df_i is not meant to exist
        # x_comb_2, x_comb_entnames_2, run_choix_2 = \
        #     bt.join_up_dataframes(INDEXED_DATA, df_j=INDEXED_ENT2_ATTRIBUTES,
        #                           same_ent_data=False)
        #
        # correct_x_comb_2 = pd.DataFrame(
        #     {'ent1': ['C', 'B', 'C', 'D'],
        #      'ent2': ['B', 'A', 'D', 'C'],
        #      'result': [1, 0, 0, 1],
        #      'feat1': [1, 1, 0, 1]}
        # ).set_index(['ent1', 'ent2'])
        #
        # assert_frame_equal(x_comb_2, correct_x_comb_2)
        # self.assertListEqual(x_comb_entnames_2, ['ent1', 'ent2'])
        # self.assertFalse(run_choix_2)
        #
        # # Case when df_i is fed in but df_j does not exist
        # x_comb_1, x_comb_entnames_1, run_choix_1 = \
        #     bt.join_up_dataframes(INDEXED_DATA, df_i=INDEXED_ENT1_ATTRIBUTES,
        #                           same_ent_data=False)
        #
        # correct_x_comb_1 = pd.DataFrame(
        #     {'ent1': ['C', 'B', 'C', 'D'],
        #      'ent2': ['B', 'A', 'D', 'C'],
        #      'result': [1, 0, 0, 1],
        #      'feat1': [12, 11, 12, 15]}
        # ).set_index(['ent1', 'ent2'])
        #
        # assert_frame_equal(x_comb_1, correct_x_comb_1)
        # self.assertListEqual(x_comb_entnames_1, ['ent1', 'ent2'])
        # self.assertFalse(run_choix_1)
        #
        # # Case when simple Bradley Terry is fed in
        # x_comb_na, x_comb_entnames_na, run_choix_na = \
        #     bt.join_up_dataframes(INDEXED_DATA)
        #
        # assert_frame_equal(x_comb_na, INDEXED_DATA)
        # self.assertListEqual(x_comb_entnames_na, ['ent1', 'ent2'])
        # self.assertTrue(run_choix_na)
        #
        # Case when df_i is fed in with a merge column and df_j is meant to be
        # the same
        long_format_pylogit = bt.unpack_data_for_pylogit(
            EVERYONE_WINS_ONCE_DATA_MERGECOL_INDEXED, ['ent1', 'ent2'])

        x_comb_m = bt.join_up_dataframes(long_format_pylogit,
                                         df_i=INDEXED_ENT1_ATTRIBUTES_MERGECOL,
                                         merge_columns=['mergecol'])

        correct_x_comb_m = pd.DataFrame({
            'observation': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
            'entity': [1, 2, 0, 1, 2, 3, 2, 3, 1, 3],
            'CHOICE': [0, 1, 1, 0, 0, 1, 0, 1, 1, 0],
            'feat1': [11, 12, 1, 11, 12, 15, 3, 4, 2, 4]
        })

        assert_frame_equal(x_comb_m.astype('int32'),
                           correct_x_comb_m.astype('int32'))