def test_replace_entities_with_lkp(self): bt = BradleyTerry() lookup = {'A': 1, 'B': 2, 'C': 3, 'D': 4} replaced_data = bt.replace_entities_with_lkp(INDEXED_DATA, lookup) correct_replaced_data = pd.DataFrame({ 'ent1': [3, 2, 3, 4], 'ent2': [2, 1, 4, 3], 'result': [1, 0, 0, 1] }) assert_frame_equal(replaced_data, correct_replaced_data)
def test_unpack_data_for_choix(self): bt = BradleyTerry() bt.rplc_lkp, bt.lkp = generate_entity_lookup( get_distinct_entities(INDEXED_DATA)) bt.target_col_name = 'result' data, n_ents = bt.unpack_data_for_choix(INDEXED_DATA, INDEXED_DATA.index.names) correct_data = {'winner': [(2, 1), (0, 1), (3, 2), (3, 2)]} corrrect_n_ents = 4 self.assertEqual(n_ents, corrrect_n_ents) self.assertEqual(data.keys(), correct_data.keys()) for key in data: for iteration, _tuple in enumerate(data[key]): self.assertTupleEqual(_tuple, correct_data[key][iteration])
def test_find_strength_diff(self): bt = BradleyTerry() bt.is_fitted = True bt.target_col_name = 'result' bt.lkp = {0: 'A', 1: 'B', 2: 'C', 3: 'D'} bt.rplc_lkp = {'A': 0, 'B': 1, 'C': 2, 'D': 3} bt._params = np.array([0.3, 0.2, -0.2, -0.4]) bt.params_ = np.array([0.3, 0.2, -0.2, -0.4]) calc_strength = bt.find_strength_diff(INDEXED_DATA_NORESCOL) correct_strength = np.array([-0.4, -0.1, 0.2, -0.2]) np.testing.assert_array_almost_equal(calc_strength, correct_strength, decimal=10)
def test_unpack_data_for_pylogit(self): bt = BradleyTerry() bt.rplc_lkp, bt.lkp = generate_entity_lookup( get_distinct_entities(INDEXED_EVERYONE_WINS_ONCE_DATA)) bt.target_col_name = 'result' long_format_pylogit = bt.unpack_data_for_pylogit( INDEXED_EVERYONE_WINS_ONCE_DATA, ['ent1', 'ent2']) correct_lf_output = pd.DataFrame({ 'observation': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5], 'entity': [1, 2, 0, 1, 2, 3, 2, 3, 1, 3], 'CHOICE': [0, 1, 1, 0, 0, 1, 0, 1, 1, 0] }) assert_frame_equal(long_format_pylogit.astype('int32'), correct_lf_output.astype('int32'))
def test_predict_proba(self): bt = BradleyTerry() bt.is_fitted = True bt.target_col_name = 'result' bt.lkp = {0: 'A', 1: 'B', 2: 'C', 3: 'D'} bt.rplc_lkp = {'A': 0, 'B': 1, 'C': 2, 'D': 3} bt._params = np.array([0.3, 0.2, -0.2, -0.4]) bt.params_ = np.array([0.3, 0.2, -0.2, -0.4]) bt.pylogit_fit = False def exp_func(x): return 1 / (1 + np.exp(-x)) pred_probs = bt.predict_proba(INDEXED_DATA_NORESCOL) corect_probs = np.array( [exp_func(-0.4), exp_func(-0.1), exp_func(0.2), exp_func(-0.2)]) np.testing.assert_array_equal(pred_probs, corect_probs)
def test_check_for_no_new_entities(self): bt = BradleyTerry() bt.is_fitted = True bt.target_col_name = 'result' bt.lkp = {1: 'A', 2: 'B', 3: 'C', 4: 'D'} wrong_data = pd.DataFrame({ 'ent1': ['K', 'A'], 'ent2': ['A,', 'B'], 'result': [1, 0] }) wrong_data = wrong_data.set_index(['ent1', 'ent2']) with self.assertRaises(Exception): bt.check_for_no_new_entities(wrong_data) try: bt.check_for_no_new_entities(TRANSITIVE_DATA_INDEXED) except: self.fail("check_for_no_new_entities failed unexpectedly")
def test_rank_entities(self): bt = BradleyTerry() bt.fit(TRANSITIVE_DATA_INDEXED, 'result') asc_rank = bt.rank_entities() correct_asc_rank = ['D', 'C', 'B', 'A'] self.assertListEqual(asc_rank, correct_asc_rank) desc_rank = bt.rank_entities(ascending=False) correct_desc_rank = ['A', 'B', 'C', 'D'] self.assertListEqual(desc_rank, correct_desc_rank)
def test_join_up_dataframes(self): bt = BradleyTerry() # Testing case when only df_i is fed in but df_j is meant to be the same bt.rplc_lkp, bt.lkp = generate_entity_lookup( get_distinct_entities(INDEXED_EVERYONE_WINS_ONCE_DATA)) bt.target_col_name = 'result' long_format_pylogit = bt.unpack_data_for_pylogit( INDEXED_EVERYONE_WINS_ONCE_DATA, ['ent1', 'ent2']) x_comb = bt.join_up_dataframes(long_format_pylogit, df_i=INDEXED_ENT1_ATTRIBUTES) correct_x_comb = pd.DataFrame({ 'observation': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5], 'entity': [1, 2, 0, 1, 2, 3, 2, 3, 1, 3], 'CHOICE': [0, 1, 1, 0, 0, 1, 0, 1, 1, 0], 'feat1': [11, 12, 1, 11, 12, 15, 12, 15, 11, 15] }) assert_frame_equal(x_comb.astype('int32'), correct_x_comb.astype('int32')) # Testing the case when different df_i and df_j is fed in # x_comb_1_2 = \ # bt.join_up_dataframes(INDEXED_EVERYONE_WINS_ONCE_DATA, # INDEXED_ENT1_ATTRIBUTES, # INDEXED_ENT2_ATTRIBUTES) # # correct_x_comb_1_2 = pd.DataFrame( # {'observation': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5], # 'entity': [1, 2, 0, 1, 2, 3, 2, 3, 1, 3], # 'CHOICE': [0, 1, 1, 0, 0, 1, 0, 1, 1, 0], # 'feat1': [11, 12, 1, 11, 12, 15, 12, 15, 11, 15], # 'feat2'} # ).set_index(['ent1', 'ent2']) # # assert_frame_equal(x_comb_1_2, correct_x_comb_1_2) # # # Testing the case when df_j is fed in but df_i is meant to be the same # x_comb_2_1, x_comb_entnames_2_1, run_choix_2_1 = \ # bt.join_up_dataframes(INDEXED_DATA, df_j=INDEXED_ENT2_ATTRIBUTES) # # correct_x_comb_2_1 = pd.DataFrame( # {'ent1': ['C', 'B', 'C', 'D'], # 'ent2': ['B', 'A', 'D', 'C'], # 'result': [1, 0, 0, 1], # 'feat1_ent2': [1, 1, 0, 1], # 'feat1_ent1': [1, 1, 1, 0]} # ).set_index(['ent1', 'ent2']) # # assert_frame_equal(x_comb_2_1, correct_x_comb_2_1) # self.assertListEqual(x_comb_entnames_2_1, ['ent1', 'ent2']) # self.assertFalse(run_choix_2_1) # # # Case when df_j is fed in but df_i is not meant to exist # x_comb_2, x_comb_entnames_2, run_choix_2 = \ # bt.join_up_dataframes(INDEXED_DATA, df_j=INDEXED_ENT2_ATTRIBUTES, # same_ent_data=False) # # correct_x_comb_2 = pd.DataFrame( # {'ent1': ['C', 'B', 'C', 'D'], # 'ent2': ['B', 'A', 'D', 'C'], # 'result': [1, 0, 0, 1], # 'feat1': [1, 1, 0, 1]} # ).set_index(['ent1', 'ent2']) # # assert_frame_equal(x_comb_2, correct_x_comb_2) # self.assertListEqual(x_comb_entnames_2, ['ent1', 'ent2']) # self.assertFalse(run_choix_2) # # # Case when df_i is fed in but df_j does not exist # x_comb_1, x_comb_entnames_1, run_choix_1 = \ # bt.join_up_dataframes(INDEXED_DATA, df_i=INDEXED_ENT1_ATTRIBUTES, # same_ent_data=False) # # correct_x_comb_1 = pd.DataFrame( # {'ent1': ['C', 'B', 'C', 'D'], # 'ent2': ['B', 'A', 'D', 'C'], # 'result': [1, 0, 0, 1], # 'feat1': [12, 11, 12, 15]} # ).set_index(['ent1', 'ent2']) # # assert_frame_equal(x_comb_1, correct_x_comb_1) # self.assertListEqual(x_comb_entnames_1, ['ent1', 'ent2']) # self.assertFalse(run_choix_1) # # # Case when simple Bradley Terry is fed in # x_comb_na, x_comb_entnames_na, run_choix_na = \ # bt.join_up_dataframes(INDEXED_DATA) # # assert_frame_equal(x_comb_na, INDEXED_DATA) # self.assertListEqual(x_comb_entnames_na, ['ent1', 'ent2']) # self.assertTrue(run_choix_na) # # Case when df_i is fed in with a merge column and df_j is meant to be # the same long_format_pylogit = bt.unpack_data_for_pylogit( EVERYONE_WINS_ONCE_DATA_MERGECOL_INDEXED, ['ent1', 'ent2']) x_comb_m = bt.join_up_dataframes(long_format_pylogit, df_i=INDEXED_ENT1_ATTRIBUTES_MERGECOL, merge_columns=['mergecol']) correct_x_comb_m = pd.DataFrame({ 'observation': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5], 'entity': [1, 2, 0, 1, 2, 3, 2, 3, 1, 3], 'CHOICE': [0, 1, 1, 0, 0, 1, 0, 1, 1, 0], 'feat1': [11, 12, 1, 11, 12, 15, 3, 4, 2, 4] }) assert_frame_equal(x_comb_m.astype('int32'), correct_x_comb_m.astype('int32'))
def test_predict(self): bt = BradleyTerry() bt.fit(TRANSITIVE_DATA_INDEXED, 'result') pred = bt.predict(INDEXED_DATA_NORESCOL) correct_pred = np.array([0, 0, 1, 0]) np.testing.assert_array_equal(pred, correct_pred)
def test_predict_choice(self): bt = BradleyTerry() bt.fit(TRANSITIVE_DATA_INDEXED, 'result') choice = bt.predict_choice(INDEXED_DATA_NORESCOL) correct_choice = np.array(['B', 'A', 'C', 'C']) np.testing.assert_array_equal(choice, correct_choice)