def test_compute_optimal_ranking_set(self): ranking_set_u1 = dataset_io.RankingSet(id=dataset_io.RankingSetId( 'u1', 'Alg'), matrix=np.array([[1, 2, 3]]), user_ids=[1]) ranking_set_by_id = { ranking_set_u1.id: ranking_set_u1, } rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[]) rating_set_by_fold = {'u1': rating_set} oracle = eild_oracle.EILDOracle(ranking_set_by_id, rating_set_by_fold) distance_matrix = np.array([[0, 0.5, 0.2], [0.5, 0, 0.9], [0.2, 0.9, 0]]) oracle.distances_by_fold = {'u1': (distance_matrix, [1, 2, 3])} optimal_ranking_set = oracle.compute_optimal_ranking_set( 'u1', input_cutoff=3, output_cutoff=2) self.assertEqual([1], optimal_ranking_set.user_ids) matrix = optimal_ranking_set.matrix self.assertEqual([2, 3], list(matrix[0, :]))
def test_compute_optimal_ranking_set(self): ranking_set_u1 = dataset_io.RankingSet( id=dataset_io.RankingSetId('u1', 'Alg'), matrix=np.array([[1, 2, 3, 4], [5, 6, 7, 8]]), user_ids=[1, 2]) ranking_set_by_id = { ranking_set_u1.id: ranking_set_u1, } rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[]) rating_set_by_fold = {'u1': rating_set} oracle = epc_oracle.EPCOracle(ranking_set_by_id, rating_set_by_fold) oracle.popularity_by_fold = {'u1': { 1: 1., 2: 0.25, 3: 0.5, 4: 0.75, 5: 0.8, 6: 0.3, 7: 0.6, 8: 0.01 }} optimal_ranking_set = oracle.compute_optimal_ranking_set( 'u1', input_cutoff=4, output_cutoff=3) self.assertEqual([1, 2], optimal_ranking_set.user_ids) matrix = optimal_ranking_set.matrix self.assertEqual([2, 3, 4], list(matrix[0, :])) self.assertEqual([8, 6, 7], list(matrix[1, :]))
def setUp(self): super().setUp() rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[]) rating_set.test = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[]) rating_set_by_fold = {'u1': rating_set} self.eild = eild.EILD(ranking_set_by_id={}, rating_set_by_fold=rating_set_by_fold)
def test_computes_likers(self): rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[(1, 2, 5), (2, 2, 5), (2, 3, 5), (3, 3, 5)]) rating_set_by_fold = {'u1': rating_set} likers_by_fold = rating_utils._compute_likers_by_fold(rating_set_by_fold) self.assertIn('u1', likers_by_fold) likers_by_item_id = likers_by_fold['u1'] self.assertDictEqual({2: {1, 2}, 3: {2, 3}}, likers_by_item_id)
def test_ignores_test_and_validation(self): rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[]) rating_set.test = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[(2, 3, 5)]) rating_set.validation = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[(1, 3, 5)]) rating_set_by_fold = {'u1': rating_set} likers_by_fold = rating_utils._compute_likers_by_fold(rating_set_by_fold) self.assertIn('u1', likers_by_fold) likers_by_item_id = likers_by_fold['u1'] self.assertDictEqual({}, likers_by_item_id)
def test_compute_popularity(self): rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[ (1, 1, 5), (1, 2, 5), (2, 1, 5), (2, 3, 5), ]) rating_set_by_fold = {'u1': rating_set} expected_popularity = {(1, 1.), (2, 0.5), (3, 0.5)} popularity = set(rating_utils._compute_popularity(rating_set).items()) self.assertEqual(expected_popularity, popularity)
def test_computes_distances(self): rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[(1, 2, 5), (2, 2, 5), (4, 2, 5), (2, 3, 5), (3, 3, 5), (4, 3, 5)]) rating_set_by_fold = {'u1': rating_set} distance_by_fold = rating_utils.compute_distances_by_fold(rating_set_by_fold) self.assertIn('u1', distance_by_fold) distance_matrix, item_ids = distance_by_fold['u1'] expected_matrix = np.array([[0., 1/3], [1/3, 0.]]) error_matrix = np.abs(distance_matrix - expected_matrix) are_almost_equal = (error_matrix < 1e-8).all() self.assertTrue(are_almost_equal) self.assertSequenceEqual([2, 3], item_ids)
def test_ignores_test_and_validation(self): rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[]) rating_set.test = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[(2, 3, 5)]) rating_set.validation = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[(1, 3, 5)]) rating_set_by_fold = {'u1': rating_set} distance_by_fold = rating_utils.compute_distances_by_fold(rating_set_by_fold) self.assertIn('u1', distance_by_fold) distance_matrix, item_ids = distance_by_fold['u1'] expected_matrix = np.array([]) are_almost_equal = ((distance_matrix - expected_matrix) < 1e-8).all() self.assertTrue(are_almost_equal) self.assertSequenceEqual([], item_ids)
def test_compute_rated_by_fold(self): rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[ (1, 1, 5), (1, 2, 5), (2, 1, 5), (2, 3, 5), ]) rating_set_by_fold = {'u1': rating_set} rated_by_fold = rating_utils.compute_rated_by_fold( rating_set_by_fold, split_name='base') self.assertIn('u1', rated_by_fold) rated = rated_by_fold['u1'] expected_rated = {1: {1, 2}, 2: {1, 3}} self.assertDictEqual(expected_rated, rated)
def test_compute_optimal_ranking_set(self): ranking_set_u1 = dataset_io.RankingSet(id=dataset_io.RankingSetId( 'u1', 'Alg'), matrix=np.array([[1, 2, 3, 4], [5, 6, 7, 8]]), user_ids=[1, 2]) ranking_set_by_id = { ranking_set_u1.id: ranking_set_u1, } rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[(4, 1, 5)]) rating_set.test = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[ (1, 3, 5), (1, 2, 5), (2, 1, 5), (2, 8, 5), ]) rating_set_by_fold = {'u1': rating_set} oracle = map_oracle.MAPOracle(ranking_set_by_id, rating_set_by_fold) optimal_ranking_set = oracle.compute_optimal_ranking_set( 'u1', input_cutoff=4, output_cutoff=3) self.assertEqual([1, 2], optimal_ranking_set.user_ids) matrix = optimal_ranking_set.matrix self.assertEqual(3, len(matrix[0, :])) self.assertSetEqual({2, 3}, set(matrix[0, 0:2])) self.assertTrue(set(matrix[0, 2:]).issubset({1, 4})) self.assertEqual(3, len(matrix[1, :])) self.assertSetEqual({8}, set(matrix[1, 0:1])) self.assertTrue(set(matrix[1, 1:]).issubset({5, 6, 7}))
def test_compute_hits_by_fold(self): rating_set = dataset_io.RatingSet(fold='u1') rating_set.base = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[(4, 1, 5)]) rating_set.test = pd.DataFrame.from_records( columns=['user_id', 'item_id', 'rating'], data=[ (1, 1, 5), (1, 2, 5), (2, 1, 5), (2, 3, 5), ]) rating_set_by_fold = {'u1': rating_set} hits_by_fold = rating_utils.compute_hits_by_fold(rating_set_by_fold) self.assertIn('u1', hits_by_fold) hits = hits_by_fold['u1'] expected_hits = {1: {1, 2}, 2: {1, 3}} self.assertDictEqual(expected_hits, hits)