def test_fit(self): scores = [1, 2, 5, 5, 3, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 10] result = NumberNormalizer().fit(scores) expected = [ -1.0, -0.77777777, -0.11111111, -0.11111111, -0.55555555, -0.44444444, -0.42222222, -0.39999999, -0.37777777, -0.35555555, -0.33333333, 1.0 ] for expected_score, result_score in zip(expected, result): self.assertAlmostEqual(expected_score, result_score) # Test with rounding at the fourth digit result_rounded = NumberNormalizer(decimal_rounding=4).fit(scores) expected_rounded = [ -1.0, -0.7778, -0.1111, -0.1111, -0.5556, -0.4444, -0.4222, -0.4, -0.3778, -0.3556, -0.3333, 1.0 ] for expected_score_rounded, result_score_rounded in zip( expected_rounded, result_rounded): self.assertAlmostEqual(expected_score_rounded, result_score_rounded)
def test_graph_creation_exo_missing(self): # Test multiple graph creation possibilities with not existent exo_representations/exo_properties # Import ratings as DataFrame ratings_import = RatingsImporter( source=CSVFile(ratings_filename), from_id_column='user_id', to_id_column='item_id', score_column='points', timestamp_column='timestamp', score_processor=NumberNormalizer() ) ratings_frame = ratings_import.import_ratings() # Create graph with non-existent exo_properties g = NXFullGraph( source_frame=ratings_frame, item_contents_dir=movies_dir, user_contents_dir=user_dir, item_exo_properties=['asdds', 'dsdds'], user_exo_properties=['vvvv'] # It's the column in the users DAT which identifies the gender ) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertEqual(len(g.property_nodes), 0) # Create graph with non-existent exo_representations g = NXFullGraph( source_frame=ratings_frame, item_contents_dir=movies_dir, user_contents_dir=user_dir, item_exo_representation="asdsa", user_exo_representation="dsdssd" ) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertEqual(len(g.property_nodes), 0) # Create graph with non-existent exo_representations and non-existent exo_properties g = NXFullGraph( source_frame=ratings_frame, item_contents_dir=movies_dir, user_contents_dir=user_dir, user_exo_representation='not_exist', item_exo_representation='not_Exist2', item_exo_properties=["asdsa"], user_exo_properties=["dsdssd"] ) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertEqual(len(g.property_nodes), 0)
def test_graph_creation(self): # Test multiple graph creation possibilities # Import ratings as DataFrame ratings_import = RatingsImporter( source=CSVFile(ratings_filename), rating_configs=[ RatingsFieldConfig(field_name='points', processor=NumberNormalizer(min_=1, max_=5)) ], from_field_name='user_id', to_field_name='item_id', timestamp_field_name='timestamp', ) ratings_frame = ratings_import.import_ratings() # Create graph without setting the representation # EX. Create graph with properties 'producer' and 'starring' from # all exo representation, since there can be multiple exo representation # containing the same properties g = NXFullGraph( source_frame=ratings_frame, item_contents_dir=movies_dir, user_contents_dir=user_dir, item_exo_properties=['producer', 'starring'], user_exo_properties=[ '1' ] # It's the column in the users DAT which identifies the gender ) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertGreater(len(g.property_nodes), 0) # Create graph without setting properties, # so ALL exo properties of the representation 0 will be retrieved g = NXFullGraph(source_frame=ratings_frame, item_contents_dir=movies_dir, user_contents_dir=user_dir, item_exo_representation="0", user_exo_representation="0") # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertGreater(len(g.property_nodes), 0) # Create graph specifying without properties g = NXFullGraph(ratings_frame) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertEqual(len(g.property_nodes), 0)
def test_graph_creation(self): # Test multiple graph creation possibilities # Import ratings as DataFrame ratings_import = RatingsImporter( source=CSVFile(ratings_filename), rating_configs=[ RatingsFieldConfig(field_name='points', processor=NumberNormalizer(min_=1, max_=5)) ], from_field_name='user_id', to_field_name='item_id', timestamp_field_name='timestamp', ) ratings_frame = ratings_import.import_ratings() # Create graph using the property 'starring' from representation '0' g = NXTripartiteGraph(ratings_frame, movies_dir, item_exo_representation="0", item_exo_properties=['starring']) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertGreater(len(g.property_nodes), 0) # Create graph specifying only the exo representation g = NXTripartiteGraph(ratings_frame, movies_dir, item_exo_representation="0") # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertGreater(len(g.property_nodes), 0) # Create graph specifying only the exo representation g = NXTripartiteGraph(ratings_frame, movies_dir, item_exo_properties=['starring']) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertGreater(len(g.property_nodes), 0) # Create graph specifying without properties g = NXTripartiteGraph(ratings_frame) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertEqual(len(g.property_nodes), 0)
def test_fit(self): self.assertAlmostEqual(NumberNormalizer(-10, -5).fit(-6), .6, places=3) self.assertAlmostEqual(NumberNormalizer(-5, 4).fit(0.5), 0.222, places=3) self.assertAlmostEqual(NumberNormalizer(0, 5).fit(2), -0.2, places=3) self.assertAlmostEqual(NumberNormalizer(1, 5).fit(2), -0.5, places=3) self.assertAlmostEqual(NumberNormalizer(-7, 0).fit(-6), -0.714, places=3) self.assertAlmostEqual(NumberNormalizer(0, 10).fit(0.5), -0.9, places=3) self.assertAlmostEqual(NumberNormalizer(0, 10).fit(11), 10, places=3) self.assertAlmostEqual(NumberNormalizer(0, 10).fit(-1), 0, places=3)
def test_graph_creation(self): # Test multiple graph creation possibilities # Import ratings as DataFrame ratings_import = RatingsImporter(source=CSVFile(ratings_filename), from_id_column='user_id', to_id_column='item_id', score_column='points', timestamp_column='timestamp', score_processor=NumberNormalizer()) ratings_frame = ratings_import.import_ratings() # Create graph using the property 'starring' from representation '0' ('dbpedia') g = NXTripartiteGraph(ratings_frame, movies_dir, item_exo_representation=0, item_exo_properties=['starring']) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertGreater(len(g.property_nodes), 0) # Create graph specifying only the exo representation g = NXTripartiteGraph(ratings_frame, movies_dir, item_exo_representation="dbpedia") # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertGreater(len(g.property_nodes), 0) # Create graph specifying only the exo representation g = NXTripartiteGraph(ratings_frame, movies_dir, item_exo_properties=['starring']) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertGreater(len(g.property_nodes), 0) # Create graph specifying without properties g = NXTripartiteGraph(ratings_frame) # Simple assert just to make sure the graph is created self.assertGreater(len(g.user_nodes), 0) self.assertGreater(len(g.item_nodes), 0) self.assertEqual(len(g.property_nodes), 0)
def test_import_ratings(self): RatingsImporter( source=JSONFile(file_path=file_path), output_directory="test_ratings", rating_configs=[ RatingsFieldConfig(field_name="review_title", processor=TextBlobSentimentAnalysis()), RatingsFieldConfig(field_name="text", processor=TextBlobSentimentAnalysis()), RatingsFieldConfig(field_name="stars", processor=NumberNormalizer(min_=0, max_=5)) ], from_field_name="user_id", to_field_name="item_id", timestamp_field_name="timestamp").import_ratings() """
def test_import_ratings_w_score_processor(self): ri = RatingsImporter(source=self.raw_source, from_id_column=0, to_id_column=1, score_column=4, score_processor=NumberNormalizer()) ratings = ri.import_ratings() expected_columns = ['from_id', 'to_id', 'score'] result_columns = list(ratings.columns) self.assertEqual(expected_columns, result_columns) score_result = list(ratings['score']) self.assertTrue(-1 <= score <= 1 for score in score_result)
def test_import_ratings(self): file_path = '../../../datasets/test_import_ratings.json' try: with open(file_path): pass except FileNotFoundError: file_path = 'datasets/test_import_ratings.json' print(file_path) RatingsImporter(source=JSONFile(file_path=file_path), output_directory="test_ratings", rating_configs=[ RatingsFieldConfig(preference_field_name="review_title", processor=TextBlobSentimentAnalysis()), RatingsFieldConfig(preference_field_name="text", processor=TextBlobSentimentAnalysis()), RatingsFieldConfig(preference_field_name="stars", processor=NumberNormalizer(min_=0, max_=5))], from_field_name="user_id", to_field_name="item_id", timestamp_field_name="timestamp").import_ratings()
from orange_cb_recsys.recsys.config import RecSysConfig THIS_DIR = os.path.dirname(os.path.abspath(__file__)) contents_path = os.path.join(THIS_DIR, "../../contents") datasets_path = os.path.join(THIS_DIR, "../../datasets") ratings_filename = os.path.join(datasets_path, "examples/new_ratings.csv") users_dir = os.path.join(contents_path, "examples/ex_1/users_1600355755.1935306") items_dir = os.path.join(contents_path, "examples/ex_1/movies_1600355972.49884") t_ratings = RatingsImporter( source=CSVFile(ratings_filename), rating_configs=[ RatingsFieldConfig(field_name='points', processor=NumberNormalizer(min_=1, max_=5)) ], from_field_name='user_id', to_field_name='item_id', timestamp_field_name='timestamp', ).import_ratings() class TestRankingEvalModel(TestCase): def test_fit(self): recsys_config = RecSysConfig(users_directory=users_dir, items_directory=items_dir, score_prediction_algorithm=None, ranking_algorithm=ClassifierRecommender( {'Plot': '0'}, SVM()),
def test_fit(self): """ item_id_list = [ 'tt0112281', 'tt0112302', 'tt0112346', 'tt0112453', 'tt0112641', 'tt0112760', 'tt0112896', 'tt0113041', 'tt0113101', 'tt0113189', 'tt0113228', 'tt0113277', 'tt0113497', 'tt0113845', 'tt0113987', 'tt0114319', 'tt0114388', 'tt0114576', 'tt0114709', 'tt0114885', ] record_list = [] for i in range(1, 7): extract_items = set([x for i, x in enumerate(item_id_list) if np.random.randint(0, 2) == 1 and i < 10]) for item in extract_items: record_list.append((str(i), item, str(np.random.randint(-0, 11) / 10))) t_ratings = pd.DataFrame.from_records(record_list, columns=['from_id', 'to_id', 'score']) """ ratings_filename = 'datasets/examples/new_ratings.csv' t_ratings = RatingsImporter( source=CSVFile(ratings_filename), rating_configs=[ RatingsFieldConfig(field_name='points', processor=NumberNormalizer(min_=1, max_=5)) ], from_field_name='user_id', to_field_name='item_id', timestamp_field_name='timestamp', ).import_ratings() print(t_ratings) recsys_config = RecSysConfig( users_directory='contents/examples/ex_1/users_1600355755.1935306', items_directory='contents/examples/ex_1/movies_1600355972.49884', score_prediction_algorithm=None, ranking_algorithm=CentroidVector(item_field='Plot', field_representation='1', similarity=CosineSimilarity()), rating_frame=t_ratings) try: RankingAlgEvalModel(config=recsys_config, partitioning=KFoldPartitioning(), metric_list=[ Precision(0.4), Recall(0.4), FNMeasure(1, 0.4), MRR(0.4), NDCG({ 0: (-1, 0), 1: (0, 1) }), Correlation('pearson'), Correlation('kendall'), Correlation('spearman') ]).fit() except TypeError: pass except ValueError: pass