def get_records_to_predict_topn(self): print('get_records_to_predict_topn: %s' % time.strftime("%Y/%m/%d-%H:%M:%S")) self.top_n_evaluator = TopNEvaluator(self.records, self.test_records, Constants.ITEM_TYPE, 10, Constants.TOPN_NUM_ITEMS) self.top_n_evaluator.initialize() self.important_records = self.top_n_evaluator.important_records if Constants.TEST_CONTEXT_REVIEWS_ONLY: self.important_records = ETLUtils.filter_records( self.important_records, Constants.HAS_CONTEXT_FIELD, [True]) self.records_to_predict =\ self.top_n_evaluator.get_records_to_predict() if Constants.MAX_SAMPLE_TEST_SET is not None: print('important_records %d' % len(self.important_records)) if len(self.important_records) > Constants.MAX_SAMPLE_TEST_SET: self.important_records = random.sample( self.important_records, Constants.MAX_SAMPLE_TEST_SET) else: message = 'WARNING max_sample_test_set is greater than the ' \ 'number of important records' print(message) self.top_n_evaluator.important_records = self.important_records self.records_to_predict = self.top_n_evaluator.get_records_to_predict() self.test_records = None gc.collect()
def test_get_items_to_predict(self): top_n_evaluator = TopNEvaluator(ratings, test_set) top_n_evaluator.I = 4 top_n_evaluator.N = 2 top_n_evaluator.initialize() items_to_predict = top_n_evaluator.get_records_to_predict() predictions = [0] * len(test_set) * (top_n_evaluator.I + 1) top_n_evaluator.evaluate(predictions) print(items_to_predict) for item in items_to_predict: print(item)
def get_records_to_predict_topn(self): print('get_records_to_predict_topn: %s' % time.strftime("%Y/%m/%d-%H:%M:%S")) with open(Constants.USER_ITEM_MAP_FILE, 'rb') as read_file: user_item_map = pickle.load(read_file) self.top_n_evaluator = TopNEvaluator(self.records, self.test_records, Constants.ITEM_TYPE, 10, Constants.TOPN_NUM_ITEMS) self.top_n_evaluator.initialize(user_item_map) self.records_to_predict = self.top_n_evaluator.get_records_to_predict() self.important_records = self.top_n_evaluator.important_records self.test_records = None gc.collect()
def export(self): print('export: %s' % time.strftime("%Y/%m/%d-%H:%M:%S")) with open(Constants.USER_ITEM_MAP_FILE, 'rb') as read_file: user_item_map = pickle.load(read_file) self.top_n_evaluator = TopNEvaluator( self.records, self.test_records, Constants.ITEM_TYPE, 10, Constants.TOPN_NUM_ITEMS) self.top_n_evaluator.initialize(user_item_map) self.records_to_predict = self.top_n_evaluator.get_records_to_predict() self.important_records = self.top_n_evaluator.important_records self.important_reviews = [ review for review in self.test_reviews if review.rating == 5 ]
def test_calculate_recall(self): N = 5 top_n_evaluator = TopNEvaluator([], [], N) top_n_list = ['I1', 'I2', 'I3', 'I4', 'I5'] item = 'I3' top_n_evaluator.update_num_hits(top_n_list, item) top_n_list = ['I1', 'I2', 'I3', 'I4', 'I5'] item = 'I6' top_n_evaluator.update_num_hits(top_n_list, item) top_n_list = ['I1', 'I6', 'I3', 'I4', 'I5'] item = 'I4' top_n_evaluator.update_num_hits(top_n_list, item) self.assertEqual(2.0 / 3, top_n_evaluator.calculate_recall())
def test_get_irrelevant_items(self): top_n_evaluator = TopNEvaluator(ratings, None) top_n_evaluator.initialize() actual_irrelevant_items = top_n_evaluator.get_irrelevant_items('U1') expected_irrelevant_items = [ 'I10', 'I11', 'I12', 'I13', 'I14', 'I15', 'I16' ] self.assertItemsEqual(expected_irrelevant_items, actual_irrelevant_items) actual_irrelevant_items = top_n_evaluator.get_irrelevant_items('U5') expected_irrelevant_items = [ 'I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'I10', 'I11', 'I12', 'I13', 'I14', 'I15' ] self.assertItemsEqual(expected_irrelevant_items, actual_irrelevant_items) # top_n_evaluator.get_irrelevant_items('U6') self.assertRaises(KeyError, top_n_evaluator.get_irrelevant_items, 'U6')
def test_update_num_hits(self): top_n_evaluator = TopNEvaluator([], []) self.assertEqual(0, top_n_evaluator.num_generic_hits) self.assertEqual(0, top_n_evaluator.num_generic_misses) top_n_list = ['I1', 'I2', 'I3', 'I4', 'I5'] item = 'I3' top_n_evaluator.update_num_hits(top_n_list, item) self.assertEqual(1, top_n_evaluator.num_generic_hits) self.assertEqual(0, top_n_evaluator.num_generic_misses) top_n_list = ['I1', 'I2', 'I3', 'I4', 'I5'] item = 'I6' top_n_evaluator.update_num_hits(top_n_list, item) self.assertEqual(1, top_n_evaluator.num_generic_hits) self.assertEqual(1, top_n_evaluator.num_generic_misses) top_n_list = ['I1', 'I6', 'I3', 'I4', 'I5'] item = 'I4' top_n_evaluator.update_num_hits(top_n_list, item) self.assertEqual(2, top_n_evaluator.num_generic_hits) self.assertEqual(1, top_n_evaluator.num_generic_misses)