Exemple #1
0
    def get_records_to_predict_topn(self):
        print('get_records_to_predict_topn: %s' %
              time.strftime("%Y/%m/%d-%H:%M:%S"))

        self.top_n_evaluator = TopNEvaluator(self.records, self.test_records,
                                             Constants.ITEM_TYPE, 10,
                                             Constants.TOPN_NUM_ITEMS)
        self.top_n_evaluator.initialize()
        self.important_records = self.top_n_evaluator.important_records

        if Constants.TEST_CONTEXT_REVIEWS_ONLY:
            self.important_records = ETLUtils.filter_records(
                self.important_records, Constants.HAS_CONTEXT_FIELD, [True])

            self.records_to_predict =\
                self.top_n_evaluator.get_records_to_predict()

        if Constants.MAX_SAMPLE_TEST_SET is not None:
            print('important_records %d' % len(self.important_records))
            if len(self.important_records) > Constants.MAX_SAMPLE_TEST_SET:
                self.important_records = random.sample(
                    self.important_records, Constants.MAX_SAMPLE_TEST_SET)
            else:
                message = 'WARNING max_sample_test_set is greater than the ' \
                          'number of important records'
                print(message)

        self.top_n_evaluator.important_records = self.important_records
        self.records_to_predict = self.top_n_evaluator.get_records_to_predict()
        self.test_records = None
        gc.collect()
Exemple #2
0
    def test_get_items_to_predict(self):
        top_n_evaluator = TopNEvaluator(ratings, test_set)
        top_n_evaluator.I = 4
        top_n_evaluator.N = 2
        top_n_evaluator.initialize()
        items_to_predict = top_n_evaluator.get_records_to_predict()

        predictions = [0] * len(test_set) * (top_n_evaluator.I + 1)
        top_n_evaluator.evaluate(predictions)

        print(items_to_predict)

        for item in items_to_predict:
            print(item)
Exemple #3
0
    def get_records_to_predict_topn(self):
        print('get_records_to_predict_topn: %s' %
              time.strftime("%Y/%m/%d-%H:%M:%S"))

        with open(Constants.USER_ITEM_MAP_FILE, 'rb') as read_file:
            user_item_map = pickle.load(read_file)

        self.top_n_evaluator = TopNEvaluator(self.records, self.test_records,
                                             Constants.ITEM_TYPE, 10,
                                             Constants.TOPN_NUM_ITEMS)
        self.top_n_evaluator.initialize(user_item_map)
        self.records_to_predict = self.top_n_evaluator.get_records_to_predict()
        self.important_records = self.top_n_evaluator.important_records
        self.test_records = None
        gc.collect()
    def export(self):
        print('export: %s' % time.strftime("%Y/%m/%d-%H:%M:%S"))

        with open(Constants.USER_ITEM_MAP_FILE, 'rb') as read_file:
            user_item_map = pickle.load(read_file)

        self.top_n_evaluator = TopNEvaluator(
            self.records, self.test_records, Constants.ITEM_TYPE, 10,
            Constants.TOPN_NUM_ITEMS)
        self.top_n_evaluator.initialize(user_item_map)
        self.records_to_predict = self.top_n_evaluator.get_records_to_predict()
        self.important_records = self.top_n_evaluator.important_records
        self.important_reviews = [
            review for review in self.test_reviews if review.rating == 5
        ]
Exemple #5
0
    def test_calculate_recall(self):
        N = 5
        top_n_evaluator = TopNEvaluator([], [], N)

        top_n_list = ['I1', 'I2', 'I3', 'I4', 'I5']
        item = 'I3'
        top_n_evaluator.update_num_hits(top_n_list, item)

        top_n_list = ['I1', 'I2', 'I3', 'I4', 'I5']
        item = 'I6'
        top_n_evaluator.update_num_hits(top_n_list, item)

        top_n_list = ['I1', 'I6', 'I3', 'I4', 'I5']
        item = 'I4'
        top_n_evaluator.update_num_hits(top_n_list, item)

        self.assertEqual(2.0 / 3, top_n_evaluator.calculate_recall())
Exemple #6
0
    def test_get_irrelevant_items(self):
        top_n_evaluator = TopNEvaluator(ratings, None)
        top_n_evaluator.initialize()

        actual_irrelevant_items = top_n_evaluator.get_irrelevant_items('U1')
        expected_irrelevant_items = [
            'I10', 'I11', 'I12', 'I13', 'I14', 'I15', 'I16'
        ]
        self.assertItemsEqual(expected_irrelevant_items,
                              actual_irrelevant_items)

        actual_irrelevant_items = top_n_evaluator.get_irrelevant_items('U5')
        expected_irrelevant_items = [
            'I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'I10', 'I11',
            'I12', 'I13', 'I14', 'I15'
        ]
        self.assertItemsEqual(expected_irrelevant_items,
                              actual_irrelevant_items)

        # top_n_evaluator.get_irrelevant_items('U6')
        self.assertRaises(KeyError, top_n_evaluator.get_irrelevant_items, 'U6')
Exemple #7
0
    def test_update_num_hits(self):

        top_n_evaluator = TopNEvaluator([], [])
        self.assertEqual(0, top_n_evaluator.num_generic_hits)
        self.assertEqual(0, top_n_evaluator.num_generic_misses)

        top_n_list = ['I1', 'I2', 'I3', 'I4', 'I5']
        item = 'I3'
        top_n_evaluator.update_num_hits(top_n_list, item)
        self.assertEqual(1, top_n_evaluator.num_generic_hits)
        self.assertEqual(0, top_n_evaluator.num_generic_misses)

        top_n_list = ['I1', 'I2', 'I3', 'I4', 'I5']
        item = 'I6'
        top_n_evaluator.update_num_hits(top_n_list, item)
        self.assertEqual(1, top_n_evaluator.num_generic_hits)
        self.assertEqual(1, top_n_evaluator.num_generic_misses)

        top_n_list = ['I1', 'I6', 'I3', 'I4', 'I5']
        item = 'I4'
        top_n_evaluator.update_num_hits(top_n_list, item)
        self.assertEqual(2, top_n_evaluator.num_generic_hits)
        self.assertEqual(1, top_n_evaluator.num_generic_misses)