コード例 #1
0
    def evaluate(self, method='hits', topk=None, not_rated_penalty=None, on_feedback_level=None):
        feedback = self.data.fields.feedback
        if int(topk or 0) > self.topk:
            self.topk = topk  # will also flush old recommendations

        # support rolling back scenario for @k calculations
        recommendations = self.recommendations[:, :topk]  # will recalculate if empty

        eval_data = self.data.test.holdout
        if self.switch_positive is None:
            # all recommendations are considered positive predictions
            # this is a proper setting for binary data problems (implicit feedback)
            # in this case all unrated items, recommended by an algorithm
            # assumed to be "honest" false positives and therefore penalty equals 1
            not_rated_penalty = 1 if not_rated_penalty is None else not_rated_penalty
            is_positive = None
        else:
            # if data is not binary (explicit feedback), the intuition is different
            # it becomes unclear whether unrated items are "honest" false positives
            # as among these items can be both top rated and down-rated
            # the defualt setting in this case is to ignore such items at all
            # by setting penalty to 0, however, it is adjustable
            not_rated_penalty = not_rated_penalty or 0
            is_positive = (eval_data[feedback] >= self.switch_positive).values

        scoring_data = assemble_scoring_matrices(recommendations, eval_data,
                                                 self._key, self._target,
                                                 is_positive, feedback=feedback)

        if method == 'relevance':  # no need for feedback
            if self.data.holdout_size == 1:
                scores = get_hr_score(scoring_data[1])
            else:
                scores = get_relevance_scores(*scoring_data, not_rated_penalty=not_rated_penalty)
        elif method == 'ranking':
            if self.data.holdout_size == 1:
                scores = get_mrr_score(scoring_data[1])
            else:
                ndcg_alternative = get_default('ndcg_alternative')
                topk = recommendations.shape[1]  # handle topk=None case
                # topk has to be passed explicitly, otherwise it's unclear how to
                # estimate ideal ranking for NDCG and NDCL metrics in get_ndcr_discounts
                scores = get_ranking_scores(*scoring_data, switch_positive=self.switch_positive, topk=topk, alternative=ndcg_alternative)
        elif method == 'hits':  # no need for feedback
            scores = get_hits(*scoring_data, not_rated_penalty=not_rated_penalty)
        else:
            raise NotImplementedError
        return scores
コード例 #2
0
ファイル: models.py プロジェクト: wahyudierwin/polara
    def evaluate(self,
                 metric_type='all',
                 topk=None,
                 not_rated_penalty=None,
                 switch_positive=None,
                 ignore_feedback=False,
                 simple_rates=False,
                 on_feedback_level=None):
        if metric_type == 'all':
            metric_type = ['hits', 'relevance', 'ranking', 'experience']

        if metric_type == 'main':
            metric_type = ['relevance', 'ranking']

        if not isinstance(metric_type, (list, tuple)):
            metric_type = [metric_type]

        # support rolling back scenario for @k calculations
        if int(topk or 0) > self.topk:
            self.topk = topk  # will also flush old recommendations

        # ORDER OF CALLS MATTERS!!!
        # make sure to call holdout before getting recommendations
        # this will ensure that model is renewed if data has changed
        holdout = self.data.test.holdout  # <-- call before getting recs
        recommendations = self.recommendations[:, :
                                               topk]  # will recalculate if empty

        switch_positive = switch_positive or self.switch_positive
        feedback = self.data.fields.feedback
        if (switch_positive is None) or (feedback is None):
            # all recommendations are considered positive predictions
            # this is a proper setting for binary data problems (implicit feedback)
            # in this case all unrated items, recommended by an algorithm
            # assumed to be "honest" false positives and therefore penalty equals 1
            not_rated_penalty = 1 if not_rated_penalty is None else not_rated_penalty
            is_positive = None
        else:
            # if data is not binary (explicit feedback), the intuition is different
            # it becomes unclear whether unrated items are "honest" false positives
            # as among these items can be both top rated and down-rated
            # the defualt setting in this case is to ignore such items at all
            # by setting penalty to 0, however, it is adjustable
            not_rated_penalty = not_rated_penalty or 0
            is_positive = (holdout[feedback] >= switch_positive).values

        feedback = None if ignore_feedback else feedback
        scoring_data = assemble_scoring_matrices(recommendations,
                                                 holdout,
                                                 self._prediction_key,
                                                 self._prediction_target,
                                                 is_positive,
                                                 feedback=feedback)

        scores = []
        if 'relevance' in metric_type:  # no need for feedback
            if (self.data.holdout_size == 1) or simple_rates:
                scores.append(get_hr_score(scoring_data[1]))
            else:
                scores.append(
                    get_relevance_scores(*scoring_data,
                                         not_rated_penalty=not_rated_penalty))

        if 'ranking' in metric_type:
            if (self.data.holdout_size == 1) or simple_rates:
                scores.append(get_mrr_score(scoring_data[1]))
            else:
                ndcg_alternative = get_default('ndcg_alternative')
                topk = recommendations.shape[1]  # handle topk=None case
                # topk has to be passed explicitly, otherwise it's unclear how to
                # estimate ideal ranking for NDCG and NDCL metrics in get_ndcr_discounts
                scores.append(
                    get_ranking_scores(*scoring_data,
                                       switch_positive=switch_positive,
                                       topk=topk,
                                       alternative=ndcg_alternative))

        if 'experience' in metric_type:  # no need for feedback
            fields = self.data.fields
            # support custom scenarios, e.g. coldstart
            entity_type = fields._fields[fields.index(self._prediction_target)]
            entity_index = getattr(self.data.index, entity_type)
            try:
                n_entities = entity_index.shape[0]
            except AttributeError:
                n_entities = entity_index.training.shape[0]
            scores.append(get_experience_scores(recommendations, n_entities))

        if 'hits' in metric_type:  # no need for feedback
            scores.append(
                get_hits(*scoring_data, not_rated_penalty=not_rated_penalty))

        if not scores:
            raise NotImplementedError

        if len(scores) == 1:
            scores = scores[0]
        return scores