def get_product_templates_tfidf(context, product_ids, blocked_products=None):
    """ Retrieves the top *n_templates* product templates per given product.

        :param context: A session context.
        :param product_ids: A list with the ids of the intended products.
        :param blocked_products: A list with ids of products that should not be fetched.

        :returns: A map {product_id: list of [strength, template_id] pairs}.
    """
    result = {}
    if blocked_products is None:
        blocked_products = []

    templates_map = context.data_proxy.fetch_product_templates(product_ids)
    for p_id, templates_tuple in templates_map.items():
        approved_templates = [t for t in templates_tuple[1] if t[1] not in blocked_products]
        result[p_id] = approved_templates

    if context.user_context is not None:
        product_models = context.product_models
    else:
        product_models = {}

    all_products = set(product_ids)
    for templates_with_strengths in result.values():
        all_products |= {t[1] for t in templates_with_strengths}

    products_with_missing_product_models = all_products - product_models.keys()
    if len(products_with_missing_product_models) > 0 and context.filter_strategy == ctx.AFTER_SCORING:
        product_models.update(context.data_proxy.fetch_product_models(list(products_with_missing_product_models)))

    if (context.near_identical_filter_field is not None) and (context.near_identical_filter_threshold is not None):
        for product_id, templates_with_strengths in result.items():
            templates = [t[1] for t in templates_with_strengths if t[1] in product_models]
            templates_to_disregard = pinpoint_near_identical_products(context, templates, product_models,
                                                                      base_product_id=product_id)
            result[product_id] = [t for t in templates_with_strengths if t[1] not in templates_to_disregard]

    return result
Example #2
0
    def recommend(self, n_recommendations):
        """ Returns the top-scored recommendations for the target user.

            :param n_recommendations: The intended number of recommendations.
        """
        start_time = time()
        log.info("Retrieving {0} recommendations for user [{1}]".format(n_recommendations,
                                                                        self.session_context.user_id))
        # Obtains the candidate products.
        candidate_products_by_algorithm = self._gather_processed_candidate_products(
            max(500, 3 * n_recommendations)
            # Hack to add some slack and make sure we bring enough products to overcome a possible subsequent pruning
            # by history decay, deleted and already consumed products.
        )

        if self.session_context.filter_strategy == ctx.BEFORE_SCORING:
            number_of_recommendations_to_ask_for = min(3 * n_recommendations,
                                                       len(candidate_products_by_algorithm[PRE_FILTER]))
        else:
            number_of_recommendations_to_ask_for = 3 * n_recommendations
        # Here again we leave some slack, so we can post-process and still retain the intended number of products.

        # Scores the products.
        scored_recommendations = self.gather_recommendation_scores(candidate_products_by_algorithm,
                                                                   number_of_recommendations_to_ask_for)
        if log.is_debug_enabled():
            log.debug('full recommendations: [{0}] => [{1}]'.format(len(scored_recommendations),
                                                                    scored_recommendations))
        else:
            log.info('full recommendations: [{0}]'.format(len(scored_recommendations)))

        # Post-processes the scores (boosts, decays, etc.).
        scored_recommendations = self.post_process_scores(scored_recommendations)

        if log.is_debug_enabled():
            log.debug('post-processed recommendations: [{0}] => [{1}]'.format(len(scored_recommendations),
                                                                              scored_recommendations))
        else:
            log.info('post-processed recommendations: [{0}]'.format(len(scored_recommendations)))

        # Makes sure that all pre-filtered products have made their way into the recommendations list.
        if self.session_context.filter_strategy == ctx.BEFORE_SCORING:
            all_candidates = candidate_products_by_algorithm[PRE_FILTER]
            if len(scored_recommendations) < len(all_candidates):
                recommended_products = {p[1] for p in scored_recommendations}
                missing_candidates = list(all_candidates - recommended_products)
                random.shuffle(missing_candidates)
                for missing_candidate in missing_candidates:
                    fill_in_score = ["PRE-FILTER", 0] if self.is_hybrid() else [0]
                    scored_recommendations += [[fill_in_score, missing_candidate]]

        should_worry_about_near_identical = (self.session_context.near_identical_filter_field is not None) and \
                                            (self.session_context.near_identical_filter_threshold is not None)

        # Ranks.
        slack_for_near_identical = 2 if should_worry_about_near_identical else 1
        ranked_recommendations = self._nlargest(slack_for_near_identical * n_recommendations, scored_recommendations)

        if log.is_debug_enabled():
            log.debug('ranked recommendations: [{0}] => [{1}]'.format(len(ranked_recommendations),
                                                                      ranked_recommendations))
        else:
            log.info('ranked recommendations: [{0}]'.format(len(ranked_recommendations)))

        # Identifies near-identical products within a same page and sends them to the end of the list.

        if should_worry_about_near_identical:
            products = [r[1] for r in ranked_recommendations]
            products_to_disregard = pinpoint_near_identical_products(self.session_context, products,
                                                                     self.session_context.product_models)
            result = []
            near_identical = []
            count_recommendations = 0
            for score_and_product in ranked_recommendations:
                product = score_and_product[1]
                if product in products_to_disregard:
                    score = score_and_product[0]
                    new_score = ["NI"] + score  # indicates it was decayed for being 'near-identical'
                    near_identical += [(new_score, product)]
                else:
                    result += [score_and_product]
                    count_recommendations += 1
                    if count_recommendations == n_recommendations:
                        break

            hole = n_recommendations - count_recommendations
            if hole > 0:
                result += near_identical[:hole]

            if log.is_debug_enabled():
                log.debug('recommendations after near-identical filter [count({0})] => [{1}]'.format(
                    len(ranked_recommendations), ranked_recommendations))
            else:
                log.info('recommendations after near-identical filter [count({0})]'.format(
                    len(ranked_recommendations)))
        else:
            result = ranked_recommendations  # There was no need to filter near-identical products...

        log.info("Recommender{0} took [{1:2.6f}] seconds for user [{2}]".format(
            self.get_suffix(), time() - start_time, self.session_context.user_id))

        return result