def test_product_product_strengths_incremental_new_user_5_to_2_to_5star(self):
        """ Tests whether the product x product strengths generated on a step-by-step basis
            match exactly those created from scratch.
            This test saves a 2-star activity with a 5-star product that had never been consumed,
            then another activity with the same product (this time a 5-star activity),
            checking whether all strengths were correctly updated.
        """
        user = "******"
        product = "p_mus_1"
        activity_type = self.session_context.activities_by_rating[3][0]
        date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:00:00"))

        activity = {"external_user_id": user,
                    "external_product_id": product,
                    "activity": activity_type,
                    "created_at": date}
        pt.update_templates(self.session_context, activity)
        tasks.update_summaries(self.session_context, activity)

        activity_type = self.session_context.activities_by_rating[5][0]
        date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:01:00"))

        activity = {"external_user_id": user,
                    "external_product_id": product,
                    "activity": activity_type,
                    "created_at": date}
        pt.update_templates(self.session_context, activity)
        tasks.update_summaries(self.session_context, activity)

        self.compare_incremental_vs_from_scratch()
    def test_product_product_strengths_incremental_random(self):
        """ Tests whether the product x product strengths generated on a step-by-step basis
            match exactly those created from scratch.
            This test saves several random activities in a row,
            checking whether all strengths were correctly updated.
        """
        if not tests.INCLUDE_RANDOM_TESTS:
            return

        all_users = [u for u in self.db_proxy.fetch_all_user_ids()]
        all_products = [p for p in self.db_proxy.fetch_all_product_ids()]

        for i in range(100):
            user = random.choice(all_users)
            product = random.choice(all_products)
            activity_type = random.choice(self.session_context.supported_activities)
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i)

            activity = {"external_user_id": user,
                        "external_product_id": product,
                        "activity": activity_type,
                        "created_at": date}
            # print(activity)
            pt.update_templates(self.session_context, activity)
            tasks.update_summaries(self.session_context, activity)

            self.compare_incremental_vs_from_scratch()
def main(argv):
    if len(argv) < 5:
        msg = "You must specify the environment, the external_user_id, " \
              "the external_product_id, the activity type and the activity date"
        log.error(msg)
        return {"success": False, "message": msg}
    try:
        # command-line arguments
        env = argv[0]
        user = argv[1]
        product = argv[2]
        activity_type = argv[3]
        activity_date = dateutil.parser.parse(argv[4])

        activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type,
                    "created_at": activity_date}

        session = init_session(env)

        maintenance.update_summaries(session, activity)

        return {"success": True}

    except Exception:
        log.exception('Exception on {0}:'.format(__name__))
        return {"success": False, "message": traceback.format_exc()}
    def test_user_user_strengths_incremental_with_new_impressions_identified_users(self):
        """ Tests whether the user x user strengths generated on a step-by-step basis
            match exactly those created from scratch.
        """
        test_descriptions = [("u_esp_4", "p_nonsense_1", "p_empty_with_missing_category", "p_filter_2", "buy")]

        for idx, (user, product1, product2, product3, activity_type) in enumerate(test_descriptions):
            # Saves a couple of impressions for the chosen user
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00") + dt.timedelta(seconds=(2 * idx + 1)))
            self.db_proxy.increment_impression_summary(user_id=user, product_id=product1,
                                                       date=date, anonymous=False)
            self.db_proxy.increment_impression_summary(user_id=user, product_id=product2,
                                                       date=date, anonymous=False)

            ut.generate_templates(self.session_context)
            # it is important to regenerate from scratch (with all new impressions)

            # Saves one activity for that same user
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00") + dt.timedelta(seconds=(2 * idx + 2)))

            activity = {"external_user_id": user,
                        "external_product_id": product3,
                        "activity": activity_type,
                        "created_at": date}

            ut.update_templates(self.session_context, activity)
            tasks.update_summaries(self.session_context, activity)

            self.compare_incremental_vs_from_scratch(
                target_users=[user] if self.session_context.impressions_enabled else None)
    def test_user_user_strengths_incremental_with_new_impressions_two_new_products(self):
        """ Tests whether the user x user strengths generated on a step-by-step basis
            match exactly those created from scratch.
            This test saves two new, identical products, with impressions for only one user.
            After activities of a like-minded user have been saved involving those products,
            checks whether all strengths were correctly updated.
        """
        # Saves two new, identical products. Initially, no users will have impressions on them.
        id_twin_product_1 = "p_tec_TWIN_1"
        id_twin_product_2 = "p_tec_TWIN_2"

        date = self.session_context.get_present_date() - dt.timedelta(days=2)

        twin_product_1 = {"external_id": id_twin_product_1,
                          "language": "english",
                          "date": date,
                          "resources": {"title": "Whatever Gets You Through The Night"},
                          "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                          "category": "Nonsense"}

        twin_product_2 = {"external_id": id_twin_product_2,
                          "language": "english",
                          "date": date,
                          "resources": {"title": "Whatever Gets You Through The Night"},
                          "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                          "category": "Nonsense"}

        self.db_proxy.insert_product(twin_product_1)
        self.db_proxy.insert_product(twin_product_2)

        user1 = "u_eco_1"
        user2 = "u_eco_2"
        activity_type = self.session_context.activities_by_rating[5][0]

        # Saves an impression on just one of the new products
        date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:00:00"))
        self.db_proxy.increment_impression_summary(user_id=user1, product_id=id_twin_product_1,
                                                   date=date, anonymous=False)

        # Saves a couple of activities for another user using the new products

        activity = {"external_user_id": user2,
                    "external_product_id": id_twin_product_1,
                    "activity": activity_type,
                    "created_at": self.session_context.get_present_date()}
        ut.update_templates(self.session_context, activity)
        tasks.update_summaries(self.session_context, activity)

        self.compare_incremental_vs_from_scratch(target_users=[user2]
                                                 if self.session_context.impressions_enabled else None)

        activity = {"external_user_id": user2,
                    "external_product_id": id_twin_product_2,
                    "activity": activity_type,
                    "created_at": self.session_context.get_present_date()}
        ut.update_templates(self.session_context, activity)
        tasks.update_summaries(self.session_context, activity)

        self.compare_incremental_vs_from_scratch(
            target_users=[user2] if self.session_context.impressions_enabled else None)
    def test_out_boost(self):
        """ Tests the effect of applying an out-boost on recommendations for some activity types.
            It applies to all user-based heuristics.
        """
        target = "u_eco_2"
        session = tests.init_session(user_id=target, algorithm=self.algorithm)
        recommender = session.get_recommender()

        # Determines the index of the first actual value in the score tuples
        # produced by the recommender (note that hybrid recommenders use the first
        # position to indicate the algorithm number)
        if recommender.is_hybrid():
            start_index = 1
        else:
            start_index = 0

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        former_top_product = recommendations[0][1]
        old_strength = recommendations[0][0]

        # Meta-test
        boost_activity_type = None
        out_boost = 1
        for boost_activity_type, out_boost in self.session_context.out_boost_by_activity.items():
            if out_boost != 1:
                break
        nose.tools.ok_(out_boost > 1, "Weak text fixture. There should be at least one out-boosted activity.")

        # Saves out-boosted activities for all templates who had consumed the former top product
        templates = [t[1] for t in session.user_templates]
        for template in templates:
            recent_product_activities_of_template = session.recent_activities_by_product_by_template_user.get(
                template, {})
            if former_top_product in recent_product_activities_of_template:
                activity = {"external_user_id": template,
                            "external_product_id": former_top_product,
                            "activity": boost_activity_type,
                            "created_at": session.get_present_date()}
                tasks.update_summaries(session, activity)

        session.refresh()
        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        new_strength = None
        for rec in recommendations:
            if rec[1] == former_top_product:
                new_strength = rec[0]
                break

        nose.tools.ok_(new_strength is not None,
                       "The former top recommendation should have been recommended again.")
        for i in range(start_index, len(new_strength)):
            old_strength_value = old_strength[i]
            new_strength_value = new_strength[i]
            nose.tools.ok_(abs(new_strength_value / old_strength_value - out_boost) < tests.FLOAT_DELTA,
                           "Incorrect application of the activity in-boost")
    def test_multi_activities_blocking_vs_non_blocking(self):
        """ Checks that blocking activities prevent items from being recommended,
            and that non-blocking activities do not do so.
        """
        # Economia
        for i in range(1, dp.N_USR_ECONOMIA + 1):
            target = "u_eco_" + str(i)

            session = tests.init_session(user_id=target, algorithm=self.algorithm)
            recommender = session.get_recommender()
            recommendations = recommender.recommend(self.n_recommendations)

            nose.tools.ok_(len(recommendations) > 0, "Empty recommendation.")
            if len(recommendations) > 0:
                top_product = recommendations[0][1]
            else:
                return

            supported_activities = self.session_context.supported_activities
            blocking_activities = self.session_context.blocking_activities
            non_blocking_activities = list(set(supported_activities) - set(blocking_activities))

            # Meta-tests
            nose.tools.ok_(len(non_blocking_activities) > 0,
                           "Weak test fixture. There should be at least one non_blocking activity")
            nose.tools.ok_(len(blocking_activities) > 0,
                           "Weak test fixture. There should be at least one blocking activity")

            # Saves a non-blocking activity first
            activity = {"external_user_id": target,
                        "external_product_id": top_product,
                        "activity": non_blocking_activities[0],
                        "created_at": self.session_context.get_present_date()}
            tasks.update_summaries(self.session_context, activity)

            session = tests.init_session(user_id=target, algorithm=self.algorithm)
            recommender = session.get_recommender()            
            recommendations = recommender.recommend(self.n_recommendations)
            recommended_products = [r[1] for r in recommendations]
            nose.tools.ok_(top_product in recommended_products,
                           "A non-blocking activity should not prevent a product from being recommended")

            # Saves a blocking activity first
            activity = {"external_user_id": target,
                        "external_product_id": top_product,
                        "activity": blocking_activities[0],
                        "created_at": self.session_context.get_present_date()}
            tasks.update_summaries(self.session_context, activity)

            session = tests.init_session(user_id=target, algorithm=self.algorithm)
            recommender = session.get_recommender()
            recommendations = recommender.recommend(self.n_recommendations)
            recommended_products = [r[1] for r in recommendations]
            if self.session_context.filter_strategy == ctx.AFTER_SCORING:
                nose.tools.ok_(top_product not in recommended_products,
                               "A blocking activity should prevent a product from being recommended")
    def test_user_user_strengths_incremental_with_new_impressions_random(self):
        """ Tests whether the user x user strengths generated on a step-by-step basis
            match exactly those created from scratch.
            This test saves several random activities in a row,
            checking whether all strengths were correctly updated.
        """
        if not tests.INCLUDE_RANDOM_TESTS:
            return

        all_users = [u for u in self.db_proxy.fetch_all_user_ids()]
        all_products = [p for p in self.db_proxy.fetch_all_product_ids()]

        for i in range(100):
            user = random.choice(all_users)
            is_anonymous = config.is_anonymous(user)

            print("user: %s" % user)

            # Saves a couple of impressions for the chosen user
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i)
            product1 = random.choice(all_products)
            product2 = random.choice(all_products)
            self.db_proxy.increment_impression_summary(user_id=user, product_id=product1,
                                                       date=date, anonymous=is_anonymous)
            self.db_proxy.increment_impression_summary(user_id=user, product_id=product2,
                                                       date=date, anonymous=is_anonymous)

            print("impressions --> %s, %s" % (product1, product2))

            ut.generate_templates(self.session_context)
            # it is important to regenerate from scratch (with all new impressions)

            # Saves one activity for that same user
            product3 = random.choice(all_products)
            activity_type = random.choice(self.session_context.supported_activities)
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i + 1)

            activity = {"external_user_id": user,
                        "external_product_id": product3,
                        "activity": activity_type,
                        "created_at": date}

            print("activity --> " + str(activity))

            ut.update_templates(self.session_context, activity)
            tasks.update_summaries(self.session_context, activity)

            self.compare_incremental_vs_from_scratch(
                target_users=[user] if self.session_context.impressions_enabled else None)
 def test_unprocessed_base_product(self):
     """ Tests whether a base product which has not yet been processed (i.e., it lacks a product model)
         will not cause the recommender to fail.
     """
     user_id = "u_tec_1"
     session = tests.init_session(user_id=user_id, algorithm=self.algorithm)
     activity = {"external_user_id": user_id,
                 "external_product_id": "unprocessed_product",
                 "activity": "buy",
                 "created_at": session.get_present_date()}
     tasks.update_summaries(self.session_context, activity)
     session.refresh()
     recommender = session.get_recommender()
     recommendations = recommender.recommend(100)
     nose.tools.ok_(len(recommendations) > 0, "Should have recommended even from an unprocessed base product")
Exemple #10
0
    def test_previous_consumption_factor():
        target_user = "******"
        session_context = tests.init_session(user_id=target_user,
                                             custom_settings={'previous_consumption_factor': 0.1})

        activity = {"external_user_id": target_user,
                    "external_product_id": "p_eco_2",
                    "activity": "buy",
                    "created_at": session_context.get_present_date()}
        tasks.update_summaries(session_context, activity)
        session_context.refresh()

        nose.tools.eq_(session_context.obtain_previous_consumption_factor("p_eco_1"), 1,
                       "Previous consumption factor should be 1 for non-consumed products")
        nose.tools.ok_(abs(session_context.obtain_previous_consumption_factor("p_eco_2") - 0.1) < tests.FLOAT_DELTA,
                       "Wrong previous consumption factor")
    def test_product_product_strengths_incremental_new_user_3star(self):
        """ Tests whether the product x product strengths generated on a step-by-step basis
            match exactly those created from scratch.
            This test saves a new activity with a 3-star product that had never been consumed by
            the user and checks whether whether all strengths were correctly updated.
        """
        user = "******"
        product = "p_mus_1"
        activity_type = self.session_context.activities_by_rating[3][0]

        activity = {"external_user_id": user,
                    "external_product_id": product,
                    "activity": activity_type,
                    "created_at": self.session_context.get_present_date()}
        pt.update_templates(self.session_context, activity)
        tasks.update_summaries(self.session_context, activity)

        self.compare_incremental_vs_from_scratch()
def populate_activities(session_context, date=None):
    """ Creates dummy activities.

        Test users shall consume products of their main interest area,
        e.g. "u_eco_X" shall only consume products "p_eco_Y".
        User *empty* shall consume no products at all.
    """
    activity_records = _load_collection("activities")
    if date is None:
        date = session_context.get_present_date() - dt.timedelta(days=1)

    for record in activity_records:
        for i, product in enumerate(record["products"]):
            new_date = date + dt.timedelta(seconds=i)
            activity = {"external_user_id": record['user_id'],
                        "external_product_id": product,
                        "activity": "buy",
                        "created_at": new_date}
            tasks.update_summaries(session_context, activity)
    def test_user_user_strengths_incremental_old_product_5_to_3star(self):
        """ Tests whether the user x user strengths generated on a step-by-step basis
            match exactly those created from scratch.
            This test saves a new 3-star activity with a product that had been consumed before by
            the target user with a 5-star activity, and checks whether all strengths were correctly updated.
        """
        user = "******"
        product = "p_eco_2"
        activity_type = self.session_context.activities_by_rating[3][0]

        activity = {"external_user_id": user,
                    "external_product_id": product,
                    "activity": activity_type,
                    "created_at": self.session_context.get_present_date()}
        ut.update_templates(self.session_context, activity)
        tasks.update_summaries(self.session_context, activity)

        self.compare_incremental_vs_from_scratch(
            target_users=[user] if self.session_context.impressions_enabled else None)
    def test_base_product_democracy(self):
        """ Tests whether all base products can send their templates to the final recommendation list.
        """
        user_id = "new_user"
        session = tests.init_session(user_id=user_id, algorithm=self.algorithm)
        types = ["esp", "tec", "eco"]
        for idx, product_type in enumerate(types):
            activity = {"external_user_id": user_id,
                        "external_product_id": "p_" + product_type + "_1",
                        "activity": "buy",
                        "created_at": session.get_present_date() + dt.timedelta(seconds=idx)}
            tasks.update_summaries(self.session_context, activity)

        session.refresh()

        recommender = session.get_recommender()
        results = recommender.recommend(self.n_recommendations)

        for idx, product_type in enumerate(types[-1::-1]):
            nose.tools.eq_(results[idx][1][2:5], product_type,
                           "A product of type '%s' should have appeared at position %d in the list"
                           % (product_type, idx))
    def test_recommend(self, test_recommendation_quality=True):
        """ Tests whether meaningful recommendations were obtained according to Alg 0.
        """
        target = "u_user_empty"

        all_users = self.db_proxy.fetch_all_user_ids()
        for user in all_users:
            if user != target:
                activity = {"external_user_id": user,
                            "external_product_id": "p_TOP_POPULAR",
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                if user != "u_user_dummy":
                    activity = {"external_user_id": user,
                                "external_product_id": "p_2ndTOP_POPULAR",
                                "activity": "buy",
                                "created_at": self.session_context.get_present_date()}
                    tasks.update_summaries(self.session_context, activity)

        # Checks whether all users got recommendations
        super().test_recommend(test_recommendation_quality=False)

        # Checks whether the recommendations conform to the top-popularity criterion

        session = tests.init_session(user_id=target, algorithm=self.algorithm)
        recommender = session.get_recommender()
        recommendations = recommender.recommend(2)

        nose.tools.ok_(len(recommendations) > 0,
                       "No recommendations were retrieved")

        nose.tools.eq_(recommendations[0][1], "p_TOP_POPULAR",
                       "Weird recommendation -- should be the most popular product")

        nose.tools.eq_(recommendations[1][1], "p_2ndTOP_POPULAR",
                       "Weird recommendation -- should be the 2nd most popular product")
Exemple #16
0
    def test_increment_product_popularity(self):
        product_1 = "p_mus_1"
        product_2 = "p_empty"
        product_ids = [product_1, product_2]
        popularity_map = self.session_context.data_proxy.fetch_product_popularity(product_ids=product_ids)

        # sanity check
        nose.tools.eq_(popularity_map[product_1], 3, "Wrong initial popularity")
        nose.tools.eq_(popularity_map.get(product_2), None, "Popularity should be None since no one consumed it")

        activity = {"external_user_id": "u_eco_1",
                    "external_product_id": product_1,
                    "activity": "buy",
                    "created_at": self.session_context.get_present_date() - dt.timedelta(2)}
        tasks.update_summaries(self.session_context, activity)

        popularity_map = self.session_context.data_proxy.fetch_product_popularity(product_ids=product_ids)
        nose.tools.ok_(abs(popularity_map[product_1] - 2) < tests.FLOAT_DELTA, "Wrong popularity")

        # another activity by the same user, without extending the date range

        activity = {"external_user_id": "u_eco_1",
                    "external_product_id": product_1,
                    "activity": "buy",
                    "created_at": self.session_context.get_present_date() - dt.timedelta(2)}
        tasks.update_summaries(self.session_context, activity)

        popularity_map = self.session_context.data_proxy.fetch_product_popularity(product_ids=product_ids)
        nose.tools.ok_(abs(popularity_map[product_1] - 2) < tests.FLOAT_DELTA, "Wrong popularity")

        # another activity by the same user, now extending the date range

        activity = {"external_user_id": "u_eco_1",
                    "external_product_id": product_1,
                    "activity": "buy",
                    "created_at": self.session_context.get_present_date() - dt.timedelta(3)}
        tasks.update_summaries(self.session_context, activity)

        popularity_map = self.session_context.data_proxy.fetch_product_popularity(product_ids=product_ids)
        nose.tools.ok_(abs(popularity_map[product_1] - 4/3) < tests.FLOAT_DELTA, "Wrong popularity")
    def test_in_boost(self):
        """ Tests the effect of applying an in-boost on recommendations for some activity types.
            It applies to all recommendation heuristics.
        """
        target = "u_eco_2"
        history_decay = {'history_decay_function_name': None}
        session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm)
        recommender = session.get_recommender()

        # Determines the index of the first actual value in the score tuples
        # produced by the recommender (note that hybrid recommenders use the first
        # position to indicate the algorithm number)
        if recommender.is_hybrid():
            start_index = 1
        else:
            start_index = 0

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        former_top_product = recommendations[0][1]
        old_strength = recommendations[0][0]

        # Meta-test
        boost_activity_type = None
        in_boost = 1
        for boost_activity_type, in_boost in self.session_context.in_boost_by_activity.items():
            if in_boost != 1:
                break
        nose.tools.ok_(in_boost > 1, "Weak text fixture. There should be at least one in-boosted activity.")

        activity = {"external_user_id": target,
                    "external_product_id": former_top_product,
                    "activity": boost_activity_type,
                    "created_at": self.session_context.get_present_date()}
        tasks.update_summaries(self.session_context, activity)

        session.refresh()
        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        new_strength = None
        for rec in recommendations:
            if rec[1] == former_top_product:
                new_strength = rec[0]
                break

        nose.tools.ok_(new_strength is not None,
                       "The former top recommendation should have been recommended again.")
        for i in range(start_index, len(new_strength)):
            old_strength_value = old_strength[i]
            new_strength_value = new_strength[i]
            nose.tools.ok_(abs(new_strength_value / old_strength_value - in_boost) < tests.FLOAT_DELTA,
                           "Incorrect application of the activity in-boost")

        self.db_proxy.increment_impression_summary(user_id=target,
                                                   product_id=former_top_product,
                                                   date=self.session_context.get_present_date(),
                                                   anonymous=False)
        self.db_proxy.increment_impression_summary(user_id=target,
                                                   product_id=former_top_product,
                                                   date=self.session_context.get_present_date(),
                                                   anonymous=False)

        history_decay = {'history_decay_function_name': 'exponential', 'history_decay_exponential_function_halflife': 2}
        session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm)
        recommender = session.get_recommender()

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        new_strength = None
        for rec in recommendations:
            if rec[1] == former_top_product:
                new_strength = rec[0]
                break

        nose.tools.ok_(new_strength is not None,
                       "The former top recommendation should have been recommended again.")
        for i in range(start_index, len(new_strength)):
            old_strength_value = old_strength[i]
            new_strength_value = new_strength[i]
            nose.tools.ok_(abs(new_strength_value / old_strength_value - in_boost / 2) < tests.FLOAT_DELTA,
                           "Incorrect application of the in-boost and history decay together")
    def test_near_identical(self):
        """ Tests that two products considered 'near-identical' are not recommended at the same time
            (within the same page) when the filtering strategy is AFTER_SCORING.
        """
        target = "u_tec_1"

        id_twin_product_1 = "p_tec_TWIN_1"
        id_twin_product_2 = "p_tec_TWIN_2"

        date = self.session_context.get_present_date() - dt.timedelta(days=1)

        twin_product_1 = {"external_id": id_twin_product_1,
                          "language": "english",
                          "date": date,
                          "expiration_date": date + dt.timedelta(days=30),
                          "resources": {"title": "Whatever Gets You Through The Night"},
                          "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                          "category": "Nonsense"}

        twin_product_2 = {"external_id": id_twin_product_2,
                          "language": "english",
                          "date": date,
                          "expiration_date": date + dt.timedelta(days=30),
                          "resources": {"title": "Whatever Gets You Through This Night is Alright"},
                          "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                          "category": "Nonsense"}

        self.db_proxy.insert_product(twin_product_1)
        tasks.process_product(self.session_context, id_twin_product_1)
        self.db_proxy.insert_product(twin_product_2)
        tasks.process_product(self.session_context, id_twin_product_2)

        # makes it so that all users consume (and have impressions on) the twins, except for the target user
        users = self.db_proxy.fetch_all_user_ids()
        for user in users:
            if user != target:
                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_1,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_2,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                if self.session_context.impressions_enabled:
                    is_anonymous = config.is_anonymous(user)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_1,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_2,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
        ut.generate_templates(self.session_context)
        pt.generate_templates(self.session_context)
        pttfidf.generate_templates(self.session_context)  # Unfortunately we need to regenerate from scratch,
                                                          # otherwise the df's of the twins will be different.

        # First, we recommend WITHOUT near-identical filtering, to check that the twins really appear consecutively.

        custom_settings = {'near_identical_filter_field': None,
                           'near_identical_filter_threshold': None}

        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()

        if not recommender.is_hybrid():
        # For hybrid recommenders, this check is meaningless.

            recommendations = recommender.recommend(100)

            twin_index = -1
            for idx, recommendation in enumerate(recommendations):
                if recommendation[1].startswith("p_tec_TWIN_"):
                    if twin_index >= 0:
                        nose.tools.eq_(idx - twin_index, 1,
                                       "The two near-identical products should appear consecutively without filtering")
                        break
                    twin_index = idx

        # Now we recommend WITH near-identical filtering

        recommendation_page_size = 5
        custom_settings = {'near_identical_filter_field': 'resources.title',
                           'near_identical_filter_threshold': 2,
                           'recommendations_page_size': recommendation_page_size}

        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()
        recommendations = recommender.recommend(100)

        # Sanity check
        recommended_products = {r[1] for r in recommendations}
        count_recommended_twins = len({id_twin_product_1, id_twin_product_2} & recommended_products)
        nose.tools.ok_(count_recommended_twins > 0,
                       "At least one of the twins should have been recommended, otherwise the test is meaningless")

        # Actual tests
        twin_index = -1 * recommendation_page_size - 1  # initial value, so the first twin passes the test
        for idx, recommendation in enumerate(recommendations):
            if recommendation[1].startswith("p_tec_TWIN_"):
                nose.tools.ok_(idx - twin_index > 1,  # it suffices to show that the twins have been separated
                               "Two near-identical products should not appear within the same recommendations page")
                twin_index = idx
    def test_product_age_decay_exponential(self):
        """ Tests the effect of applying a product age decay factor based on an exponential
            function on recommendations. It applies to all recommendation heuristics.
        """
        target = "u_tec_1"

        id_twin_product_old = "p_tec_TWIN_OLD"
        id_twin_product_new = "p_tec_TWIN_NEW"

        # makes it so that the oldest twin is 2 days (the configured half life) older
        old_date = self.session_context.get_present_date() - dt.timedelta(days=2)
        new_date = self.session_context.get_present_date()

        twin_product_old = {"external_id": id_twin_product_old,
                            "language": "english",
                            "date": old_date,
                            "expiration_date": old_date + dt.timedelta(days=30),
                            "resources": {"title": "Whatever Gets You Through The Night"},
                            "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                            "category": "Nonsense"}

        twin_product_new = {"external_id": id_twin_product_new,
                            "language": "english",
                            "date": new_date,
                            "expiration_date": new_date + dt.timedelta(days=30),
                            "resources": {"title": "Whatever Gets You Through The Night"},
                            "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                            "category": "Nonsense"}

        self.db_proxy.insert_product(twin_product_old)
        tasks.process_product(self.session_context, id_twin_product_old)
        self.db_proxy.insert_product(twin_product_new)
        tasks.process_product(self.session_context, id_twin_product_new)

        # makes it so that all users consume (and have impressions on) the twins, except for the target user
        users = self.db_proxy.fetch_all_user_ids()
        for user in users:
            if user != target:
                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_old,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_new,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                if self.session_context.impressions_enabled:
                    is_anonymous = config.is_anonymous(user)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_old,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_new,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)

        ut.generate_templates(self.session_context)
        pt.generate_templates(self.session_context)
        pttfidf.generate_templates(self.session_context)  # Unfortunately we need to regenerate from scratch,
                                                          # otherwise the df's of the twins will be different.

        custom_settings = {'product_age_decay_function_name': 'exponential',
                           'product_age_decay_exponential_function_halflife': 2,
                           'near_identical_filter_field': None, 'near_identical_filter_threshold': None}

        # Disables near-identical filtering
        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()

        # Determines the index of the first actual value in the score tuples
        # produced by the recommender (note that hybrid recommenders use the first
        # position to indicate the algorithm number)
        if recommender.is_hybrid():
            start_index = 1
        else:
            start_index = 0

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")

        strength_old_twin = None
        strength_new_twin = None

        for rec in recommendations:
            if rec[1] == id_twin_product_old:
                strength_old_twin = rec[0]
            if rec[1] == id_twin_product_new:
                strength_new_twin = rec[0]

        for i in range(start_index, len(strength_old_twin)):
            old_strength_value = strength_old_twin[i]
            new_strength_value = strength_new_twin[i]
            nose.tools.ok_(abs(old_strength_value / new_strength_value - 0.5) < tests.FLOAT_DELTA,
                           "Incorrect application of the product age decay")