Example #1
0
 def test_invalid_attribute_pre_filter(self):
     target_user = "******"
     custom_settings = {
         'filter_strategy': context.BEFORE_SCORING
     }
     tests.init_session(user_id=target_user, custom_settings=custom_settings,
                        context_filter_string="{\"xxxx\": \"yyyy\"}")
Example #2
0
 def test_invalid_date_post_filter(self):
     target_user = "******"
     custom_settings = {
         'filter_strategy': context.AFTER_SCORING
     }
     tests.init_session(user_id=target_user, custom_settings=custom_settings,
                        context_filter_string="{\"date\": \"1234X\"}")
Example #3
0
    def test_multi_activities_blocking_vs_non_blocking(self):
        """ Checks that blocking activities prevent items from being recommended,
            and that non-blocking activities do not do so.
        """
        # Economia
        for i in range(1, dp.N_USR_ECONOMIA + 1):
            target = "u_eco_" + str(i)

            session = tests.init_session(user_id=target, algorithm=self.algorithm)
            recommender = session.get_recommender()
            recommendations = recommender.recommend(self.n_recommendations)

            nose.tools.ok_(len(recommendations) > 0, "Empty recommendation.")
            if len(recommendations) > 0:
                top_product = recommendations[0][1]
            else:
                return

            supported_activities = self.session_context.supported_activities
            blocking_activities = self.session_context.blocking_activities
            non_blocking_activities = list(set(supported_activities) - set(blocking_activities))

            # Meta-tests
            nose.tools.ok_(len(non_blocking_activities) > 0,
                           "Weak test fixture. There should be at least one non_blocking activity")
            nose.tools.ok_(len(blocking_activities) > 0,
                           "Weak test fixture. There should be at least one blocking activity")

            # Saves a non-blocking activity first
            activity = {"external_user_id": target,
                        "external_product_id": top_product,
                        "activity": non_blocking_activities[0],
                        "created_at": self.session_context.get_present_date()}
            tasks.update_summaries(self.session_context, activity)

            session = tests.init_session(user_id=target, algorithm=self.algorithm)
            recommender = session.get_recommender()            
            recommendations = recommender.recommend(self.n_recommendations)
            recommended_products = [r[1] for r in recommendations]
            nose.tools.ok_(top_product in recommended_products,
                           "A non-blocking activity should not prevent a product from being recommended")

            # Saves a blocking activity first
            activity = {"external_user_id": target,
                        "external_product_id": top_product,
                        "activity": blocking_activities[0],
                        "created_at": self.session_context.get_present_date()}
            tasks.update_summaries(self.session_context, activity)

            session = tests.init_session(user_id=target, algorithm=self.algorithm)
            recommender = session.get_recommender()
            recommendations = recommender.recommend(self.n_recommendations)
            recommended_products = [r[1] for r in recommendations]
            if self.session_context.filter_strategy == ctx.AFTER_SCORING:
                nose.tools.ok_(top_product not in recommended_products,
                               "A blocking activity should prevent a product from being recommended")
Example #4
0
    def test_history_decay_exponential(self):
        """ Tests the effect of applying a history decay factor based on an exponential function
            on recommendations. It applies to all recommendation heuristics.
        """
        target = "u_eco_2"

        history_decay = {'history_decay_function_name': None}
        session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm)
        recommender = session.get_recommender()

        # Determines the index of the first actual value in the score tuples
        # produced by the recommender (note that hybrid recommenders use the first
        # position to indicate the algorithm number)
        if recommender.is_hybrid():
            start_index = 1
        else:
            start_index = 0

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        former_top_product = recommendations[0][1]
        old_strength = recommendations[0][0]

        impressions_count = self.db_proxy.fetch_impressions_summary(
            user_ids=[target],
            product_ids=[former_top_product],
            group_by_product=False,
            anonymous=False).get(target, {}).get(former_top_product, (0, None))[0]

        for i in range(3 - impressions_count):
            self.db_proxy.increment_impression_summary(user_id=target,
                                                       product_id=former_top_product,
                                                       date=self.session_context.get_present_date(),
                                                       anonymous=False)

        history_decay = {'history_decay_function_name': 'exponential', 'history_decay_exponential_function_halflife': 3}
        session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm)
        recommender = session.get_recommender()
        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        new_strength = None
        for rec in recommendations:
            if rec[1] == former_top_product:
                new_strength = rec[0]
                break

        nose.tools.ok_(new_strength is not None,
                       "The former top recommendation should have been recommended again.")
        for i in range(start_index, len(new_strength)):
            old_strength_value = old_strength[i]
            new_strength_value = new_strength[i]
            nose.tools.ok_(abs(new_strength_value / old_strength_value - 0.5) < tests.FLOAT_DELTA,
                           "Incorrect application of the exponential history decay")
 def test_recommend(self, test_recommendation_quality=True):
     super().test_recommend(test_recommendation_quality=False)
     target = "u_user_empty"
     session = tests.init_session(user_id=target, algorithm=self.algorithm)
     recommender = session.get_recommender()
     recommendations = recommender.recommend(5)
     nose.tools.ok_(len(recommendations) == 5, "No recommendations were retrieved")
    def test_merge_algorithm_contributions(self):
        """ Tests the merge based on fixed slices without bonuses.
        """
        recommendations_by_alg = {
            "UBCF": [[[50], "a"], [[30], "b"], [[10], "c"], [[5], "d"], [[2], "e"]],
            "PBCF": [[[50], "f"], [[30], "b"], [[10], "d"], [[5], "g"]],
            "CB": [
                [[50], "g"],
                [[40], "h"],
                [[30], "c"],
                [[20], "d"],
                [[10], "i"],
                [[9], "j"],
                [[8], "k"],
                [[7], "l"],
                [[4], "m"],
            ],
            "POP": [
                [[50], "POP_1"],
                [[30], "POP_2"],
                [[10], "POP_3"],
                [[5], "POP_4"],
                [[4], "POP_5"],
                [[3], "POP_6"],
                [[4], "POP_7"],
            ],
        }

        session = tests.init_session(user_id="u_eco_1", algorithm=self.algorithm)
        recommender = session.get_recommender()
        merged_recommendations = recommender.merge_algorithm_contributions(recommendations_by_alg, 20)
        products_rank = [rec[1] for rec in merged_recommendations]
        nose.tools.eq_(
            products_rank, ["f", "g", "d", "c", "h", "b", "i", "a", "j", "k", "l", "m", "e"], "Wrong rank after merge"
        )
Example #7
0
 def test_recommend_non_existing_user(self):
     """ Tests whether valid recommendations are returned for an unknown user.
     """
     session = tests.init_session(user_id="Invalid user id", algorithm=self.algorithm)
     recommender = session.get_recommender()
     results = recommender.recommend(self.n_recommendations)
     nose.tools.ok_(len(results) > 0, "Recommender 0 should recommend even for non-existing users.")
Example #8
0
 def test_recommend_anonymous_user(self):
     """ Tests whether valid recommendations are returned for an anonymous user.
     """
     session = tests.init_session(user_id="hmrtmp_AnonymousUser1", algorithm=self.algorithm)
     recommender = session.get_recommender()
     results = recommender.recommend(self.n_recommendations)
     nose.tools.ok_(len(results) > 0, "Recommender 0 should recommend even for anonymous users.")
Example #9
0
    def test_history_decay_step(self):
        """ Tests the effect of applying a history decay factor based on a step function
            on recommendations. It applies to all recommendation heuristics.
        """
        target = "u_eco_2"

        history_decay = {'history_decay_function_name': 'step', 'history_decay_step_function_ttl': 3}
        session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm)
        recommender = session.get_recommender()
        recommendations = recommender.recommend(self.n_recommendations)

        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        former_top_product = recommendations[0][1]

        impressions_count = self.db_proxy.fetch_impressions_summary(
            user_ids=[target],
            product_ids=[former_top_product],
            group_by_product=False,
            anonymous=False).get(target, {}).get(former_top_product, (0, None))[0]

        for i in range(3 - impressions_count):
            self.db_proxy.increment_impression_summary(user_id=target,
                                                       product_id=former_top_product,
                                                       date=self.session_context.get_present_date(),
                                                       anonymous=False)

        session.refresh()
        recommendations = recommender.recommend(self.n_recommendations)
        nose.tools.ok_(former_top_product not in [rec[1] for rec in recommendations],
                       "Incorrect application of the step history decay")
 def test_recommend_anonymous_user(self):
     """ Tests whether valid recommendations are returned for an anonymous user.
     """
     session = tests.init_session(user_id="hmrtmp_AnonymousUser1", algorithm=self.algorithm)
     recommender = session.get_recommender()
     results = recommender.recommend(self.n_recommendations)
     nose.tools.ok_(len(results) > 0, "Product-based recommenders should recommend even for anonymous users")
     nose.tools.ok_(results[0][1].startswith("p_mus"), "Anonymous user got a weird recommendation")
 def test_recommend_non_existing_user(self):
     """ Tests whether an empty set is returned when an invalid user id is
         passed as parameter to the Recommender constructor.
     """
     session = tests.init_session(user_id="Invalid user id", algorithm=self.algorithm)
     recommender = session.get_recommender()
     results = recommender.recommend(self.n_recommendations)
     nose.tools.ok_(len(results) > 0, "The random recommender should recommend even for non-existing users")
Example #12
0
 def test_recommend_non_existing_user(self):
     """ Tests whether an empty set is returned when an invalid user id is
         passed as parameter to the Recommender constructor.
     """
     session = tests.init_session(user_id="Invalid user id", algorithm=self.algorithm)
     recommender = session.get_recommender()
     results = recommender.recommend(self.n_recommendations)
     nose.tools.eq_(len(results), 0, "No recommendations should have been returned.")
    def test_out_boost(self):
        """ Tests the effect of applying an out-boost on recommendations for some activity types.
            It applies to all user-based heuristics.
        """
        target = "u_eco_2"
        session = tests.init_session(user_id=target, algorithm=self.algorithm)
        recommender = session.get_recommender()

        # Determines the index of the first actual value in the score tuples
        # produced by the recommender (note that hybrid recommenders use the first
        # position to indicate the algorithm number)
        if recommender.is_hybrid():
            start_index = 1
        else:
            start_index = 0

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        former_top_product = recommendations[0][1]
        old_strength = recommendations[0][0]

        # Meta-test
        boost_activity_type = None
        out_boost = 1
        for boost_activity_type, out_boost in self.session_context.out_boost_by_activity.items():
            if out_boost != 1:
                break
        nose.tools.ok_(out_boost > 1, "Weak text fixture. There should be at least one out-boosted activity.")

        # Saves out-boosted activities for all templates who had consumed the former top product
        templates = [t[1] for t in session.user_templates]
        for template in templates:
            recent_product_activities_of_template = session.recent_activities_by_product_by_template_user.get(
                template, {})
            if former_top_product in recent_product_activities_of_template:
                activity = {"external_user_id": template,
                            "external_product_id": former_top_product,
                            "activity": boost_activity_type,
                            "created_at": session.get_present_date()}
                tasks.update_summaries(session, activity)

        session.refresh()
        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        new_strength = None
        for rec in recommendations:
            if rec[1] == former_top_product:
                new_strength = rec[0]
                break

        nose.tools.ok_(new_strength is not None,
                       "The former top recommendation should have been recommended again.")
        for i in range(start_index, len(new_strength)):
            old_strength_value = old_strength[i]
            new_strength_value = new_strength[i]
            nose.tools.ok_(abs(new_strength_value / old_strength_value - out_boost) < tests.FLOAT_DELTA,
                           "Incorrect application of the activity in-boost")
 def test_deleted_base_product(self):
     """ Tests whether a base product which has been deleted will not cause the recommender to fail.
     """
     user_id = "u_tec_1"
     session = tests.init_session(user_id=user_id, algorithm=self.algorithm)
     prd.delete_product(session, "p_tec_2")
     session.refresh()
     recommender = session.get_recommender()
     recommendations = recommender.recommend(100)
     nose.tools.ok_(len(recommendations) > 0, "Should have recommended even from a deleted base product")
Example #15
0
    def setup_class(cls):
        cls.session_context = tests.init_session()
        cls.db_proxy = cls.session_context.data_proxy

        cls.db_proxy.drop_database()

        dp.populate_products(cls.session_context)
        dp.populate_users(cls.session_context)
        dp.populate_activities(cls.session_context)
        dp.populate_impressions(cls.session_context)
        cls.db_proxy.ensure_indexes(create_ttl_indexes=False)

        cls.db_proxy.backup_database()
 def test_unprocessed_base_product(self):
     """ Tests whether a base product which has not yet been processed (i.e., it lacks a product model)
         will not cause the recommender to fail.
     """
     user_id = "u_tec_1"
     session = tests.init_session(user_id=user_id, algorithm=self.algorithm)
     activity = {"external_user_id": user_id,
                 "external_product_id": "unprocessed_product",
                 "activity": "buy",
                 "created_at": session.get_present_date()}
     tasks.update_summaries(self.session_context, activity)
     session.refresh()
     recommender = session.get_recommender()
     recommendations = recommender.recommend(100)
     nose.tools.ok_(len(recommendations) > 0, "Should have recommended even from an unprocessed base product")
Example #17
0
    def setup_class(cls):
        cls.session_context = tests.init_session(cls.custom_settings)
        cls.db_proxy = cls.session_context.data_proxy

        cls.db_proxy.drop_database()
        cls.db_proxy.ensure_indexes(create_ttl_indexes=False)

        # Populate the database
        dp.populate_users(cls.session_context)
        dp.populate_products(cls.session_context)
        product_maintenance.process_products_from_scratch(cls.session_context)
        dp.populate_activities(cls.session_context)

        # Make a backup of the database so that after each test we can send it back to its original state
        cls.db_proxy.backup_database()
Example #18
0
    def test_previous_consumption_factor():
        target_user = "******"
        session_context = tests.init_session(user_id=target_user,
                                             custom_settings={'previous_consumption_factor': 0.1})

        activity = {"external_user_id": target_user,
                    "external_product_id": "p_eco_2",
                    "activity": "buy",
                    "created_at": session_context.get_present_date()}
        tasks.update_summaries(session_context, activity)
        session_context.refresh()

        nose.tools.eq_(session_context.obtain_previous_consumption_factor("p_eco_1"), 1,
                       "Previous consumption factor should be 1 for non-consumed products")
        nose.tools.ok_(abs(session_context.obtain_previous_consumption_factor("p_eco_2") - 0.1) < tests.FLOAT_DELTA,
                       "Wrong previous consumption factor")
    def test_merge_algorithm_contributions(self):
        """ Tests the merge based on fixed slices.
        """
        recommendations = {"UBCF": [[[50], "UBCF_1"],
                                    [[30], "UBCF_2"],
                                    [[10], "UBCF_3"],
                                    [[5], "UBCF_4"],
                                    [[2], "UBCF_5"]],

                           "PBCF": [[[50], "PBCF_1"],
                                    [[30], "PBCF_2"],
                                    [[10], "PBCF_3"],
                                    [[5], "PBCF_4"]],

                           "CB": [[[50], "CB_1"],
                                  [[40], "CB_2"],
                                  [[30], "CB_3"],
                                  [[20], "CB_4"],
                                  [[10], "CB_5"],
                                  [[9], "CB_6"],
                                  [[8], "CB_7"],
                                  [[7], "CB_8"],
                                  [[4], "CB_9"]],

                           "POP": [[[50], "POP_1"],
                                   [[30], "POP_2"],
                                   [[10], "POP_3"],
                                   [[5], "POP_4"],
                                   [[4], "POP_5"],
                                   [[3], "POP_6"],
                                   [[4], "POP_7"]]}

        session = tests.init_session(user_id="u_eco_1", algorithm=self.algorithm)
        recommender = session.get_recommender()

        merged_recommendations = recommender.merge_algorithm_contributions(recommendations, 20)
        products_rank = [rec[1] for rec in merged_recommendations]
        nose.tools.eq_(products_rank,
                       ['UBCF_1', 'PBCF_1', 'CB_1', 'CB_2',
                        'UBCF_2', 'PBCF_2', 'CB_3', 'CB_4',
                        'UBCF_3', 'PBCF_3', 'CB_5', 'CB_6',
                        'UBCF_4', 'PBCF_4', 'CB_7', 'CB_8',
                        'UBCF_5',           'CB_9'],
                       "Wrong rank after merge")
Example #20
0
    def setup_class(cls):
        cls.session_context = tests.init_session()
        cls.db_proxy = cls.session_context.data_proxy

        cls.db_proxy.drop_database()
        cls.db_proxy.ensure_indexes(create_ttl_indexes=False)

        cls.session_context.history_decay_function_name = None
        cls.session_context.product_age_decay_function_name = None

        # Populate the database
        dp.populate_products(cls.session_context)
        tasks.process_products(cls.session_context)
        dp.populate_users(cls.session_context)
        dp.populate_activities(cls.session_context)
        dp.populate_impressions(cls.session_context)

        # Make a backup of the database so that after each test we can send it back to its original state
        cls.db_proxy.backup_database()
Example #21
0
    def test_recommendation_slack(self):
        target_user = "******"
        ttl = 3
        custom_settings = {
            'history_decay_function_name': 'step',
            'history_decay_step_function_ttl': ttl
        }
        session = tests.init_session(user_id=target_user, custom_settings=custom_settings, algorithm=self.algorithm)
        recommender = session.get_recommender()

        # this first query just measures how many products we're able to fetch
        max_results = len(recommender.recommend(1000))

        # retrieves half of those products
        initial_query = recommender.recommend(max_results // 2)

        recommended_products = [result[1] for result in initial_query]

        impressions_summary = self.db_proxy.fetch_impressions_summary(
            user_ids=[target_user],
            product_ids=[recommended_products],
            group_by_product=False,
            anonymous=False).get(target_user, {})

        # bury the first recommendation set with impressions so that those products do not appear again
        for product_id in recommended_products:
            # generate enough impressions to bury that product
            for i in range(ttl - impressions_summary.get(product_id, (0, None))[0]):
                self.db_proxy.increment_impression_summary(user_id=target_user,
                                                           product_id=product_id,
                                                           date=self.session_context.get_present_date(),
                                                           anonymous=False)
        session.refresh()

        # this query should still return some products if the recommender is internally adding a slack when fetching
        # products from the database
        recommender = session.get_recommender()
        second_query = recommender.recommend(max_results // 2)

        nose.tools.ok_(len(second_query) > 0, "Recommender '{0}' did not pass the 'slack test'".format(self.algorithm))
    def test_base_product_democracy(self):
        """ Tests whether all base products can send their templates to the final recommendation list.
        """
        user_id = "new_user"
        session = tests.init_session(user_id=user_id, algorithm=self.algorithm)
        types = ["esp", "tec", "eco"]
        for idx, product_type in enumerate(types):
            activity = {"external_user_id": user_id,
                        "external_product_id": "p_" + product_type + "_1",
                        "activity": "buy",
                        "created_at": session.get_present_date() + dt.timedelta(seconds=idx)}
            tasks.update_summaries(self.session_context, activity)

        session.refresh()

        recommender = session.get_recommender()
        results = recommender.recommend(self.n_recommendations)

        for idx, product_type in enumerate(types[-1::-1]):
            nose.tools.eq_(results[idx][1][2:5], product_type,
                           "A product of type '%s' should have appeared at position %d in the list"
                           % (product_type, idx))
Example #23
0
    def test_recommend(self, test_recommendation_quality=True):
        """ Tests whether meaningful recommendations were obtained according to Alg 0.
        """
        target = "u_user_empty"

        all_users = self.db_proxy.fetch_all_user_ids()
        for user in all_users:
            if user != target:
                activity = {"external_user_id": user,
                            "external_product_id": "p_TOP_POPULAR",
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                if user != "u_user_dummy":
                    activity = {"external_user_id": user,
                                "external_product_id": "p_2ndTOP_POPULAR",
                                "activity": "buy",
                                "created_at": self.session_context.get_present_date()}
                    tasks.update_summaries(self.session_context, activity)

        # Checks whether all users got recommendations
        super().test_recommend(test_recommendation_quality=False)

        # Checks whether the recommendations conform to the top-popularity criterion

        session = tests.init_session(user_id=target, algorithm=self.algorithm)
        recommender = session.get_recommender()
        recommendations = recommender.recommend(2)

        nose.tools.ok_(len(recommendations) > 0,
                       "No recommendations were retrieved")

        nose.tools.eq_(recommendations[0][1], "p_TOP_POPULAR",
                       "Weird recommendation -- should be the most popular product")

        nose.tools.eq_(recommendations[1][1], "p_2ndTOP_POPULAR",
                       "Weird recommendation -- should be the 2nd most popular product")
Example #24
0
def test_date_type():
    context = tests.init_session()
    factory = context.product_model_factory
    product1 = {'language': 'portuguese',
                'full_content': 'empty',
                'resources': {'title': 'whatever'},
                'date': '2014-01-01T12:34:56.7890',
                'expiration_date': '2018-01-01T12:34:56.7890'}
    model = factory.build('product1', product1)
    nose.tools.eq_(model.get_attribute('date'), dt.datetime(2014, 1, 1, 12, 34, 56, 789000))
    product1 = {'language': 'portuguese',
                'full_content': 'empty',
                'resources': {'title': 'whatever'},
                'date': '2014-01-01T12:34:56Z',
                'expiration_date': '2018-01-01T12:34:56.7890'}
    model = factory.build('product1', product1)
    nose.tools.eq_(model.get_attribute('date'), dt.datetime(2014, 1, 1, 12, 34, 56, tzinfo=pytz.utc))
    product1 = {'language': 'portuguese',
                'full_content': 'empty',
                'resources': {'title': 'whatever'},
                'date': '2014-01-01T12:34:56.7890Z',
                'expiration_date': '2018-01-01T12:34:56.7890'}
    model = factory.build('product1', product1)
    nose.tools.eq_(model.get_attribute('date'), dt.datetime(2014, 1, 1, 12, 34, 56, 789000, tzinfo=pytz.utc))
Example #25
0
    def test_in_boost(self):
        """ Tests the effect of applying an in-boost on recommendations for some activity types.
            It applies to all recommendation heuristics.
        """
        target = "u_eco_2"
        history_decay = {'history_decay_function_name': None}
        session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm)
        recommender = session.get_recommender()

        # Determines the index of the first actual value in the score tuples
        # produced by the recommender (note that hybrid recommenders use the first
        # position to indicate the algorithm number)
        if recommender.is_hybrid():
            start_index = 1
        else:
            start_index = 0

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        former_top_product = recommendations[0][1]
        old_strength = recommendations[0][0]

        # Meta-test
        boost_activity_type = None
        in_boost = 1
        for boost_activity_type, in_boost in self.session_context.in_boost_by_activity.items():
            if in_boost != 1:
                break
        nose.tools.ok_(in_boost > 1, "Weak text fixture. There should be at least one in-boosted activity.")

        activity = {"external_user_id": target,
                    "external_product_id": former_top_product,
                    "activity": boost_activity_type,
                    "created_at": self.session_context.get_present_date()}
        tasks.update_summaries(self.session_context, activity)

        session.refresh()
        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        new_strength = None
        for rec in recommendations:
            if rec[1] == former_top_product:
                new_strength = rec[0]
                break

        nose.tools.ok_(new_strength is not None,
                       "The former top recommendation should have been recommended again.")
        for i in range(start_index, len(new_strength)):
            old_strength_value = old_strength[i]
            new_strength_value = new_strength[i]
            nose.tools.ok_(abs(new_strength_value / old_strength_value - in_boost) < tests.FLOAT_DELTA,
                           "Incorrect application of the activity in-boost")

        self.db_proxy.increment_impression_summary(user_id=target,
                                                   product_id=former_top_product,
                                                   date=self.session_context.get_present_date(),
                                                   anonymous=False)
        self.db_proxy.increment_impression_summary(user_id=target,
                                                   product_id=former_top_product,
                                                   date=self.session_context.get_present_date(),
                                                   anonymous=False)

        history_decay = {'history_decay_function_name': 'exponential', 'history_decay_exponential_function_halflife': 2}
        session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm)
        recommender = session.get_recommender()

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")
        new_strength = None
        for rec in recommendations:
            if rec[1] == former_top_product:
                new_strength = rec[0]
                break

        nose.tools.ok_(new_strength is not None,
                       "The former top recommendation should have been recommended again.")
        for i in range(start_index, len(new_strength)):
            old_strength_value = old_strength[i]
            new_strength_value = new_strength[i]
            nose.tools.ok_(abs(new_strength_value / old_strength_value - in_boost / 2) < tests.FLOAT_DELTA,
                           "Incorrect application of the in-boost and history decay together")
Example #26
0
    def test_product_age_decay_exponential(self):
        """ Tests the effect of applying a product age decay factor based on an exponential
            function on recommendations. It applies to all recommendation heuristics.
        """
        target = "u_tec_1"

        id_twin_product_old = "p_tec_TWIN_OLD"
        id_twin_product_new = "p_tec_TWIN_NEW"

        # makes it so that the oldest twin is 2 days (the configured half life) older
        old_date = self.session_context.get_present_date() - dt.timedelta(days=2)
        new_date = self.session_context.get_present_date()

        twin_product_old = {"external_id": id_twin_product_old,
                            "language": "english",
                            "date": old_date,
                            "expiration_date": old_date + dt.timedelta(days=30),
                            "resources": {"title": "Whatever Gets You Through The Night"},
                            "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                            "category": "Nonsense"}

        twin_product_new = {"external_id": id_twin_product_new,
                            "language": "english",
                            "date": new_date,
                            "expiration_date": new_date + dt.timedelta(days=30),
                            "resources": {"title": "Whatever Gets You Through The Night"},
                            "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                            "category": "Nonsense"}

        self.db_proxy.insert_product(twin_product_old)
        tasks.process_product(self.session_context, id_twin_product_old)
        self.db_proxy.insert_product(twin_product_new)
        tasks.process_product(self.session_context, id_twin_product_new)

        # makes it so that all users consume (and have impressions on) the twins, except for the target user
        users = self.db_proxy.fetch_all_user_ids()
        for user in users:
            if user != target:
                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_old,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_new,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                if self.session_context.impressions_enabled:
                    is_anonymous = config.is_anonymous(user)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_old,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_new,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)

        ut.generate_templates(self.session_context)
        pt.generate_templates(self.session_context)
        pttfidf.generate_templates(self.session_context)  # Unfortunately we need to regenerate from scratch,
                                                          # otherwise the df's of the twins will be different.

        custom_settings = {'product_age_decay_function_name': 'exponential',
                           'product_age_decay_exponential_function_halflife': 2,
                           'near_identical_filter_field': None, 'near_identical_filter_threshold': None}

        # Disables near-identical filtering
        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()

        # Determines the index of the first actual value in the score tuples
        # produced by the recommender (note that hybrid recommenders use the first
        # position to indicate the algorithm number)
        if recommender.is_hybrid():
            start_index = 1
        else:
            start_index = 0

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")

        strength_old_twin = None
        strength_new_twin = None

        for rec in recommendations:
            if rec[1] == id_twin_product_old:
                strength_old_twin = rec[0]
            if rec[1] == id_twin_product_new:
                strength_new_twin = rec[0]

        for i in range(start_index, len(strength_old_twin)):
            old_strength_value = strength_old_twin[i]
            new_strength_value = strength_new_twin[i]
            nose.tools.ok_(abs(old_strength_value / new_strength_value - 0.5) < tests.FLOAT_DELTA,
                           "Incorrect application of the product age decay")
Example #27
0
 def __init__(self):
     self.context = tests.init_session()
Example #28
0
    def test_recommend(self, test_recommendation_quality=True):
        """ Tests whether meaningful recommendations were obtained.
        """
        # pre-generates a session context and use it for all recommendation tests below
        session = tests.init_session(algorithm=self.algorithm)

        def generate_queries_for_category(category, user_count, product_count):
            for i in range(1, user_count + 1):
                target_user = '******'.format(category, str(i))
                result = {
                    'target_user': target_user,
                    'category': category,
                    'product_count': product_count
                }
                yield result

        def recommend(target_user):
            """ Returns recommendations for a certain user.
                :param target_user: user to recommend
                :return: list of recommendations
            """
            # updates the session's user context
            session.user_id = target_user
            session.refresh()
            recommender = session.get_recommender()
            return recommender.recommend(self.n_recommendations)

        def verify_recommendations(_query, _recommendations):
            """ Verify that the recommendation was successful.
                :param _query: query parameters
                :param _recommendations: recommendation result set
            """
            recent_activities = session.user_context.recent_activities
            products_consumed = list({act["external_product_id"] for act in recent_activities})
            n_products_consumed = len(products_consumed)

            nose.tools.ok_(len(_recommendations) > 0, "No recommendations were retrieved")
            if test_recommendation_quality:
                for j in range(min(_query['product_count'] - n_products_consumed, len(_recommendations))):
                    nose.tools.eq_(_recommendations[j][1][:6], "p_{0}_".format(_query['category']),
                                   "Questionable recommendations were obtained " +
                                   "for user %s: %s" % (_query['target_user'], _recommendations))
        queries = []
        # Economia
        queries += generate_queries_for_category('eco', dp.N_USR_ECONOMIA, dp.N_PROD_ECONOMIA)
        # Esportes
        queries += generate_queries_for_category('esp', dp.N_USR_ESPORTES, dp.N_PROD_ESPORTES)
        # Música
        queries += generate_queries_for_category('mus', dp.N_USR_MUSICA, dp.N_PROD_MUSICA)
        # Tecnologia
        queries += generate_queries_for_category('tec', dp.N_USR_TECNOLOGIA, dp.N_PROD_TECNOLOGIA)

        # We did an experiment trying to parallelize the test recommendations, but there was no speedup because the
        # overhead is too cumbersome.
        n_workers = 1  # For some reason a thread pool with 1 worker is slightly faster than the nonconcurrent version
        with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as executor:
            future_to_query = {executor.submit(wrap(recommend), q['target_user']): q
                               for q in queries}

            for future in concurrent.futures.as_completed(future_to_query):
                query = future_to_query[future]
                recommendations = future.result()
                verify_recommendations(query, recommendations)
Example #29
0
    def test_near_identical(self):
        """ Tests that two products considered 'near-identical' are not recommended at the same time
            (within the same page) when the filtering strategy is AFTER_SCORING.
        """
        target = "u_tec_1"

        id_twin_product_1 = "p_tec_TWIN_1"
        id_twin_product_2 = "p_tec_TWIN_2"

        date = self.session_context.get_present_date() - dt.timedelta(days=1)

        twin_product_1 = {"external_id": id_twin_product_1,
                          "language": "english",
                          "date": date,
                          "expiration_date": date + dt.timedelta(days=30),
                          "resources": {"title": "Whatever Gets You Through The Night"},
                          "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                          "category": "Nonsense"}

        twin_product_2 = {"external_id": id_twin_product_2,
                          "language": "english",
                          "date": date,
                          "expiration_date": date + dt.timedelta(days=30),
                          "resources": {"title": "Whatever Gets You Through This Night is Alright"},
                          "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                          "category": "Nonsense"}

        self.db_proxy.insert_product(twin_product_1)
        tasks.process_product(self.session_context, id_twin_product_1)
        self.db_proxy.insert_product(twin_product_2)
        tasks.process_product(self.session_context, id_twin_product_2)

        # makes it so that all users consume (and have impressions on) the twins, except for the target user
        users = self.db_proxy.fetch_all_user_ids()
        for user in users:
            if user != target:
                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_1,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_2,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                if self.session_context.impressions_enabled:
                    is_anonymous = config.is_anonymous(user)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_1,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_2,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
        ut.generate_templates(self.session_context)
        pt.generate_templates(self.session_context)
        pttfidf.generate_templates(self.session_context)  # Unfortunately we need to regenerate from scratch,
                                                          # otherwise the df's of the twins will be different.

        # First, we recommend WITHOUT near-identical filtering, to check that the twins really appear consecutively.

        custom_settings = {'near_identical_filter_field': None,
                           'near_identical_filter_threshold': None}

        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()

        if not recommender.is_hybrid():
        # For hybrid recommenders, this check is meaningless.

            recommendations = recommender.recommend(100)

            twin_index = -1
            for idx, recommendation in enumerate(recommendations):
                if recommendation[1].startswith("p_tec_TWIN_"):
                    if twin_index >= 0:
                        nose.tools.eq_(idx - twin_index, 1,
                                       "The two near-identical products should appear consecutively without filtering")
                        break
                    twin_index = idx

        # Now we recommend WITH near-identical filtering

        recommendation_page_size = 5
        custom_settings = {'near_identical_filter_field': 'resources.title',
                           'near_identical_filter_threshold': 2,
                           'recommendations_page_size': recommendation_page_size}

        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()
        recommendations = recommender.recommend(100)

        # Sanity check
        recommended_products = {r[1] for r in recommendations}
        count_recommended_twins = len({id_twin_product_1, id_twin_product_2} & recommended_products)
        nose.tools.ok_(count_recommended_twins > 0,
                       "At least one of the twins should have been recommended, otherwise the test is meaningless")

        # Actual tests
        twin_index = -1 * recommendation_page_size - 1  # initial value, so the first twin passes the test
        for idx, recommendation in enumerate(recommendations):
            if recommendation[1].startswith("p_tec_TWIN_"):
                nose.tools.ok_(idx - twin_index > 1,  # it suffices to show that the twins have been separated
                               "Two near-identical products should not appear within the same recommendations page")
                twin_index = idx