コード例 #1
0
    def test_user_user_strengths_incremental_with_new_impressions_identified_users(self):
        """ Tests whether the user x user strengths generated on a step-by-step basis
            match exactly those created from scratch.
        """
        test_descriptions = [("u_esp_4", "p_nonsense_1", "p_empty_with_missing_category", "p_filter_2", "buy")]

        for idx, (user, product1, product2, product3, activity_type) in enumerate(test_descriptions):
            # Saves a couple of impressions for the chosen user
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00") + dt.timedelta(seconds=(2 * idx + 1)))
            self.db_proxy.increment_impression_summary(user_id=user, product_id=product1,
                                                       date=date, anonymous=False)
            self.db_proxy.increment_impression_summary(user_id=user, product_id=product2,
                                                       date=date, anonymous=False)

            ut.generate_templates(self.session_context)
            # it is important to regenerate from scratch (with all new impressions)

            # Saves one activity for that same user
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00") + dt.timedelta(seconds=(2 * idx + 2)))

            activity = {"external_user_id": user,
                        "external_product_id": product3,
                        "activity": activity_type,
                        "created_at": date}

            ut.update_templates(self.session_context, activity)
            tasks.update_summaries(self.session_context, activity)

            self.compare_incremental_vs_from_scratch(
                target_users=[user] if self.session_context.impressions_enabled else None)
コード例 #2
0
    def compare_incremental_vs_from_scratch(self, target_users=None):
        """ Helper method to compare strengths generated incrementally vs from-scratch.
            When using impressions we only care about target users whose activities triggered the updates.

            :param target_users: If not None, then only target users informed in this set will be considered.
        """
        def compare_strengths(incremental, from_scratch, pair_of_users):
            strength1 = incremental[pair_of_users]
            strength2 = from_scratch[pair_of_users]
            nose.tools.ok_(abs(strength1 - strength2) < 0.00001,
                           "Strengths do not match for " + str(pair_of_users) + ": " +
                           "[incremental --> %.6f] [from scratch --> %.6f]"
                           % (strength1, strength2))

        def compare_templates(incremental, from_scratch, user):
            templates1 = incremental[user]
            templates2 = from_scratch[user]
            nose.tools.eq_(templates1, templates2,
                           "Templates do not match for " + str(user) + ": " +
                           "[incremental --> %s] [from scratch --> %s]"
                           % (str(templates1), str(templates2)))

        users = target_users if target_users is not None else list(self.db_proxy.fetch_all_user_ids())

        # saves locally the strengths and the templates that were obtained incrementally
        strengths_incremental = self.db_proxy.fetch_user_user_strengths()
        templates_incremental = self.db_proxy.fetch_user_templates(users)

        # regenerates all strengths from scratch
        ut.generate_templates(self.session_context)

        # saves locally the strengths and the templates that were obtained from scratch
        strengths_from_scratch = self.db_proxy.fetch_user_user_strengths()
        templates_from_scratch = self.db_proxy.fetch_user_templates(users)

        for user_pair in strengths_from_scratch:
            if target_users is not None:
                if user_pair[0] not in target_users:
                    continue
                    # The incremental (on-the-fly) updates take care of "user as target" strengths only.
                    # This is ok, since "user as template" updates will be triggered indirectly
                    # (by other target users).
            compare_strengths(strengths_incremental, strengths_from_scratch, user_pair)
            compare_templates(templates_incremental, templates_from_scratch, user_pair[0])

        for user_pair in strengths_incremental:
            if target_users is not None:
                if user_pair[0] not in target_users:
                    continue
                    # Idem.
            compare_strengths(strengths_incremental, strengths_from_scratch, user_pair)
            compare_templates(templates_incremental, templates_from_scratch, user_pair[0])
コード例 #3
0
    def test_user_user_strengths_incremental_with_new_impressions_random(self):
        """ Tests whether the user x user strengths generated on a step-by-step basis
            match exactly those created from scratch.
            This test saves several random activities in a row,
            checking whether all strengths were correctly updated.
        """
        if not tests.INCLUDE_RANDOM_TESTS:
            return

        all_users = [u for u in self.db_proxy.fetch_all_user_ids()]
        all_products = [p for p in self.db_proxy.fetch_all_product_ids()]

        for i in range(100):
            user = random.choice(all_users)
            is_anonymous = config.is_anonymous(user)

            print("user: %s" % user)

            # Saves a couple of impressions for the chosen user
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i)
            product1 = random.choice(all_products)
            product2 = random.choice(all_products)
            self.db_proxy.increment_impression_summary(user_id=user, product_id=product1,
                                                       date=date, anonymous=is_anonymous)
            self.db_proxy.increment_impression_summary(user_id=user, product_id=product2,
                                                       date=date, anonymous=is_anonymous)

            print("impressions --> %s, %s" % (product1, product2))

            ut.generate_templates(self.session_context)
            # it is important to regenerate from scratch (with all new impressions)

            # Saves one activity for that same user
            product3 = random.choice(all_products)
            activity_type = random.choice(self.session_context.supported_activities)
            date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i + 1)

            activity = {"external_user_id": user,
                        "external_product_id": product3,
                        "activity": activity_type,
                        "created_at": date}

            print("activity --> " + str(activity))

            ut.update_templates(self.session_context, activity)
            tasks.update_summaries(self.session_context, activity)

            self.compare_incremental_vs_from_scratch(
                target_users=[user] if self.session_context.impressions_enabled else None)
コード例 #4
0
def main(argv):
    if len(argv) < 1:
        msg = "You must specify the environment"
        log.error(msg)
        return {"success": False, "message": msg}
    try:
        # command-line arguments
        env = argv[0]

        session = init_session(env)
        ut.generate_templates(session)
        return {"success": True}

    except Exception:
        log.exception('Exception on {0}:'.format(__name__))
        return {"success": False, "message": traceback.format_exc()}
コード例 #5
0
 def setup(self):
     super().setup()
     ut.generate_templates(self.session_context)
コード例 #6
0
 def setup(self):
     super().setup()
     dp.populate_impressions(self.session_context)
     ut.generate_templates(self.session_context)
コード例 #7
0
    def test_near_identical(self):
        """ Tests that two products considered 'near-identical' are not recommended at the same time
            (within the same page) when the filtering strategy is AFTER_SCORING.
        """
        target = "u_tec_1"

        id_twin_product_1 = "p_tec_TWIN_1"
        id_twin_product_2 = "p_tec_TWIN_2"

        date = self.session_context.get_present_date() - dt.timedelta(days=1)

        twin_product_1 = {"external_id": id_twin_product_1,
                          "language": "english",
                          "date": date,
                          "expiration_date": date + dt.timedelta(days=30),
                          "resources": {"title": "Whatever Gets You Through The Night"},
                          "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                          "category": "Nonsense"}

        twin_product_2 = {"external_id": id_twin_product_2,
                          "language": "english",
                          "date": date,
                          "expiration_date": date + dt.timedelta(days=30),
                          "resources": {"title": "Whatever Gets You Through This Night is Alright"},
                          "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                          "category": "Nonsense"}

        self.db_proxy.insert_product(twin_product_1)
        tasks.process_product(self.session_context, id_twin_product_1)
        self.db_proxy.insert_product(twin_product_2)
        tasks.process_product(self.session_context, id_twin_product_2)

        # makes it so that all users consume (and have impressions on) the twins, except for the target user
        users = self.db_proxy.fetch_all_user_ids()
        for user in users:
            if user != target:
                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_1,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_2,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                if self.session_context.impressions_enabled:
                    is_anonymous = config.is_anonymous(user)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_1,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_2,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
        ut.generate_templates(self.session_context)
        pt.generate_templates(self.session_context)
        pttfidf.generate_templates(self.session_context)  # Unfortunately we need to regenerate from scratch,
                                                          # otherwise the df's of the twins will be different.

        # First, we recommend WITHOUT near-identical filtering, to check that the twins really appear consecutively.

        custom_settings = {'near_identical_filter_field': None,
                           'near_identical_filter_threshold': None}

        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()

        if not recommender.is_hybrid():
        # For hybrid recommenders, this check is meaningless.

            recommendations = recommender.recommend(100)

            twin_index = -1
            for idx, recommendation in enumerate(recommendations):
                if recommendation[1].startswith("p_tec_TWIN_"):
                    if twin_index >= 0:
                        nose.tools.eq_(idx - twin_index, 1,
                                       "The two near-identical products should appear consecutively without filtering")
                        break
                    twin_index = idx

        # Now we recommend WITH near-identical filtering

        recommendation_page_size = 5
        custom_settings = {'near_identical_filter_field': 'resources.title',
                           'near_identical_filter_threshold': 2,
                           'recommendations_page_size': recommendation_page_size}

        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()
        recommendations = recommender.recommend(100)

        # Sanity check
        recommended_products = {r[1] for r in recommendations}
        count_recommended_twins = len({id_twin_product_1, id_twin_product_2} & recommended_products)
        nose.tools.ok_(count_recommended_twins > 0,
                       "At least one of the twins should have been recommended, otherwise the test is meaningless")

        # Actual tests
        twin_index = -1 * recommendation_page_size - 1  # initial value, so the first twin passes the test
        for idx, recommendation in enumerate(recommendations):
            if recommendation[1].startswith("p_tec_TWIN_"):
                nose.tools.ok_(idx - twin_index > 1,  # it suffices to show that the twins have been separated
                               "Two near-identical products should not appear within the same recommendations page")
                twin_index = idx
コード例 #8
0
    def test_product_age_decay_exponential(self):
        """ Tests the effect of applying a product age decay factor based on an exponential
            function on recommendations. It applies to all recommendation heuristics.
        """
        target = "u_tec_1"

        id_twin_product_old = "p_tec_TWIN_OLD"
        id_twin_product_new = "p_tec_TWIN_NEW"

        # makes it so that the oldest twin is 2 days (the configured half life) older
        old_date = self.session_context.get_present_date() - dt.timedelta(days=2)
        new_date = self.session_context.get_present_date()

        twin_product_old = {"external_id": id_twin_product_old,
                            "language": "english",
                            "date": old_date,
                            "expiration_date": old_date + dt.timedelta(days=30),
                            "resources": {"title": "Whatever Gets You Through The Night"},
                            "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                            "category": "Nonsense"}

        twin_product_new = {"external_id": id_twin_product_new,
                            "language": "english",
                            "date": new_date,
                            "expiration_date": new_date + dt.timedelta(days=30),
                            "resources": {"title": "Whatever Gets You Through The Night"},
                            "full_content": """Begin. Technology. Technology. This is all we got. End.""",
                            "category": "Nonsense"}

        self.db_proxy.insert_product(twin_product_old)
        tasks.process_product(self.session_context, id_twin_product_old)
        self.db_proxy.insert_product(twin_product_new)
        tasks.process_product(self.session_context, id_twin_product_new)

        # makes it so that all users consume (and have impressions on) the twins, except for the target user
        users = self.db_proxy.fetch_all_user_ids()
        for user in users:
            if user != target:
                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_old,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                activity = {"external_user_id": user,
                            "external_product_id": id_twin_product_new,
                            "activity": "buy",
                            "created_at": self.session_context.get_present_date()}
                tasks.update_summaries(self.session_context, activity)

                if self.session_context.impressions_enabled:
                    is_anonymous = config.is_anonymous(user)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_old,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)
                    self.db_proxy.increment_impression_summary(user,
                                                               id_twin_product_new,
                                                               date=self.session_context.get_present_date(),
                                                               anonymous=is_anonymous)

        ut.generate_templates(self.session_context)
        pt.generate_templates(self.session_context)
        pttfidf.generate_templates(self.session_context)  # Unfortunately we need to regenerate from scratch,
                                                          # otherwise the df's of the twins will be different.

        custom_settings = {'product_age_decay_function_name': 'exponential',
                           'product_age_decay_exponential_function_halflife': 2,
                           'near_identical_filter_field': None, 'near_identical_filter_threshold': None}

        # Disables near-identical filtering
        session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm)
        session.refresh()

        recommender = session.get_recommender()

        # Determines the index of the first actual value in the score tuples
        # produced by the recommender (note that hybrid recommenders use the first
        # position to indicate the algorithm number)
        if recommender.is_hybrid():
            start_index = 1
        else:
            start_index = 0

        recommendations = recommender.recommend(100)
        nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!")

        strength_old_twin = None
        strength_new_twin = None

        for rec in recommendations:
            if rec[1] == id_twin_product_old:
                strength_old_twin = rec[0]
            if rec[1] == id_twin_product_new:
                strength_new_twin = rec[0]

        for i in range(start_index, len(strength_old_twin)):
            old_strength_value = strength_old_twin[i]
            new_strength_value = strength_new_twin[i]
            nose.tools.ok_(abs(old_strength_value / new_strength_value - 0.5) < tests.FLOAT_DELTA,
                           "Incorrect application of the product age decay")