def test_product_product_strengths_incremental_new_user_5_to_2_to_5star(self): """ Tests whether the product x product strengths generated on a step-by-step basis match exactly those created from scratch. This test saves a 2-star activity with a 5-star product that had never been consumed, then another activity with the same product (this time a 5-star activity), checking whether all strengths were correctly updated. """ user = "******" product = "p_mus_1" activity_type = self.session_context.activities_by_rating[3][0] date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:00:00")) activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": date} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) activity_type = self.session_context.activities_by_rating[5][0] date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:01:00")) activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": date} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch()
def test_product_product_strengths_incremental_random(self): """ Tests whether the product x product strengths generated on a step-by-step basis match exactly those created from scratch. This test saves several random activities in a row, checking whether all strengths were correctly updated. """ if not tests.INCLUDE_RANDOM_TESTS: return all_users = [u for u in self.db_proxy.fetch_all_user_ids()] all_products = [p for p in self.db_proxy.fetch_all_product_ids()] for i in range(100): user = random.choice(all_users) product = random.choice(all_products) activity_type = random.choice(self.session_context.supported_activities) date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i) activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": date} # print(activity) pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch()
def test_product_product_strengths_incremental_with_new_impressions_two_new_products(self): """ Tests whether the product x product strengths generated on a step-by-step basis match exactly those created from scratch. This test saves two new, identical products, with impressions for only one user. After activities of a like-minded user have been saved involving those products, checks whether all strengths were correctly updated. """ # Saves two new, identical products. Initially, no users will have impressions on them. id_twin_product_1 = "p_tec_TWIN_1" id_twin_product_2 = "p_tec_TWIN_2" date = self.session_context.get_present_date() - dt.timedelta(days=2) twin_product_1 = {"external_id": id_twin_product_1, "language": "english", "date": date, "resources": {"title": "Whatever Gets You Through The Night"}, "full_content": """Begin. Technology. Technology. This is all we got. End.""", "category": "Nonsense"} twin_product_2 = {"external_id": id_twin_product_2, "language": "english", "date": date, "resources": {"title": "Whatever Gets You Through The Night"}, "full_content": """Begin. Technology. Technology. This is all we got. End.""", "category": "Nonsense"} self.db_proxy.insert_product(twin_product_1) self.db_proxy.insert_product(twin_product_2) user1 = "u_eco_1" user2 = "u_eco_2" activity_type = self.session_context.activities_by_rating[5][0] # Saves an impression on just one of the new products date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:00:00")) self.db_proxy.increment_impression_summary(user_id=user1, product_id=id_twin_product_1, date=date, anonymous=False) # Saves a couple of activities for another user using the new products activity = {"external_user_id": user2, "external_product_id": id_twin_product_1, "activity": activity_type, "created_at": self.session_context.get_present_date()} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch() activity = {"external_user_id": user2, "external_product_id": id_twin_product_2, "activity": activity_type, "created_at": self.session_context.get_present_date()} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch()
def test_product_product_strengths_incremental_new_user_3star(self): """ Tests whether the product x product strengths generated on a step-by-step basis match exactly those created from scratch. This test saves a new activity with a 3-star product that had never been consumed by the user and checks whether whether all strengths were correctly updated. """ user = "******" product = "p_mus_1" activity_type = self.session_context.activities_by_rating[3][0] activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": self.session_context.get_present_date()} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch()
def test_product_product_strengths_incremental_with_new_impressions_random(self): """ Tests whether the product x product strengths generated on a step-by-step basis match exactly those created from scratch. This test saves several random activities in a row, checking whether all strengths were correctly updated. """ if not tests.INCLUDE_RANDOM_TESTS: return all_users = [u for u in self.db_proxy.fetch_all_user_ids()] all_products = [p for p in self.db_proxy.fetch_all_product_ids()] for i in range(100): user = random.choice(all_users) is_anonymous = config.is_anonymous(user) # Saves a couple of impressions for the chosen user date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i) product1 = random.choice(all_products) product2 = random.choice(all_products) self.db_proxy.increment_impression_summary(user_id=user, product_id=product1, date=date, anonymous=is_anonymous) self.db_proxy.increment_impression_summary(user_id=user, product_id=product2, date=date, anonymous=is_anonymous) pt.generate_templates(self.session_context) # it is important to regenerate from scratch (with all new impressions) # Saves one activity for that same user product3 = random.choice(all_products) activity_type = random.choice(self.session_context.supported_activities) date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i + 1) activity = {"external_user_id": user, "external_product_id": product3, "activity": activity_type, "created_at": date} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch()
def update_collaborative_filtering_strengths(session_context, activity): """ Updates user-user strengths and product-product strengths in conformity to the informed activity. :param session_context: The session context. :param activity: The activity which triggered the updates. """ user = activity["external_user_id"] is_anonymous = config.is_anonymous(user) if is_anonymous: return # we do NOT want anonymous users to influence collaborative filtering strengths! product = activity["external_product_id"] act_type = activity["activity"] log.info("Processing strengths [user=%s, product=%s, type=%s]..." % (user, product, act_type)) start = time() u_p_activity_summary = _get_current_user_product_summary(session_context, user, product, anonymous=False) first_impression_date = None if session_context.impressions_enabled: product_user_impressions_summary = session_context.data_proxy.fetch_impressions_summary( product_ids=[product], user_ids=[user], group_by_product=True, anonymous=False).get(product, {}).get(user, (0, None)) first_impression_date = product_user_impressions_summary[1] log.info("Updating user-user strengths affected by user/product pair ({0}, {1})...".format(user, product)) ut.update_templates(session_context, activity, u_p_activity_summary, first_impression_date, should_lookup_activities_summary=False, should_lookup_first_impression=False) log.info("Updating product-product strengths affected by user/product pair ({0}, {1})...".format(user, product)) pt.update_templates(session_context, activity, u_p_activity_summary, first_impression_date, should_lookup_activities_summary=False, should_lookup_first_impression=False) log.info("---Done processing strengths [user=%s, product=%s, type=%s] (took %.6f seconds)" % (user, product, act_type, time() - start))