def main(argv): if len(argv) < 1: msg = "You must specify the environment" log.error(msg) return {"success": False, "message": msg} try: # command-line arguments env = argv[0] session = init_session(env) user_ids = None if len(argv) >= 2: user_ids = argv[1].split(",") timestamp = session.get_present_date() start = time() latest_run = session.data_proxy.fetch_latest_batch_info_user_template_consolidation() if latest_run: if latest_run.get("status") == "running": msg = "An old consolidation batch is still running. Won't start another one." log.info(msg) return {"success": False, "message": msg} session.data_proxy.save_timestamp_user_template_consolidation( status="running", timestamp=timestamp) consolidate_user_templates(session, user_ids) session.data_proxy.ensure_indexes_cache() elapsed_time = time() - start session.data_proxy.save_timestamp_user_template_consolidation( status="success", timestamp=timestamp, elapsed_time=elapsed_time) return {"success": True} except Exception: log.exception('Exception on {0}:'.format(__name__)) session.data_proxy.save_timestamp_user_template_consolidation( status="failed", timestamp=timestamp) return {"success": False, "message": traceback.format_exc()}
def generate_templates(session_context): generate_strengths(session_context) consolidate_user_templates(session_context)
def update_templates(session_context, new_activity, u_p_activities_summary=None, first_impression_date=None, should_lookup_activities_summary=True, should_lookup_first_impression=True): """ Updates user x user strengths based on a single new activity. :param session_context: The session context. :param new_activity: a dict {"external_user_id": user_id, "external_product_id": product_id, "activity": activity_type, "created_at": datetime}. :param u_p_activities_summary: The summary of activities for that (user, product) pair, if any, in the form of a dict {"external_user_id": the user id, "external_product_id": the product id, "activity": the latest activity type, "created_at": the datetime of the latest activity, "uu_latest_type": the type of the latest activity to be processed for that pair during u-u strengths calculation, "uu_latest_date": the date of the latest activity to be processed for that pair during u-u strengths calculation}. :param first_impression_date: The date of the first impression, if any, the activity user has received on the activity product. :param should_lookup_activities_summary: If True and previous_activity is None, it queries the database for the previous activity. :param should_lookup_first_impression: If True and first_impression_date is None, it queries the database for the first impression. """ log.info("Computing user-user strengths...") user = new_activity["external_user_id"] if config.is_anonymous(user): log.info("Anonymous users should not affect user-user strengths! Exiting now.") return product = new_activity["external_product_id"] activity_date = new_activity["created_at"] activity_type = new_activity["activity"] rating = session_context.rating_by_activity.get(activity_type) if rating is None: log.error("Unsupported activity type: %s" % activity_type) return suggested_cutoff_date = session_context.get_present_date() - \ dt.timedelta(session_context.user_user_strengths_window) latest_batch_info = session_context.data_proxy.fetch_latest_batch_info_user_user_strengths() if latest_batch_info is not None: latest_batch_timestamp = latest_batch_info["timestamp"] persisted_cutoff_date = latest_batch_info.get("cutoff_date") if persisted_cutoff_date is None: cutoff_date = suggested_cutoff_date else: cutoff_date = max(persisted_cutoff_date, suggested_cutoff_date) else: latest_batch_timestamp = None cutoff_date = suggested_cutoff_date if session_context.impressions_enabled and first_impression_date is None and should_lookup_first_impression: product_user_impressions_summary = session_context.data_proxy.fetch_impressions_summary( product_ids=[product], user_ids=[user], group_by_product=True, anonymous=False).get(product, {}).get(user, (0, None)) first_impression_date = product_user_impressions_summary[1] if u_p_activities_summary is None and should_lookup_activities_summary: u_p_activities_summary_as_singleton_list = session_context.data_proxy.fetch_activity_summaries_by_user( user_ids=[user], product_ids=[product], indexed_fields_only=False, anonymous=False).get(user, []) if len(u_p_activities_summary_as_singleton_list) > 0: u_p_activities_summary = u_p_activities_summary_as_singleton_list[0] previous_activity_rating = 0 if u_p_activities_summary is not None: previous_activity_type = u_p_activities_summary.get("uu_latest_type") if previous_activity_type is not None: previous_activity_rating = session_context.rating_by_activity[previous_activity_type] previous_activity_date = u_p_activities_summary["uu_latest_date"] if previous_activity_rating == rating and not session_context.impressions_enabled: return # repeating the latest activity --- there is nothing to do here # (if using impressions, must recalculate anyway to account for latest impressions) numerator_diff = [0, 0] denominator_diff = 0 remove_previous_activity_contribution = \ previous_activity_rating >= min(session_context.min_rating_conservative, session_context.min_rating_recommendable_from_user) if remove_previous_activity_contribution: if session_context.impressions_enabled: if first_impression_date is not None: # must remove former contribution if impression was already processed incrementally remove_previous_activity_contribution = previous_activity_date >= first_impression_date # must remove also if generation from scratch happened after the first impression if not remove_previous_activity_contribution and latest_batch_timestamp is not None: remove_previous_activity_contribution = latest_batch_timestamp >= first_impression_date # Removes the former contribution of the previous commanding activity for that (user, product) pair. if remove_previous_activity_contribution: if previous_activity_rating >= session_context.min_rating_conservative: numerator_diff[CONSERVATIVE] -= 1 if previous_activity_rating >= session_context.min_rating_aggressive: numerator_diff[AGGRESSIVE] -= 1 if previous_activity_rating >= session_context.min_rating_recommendable_from_user: denominator_diff -= 1 # Adds the contribution of this activity if rating >= session_context.min_rating_conservative: numerator_diff[CONSERVATIVE] += 1 if rating >= session_context.min_rating_aggressive: numerator_diff[AGGRESSIVE] += 1 if rating >= session_context.min_rating_recommendable_from_user: denominator_diff += 1 # Fetches all the users who consumed this product users_by_rating = session_context.data_proxy.fetch_users_by_rating_by_product( product_ids=[product], min_date=cutoff_date, max_date=session_context.get_present_date())[0].get(product, {}) # Includes the user of the current activity (remember: this activity might not have been saved yet) users_set = users_by_rating.get(rating, set()) users_set.add(user) users_by_rating[rating] = users_set if u_p_activities_summary is not None: users_set = users_by_rating.get(previous_activity_rating, set()) if user in users_set: users_set.remove(user) users_by_rating[previous_activity_rating] = users_set users_who_rated_conservatively_high = set() for r in range(session_context.min_rating_conservative, 6): users_who_rated_conservatively_high |= users_by_rating.get(r, set()) users_who_rated_aggressively_high = set() for r in range(session_context.min_rating_aggressive, 6): users_who_rated_aggressively_high |= users_by_rating.get(r, set()) users_who_rated_sufficiently_for_recommendation = set() for r in range(session_context.min_rating_recommendable_from_user, 6): users_who_rated_sufficiently_for_recommendation |= users_by_rating.get(r, set()) numerators_with_user_as_target = None denominators_with_user_as_target = None numerators_with_user_as_template = None denominators_with_user_as_template = None strengths_map_for_insert = {} strengths_map_for_update = {} # This user as TARGET # If this user has consumed this product without previous impressions, then it shall not contribute # for user-user strengths with this user as target. update_user_as_target = True if session_context.impressions_enabled: update_user_as_target = first_impression_date is not None # Existing pairs with user as target. if update_user_as_target and numerator_diff != [0, 0]: strength_operands_with_user_as_target = session_context.data_proxy.fetch_user_user_strength_operands( users=[user]) numerators_with_user_as_target = strength_operands_with_user_as_target[0] denominators_with_user_as_target = strength_operands_with_user_as_target[1] for user_and_template, numerator_tuple in numerators_with_user_as_target.items(): template = user_and_template[1] if template in users_who_rated_sufficiently_for_recommendation: new_numerator_tuple = [numerator_tuple[0] + numerator_diff[0], numerator_tuple[1] + numerator_diff[1]] numerators_with_user_as_target[user_and_template] = new_numerator_tuple update_doc = strengths_map_for_update.get(user_and_template, {}) update_doc["nc"] = new_numerator_tuple[CONSERVATIVE] update_doc["na"] = new_numerator_tuple[AGGRESSIVE] strengths_map_for_update[user_and_template] = update_doc # New pairs with user as target. if update_user_as_target and numerator_diff[0] == 1: # if this user has *just* rated this product high... new_templates = [] for template in users_who_rated_sufficiently_for_recommendation: if template != user and (user, template) not in numerators_with_user_as_target: # new pair new_templates += [template] new_numerator_tuple = [1 if rating >= session_context.min_rating_conservative else 0, 1 if rating >= session_context.min_rating_aggressive else 0] numerators_with_user_as_target[(user, template)] = new_numerator_tuple update_doc = strengths_map_for_insert.get((user, template), {}) update_doc["nc"] = new_numerator_tuple[CONSERVATIVE] update_doc["na"] = new_numerator_tuple[AGGRESSIVE] strengths_map_for_insert[(user, template)] = update_doc products_by_rating_by_new_template = session_context.data_proxy.fetch_products_by_rating_by_user( user_ids=new_templates, min_date=cutoff_date, max_date=session_context.get_present_date())[0] for new_template in new_templates: recommendable_products = set() for r in range(session_context.min_rating_recommendable_from_user, 6): recommendable_products |= products_by_rating_by_new_template[new_template][r] if session_context.impressions_enabled: # Retrieves the intersection of the recommendable products of the template user # with the products with impressions for the target user recommendable_products_with_impressions = \ session_context.data_proxy.fetch_products_with_impressions_by_user( user_ids=[user], product_ids=list(recommendable_products), anonymous=False).get(user, set()) new_denominator = len(recommendable_products_with_impressions) else: new_denominator = len(recommendable_products) denominators_with_user_as_target[(user, new_template)] = new_denominator insert_doc = strengths_map_for_insert.get((user, new_template), {}) insert_doc["denominator"] = new_denominator strengths_map_for_insert[(user, new_template)] = insert_doc # This user as TEMPLATE # Existing pairs with user as template. if session_context.bidirectional_uu_strength_updates and denominator_diff != 0: user_user_strength_operands = session_context.data_proxy.fetch_user_user_strength_operands( templates=[user]) numerators_with_user_as_template = user_user_strength_operands[0] denominators_with_user_as_template = user_user_strength_operands[1] for user_and_template in denominators_with_user_as_template: # updates the denominator... denominator = denominators_with_user_as_template[user_and_template] new_denominator = denominator + denominator_diff denominators_with_user_as_template[user_and_template] = new_denominator update_doc = strengths_map_for_update.get(user_and_template, {}) update_doc["denominator"] = new_denominator strengths_map_for_update[user_and_template] = update_doc # ...and the numerator, in case the target user has consumed this product if user_and_template[0] in users_who_rated_conservatively_high and \ user_and_template in numerators_with_user_as_template: numerator_tuple = numerators_with_user_as_template[user_and_template] numerator_tuple[CONSERVATIVE] += denominator_diff if user_and_template[0] in users_who_rated_aggressively_high: numerator_tuple[AGGRESSIVE] += denominator_diff numerators_with_user_as_template[user_and_template] = numerator_tuple update_doc = strengths_map_for_update.get(user_and_template, {}) update_doc["nc"] = numerator_tuple[CONSERVATIVE] update_doc["na"] = numerator_tuple[AGGRESSIVE] strengths_map_for_update[user_and_template] = update_doc # New pairs with user as template. if session_context.bidirectional_uu_strength_updates and denominator_diff == 1: # if this user has *just* rated this product aggressively high... new_targets = [] for target in users_who_rated_conservatively_high: if target != user and (target, user) not in denominators_with_user_as_template: # it is a new pair indeed new_targets += [target] if len(new_targets) > 0: products_of_user_as_template = session_context.data_proxy.fetch_products_by_rating_by_user( user_ids=[user], min_date=cutoff_date, max_date=session_context.get_present_date())[0].get(user, {}) # Includes the product of the current activity (remember again: this activity might not have been saved yet) products_set = products_of_user_as_template.get(rating, set()) products_set.add(product) products_of_user_as_template[rating] = products_set recommendable_products_of_user_as_template = set() for r in range(session_context.min_rating_recommendable_from_user, 6): recommendable_products_of_user_as_template |= products_of_user_as_template.get(r, set()) if session_context.impressions_enabled: product_impressions_by_target = session_context.data_proxy.fetch_impressions_summary( user_ids=new_targets, product_ids=list(recommendable_products_of_user_as_template), group_by_product=False, anonymous=False) for new_target in new_targets: if session_context.impressions_enabled: new_denominator = len(product_impressions_by_target.get(new_target, [])) else: new_denominator = len(recommendable_products_of_user_as_template) denominators_with_user_as_template[(new_target, user)] = new_denominator insert_doc = strengths_map_for_insert.get((new_target, user), {}) insert_doc["denominator"] = new_denominator strengths_map_for_insert[(new_target, user)] = insert_doc for new_target in new_targets: if new_target in users_who_rated_conservatively_high: numerator_tuple = numerators_with_user_as_template.get((new_target, user), [0, 0]) numerator_tuple[CONSERVATIVE] += 1 if new_target in users_who_rated_aggressively_high: numerator_tuple[AGGRESSIVE] += 1 numerators_with_user_as_template[(new_target, user)] = numerator_tuple insert_doc = strengths_map_for_insert.get((new_target, user), {}) insert_doc["nc"] = numerator_tuple[CONSERVATIVE] insert_doc["na"] = numerator_tuple[AGGRESSIVE] strengths_map_for_insert[(new_target, user)] = insert_doc # Computes all affected strengths for UPDATE if len(strengths_map_for_update) > 0: _prepare_strengths_map(session_context, user, strengths_map_for_update, numerators_with_user_as_target, denominators_with_user_as_target, numerators_with_user_as_template, denominators_with_user_as_template) log.info("Saving user-user strengths (UPDATE)...") session_context.data_proxy.save_uu_strengths(strengths_map_for_update, upsert=True) log.info("[{0}] user-user strengths updated".format(len(strengths_map_for_update))) else: log.info("No old strengths to update.") # Computes all affected strengths for INSERT if len(strengths_map_for_insert) > 0: _prepare_strengths_map(session_context, user, strengths_map_for_insert, numerators_with_user_as_target, denominators_with_user_as_target, numerators_with_user_as_template, denominators_with_user_as_template) log.info("Saving user-user strengths (INSERT)...") session_context.data_proxy.save_uu_strengths(strengths_map_for_insert, upsert=False) log.info("[{0}] user-user strengths inserted".format(len(strengths_map_for_insert))) else: log.info("No new strengths to insert.") # Consolidates cached user templates log.info("Determining users whose templates must be consolidated...") users_to_consolidate = {user_and_template[0] for user_and_template in strengths_map_for_insert} updated_users = {user_and_template[0] for user_and_template in strengths_map_for_update} old_templates_map = session_context.data_proxy.fetch_user_templates(list(updated_users)) for user_and_template, strength_doc in strengths_map_for_update.items(): target_user = user_and_template[0] template_user = user_and_template[1] old_templates = old_templates_map.get(target_user) if old_templates: cutoff_strength = old_templates[-1][0] # the strength of the weakest template if isinstance(cutoff_strength, str): cutoff_strength = 0 old_template_ids = {t[1] for t in old_templates} else: cutoff_strength = 0 old_template_ids = set() if strength_doc["strength"] > cutoff_strength or \ template_user in old_template_ids or \ len(old_template_ids) < session_context.user_templates_count: users_to_consolidate.add(target_user) if session_context.should_consolidate_user_templates_on_the_fly: if len(users_to_consolidate) > 0: log.info("Consolidating templates of %d users..." % len(users_to_consolidate)) consolidate_user_templates(session_context, users_list=list(users_to_consolidate)) else: log.info("No users with templates to consolidate.") session_context.data_proxy.save_latest_activity_for_user_user_strengths( user, product, activity_type, activity_date) log.info("UU strengths and templates updated successfully.")