def filter_and_compose_recommend_item_list(user_info, reason, item_id_list_proto, result): """ try set result[reason] = item_id_list_proto and compose result if no dedup, update user_info @return None """ logger.debug("try to add recommend, reason:%s, item_id_list_proto:%s" % (reason, item_id_list_proto)) if not check_reason(user_info, reason): logger.debug("existing feature_type, ignore:%s" % reason) return result_item_id_list = [] resource_type = ResourceType.get_resource_type(reason) for v in item_id_list_proto.item_id: if v in user_info.recent_push_item_id_list: continue result_item_id_list.append(v) user_info.recent_push_item_id_list.add(v) max_recommend_item_count_per_reason = get_max_recommend_item_count_per_resource(resource_type) if len(result_item_id_list) >= max_recommend_item_count_per_reason: logger.debug("real_list size:%s, select part:%s" % (len(item_id_list_proto.item_id), max_recommend_item_count_per_reason)) break if not result_item_id_list: logger.info("all item id list has been pushed") logger.debug("all item id list has been pushed:%s" % item_id_list_proto) return if not result_item_id_list: return result_recommend_item = [RecommendItem(item_id=v) for v in result_item_id_list] result.add(reason, result_recommend_item) add_reason(user_info, reason) return
def get_item_list_by_item(user_info, resource_type, count): """ get item_list by item recommendation @return RecommendItemContainer """ logger.debug("begin get_item_list_by_item") uid = user_info.uid result = RecommendItemContainer() #get favor item_id列表 errCode, kv_dict = model.get_online_user_favor_item_list(uid, [resource_type]) if not is_valid_data(errCode, kv_dict, "online_user_favor_item_list", {"uid":uid, "resource_type":resource_type}): return result if not kv_dict: return result assert(resource_type in kv_dict) try: item_id_list_proto = ItemIdList() item_id_list_proto.ParseFromString(kv_dict[resource_type]) reason_list = [resource_type+ITEM_RECOMMEND_FEATURE_TYPE+v for v in item_id_list_proto.item_id] item_id_list = [] for v in item_id_list_proto.item_id: if not check_reason(user_info, resource_type+ITEM_RECOMMEND_FEATURE_TYPE+v): continue item_id_list.append(v) if not item_id_list: logger.info("all item id list have no item2item result:%s" % item_id_list) return result #@note: use latest item first item_id_list.reverse() #recommend by item id errCode, kv_dict = model.get_offline_item_recommend_item_list(item_id_list) if not is_valid_data(errCode, kv_dict, "offline_item_recommend_item_list", {"item_id_list":"..."}): return result if not kv_dict: for item_id in item_id_list: reason = resource_type + ITEM_RECOMMEND_FEATURE_TYPE + item_id ignore_reason(user_info, reason) return result for item_id in item_id_list: if result.size() >= count: break if item_id not in kv_dict: logger.info("no item2itemid list for id:%s" % item_id) ignore_reason(user_info, reason) continue k = item_id v = kv_dict[k] reason = resource_type + ITEM_RECOMMEND_FEATURE_TYPE + k item_id_list_proto = ItemIdList() item_id_list_proto.ParseFromString(v) filter_and_compose_recommend_item_list(user_info, reason, item_id_list_proto, result) except: logger.error("bad get_item_list_by_item:%s" % traceback.format_exc()) return result
def get_item_list_by_feature_list(user_info, resource_type, feature_name_list_proto, count): """ get item list by check with all feature list @return RecommendItemContainer """ logger.debug("begin get_item_list_by_feature_list") feature_name_list = [] result = RecommendItemContainer() filtered_feature_name_list_proto = UserFeatureList() for v in feature_name_list_proto.feature: feature_name = v.feature_name if not check_reason(user_info, feature_name): continue feature_name_list.append(feature_name) cur = filtered_feature_name_list_proto.feature.add() cur.feature_name = feature_name if not feature_name_list: logger.info("no new reason from feature list,current feature_name size:%s" % len(feature_name_list_proto.feature)) logger.debug("current feature_name_list_proto:%s" % feature_name_list_proto) return result #news like if ResourceType.RESOURCE_TYPE_NEWS == resource_type: return get_news_online_item_by_feature_list(user_info, resource_type, filtered_feature_name_list_proto, count) #items like errCode, kv_dict = model.get_offline_feature_hot_item_list(feature_name_list) if not is_valid_data(errCode, kv_dict, "offline_feature_hot_item_list", {"feature_name_list":"..."}): return result if not kv_dict: for k in feature_name_list: ignore_reason(user_info, k) return result try: #logger.debug("%s" % feature_name_list) for k in feature_name_list: #@note:ordered logger.debug("check result with feature_name:%s" % k) if result.size() >= count: logger.debug("fetch enough recommend result, count:%s" % count) break if k not in kv_dict: logger.info("no result for feature_name:%s" % k) ignore_reason(user_info, k) continue v = kv_dict[k] item_id_list = ItemIdList() item_id_list.ParseFromString(v) filter_and_compose_recommend_item_list(user_info, k, item_id_list, result) logger.debug("get_item_list_by_feature_list,result_size:%s, uid:%s, resource_type:%s" % (result.size(), user_info.uid, resource_type)) except: logger.error("bad ItemIdList for get_item_list_by_feature_list:%s" % traceback.format_exc()) return result
def raw_get_news_online_item_by_feature_list(user_info, resource_type, feature_name_list_proto, count, max_recommend_item_count_per_reason, fetch_more_times, backup_feature_count): """ talk with news online item interface by feature list, @return RecommendItemContainer @note: should filter visited feature in user_info for user_feature_list, and update user_info """ result = RecommendItemContainer() input = FeatureNameLimitList() logger.debug("feature list proto:%s" % feature_name_list_proto) for i in feature_name_list_proto.feature: if not check_reason(user_info, i.feature_name): continue #@note: temp logic to ignore some feature type continue_out = False for prefix in NEWS_IGNORE_FEATURE_NAME_PREFIX: if i.feature_name.startswith(prefix): logger.info("ignore feature:%s" % i.feature_name) continue_out = True break if continue_out: continue cur = input.feature_name_limit.add() cur.feature_name = i.feature_name cur.limit = int(count * max_recommend_item_count_per_reason * fetch_more_times) if len(input.feature_name_limit) >= count + backup_feature_count: logger.info("break for enough feature count:%s,feature list:%s" % (len(input.feature_name_limit), input)) break #logger.debug("feature_name:%s,limit:%s" % (cur.feature_name, cur.limit)) if not input.feature_name_limit: logger.info("no new feature to get for news:%s, feature_list size:%s" % (resource_type, len(feature_name_list_proto.feature))) logger.debug("current feature_list :%s" % (feature_name_list_proto)) return result try: #socket_out = urllib2.urlopen(NEWS_ONLINE_URL_SEARCH_BY_QUERY, urllib.urlencode(input.SerializeToString())) socket_out = urllib2.urlopen(NEWS_ONLINE_URL_SEARCH_BY_QUERY, input.SerializeToString(), timeout=online_clicklog_feedback.NEWS_TIMEOUT_SECONDS) return_data = socket_out.read() except: logger.error("fail to talk with online news server,featurelists:%s,exceptions:%s" % (input, traceback.format_exc())) return result try: output = NewsFeatureItemList() output.ParseFromString(return_data) logger.debug("get news online item list result,user_info:%s,result:%s" % (user_info, output)) except: logger.error("bad parse news return value:%s" % traceback.format_exc()) return result for reason_item_id_list in output.feature_item: if not len(reason_item_id_list.item_feature_info): continue reason = reason_item_id_list.feature_name resource_type = ResourceType.get_resource_type(reason) item_list = [] for i in reason_item_id_list.item_feature_info: if i.item_id in user_info.recent_push_item_id_list: logger.debug("ignore existing item id:%s" % i.item_id) continue item = RecommendItem() item.item_id = i.item_id item.item_info_json = i.item_info item.feature_name_list = i.item_feature_list item_list.append(item) user_info.recent_push_item_id_list.add(i.item_id) logger.info("item_list:%s" %item_list) if len(item_list) >= max_recommend_item_count_per_reason: logger.debug("real_list size:%s, select part:%s" % (len(reason_item_id_list.item_feature_info), max_recommend_item_count_per_reason)) break if not item_list or len(item_list) < max_recommend_item_count_per_reason: logger.info("not enough data, discard:%s" % (item_list)) ignore_reason(user_info, reason) #fetch not enough data, discard this reason continue result.add(reason, item_list) add_reason(user_info, reason) if result.size() >= count: break if not result: logger.info("get empty get_news_online_item_by_feature_list") return result