def update_user_feature_list_for_show_action(user_info, recommend_result): """ update user info pv(show) action info """ uid = user_info.uid reasons = {} #get resource_type list for k in recommend_result.reason_list: resource_type = ResourceType.get_resource_type(k) reasons.setdefault(resource_type, []) reasons[resource_type].append(k) #update show action info for k,v in reasons.iteritems(): user_feature_list = None if k in user_info.user_feature_info: user_feature_list = user_info.user_feature_info[k] else: errCode, user_feature_list = get_user_feature_list(uid, k) if not errCode: continue item_feature_list = ItemFeatureList() for i in v: cur_feature = item_feature_list.feature.add() cur_feature.feature_name = i errCode = online_clicklog_feedback.raw_update_user_action(uid, k, user_feature_list, item_feature_list, card_feature='', favor_type='show') logger.info("update user action info, uid:%s,resource_type:%s,errCode:%s" % (uid, k, errCode)) return None
def filter_and_compose_recommend_item_list(user_info, reason, item_id_list_proto, result): """ try set result[reason] = item_id_list_proto and compose result if no dedup, update user_info @return None """ logger.debug("try to add recommend, reason:%s, item_id_list_proto:%s" % (reason, item_id_list_proto)) if not check_reason(user_info, reason): logger.debug("existing feature_type, ignore:%s" % reason) return result_item_id_list = [] resource_type = ResourceType.get_resource_type(reason) for v in item_id_list_proto.item_id: if v in user_info.recent_push_item_id_list: continue result_item_id_list.append(v) user_info.recent_push_item_id_list.add(v) max_recommend_item_count_per_reason = get_max_recommend_item_count_per_resource(resource_type) if len(result_item_id_list) >= max_recommend_item_count_per_reason: logger.debug("real_list size:%s, select part:%s" % (len(item_id_list_proto.item_id), max_recommend_item_count_per_reason)) break if not result_item_id_list: logger.info("all item id list has been pushed") logger.debug("all item id list has been pushed:%s" % item_id_list_proto) return if not result_item_id_list: return result_recommend_item = [RecommendItem(item_id=v) for v in result_item_id_list] result.add(reason, result_recommend_item) add_reason(user_info, reason) return
def update_user_info(user_info, recommend_result): """ update recommend_result to user_info @return None """ #@note:暂时不获取item_feature结构(不管是news还是item类,为了计算用户的feature和feature偏好的pv部分); #@note:item_id列表的push由外围世荣处保证 for reason,item_list in recommend_result.reason2itemidlist.iteritems(): add_reason(user_info, reason) for v in item_list: resource_type = ResourceType.get_resource_type(v.item_id) assert(resource_type) user_info.recent_resource_visit_info.setdefault(resource_type, VisitInfo()) user_info.recent_resource_visit_info[resource_type].pv_count += 1 #set online user_feature_reason and user resource visit info feature_list = ItemFeatureList() for reason,weight in user_info.recent_push_reason_info.iteritems(): feature = feature_list.feature.add() feature.feature_name = reason feature.weight = weight uid = user_info.uid errCode = model.set_online_user_feature_reason(uid, feature_list.SerializeToString()) if not errCode: logger.warn("fail to set_online_user_feature_reason, uid:%s" % uid) for resource_type,visit_info in user_info.recent_resource_visit_info.iteritems(): errCode = model.set_online_user_resource_visitinfo(uid, resource_type, visit_info.SerializeToString()) if not errCode: logger.warn("fail to set_online_user_resource_visitinfo, uid:%s, resource_type:%s" % (uid, resource_type)) #feedback for show action update_user_feature_list_for_show_action(user_info, recommend_result)
def update_user_favor_item_id_list(uid, resource_type, item_id, favor_type): """ update user favor item id list, @return True|False """ resource_type = ResourceType.get_resource_type(item_id) assert(resource_type) errCode, kv_dict = model.get_online_user_favor_item_list(uid, [resource_type]) if not is_valid_data(errCode, kv_dict, "online_user_favor_item_list", {"uid":uid, "resource_type":resource_type}): return False logger.debug("resource_type:%s, kv_dict:%s" % (resource_type, kv_dict)) item_id_list_proto = ItemIdList() try: if kv_dict: assert(resource_type in kv_dict) item_id_list_proto.ParseFromString(kv_dict[resource_type]) logger.debug("favor item_id_list:%s" % item_id_list_proto) except: logger.error("bad ItemIdList from get_online_user_favor_item_list:uid:%s, resource_type:%s, exception:%s" % (uid, resource_type, traceback.format_exc())) return False item_id_list_proto.item_id.append(item_id) limit_protobuf_repeated_scala_field_size(item_id_list_proto, "item_id", MAX_FAVOR_ITEM_LIST_COUNT_PER_PERSON) logger.debug("new item_id_list:%s" % item_id_list_proto) errCode = model.set_online_user_favor_item_list(uid, resource_type, item_id_list_proto.SerializeToString()) if not errCode: logger.warn("fail to set_online_user_favor_item_list, uid:%s, resource_type:%s" % (uid, resource_type)) else: logger.info("successfully to set_online_user_favor_item_list, uid:%s, resource_type:%s" % (uid, resource_type)) return errCode
def to_list(self, user_info): result = [] for resource_type, user_feature_list in user_info.user_feature_info.iteritems(): logger.debug("resource_type:%s, user_info:%s" % (resource_type, user_feature_list)) for k in self.reason_list: if k not in self.reason2itemidlist: logger.error("reason not existing:%s" % k) continue reason1, reason2 = translate_reason.translate_reason(k) resouce_type = ResourceType.get_resource_type(k) default_reason = reason2 if resouce_type in user_info.user_feature_info: user_feature_list_proto = user_info.user_feature_info[resouce_type] for u in user_feature_list_proto.feature: if u.feature_name == k and u.visit_info.click_count > 0: default_reason = reason1 break reason = default_reason logger.debug("reason_old:%s,reason_new:%s,cur:%s" % (reason1, reason2, reason)) result.append(((k, reason), self.reason2itemidlist[k])) return result
def merge_user_recent_feature(log_dict): """ interface for online log collector @note: the following key must exists in log_dict: uid, item_id, card_feature, type @return True|False to indicate whether the input log_dict is accepted """ #@note:对于重复的点击的去重,暂时还没定处理策略 logger.debug("begin merge_user_recent_feature") logger.info("log_dict:%s" % log_dict) if LogFeedBackKey.UID not in log_dict: logger.warn("uid missing") return False uid = log_dict[LogFeedBackKey.UID] if LogFeedBackKey.ITEM_ID not in log_dict: logger.warn("item_id missing") return False item_id = log_dict[LogFeedBackKey.ITEM_ID] if len(item_id) <= ResourceType.RESOURCE_TYPE_LENTH: logger.warn("invalid item_id format") return False if LogFeedBackKey.CARD_FEATURE not in log_dict: logger.warn("card_feature missing") return False card_feature = log_dict[LogFeedBackKey.CARD_FEATURE] if LogFeedBackKey.TYPE not in log_dict: logger.warn("type missing") return False favor_type = log_dict[LogFeedBackKey.TYPE] resource_type = ResourceType.get_resource_type(item_id) #整体策略:尽可能更新正确的信息,即时是部分也行 #更新用户的recent feature list errCode1 = update_user_recent_feature_list(uid, resource_type, item_id, card_feature, favor_type) #更新favor item id 列表 errCode2 = update_user_favor_item_id_list(uid, resource_type, item_id, favor_type) #更新user resource visit info errCode3 = update_user_recent_resource_visit_info(uid, resource_type, favor_type) return errCode1 and errCode2 and errCode3
def raw_get_news_online_item_by_feature_list(user_info, resource_type, feature_name_list_proto, count, max_recommend_item_count_per_reason, fetch_more_times, backup_feature_count): """ talk with news online item interface by feature list, @return RecommendItemContainer @note: should filter visited feature in user_info for user_feature_list, and update user_info """ result = RecommendItemContainer() input = FeatureNameLimitList() logger.debug("feature list proto:%s" % feature_name_list_proto) for i in feature_name_list_proto.feature: if not check_reason(user_info, i.feature_name): continue #@note: temp logic to ignore some feature type continue_out = False for prefix in NEWS_IGNORE_FEATURE_NAME_PREFIX: if i.feature_name.startswith(prefix): logger.info("ignore feature:%s" % i.feature_name) continue_out = True break if continue_out: continue cur = input.feature_name_limit.add() cur.feature_name = i.feature_name cur.limit = int(count * max_recommend_item_count_per_reason * fetch_more_times) if len(input.feature_name_limit) >= count + backup_feature_count: logger.info("break for enough feature count:%s,feature list:%s" % (len(input.feature_name_limit), input)) break #logger.debug("feature_name:%s,limit:%s" % (cur.feature_name, cur.limit)) if not input.feature_name_limit: logger.info("no new feature to get for news:%s, feature_list size:%s" % (resource_type, len(feature_name_list_proto.feature))) logger.debug("current feature_list :%s" % (feature_name_list_proto)) return result try: #socket_out = urllib2.urlopen(NEWS_ONLINE_URL_SEARCH_BY_QUERY, urllib.urlencode(input.SerializeToString())) socket_out = urllib2.urlopen(NEWS_ONLINE_URL_SEARCH_BY_QUERY, input.SerializeToString(), timeout=online_clicklog_feedback.NEWS_TIMEOUT_SECONDS) return_data = socket_out.read() except: logger.error("fail to talk with online news server,featurelists:%s,exceptions:%s" % (input, traceback.format_exc())) return result try: output = NewsFeatureItemList() output.ParseFromString(return_data) logger.debug("get news online item list result,user_info:%s,result:%s" % (user_info, output)) except: logger.error("bad parse news return value:%s" % traceback.format_exc()) return result for reason_item_id_list in output.feature_item: if not len(reason_item_id_list.item_feature_info): continue reason = reason_item_id_list.feature_name resource_type = ResourceType.get_resource_type(reason) item_list = [] for i in reason_item_id_list.item_feature_info: if i.item_id in user_info.recent_push_item_id_list: logger.debug("ignore existing item id:%s" % i.item_id) continue item = RecommendItem() item.item_id = i.item_id item.item_info_json = i.item_info item.feature_name_list = i.item_feature_list item_list.append(item) user_info.recent_push_item_id_list.add(i.item_id) logger.info("item_list:%s" %item_list) if len(item_list) >= max_recommend_item_count_per_reason: logger.debug("real_list size:%s, select part:%s" % (len(reason_item_id_list.item_feature_info), max_recommend_item_count_per_reason)) break if not item_list or len(item_list) < max_recommend_item_count_per_reason: logger.info("not enough data, discard:%s" % (item_list)) ignore_reason(user_info, reason) #fetch not enough data, discard this reason continue result.add(reason, item_list) add_reason(user_info, reason) if result.size() >= count: break if not result: logger.info("get empty get_news_online_item_by_feature_list") return result