def load_weibo(uid_weibo): result_data = dict() p_data = dict() for k,v in uid_weibo.iteritems(): domain_p = sta_dict() for d_k in domain_p.keys(): domain_p[d_k] = com_p(v,DOMAIN_DICT[d_k],DOMAIN_COUNT[d_k],LEN_DICT[d_k],TOTAL)#计算文档属于每一个类的概率 result_data[k] = domain_p p_data[k] = rank_result(domain_p) return result_data,p_data
def topic_classfiy(uid_list,uid_weibo):#话题分类主函数 ''' 用户话题分类主函数 输入数据示例: uidlist:uid列表([uid1,uid2,uid3,...]) uid_weibo:分词之后的词频字典({uid1:{'key1':f1,'key2':f2...}...}) 输出数据示例:字典 用户18个话题的分布: {uid1:{'art':0.1,'social':0.2...}...} 用户关注较多的话题(最多有3个): {uid1:['art','social','media']...} ''' if not len(uid_weibo) and len(uid_list): result_data = dict() uid_topic = dict() for uid in uid_list: result_data[uid] = sta_dict() uid_topic[uid] = ['life'] return result_data,uid_topic elif len(uid_weibo) and not len(uid_list): uid_list = uid_weibo.keys() elif not len(uid_weibo) and not len(uid_list): result_data = dict() uid_topic = dict() return result_data,uid_topic else: pass result_data,uid_topic = load_weibo(uid_weibo)#话题分类主函数 for uid in uid_list: if not result_data.has_key(uid): result_data[uid] = sta_dict() uid_topic[uid] = ['life'] return result_data,uid_topic