コード例 #1
0
def domain_classfiy(uid_list,uid_weibo):#领域分类主函数
    '''
    用户领域分类主函数
    输入数据示例:
    uid_list:uid列表 [uid1,uid2,uid3,...]
    uid_weibo:分词之后的词频字典  {uid1:{'key1':f1,'key2':f2...}...}

    输出数据示例:
    domain:标签字典
    {uid1:[label1,label2,label3],uid2:[label1,label2,label3]...}
    注:label1是根据粉丝结构分类的结果,label2是根据认证类型分类的结果,label3是根据用户文本分类的结果

    re_label:推荐标签字典
    {uid1:label,uid2:label2...}
    '''
    users = get_user(uid_list)
    frineds = get_friends(uid_list)

    domain = dict()
    r_domain = dict()
    text_result = dict()
    user_result = dict()
    for k,v in users.iteritems():

        uid = k
        result_label = []
        sorted_mbr = dict()
        field1 = getFieldFromProtou(k, protou_dict=train_users)#判断uid是否在种子用户里面
        if field1 != 'Null':#该用户在种子用户里面
            result_label.append(field1)
        else:
            f= frineds[k]#返回用户的粉丝列表
            if len(f):
                field1,sorted_mbr = user_domain_classifier_v1(f, fields_value=txt_labels, protou_dict=proto_users)
            else:
                field1 = 'other'
                sorted_mbr = {'university':0, 'homeadmin':0, 'abroadadmin':0, 'homemedia':0, 'abroadmedia':0, 'folkorg':0, \
          'lawyer':0, 'politician':0, 'mediaworker':0, 'activer':0, 'grassroot':0, 'other':0, 'business':0}
            result_label.append(field1)
        
        r = v
        if r == 'other':
            field2 = 'other'
        else:
            field2 = user_domain_classifier_v2(r)
        result_label.append(field2)

        if uid_weibo.has_key(k):
            field_dict,result = domain_classfiy_by_text({k: uid_weibo[k]})#根据用户文本进行分类
            field3 = field_dict[k]
        else:
            field3 = 'other'
        result_label.append(field3)
                
        domain[str(uid)] = result_label

        if r == 'other':
            re_label = get_recommend_result('other',result_label)#没有认证类型字段
        else:
            re_label = get_recommend_result(r['verified_type'],result_label)

        r_domain[str(uid)] = re_label
    
    return domain,r_domain
コード例 #2
0
def domain_classfiy(uid_list, uid_weibo):  #领域分类主函数
    '''
    用户领域分类主函数
    输入数据示例:
    uid_list:uid列表 [uid1,uid2,uid3,...]
    uid_weibo:分词之后的词频字典  {uid1:{'key1':f1,'key2':f2...}...}

    输出数据示例:
    domain:标签字典  {uid1:[label1,label2,label3...],uid2:[label1,label2,label3...]...}

    '''
    if not len(uid_weibo) and len(uid_list):
        domain = dict()
        r_domain = dict()
        for uid in uid_list:
            domain[uid] = ['other']
            r_domain[uid] = ['other']
        return domain, r_domain
    elif len(uid_weibo) and not len(uid_list):
        uid_list = uid_weibo.keys()
    elif not len(uid_weibo) and not len(uid_list):
        domain = dict()
        r_domain = dict()
        return domain, r_domain
    else:
        pass

    users = get_user(uid_list)
    frineds = get_friends(uid_list)

    domain = dict()
    r_domain = dict()
    text_result = dict()
    user_result = dict()
    for k, v in users.iteritems():

        uid = k
        r = v
        field1 = getFieldFromProtou(
            k, protou_dict=train_users)  #判断uid是否在种子用户里面,返回一个list
        if len(field1):  #该用户在种子用户里面
            domain[str(uid)] = field1
        else:
            f = frineds[k]  #返回用户的粉丝列表
            if len(f):
                field1 = user_domain_classifier_v1(
                    f, fields_value=txt_labels,
                    protou_dict=train_users)  #根据关注关系分类,返回一个list
            else:
                field1 = ['other']

            if r == 'other':
                #field2 = 'other'
                field_d = ['other']
            else:
                #field2 = user_domain_classifier_v2(r)#根据认证类型分类,返回一个标签
                field_dict = domain_classify_by_des({k: v['description']
                                                     })  #根据个人描述分类,返回一个list
                field_d = field_dict[k]

            if uid_weibo.has_key(k) and len(uid_weibo[k]):
                field_dict = domain_classfiy_by_text({k: uid_weibo[k]
                                                      })  #根据用户文本进行分类,返回一个list
                field3 = field_dict[k]
            else:
                field3 = ['other']

            if r == 'other':
                re_label = get_recommend_result('other', field1, field_d,
                                                field3)  #没有认证类型字段
            else:
                re_label = get_recommend_result(r['verified_type'], field1,
                                                field_d, field3)

            domain[str(uid)] = re_label

    return domain
コード例 #3
0
def domain_classfiy(uid_list,uid_weibo):#领域分类主函数
    '''
    用户领域分类主函数
    输入数据示例:
    uid_list:uid列表 [uid1,uid2,uid3,...]
    uid_weibo:分词之后的词频字典  {uid1:{'key1':f1,'key2':f2...}...}

    输出数据示例:
    domain:标签字典  {uid1:[label1,label2,label3...],uid2:[label1,label2,label3...]...}

    '''
    if not len(uid_weibo) and len(uid_list):
        domain = dict()
        r_domain = dict()
        for uid in uid_list:
            domain[uid] = ['other']
            r_domain[uid] = ['other']
        return domain,r_domain
    elif len(uid_weibo) and not len(uid_list):
        uid_list = uid_weibo.keys()
    elif not len(uid_weibo) and not len(uid_list):
        domain = dict()
        r_domain = dict()
        return domain,r_domain
    else:
        pass
    
    users = get_user(uid_list)
    frineds = get_friends(uid_list)

    domain = dict()
    r_domain = dict()
    text_result = dict()
    user_result = dict()
    for k,v in users.iteritems():

        uid = k
        r = v
        field1 = getFieldFromProtou(k, protou_dict=train_users)#判断uid是否在种子用户里面,返回一个list
        if len(field1):#该用户在种子用户里面
            domain[str(uid)] = field1
        else:
            f = frineds[k]#返回用户的粉丝列表
            if len(f):
                field1 = user_domain_classifier_v1(f, fields_value=txt_labels, protou_dict=train_users)#根据关注关系分类,返回一个list
            else:
                field1 = ['other']

            if r == 'other':
                #field2 = 'other'
                field_d = ['other']
            else:
                #field2 = user_domain_classifier_v2(r)#根据认证类型分类,返回一个标签
                field_dict = domain_classify_by_des({k: v['description']})#根据个人描述分类,返回一个list
                field_d = field_dict[k]

            if uid_weibo.has_key(k) and len(uid_weibo[k]):
                field_dict = domain_classfiy_by_text({k: uid_weibo[k]})#根据用户文本进行分类,返回一个list
                field3 = field_dict[k]
            else:
                field3 = ['other']
            
            if r == 'other':
                re_label = get_recommend_result('other',field1,field_d,field3)#没有认证类型字段
            else:
                re_label = get_recommend_result(r['verified_type'],field1,field_d,field3)
        
            domain[str(uid)] = re_label
    
    return domain
コード例 #4
0
def domain_classfiy(uid_weibo):#领域分类主函数
    '''
    用户领域分类主函数
    输入数据示例:
    uid_weibo:字典
    {uid1:[weibo1,weibo2,weibo3,...]}

    输出数据示例:
    domain:标签字典
    {uid1:[label1,label2,label3],uid2:[label1,label2,label3]...}
    注:label1是根据粉丝结构分类的结果,label2是根据认证类型分类的结果,label3是根据用户文本分类的结果

    re_label:推荐标签字典
    {uid1:label,uid2:label2...}
    '''

    weibo_text = dict()
    uidlist = []
    for k,v in uid_weibo.items():
        item = ''
        for i in range(0,len(v)):
            text = re_cut(v[i]['text'])
            item = item + ',' + text
        weibo_text[k] = item
        uidlist.append(k)
    
    users = get_user(uidlist)
    print 'len(users):',len(users)
    print len(uidlist)
    domain = dict()
    r_domain = dict()
    text_result = dict()
    user_result = dict()
    for k,v in users.items():

        uid = k
        result_label = []
        sorted_mbr = dict()
        field1 = getFieldFromProtou(k, protou_dict=train_users)#判断uid是否在种子用户里面
        if field1 != 'Null':#该用户在种子用户里面
            result_label.append(field1)
        else:
            f= get_friends([k])#返回用户的粉丝列表
            friends = f[str(uid)]
            if len(friends):
                field1,sorted_mbr = user_domain_classifier_v1(friends, fields_value=txt_labels, protou_dict=proto_users)
            else:
                field1 = 'other'
                sorted_mbr = {'university':0, 'homeadmin':0, 'abroadadmin':0, 'homemedia':0, 'abroadmedia':0, 'folkorg':0, \
          'lawyer':0, 'politician':0, 'mediaworker':0, 'activer':0, 'grassroot':0, 'other':0, 'business':0}
            result_label.append(field1)
        
        r = read_by_xapian(xs,uid)
        if r == 'other':
            field2 = 'other'
        else:
            field2 = user_domain_classifier_v2(r)
        result_label.append(field2)

        field_dict,result = domain_classfiy_by_text({k: weibo_text[k]})#根据用户文本进行分类
        field3 = field_dict[k]
        result_label.append(field3)
                
        domain[str(uid)] = result_label
        user_result[str(uid)] = sorted_mbr#有问题
        text_result[str(uid)] = result[k]#有问题

        if r == 'other':
            re_label = get_recommend_result('other',result_label)#没有认证类型字段
        else:
            re_label = get_recommend_result(r['verified_type'],result_label)

        r_domain[str(uid)] = re_label
    
    return domain,re_label
コード例 #5
0
ファイル: test_domain_v2.py プロジェクト: SwoJa/ruman
def domain_classfiy(uid_list, uid_weibo):  #领域分类主函数
    '''
    用户领域分类主函数
    输入数据示例:
    uid_list:uid列表 [uid1,uid2,uid3,...]
    uid_weibo:分词之后的词频字典  {uid1:{'key1':f1,'key2':f2...}...}

    输出数据示例:
    domain:标签字典
    {uid1:[label1,label2,label3],uid2:[label1,label2,label3]...}
    注:label1是根据粉丝结构分类的结果,label2是根据认证类型分类的结果,label3是根据用户文本分类的结果

    re_label:推荐标签字典
    {uid1:label,uid2:label2...}
    '''
    if not len(uid_weibo) and len(uid_list):
        domain = dict()
        r_domain = dict()
        for uid in uid_list:
            domain[uid] = ['other']
            r_domain[uid] = ['other']
        return domain, r_domain
    elif len(uid_weibo) and not len(uid_list):
        uid_list = uid_weibo.keys()
    elif not len(uid_weibo) and not len(uid_list):
        domain = dict()
        r_domain = dict()
        return domain, r_domain
    else:
        pass

    users = get_user(uid_list)
    frineds = get_friends(uid_list)

    domain = dict()
    r_domain = dict()
    text_result = dict()
    user_result = dict()
    for k, v in users.iteritems():

        uid = k
        result_label = []
        sorted_mbr = dict()
        field1 = getFieldFromProtou(k,
                                    protou_dict=train_users)  #判断uid是否在种子用户里面
        if field1 != 'Null':  #该用户在种子用户里面
            result_label.append(field1)
        else:
            f = frineds[k]  #返回用户的粉丝列表
            if len(f):
                field1, sorted_mbr = user_domain_classifier_v1(
                    f, fields_value=txt_labels, protou_dict=proto_users)
            else:
                field1 = 'other'
                sorted_mbr = {'university':0, 'homeadmin':0, 'abroadadmin':0, 'homemedia':0, 'abroadmedia':0, 'folkorg':0, \
          'lawyer':0, 'politician':0, 'mediaworker':0, 'activer':0, 'grassroot':0, 'other':0, 'business':0}
            result_label.append(field1)

        r = v
        if r == 'other':
            field2 = 'other'
        else:
            field2 = user_domain_classifier_v2(r)
        result_label.append(field2)

        if uid_weibo.has_key(k) and len(uid_weibo[k]):
            field_dict, result = domain_classfiy_by_text({k: uid_weibo[k]
                                                          })  #根据用户文本进行分类
            field3 = field_dict[k]
        else:
            field3 = 'other'
        result_label.append(field3)

        domain[str(uid)] = result_label

        if r == 'other':
            re_label = get_recommend_result('other', result_label)  #没有认证类型字段
        else:
            re_label = get_recommend_result(r['verified_type'], result_label)

        r_domain[str(uid)] = re_label

    return domain, r_domain