예제 #1
0
def get_show_trace_followers(xnr_user_no):

    es_get_result = es.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\
                    id=xnr_user_no)['_source']

    trace_follow_list = es_get_result['trace_follow_list']

    weibo_user_info = []

    if trace_follow_list:
        mget_results = es.mget(index=twitter_user_index_name,doc_type=twitter_user_index_type,\
                            body={'ids':trace_follow_list})['docs']
        # print 'mget_results::',mget_results
        for result in mget_results:
            if result['found']:
                weibo_user_info.append(result['_source'])
            else:
                uid = result['_id']

                weibo_user_info.append({
                    'uid': uid,
                    'statusnum': 0,
                    'fansnum': 0,
                    'friendsnum': 0,
                    'photo_url': '',
                    'sex': '',
                    'nick_name': uid,
                    'user_location': ''
                })
    else:
        weibo_user_info = []

    return weibo_user_info
예제 #2
0
def my_topic_classfiy(uid_list, datetime_list):
    topic_dict_results = {}
    topic_string_results = {}
    #将处理后的结果保存到数据库中,并在处理前查询数据库中是否已经有了相应内容之前存储的结果,以提高效率
    uids = uid_list
    unresolved_uids = []
    res = es.mget(index=fb_portrait_index_name, doc_type=fb_portrait_index_type, body={'ids': uids})['docs']
    for r in res:
        uid = r['_id']
        if r.has_key('found'): 
            found = r['found']
            if found and r['_source'].has_key('topic'):
                topic = r['_source']['topic']
                topic_string = r['_source']['topic_string']
                topic_dict_results[uid] = json.loads(topic)
                topic_string_results[uid] = [topic_ch2en_dict[ch_topic] for ch_topic in topic_string.split('&')]
            else:
                unresolved_uids.append(uid)
        else:   #es表中目前无任何记录 
            unresolved_uids.append(uid)

    #未在数据库中的进行计算并存储
    user_topic_dict = {}
    user_topic_list = {}
    if unresolved_uids:
        fb_flow_text_index_list = []
        for datetime in datetime_list:
            fb_flow_text_index_list.append(flow_text_index_name_pre + datetime)
        user_topic_data = get_filter_keywords(fb_flow_text_index_list, unresolved_uids)
        user_topic_dict, user_topic_list = topic_classfiy(unresolved_uids, user_topic_data)

        user_topic_string = {}
        for uid, topic_list in user_topic_list.items():
            li = []
            for t in topic_list:
                li.append(zh_data[name_list.index(t)].decode('utf8'))
            user_topic_string[uid] = '&'.join(li)
        user_topic = {}
        for uid in unresolved_uids:
            if uid in user_topic_dict:
                user_topic[uid] = {
                    'filter_keywords': json.dumps(user_topic_data[uid]),
                    'topic': json.dumps(user_topic_dict[uid]),
                    'topic_string': user_topic_string[uid]
                }
            else:
                user_topic[uid] = {
                    'filter_keywords': json.dumps({}),
                    'topic': json.dumps({}),
                    'topic_string': ''
                }
        save_data2es(user_topic)

    #整合
    user_topic_dict.update(topic_dict_results)
    user_topic_list.update(topic_string_results)
    return user_topic_dict, user_topic_list
예제 #3
0
def get_recommend_at_user(xnr_user_no):
    #_id  = user_no2_id(user_no)
    es_result = es.get(index=tw_xnr_index_name,
                       doc_type=tw_xnr_index_type,
                       id=xnr_user_no)['_source']
    #print 'es_result:::',es_result
    if es_result:
        uid = es_result['uid']
        daily_interests = es_result['daily_interests']
    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_TW)
    else:
        now_ts = int(time.time())
    datetime = ts2datetime(now_ts - 24 * 3600)

    index_name = twitter_flow_text_index_name_pre + datetime
    nest_query_list = []
    daily_interests_list = daily_interests.split('&')

    es_results_daily = es.search(index=index_name,doc_type=twitter_flow_text_index_type,\
                        body={'query':{'match_all':{}},'size':200,\
                        'sort':{'timestamp':{'order':'desc'}}})['hits']['hits']

    uid_list = []
    if es_results_daily:
        for result in es_results_daily:
            result = result['_source']
            uid_list.append(result['uid'])

    ## 根据uid,从weibo_user中得到 nick_name
    uid_nick_name_dict = dict()  # uid不会变,而nick_name可能会变
    es_results_user = es.mget(index=twitter_user_index_name,
                              doc_type=twitter_user_index_type,
                              body={'ids': uid_list})['docs']
    i = 0
    for result in es_results_user:

        if result['found'] == True:
            result = result['_source']
            uid = result['uid']
            nick_name = result['name']
            if nick_name:
                i += 1
                uid_nick_name_dict[uid] = nick_name
        if i >= DAILY_AT_RECOMMEND_USER_TOP:
            break

    return uid_nick_name_dict
예제 #4
0
def filter_mid(mid_list):
    llen = len(mid_list)
    l_1000 = llen/1000

    result = []

    for i in range(l_1000+1):
        tmp = mid_list[i*1000:(i+1)*1000]
        if tmp:
            es_results = es_xnr.mget(index="social_sensing_text", doc_type="text", body={"ids":tmp}, _source=False)["docs"]
            #print 'es_results:::',es_results
            for item in es_results:
                #print 'item::',item
                if not item["found"]:
                    result.append(item["_id"])

    return result
예제 #5
0
def get_forward_numerical_info(task_name, ts):
    results = []
    ts_series = []
    for i in range(1, forward_n + 1):
        ts_series.append(ts - i * time_interval)

    # check if detail es of task exists
    doctype = task_name
    index_exist = es_xnr.indices.exists_type(index_sensing_task, doctype)
    if not index_exist:
        print "new create task detail index"
        mappings_sensing_task(doctype)

    if ts_series:
        search_results = es_xnr.mget(index=index_sensing_task,
                                     doc_type=doctype,
                                     body={"ids": ts_series})['docs']
        found_count = 0
        average_origin = []
        average_retweeted = []
        average_commet = []
        average_total = []
        average_negetive = []
        for item in search_results:
            if item['found']:
                temp = item['_source']
                sentiment_dict = json.loads(temp['sentiment_distribution'])
                average_total.append(int(temp['weibo_total_number']))
                average_negetive.append(
                    int(sentiment_dict["2"]) + int(sentiment_dict['3']) +
                    int(sentiment_dict['4']) + int(sentiment_dict['5']) +
                    int(sentiment_dict['6']))
                found_count += 1

        if found_count > initial_count:
            number_mean = np.mean(average_total)
            number_std = np.std(average_total)
            sentiment_mean = np.mean(average_negetive)
            sentiment_std = np.mean(average_negetive)
            results = [
                1, number_mean, number_std, sentiment_mean, sentiment_std
            ]
        else:
            results = [0]

    return results
예제 #6
0
def create_event_warning(xnr_user_no,today_datetime,write_mark):
    #获取事件名称
    hashtag_list = get_hashtag(today_datetime)
    print 'hashtag_list/:',hashtag_list

    twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,today_datetime,today_datetime)

    #查询关注列表
    lookup_type='followers_list'
    followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type)

    event_warming_list=[]
    for event_item in hashtag_list:
        event_warming_content=dict()     #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间
        event_warming_content['event_name']=event_item['event_name']
        event_influence_sum=0
        event_time_sum=0       
        query_body={
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'hashtag':event_item['event_name']}},
                                {'range':{'sensitive':{'gte':1}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_WARMING_SIZE,
            'sort':{'sensitive':{'order':'desc'}}
        }       
        event_results=es_xnr.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,body=query_body)['hits']['hits']
        if event_results:
            twitter_result=[]
            alluser_num_dict=dict()
            #print 'sencond_time:::',int(time.time())
            for item in event_results:
                #查询三个指标字段
                tid_result=lookup_tid_attend_index(item['_source']['tid'],today_datetime)
                if tid_result:
                    item['_source']['comment']=tid_result['comment']
                    item['_source']['share']=tid_result['share']
                    item['_source']['favorite']=tid_result['favorite']
                else:
                    item['_source']['comment']=0
                    item['_source']['share']=0
                    item['_source']['favorite']=0  
                #print 'event_content:',item['_source']['text']          
                #统计用户信息
                if alluser_num_dict.has_key(str(item['_source']['uid'])):
                    followers_mark=set_intersection(item['_source']['uid'],followers_list)
                    if followers_mark > 0:
                        alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1*2
                    else:
                        alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1
                else:
                    alluser_num_dict[str(item['_source']['uid'])]=1
                                   

                #计算影响力
                origin_influence_value=(1+item['_source']['comment']+item['_source']['share']+item['_source']['favorite'])*(1+item['_source']['sensitive'])
                followers_value=judge_user_type(item['_source']['uid'],followers_list)
                item['_source']['twitter_influence_value']=origin_influence_value*followers_value
                
                #查询用户昵称
                item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])
                
                twitter_result.append(item['_source'])

                #统计影响力、时间
                event_influence_sum=event_influence_sum+item['_source']['twitter_influence_value']
                event_time_sum=event_time_sum+item['_source']['timestamp']            
        
            # print 'third_time:::',int(time.time())
            #典型信息
            twitter_result.sort(key=lambda k:(k.get('twitter_influence_value',0)),reverse=True)
            event_warming_content['main_twitter_info']=json.dumps(twitter_result)

            #事件影响力和事件时间
            number=len(event_results)
            event_warming_content['event_influence']=event_influence_sum/number
            event_warming_content['event_time']=event_time_sum/number


        #对用户进行排序
            alluser_num_dict=sorted(alluser_num_dict.items(),key=lambda d:d[1],reverse=True)
            main_userid_list=[]
            for i in xrange(0,len(alluser_num_dict)):
                main_userid_list.append(alluser_num_dict[i][0])

        #主要参与用户信息
            main_user_info=[]
            user_es_result=es_xnr.mget(index=twitter_user_index_name,doc_type=twitter_user_index_type,body={'ids':main_userid_list})['docs']
            # print 'user_es_result:',user_es_result
            for item in user_es_result:

                user_dict=dict()
                if item['found']:
                    user_dict['uid']=item['_id']
                    user_dict['username']=item['_source']['username']
                    if item['_source'].has_key('profileimageurl'):
                        user_dict['profileimageurl']=item['_source']['profileimageurl']
                    else:
                        user_dict['profileimageurl']=''
                    if item['_source'].has_key('statuscount'):
                        user_dict['statuscount']=item['_source']['statuscount']
                    else:
                        user_dict['statuscount']=0
                    if item['_source'].has_key('followerscount'):
                        user_dict['followerscount']=item['_source']['followerscount']
                    else:
                        user_dict['followerscount']=0
                    if item['_source'].has_key('friendscount'):
                        user_dict['friendscount']=item['_source']['friendscount']
                    else:
                        user_dict['friendscount']=0
                else:
                    # user_dict['icon']=''
                    user_dict['uid']=item['_id']
                    user_dict['username']=''
                    user_dict['profileimageurl']=''
                    user_dict['statuscount']=0
                    user_dict['followerscount']=0
                    user_dict['friendscount']=0
                main_user_info.append(user_dict)
            event_warming_content['main_user_info']=json.dumps(main_user_info)



            # print 'fourth_time:::',int(time.time())
            event_warming_content['xnr_user_no']=xnr_user_no
            event_warming_content['validity']=0
            event_warming_content['timestamp']=today_datetime
            now_time=int(time.time())
            task_id=xnr_user_no+'_'+str(now_time) 
        
            #写入数据库           
            if write_mark:
                # print 'today_datetime:::',ts2datetime(today_datetime)
                mark=write_envent_warming(today_datetime,event_warming_content,task_id)
                event_warming_list.append(mark)
            else:
                event_warming_list.append(event_warming_content)

        else:
            pass
        # print 'fifth_time:::',int(time.time())
    return event_warming_list
예제 #7
0
def get_hot_sensitive_recommend_at_user(sort_item):

    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_TW)
    else:
        now_ts = int(time.time())
    datetime = ts2datetime(now_ts - 24 * 3600)

    #sort_item = 'sensitive'
    sort_item_2 = 'timestamp'

    index_name = twitter_flow_text_index_name_pre + datetime

    query_body = {
        'query': {
            'match_all': {}
        },
        'sort': {
            sort_item: {
                'order': 'desc'
            }
        },
        'size': HOT_EVENT_TOP_USER,
        '_source': ['uid', 'user_fansnum', 'retweeted', 'timestamp']
    }

    es_results = es.search(index=index_name,
                           doc_type=twitter_flow_text_index_type,
                           body=query_body)['hits']['hits']

    uid_fansnum_dict = dict()
    if es_results:
        for result in es_results:
            result = result['_source']
            uid = result['uid']
            uid_fansnum_dict[uid] = {}
            uid_fansnum_dict[uid][sort_item_2] = result[sort_item_2]

    uid_fansnum_dict_sort_top = sorted(uid_fansnum_dict.items(),
                                       key=lambda x: x[1][sort_item_2],
                                       reverse=True)

    uid_set = set()

    for item in uid_fansnum_dict_sort_top:
        uid_set.add(item[0])

    uid_list = list(uid_set)

    ## 根据uid,从weibo_user中得到 nick_name
    uid_nick_name_dict = dict()  # uid不会变,而nick_name可能会变
    es_results_user = es.mget(index=twitter_user_index_name,
                              doc_type=twitter_user_index_type,
                              body={'ids': uid_list})['docs']
    i = 0
    for result in es_results_user:
        if result['found'] == True:
            result = result['_source']
            uid = result['uid']
            nick_name = result['username']
            if nick_name:
                i += 1
                uid_nick_name_dict[uid] = nick_name
        if i >= HOT_AT_RECOMMEND_USER_TOP:
            break

    return uid_nick_name_dict
예제 #8
0
def detect_by_seed_users(seed_users):
    retweet_mark = 1  #目前只有部分数据
    comment_mark = 0  #暂无数据

    group_uid_list = set()
    all_union_result_dict = {}
    #get retweet/comment es db_number
    now_ts = time.time()
    db_number = get_db_num(now_ts)

    #step1: mget retweet and be_retweet
    if retweet_mark == 1:
        # retweet_index_name = retweet_index_name_pre + str(db_number)
        be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
        #mget retwet
        '''
        try:
            retweet_result = es.mget(index=retweet_index_name, doc_type=retweet_index_type, \
                                             body={'ids':seed_users}, _source=True)['docs']
        except:
            retweet_result = []
        '''
        #mget be_retweet
        try:
            be_retweet_result = es.mget(index=be_retweet_index_name, doc_type=be_retweet_index_type, \
                                                body={'ids':seed_users} ,_source=True)['docs']
        except:
            be_retweet_result = []
    '''
    #step2: mget comment and be_comment
    if comment_mark == 1:
        comment_index_name = comment_index_name_pre + str(db_number)
        be_comment_index_name = be_comment_index_name_pre + str(db_number)
        #mget comment
        try:
            comment_result = es.mget(index=comment_index_name, doc_type=comment_index_type, \
                                             body={'ids':seed_users}, _source=True)['docs']
        except:
            comment_result = []
        #mget be_comment
        try:
            be_comment_result = es.mget(index=be_comment_index_name, doc_type=be_comment_index_type, \
                                            body={'ids':seed_users}, _source=True)['docs']
        except:
            be_comment_result = []
    '''
    #step3: union retweet/be_retweet/comment/be_comment result
    union_count = 0

    for iter_search_uid in seed_users:
        try:
            uid_retweet_dict = json.loads(
                retweet_result[union_count]['_source']['uid_retweet'])
        except:
            uid_retweet_dict = {}
        try:
            uid_be_retweet_dict = json.loads(
                be_retweet_result[union_count]['_source']['uid_be_retweet'])
        except:
            uid_be_retweet_dict = {}
        try:
            uid_comment_dict = json.loads(
                comment_result[union_count]['_source']['uid_comment'])
        except:
            uid_comment_dict = {}
        try:
            uid_be_comment_dict = json.loads(
                be_comment_result[union_count]['_source']['uid_be_comment'])
        except:
            uid_be_comment_dict = {}
        #union four type user set
        union_result = union_dict(uid_retweet_dict, uid_be_retweet_dict,
                                  uid_comment_dict, uid_be_comment_dict)
        all_union_result_dict[iter_search_uid] = union_result
    '''
    !!!! 有一个转化提取 
    从 all_union_result_dict   中提取 所有的uid
    '''
    for seeder_uid, inter_dict in all_union_result_dict.iteritems():
        for uid, inter_count in inter_dict.iteritems():
            group_uid_list.add(uid)

    group_uid_list = list(group_uid_list)

    return group_uid_list
예제 #9
0
def my_domain_classfiy(uid_list, datetime_list):
    domain_results = {}
    #将处理后的结果保存到数据库中,并在处理前查询数据库中是否已经有了相应内容之前存储的结果,以提高效率
    uids = uid_list
    unresolved_uids = []
    res = es.mget(index=fb_portrait_index_name,
                  doc_type=fb_portrait_index_type,
                  body={'ids': uids})['docs']
    for r in res:
        uid = r['_id']
        if r.has_key('found'):
            found = r['found']
            if found and r['_source'].has_key('domain'):
                domain = r['_source']['domain']
                domain_results[uid] = domain
            else:
                unresolved_uids.append(uid)
        else:  #es表中目前无任何记录
            unresolved_uids.append(uid)

    #未在数据库中的进行计算并存储
    user_domain = {}
    user_domain_temp = {}
    if unresolved_uids:
        fb_flow_text_index_list = []
        for datetime in datetime_list:
            fb_flow_text_index_list.append(flow_text_index_name_pre + datetime)

        user_domain_data = {}
        #load num of text
        count_result = count_text_num(unresolved_uids, fb_flow_text_index_list)
        #load baseinfo
        fb_user_query_body = {
            'query': {
                "filtered": {
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    "terms": {
                                        "uid": unresolved_uids
                                    }
                                },
                            ]
                        }
                    }
                }
            },
            'size': MAX_SEARCH_SIZE,
            "fields":
            ["bio", "about", "description", "quotes", "category", "uid"]
        }
        try:
            search_results = es.search(index=facebook_user_index_name,
                                       doc_type=facebook_user_index_type,
                                       body=fb_user_query_body)['hits']['hits']
            for item in search_results:
                content = item['fields']
                uid = content['uid'][0]
                if not uid in user_domain_data:
                    text_num = count_result[uid]
                    user_domain_data[uid] = {
                        'bio_str': '',
                        'bio_list': [],
                        'category': '',
                        'number_of_text': text_num
                    }
                #对于长文本,Goslate 会在标点换行等分隔处把文本分拆为若干接近 2000 字节的子文本,再一一查询,最后将翻译结果拼接后返回用户。通过这种方式,Goslate 突破了文本长度的限制。
                if content.has_key('category'):
                    category = content.get('category')[0]
                else:
                    category = ''
                if content.has_key('description'):
                    description = content.get(
                        'description'
                    )[0][:
                         1000]  #有的用户描述信息之类的太长了……3000+,没有卵用,而且翻译起来会出现一些问题,截取一部分就行了
                else:
                    description = ''
                if content.has_key('quotes'):
                    quotes = content.get('quotes')[0][:1000]
                else:
                    quotes = ''
                if content.has_key('bio'):
                    bio = content.get('bio')[0][:1000]
                else:
                    bio = ''
                if content.has_key('about'):
                    about = content.get('about')[0][:1000]
                else:
                    about = ''
                user_domain_data[uid]['bio_list'] = [
                    quotes, bio, about, description
                ]
                user_domain_data[uid]['category'] = category
        except Exception, e:
            print e
        #由于一个用户请求一次翻译太耗时,所以统一批量翻译
        trans_uid_list = []
        untrans_bio_data = []
        cut = 100
        n = len(user_domain_data) / cut
        for uid, content in user_domain_data.items():
            trans_uid_list.append(uid)
            untrans_bio_data.extend(content['bio_list'])
            content.pop('bio_list')
            if n:
                if len(trans_uid_list) % cut == 0:
                    temp_trans_bio_data = trans_bio_data(untrans_bio_data)
                    for i in range(len(trans_uid_list)):
                        uid = trans_uid_list[i]
                        user_domain_data[uid]['bio_str'] = '_'.join(
                            temp_trans_bio_data[4 * i:4 * i + 4])
                    trans_uid_list = []
                    untrans_bio_data = []
                    n = n - 1
            else:
                if len(trans_uid_list) == (len(user_domain_data) % cut):
                    temp_trans_bio_data = trans_bio_data(untrans_bio_data)
                    for i in range(len(trans_uid_list)):
                        uid = trans_uid_list[i]
                        user_domain_data[uid]['bio_str'] = '_'.join(
                            temp_trans_bio_data[4 * i:4 * i + 4])
                    trans_uid_list = []
                    untrans_bio_data = []
        #domian计算
        user_domain_temp = domain_main(user_domain_data)
        for uid in unresolved_uids:
            if uid in user_domain_temp:
                user_domain[uid] = {'domain': user_domain_temp[uid]}
            else:
                user_domain_temp[uid] = 'other'
                user_domain[uid] = {'domain': 'other'}
        save_data2es(user_domain)
예제 #10
0
def my_domain_classfiy(uid_list, datetime_list):
    domain_results = {}
    #将处理后的结果保存到数据库中,并在处理前查询数据库中是否已经有了相应内容之前存储的结果,以提高效率
    uids = uid_list
    unresolved_uids = []
    res = es.mget(index=tw_portrait_index_name,
                  doc_type=tw_portrait_index_type,
                  body={'ids': uids})['docs']
    for r in res:
        uid = r['_id']
        if r.has_key('found'):
            found = r['found']
            if found and r['_source'].has_key('domain'):
                domain = r['_source']['domain']
                domain_results[uid] = domain
            else:
                unresolved_uids.append(uid)
        else:  #es表中目前无任何记录
            unresolved_uids.append(uid)

    #未在数据库中的进行计算并存储
    user_domain = {}
    user_domain_temp = {}
    if unresolved_uids:
        tw_flow_text_index_list = []
        for datetime in datetime_list:
            tw_flow_text_index_list.append(flow_text_index_name_pre + datetime)

        user_domain_data = {}
        #load num of text
        count_result = count_text_num(unresolved_uids, tw_flow_text_index_list)
        #load baseinfo
        tw_user_query_body = {
            'query': {
                "filtered": {
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    "terms": {
                                        "uid": unresolved_uids
                                    }
                                },
                            ]
                        }
                    }
                }
            },
            'size': MAX_SEARCH_SIZE,
            "fields": ["location", "username", "description", "uid"]
        }
        try:
            search_results = es.search(index=twitter_user_index_name,
                                       doc_type=twitter_user_index_type,
                                       body=tw_user_query_body)['hits']['hits']
            for item in search_results:
                content = item['fields']
                uid = content['uid'][0]
                if not uid in user_domain_data:
                    text_num = count_result[uid]
                    user_domain_data[uid] = {
                        'location': '',
                        'username': '',
                        'description': '',
                        'number_of_text': text_num
                    }
                if content.has_key('location'):
                    location = content.get('location')[0]
                else:
                    location = ''
                if content.has_key('description'):
                    description = content.get('description')[0][:1000]
                else:
                    description = ''
                if content.has_key('username'):
                    username = content.get('username')[0]
                else:
                    username = ''
                user_domain_data[uid]['location'] = location
                user_domain_data[uid]['username'] = username
                user_domain_data[uid]['description'] = description
        except Exception, e:
            print e
        #由于一个用户请求一次翻译太耗时,所以统一批量翻译
        trans_uid_list = []
        untrans_bio_data = []
        cut = 100
        n = len(user_domain_data) / cut
        for uid, content in user_domain_data.items():
            trans_uid_list.append(uid)
            untrans_bio_data.extend(
                [content['location'], content['description']])
            if n:
                if len(trans_uid_list) % cut == 0:
                    temp_trans_bio_data = trans_bio_data(untrans_bio_data)
                    for i in range(len(trans_uid_list)):
                        uid = trans_uid_list[i]
                        user_domain_data[uid]['location'] = '_'.join(
                            temp_trans_bio_data[2 * i])
                        user_domain_data[uid]['description'] = '_'.join(
                            temp_trans_bio_data[2 * i + 1])
                    trans_uid_list = []
                    untrans_bio_data = []
                    n = n - 1
            else:
                if len(trans_uid_list) == (len(user_domain_data) % cut):
                    temp_trans_bio_data = trans_bio_data(untrans_bio_data)
                    for i in range(len(trans_uid_list)):
                        uid = trans_uid_list[i]
                        user_domain_data[uid]['location'] = '_'.join(
                            temp_trans_bio_data[2 * i])
                        user_domain_data[uid]['description'] = '_'.join(
                            temp_trans_bio_data[2 * i + 1])
                    trans_uid_list = []
                    untrans_bio_data = []
        #domian计算
        user_domain_temp = domain_main(user_domain_data)
        for uid in unresolved_uids:
            if uid in user_domain_temp:
                user_domain[uid] = {'domain': user_domain_temp[uid]}
            else:
                user_domain_temp[uid] = 'other'
                user_domain[uid] = {'domain': 'other'}
        save_data2es(user_domain)