Example #1
0
def retweet_dict2results(uid, item_results):
    results = []
    uid_list = []
    sort_list = []
    sorted_list = sorted(item_results.iteritems(), key = lambda x:x[0], reverse=True)
    count = 0
    for key, value in sorted_list:
        if (key == 'None' or key == uid):
            continue
        count += 1
        uid_list.append(key)
        sort_list.append(value)
        if (count == 100):
            break
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
            else:
                tmp.extend([_id,''])
            value = sort_list[index]
            tmp.append(value)
            results.append(tmp)
    return results
def get_user_profile_weibo(user_list):
    user_info_dict = {}
    try:
        user_profile_dict = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, \
                body={'ids': user_list})['docs']
    except:
        user_profile_dict = []
    if user_profile_dict:
        for user_dict in user_profile_dict:
            if user_dict['found'] == True:
                source = user_dict['_source']
                source_dict['uid'] = source['uid']
                source_dict['uname'] = source['nick_name']
                source_dict['location'] = source['location']
                source_dict['photo_url'] = source['photo_url']
                source_dict['fansnum'] = source['fansnum']
                source_dict['friendsnum'] = source['friendsnum']
                source_dict['statusnum'] = source['statusnum']
                source_dict['description'] = source['description']
            else:
                source_dict['uid'] = source['uid']
                source_dict['uname'] = 'unknown'
                source_dict['location'] = 'unknown'
                source_dict['photo_url'] = ''
                source_dict['fansnum'] = 0
                source_dict['friendsnum'] = 0
                source_dict['statusnum'] = 0
                source_dict['description'] = ''

            user_info_dict[user_dict['_id']] = source_dict
    return user_info_dict
def get_user_profile_weibo(user_list):
    user_info_dict = {}
    try:
        user_profile_dict = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, \
                body={'ids': user_list})['docs']
    except:
        user_profile_dict = []
    if user_profile_dict:
        for user_dict in user_profile_dict:
            if user_dict['found'] == True:
                source = user_dict['_source']
                source_dict['uid'] = source['uid']
                source_dict['uname'] = source['nick_name']
                source_dict['location'] = source['location']
                source_dict['photo_url'] = source['photo_url']
                source_dict['fansnum'] = source['fansnum']
                source_dict['friendsnum'] = source['friendsnum']
                source_dict['statusnum'] = source['statusnum']
                source_dict['description'] = source['description']
            else:
                source_dict['uid'] = source['uid']
                source_dict['uname'] = 'unknown'
                source_dict['location'] = 'unknown'
                source_dict['photo_url'] = ''
                source_dict['fansnum'] = 0
                source_dict['friendsnum'] = 0
                source_dict['statusnum'] = 0
                source_dict['description'] = ''

            user_info_dict[user_dict['_id']] = source_dict
    return user_info_dict
def retweet_dict2results(uid, item_results):
    results = []
    uid_list = []
    sort_list = []
    for key in item_results:
        if (key == uid):
            continue
        uid_list.append(key)
        sort_list.append(item_results[key])
    
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
            else:
                tmp.extend([_id,''])
            value = sort_list[index]
            tmp.append(value)
            results.append(tmp)
    return results
def query_vary_top_k(index_name, doctype, top_k, sort_index="vary"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{sort_index: {"order": "desc"}}]
    }

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    uid_list = []
    for item in result:
        uid_list.append(item['_id'])

    portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
    profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs']

    return_list = []
    rank = 1
    for i in range(len(result)):
        info = ['','','','','']
        info[0] = rank
        if profile_result[i]['found']:
            info[1] = profile_result[i]['_source'].get('photo_url','')
            info[3] = profile_result[i]['_source'].get('nick_name','')
        info[2] = result[i].get('_id','')
        info[4] = result[i]['_source']['vary']
        if portrait_result[i]['found']:
            info.append('1')
        else:
            info.append('0')
        return_list.append(info)
        rank += 1

    return return_list
def search_portrait_user(es,
                         number,
                         active_index,
                         active_type,
                         portrait_index,
                         portrait_type,
                         field="user_index"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, field, 100)
        start += 100
        for item in user_list:
            if field == "vary":
                uid = item.get('uid',
                               '0')  # obtain uid, notice "uid" or "user"
            else:
                uid = item.get('user', '0')
            search_list.append(uid)  # uid list

        search_result = es_portrait.mget(index=portrait_index,
                                         doc_type=portrait_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user",
                                         doc_type="user",
                                         body={"ids": search_list},
                                         _source=True)["docs"]

        for item in search_result:
            if item["found"]:
                info = ['', '', '', '', '', '']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get(
                        'photo_url', '')
                    info[3] = profile_result[index]['_source'].get(
                        'nick_name', '')
                info[2] = search_result[index].get('_id', '')
                info[4] = user_list[index][field]
                info[5] = "1"
                return_list.append(info)
                rank += 1
                count_c += 1

                if count_c >= int(number):
                    return return_list
Example #7
0
def get_user_detail(date, input_result, status):
    results = []
    if status=='show_in':
        uid_list = input_result
    if status=='show_compute':
        uid_list = input_result.keys()
    if status=='show_in_history':
        uid_list = input_result.keys()
    if date!='all':
        index_name = 'bci_' + ''.join(date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    index_type = 'bci'
    user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
    user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
    max_evaluate_influ = get_evaluate_max(index_name)
    for i in range(0, len(uid_list)):
        uid = uid_list[i]
        bci_dict = user_bci_result[i]
        profile_dict = user_profile_result[i]
        try:
            bci_source = bci_dict['_source']
        except:
            bci_source = None
        if bci_source:
            influence = bci_source['user_index']
            influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
            influence = influence * 100
        else:
            influence = ''
        try:
            profile_source = profile_dict['_source']
        except:
            profile_source = None
        if profile_source:
            uname = profile_source['nick_name'] 
            location = profile_source['user_location']
            fansnum = profile_source['fansnum']
            statusnum = profile_source['statusnum']
        else:
            uname = ''
            location = ''
            fansnum = ''
            statusnum = ''
        if status == 'show_in':
            results.append([uid, uname, location, fansnum, statusnum, influence])
        if status == 'show_compute':
            in_date = json.loads(input_result[uid])[0]
            compute_status = json.loads(input_result[uid])[1]
            if compute_status == '1':
                compute_status = '3'
            results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status])
        if status == 'show_in_history':
            in_status = input_result[uid]
            results.append([uid, uname, location, fansnum, statusnum, influence, in_status])

    return results
def get_recommentation(submit_user):
    if RUN_TYPE:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    in_portrait_set = set(r.hkeys("compute"))
    result = []
    for i in range(7):
        iter_ts = now_ts - i*DAY
        iter_date = ts2datetime(iter_ts)
        submit_user_recomment = "recomment_" + submit_user + "_" + str(iter_date)
        bci_date = ts2datetime(iter_ts - DAY)
        submit_user_recomment = r.hkeys(submit_user_recomment)
        bci_index_name = "bci_" + bci_date.replace('-', '')
        exist_bool = es_cluster.indices.exists(index=bci_index_name)
        if not exist_bool:
            continue
        if submit_user_recomment:
            user_bci_result = es_cluster.mget(index=bci_index_name, doc_type="bci", body={'ids':submit_user_recomment}, _source=True)['docs']
            user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':submit_user_recomment}, _source=True)['docs']
            max_evaluate_influ = get_evaluate_max(bci_index_name)
            for i in range(len(submit_user_recomment)):
                uid = submit_user_recomment[i]
                bci_dict = user_bci_result[i]
                profile_dict = user_profile_result[i]
                try:
                    bci_source = bci_dict['_source']
                except:
                    bci_source = None
                if bci_source:
                    influence = bci_source['user_index']
                    influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
                    influence = influence * 100
                else:
                    influence = ''
                try:
                    profile_source = profile_dict['_source']
                except:
                    profile_source = None
                if profile_source:
                    uname = profile_source['nick_name']
                    location = profile_source['user_location']
                    fansnum = profile_source['fansnum']
                    statusnum = profile_source['statusnum']
                else:
                    uname = ''
                    location = ''
                    fansnum = ''
                    statusnum = ''
                if uid in in_portrait_set:
                    in_portrait = "1"
                else:
                    in_portrait = "0"
                recomment_day = iter_date
                result.append([iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait])

    return result    
def search_portrait_user_in_activity(es, number, active_index, active_type, portrait_index, portrait_type, field="user_index"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, field, 100)
        start += 100
        for item in user_list:
            if field == "vary":
                uid = item.get('uid', '0') # obtain uid, notice "uid" or "user"
            else:
                uid = item.get('user', '0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

        key_list = ["origin_weibo_retweeted_total_number", "origin_weibo_retweeted_average_number", "origin_weibo_retweeted_top_number", "origin_weibo_retweeted_brust_average", \
                   "origin_weibo_comment_total_number", "origin_weibo_comment_average_number", "origin_weibo_comment_top_number", "origin_weibo_retweeted_brust_average", \
                   "retweeted_weibo_retweeted_total_number", "retweeted_weibo_retweeted_average_number", "retweeted_weibo_retweeted_top_number", "retweeted_weibo_retweeted_brust_average", \
                   "retweeted_weibo_comment_total_number", "retweeted_weibo_comment_average_number", "retweeted_weibo_comment_top_number", "retweeted_weibo_retweeted_brust_average"]
        for item in search_result:
            if item["found"]:
                info = ['','','','','','']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get('photo_url','')
                    info[3] = profile_result[index]['_source'].get('nick_name','')
                info[2] = search_result[index].get('_id','')
                info[4] = user_list[index]['user_index']
                info[5] = "1"
                if field == 'origin_weibo_retweeted_brust_average':
                    info.append(user_list[index]['origin_weibo_retweeted_brust_average'])
                    for key in key_list:
                        info.append(user_list[index][key])
                elif field == 'origin_weibo_comment_brust_average':
                    info.append(user_list[index]['origin_weibo_comment_brust_average'])
                    for key in key_list:
                        info.append(user_list[index][key])
                else:
                    pass
                return_list.append(info)
                rank += 1
                count_c += 1

                if count_c >= int(number):
                    return return_list
Example #10
0
def search_user_info(es,index_name,doc_type,uid,result_name):
    try:
        retweet_result = es.get(index=index_name, doc_type=doc_type, id=uid)['_source']
    except:
        return None
    if retweet_result:
        retweet_dict = json.loads(retweet_result[result_name])
        sorted_list = sorted(retweet_dict.iteritems(),key=lambda x:x[1],reverse=True)[:20]
        uid_list = [i[0] for i in sorted_list if i[0] != uid]
        portrait_result = []
        try:
            user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':uid_list})['docs']
        except:
            user_result = []
        try:
            bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids':uid_list}, fields=fields)['docs']    
        except:
            bci_history_result = []
        #print bci_history_result
        iter_count = 0
        out_portrait_list = []
        for out_user_item in user_result:
            uid = out_user_item['_id']
            if out_user_item['found'] == True:
                source = out_user_item['_source']
                uname = source['nick_name']
                photo_url = source['photo_url']
                if uname == '':
                    uname = u'未知'
                #location = source['user_location']
                friendsnum = source['friendsnum']
            else:
                uname = u'未知'
                location = ''
                friendsnum = ''
                photo_url = 'unknown'
            #add index from bci_history
            try:
                bci_history_item = bci_history_result[iter_count]
            except:
                bci_history_item = {'found': False}
            if bci_history_item['found']==True:
                fansnum = bci_history_item['fields'][fields[0]][0]
                user_weibo_count = bci_history_item['fields'][fields[1]][0]
                user_friendsnum = bci_history_item['fields'][fields[2]][0]
                influence = bci_history_item['fields'][fields[3]][0]
            else:
                fansnum = ''
                user_weibo_count = ''
                user_friendsnum = ''
                influence = ''
            #retweet_count = int(retweet_dict[uid])
            count = retweet_dict[uid]
            out_portrait_list.append({'uid':uid,'photo_url':photo_url,'count':count,'uname':uname,'influence':influence,'fansnum':fansnum, 'friendsnum':user_friendsnum,'weibo_count':user_weibo_count})#location,
            iter_count += 1
        return out_portrait_list
    else:
        return None
def show_keywords_rank(task_id, sort_type, count):
    try:
        task_found = es_network_task.get(index=network_keywords_index_name, \
                doc_type=network_keywords_index_type, id=task_id)['_source']
    except:
        task_found = {}
        return task_found
    
    search_results = json.loads(task_found['results'])
    sort_results = search_results[sort_type]
    results = []
    uid_list = []
    sort_list = []
    for source_uid, sort_value in sort_results:
        uid_list.append(source_uid)
        sort_list.append(sort_value)
    
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
                tmp.append(item['user_location'])
            else:
                tmp.extend([_id,'',''])
            value = sort_list[index]
            tmp.append(value)
            results.append(tmp)
    
    if uid_list:
        count = 0
        history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list})["docs"]
        for item in history_result:
            if item['found']:
                item = item['_source']
                results[count].extend([item['user_fansnum'], item['weibo_month_sum']])
            else:
                results[count].extend(['',''])
            count += 1
    
    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{sort_order: {"order": "desc"}}]
    }

    if top:
        result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order]
    else:
        search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']

        uid_list = []
        for item in search_result:
            uid_list.append(item['_id'])
        profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs']
        portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs']

        result = []
        rank = 1
        for i in range(len(search_result)):
            info = ['','','','']
            info[0] = rank
            if profile_result[i]['found']:
                info[1] = profile_result[i]['_source'].get('photo_url','')
                info[3] = profile_result[i]['_source'].get('nick_name','')

            info[2] = search_result[i].get('_id','')
            if sort_order in ["user_index","origin_weibo_retweeted_brust_average","origin_weibo_comment_brust_average"]:
                info.append(search_result[i]['_source'][sort_order])
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")
            elif sort_order == "origin_weibo_retweeted_top_number":
               info.append(search_result[i]['_source']['origin_weibo_retweeted_top_number']) 
               mid = search_result[i]['_source']['origin_weibo_top_retweeted_id']
               info.append(weiboinfo2url(info[2],mid))
               if portrait_result[i]['found']:
                   info.append("1")
               else:
                   info.append("0")
            elif sort_order == "origin_weibo_comment_top_number":
                info.append(search_result[i]['_source']['origin_weibo_comment_top_number'])
                mid = search_result[i]['_source']['origin_weibo_top_comment_id']
                info.append(weiboinfo2url(info[2],mid))
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")

            rank += 1
            result.append(info)

    return result
Example #13
0
def show_daily_rank(period, sort_type, count):
    index_name = 'user_portrait_network'
    index_type = 'network'
    if (len(sort_type.split('_')) > 1):
        sort = 'rank_' + sort_type + '_' + str(period)   #pr_0
    else:
        sort = sort_type + '_' + str(period)   #pr_0
    query_body = {
        'sort':[{sort:{'order': 'desc'}}],
        'size': count
        }

    try:
        search_results = es_network_task.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
    except:
        search_results = []
    results = []
    uid_list = []
    sort_list = []
    for item in search_results:
        source = item['_source']
        if sort in source:
            uid_list.append(source['uid'])
            sort_list.append(source[sort])
    
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
                tmp.append(item['statusnum'])
                tmp.append(item['user_location'])
                tmp.append(item['fansnum'])
            else:
                tmp.extend([_id,'','','',''])
            value = sort_list[index]
            tmp.append(value)
            results.append(tmp)
    
    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
def search_tag(es, number, active_index, active_type, portrait_index,
               portrait_type, tag):

    #field_dict = {"domain":"art"}
    return_list = []
    count_s = 0
    count_c = 0
    start = 0
    rank = 1

    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start,
                             "user_index", 10000)
        start += 10000
        for item in user_list:
            uid = item.get('user', '0')
            search_list.append(uid)  # uid list

        search_result = es_portrait.mget(index=portrait_index,
                                         doc_type=portrait_type,
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user",
                                         doc_type="user",
                                         body={"ids": search_list},
                                         _source=True)["docs"]
        for item in search_result:
            count_s += 1
            if item['found'] and tag in item['_source']['domain']:
                info = ['', '', '', '', '', '', '']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get(
                        'photo_url', '')
                    info[3] = profile_result[index]['_source'].get(
                        'nick_name', '')
                info[2] = search_result[index].get('_id', '')
                info[4] = user_list[index]['user_index']
                info[5] = search_result[index]['_source'].get('activeness', '')
                info[6] = search_result[index]['_source'].get('importance', '')

                rank += 1
                return_list.append(info)

                if rank >= int(number) + 1:
                    return return_list

        if count_s > 100000:
            return return_list
def search_max_single_field(field, index_name, doctype, top_k=3):

    # field = "origin_weibo_retweeted_top_number", "origin_weibo_comment_top_number"
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": [{field: {"order": "desc"}}],
        "size": top_k
    }

    
    return_list = []
    rank = 1
    count_c = 0
    start = 0

    while 1:
        search_list = []
        user_list = search_k(es, index_name, doctype, start, field, 100)
        start += 100
        for item in user_list:
            uid = item.get('user','0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

        for i in range(len(search_result)):
            if search_result[i]['found']:
                info = ['','','','','','','1']
                info[0] = rank
                info[2] = search_result[i].get('_id','')

                if profile_result[i]['found']:
                    info[1] = profile_result[i]['_source'].get('photo_url','')
                    info[3] = profile_result[i]['_source'].get('nick_name','')

                if 'retweeted' in field:
                    temp_mid = user_list[i]['origin_weibo_top_retweeted_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_retweeted_top_number']
                else:
                    temp_mid = user_list[i]['origin_weibo_top_comment_id']
                    info[5] = weiboinfo2url(info[2], temp_mid)
                    info[4] = user_list[i]['origin_weibo_comment_top_number']

                rank += 1
                return_list.append(info)

                if rank >= int(top_k)+1:
                    return return_list
Example #16
0
def search_yangshi_attention(uid, top_count):

    results = {}
    now_ts = time.time()
    db_number = get_db_num(now_ts)
    index_name = retweet_index_name_pre + str(db_number)
    center_uid = uid
    # print es_retweet,index_name,retweet_index_type,uid
    try:
        retweet_result = es_retweet.get(index=index_name,
                                        doc_type=retweet_index_type,
                                        id=uid)['_source']
    except:
        return None
    if retweet_result:
        retweet_dict = json.loads(retweet_result['uid_retweet'])
        sorted_list = sorted(retweet_dict.iteritems(),
                             key=lambda x: x[1],
                             reverse=True)[:20]
        uid_list = [i[0] for i in sorted_list]
        portrait_result = []
        try:
            user_result = es_user_profile.mget(index=profile_index_name,
                                               doc_type=profile_index_type,
                                               body={'ids': uid_list})['docs']
        except:
            user_result = []

        iter_count = 0
        out_portrait_list = []
        for out_user_item in user_result:
            uid = out_user_item['_id']
            if out_user_item['found'] == True:
                source = out_user_item['_source']
                uname = source['nick_name']
                if uname == '':
                    uname = u'未知'

            else:
                uname = u'未知'

            count = retweet_dict[uid]
            out_portrait_list.append({
                'uid': uid,
                'count': count,
                'uname': uname,
            })  #location,
            iter_count += 1
        return out_portrait_list
    else:
        return None
Example #17
0
def identify_user_out(input_uid_list):
    out_user_list = []
    in_user_list = []
    input_len = len(input_uid_list)
    iter_count = 0
    print 'identify user out'
    #get user list who is out user_portrait
    while iter_count < input_len:
        iter_user_list = input_uid_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':iter_user_list}, _source=False)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found'] != True:
                out_user_list.append(uid)
            else:
                in_user_list.append(uid)
        iter_count += DETECT_ITER_COUNT
    print 'get out user portrait information'
    #get user profile information for out user_portrait
    iter_count = 0
    out_user_count = len(out_user_list)
    out_user_result = []
    while iter_count < out_user_count:
        iter_user_list = out_user_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':iter_user_list}, _source=True)['docs']
        except:
            profile_result = []
        for item in profile_result:
            uid = item['_id']
            if item['found']==True:
                source = item['_source']
                uname = source['nick_name']
                fansnum = source['fansnum']
                statusnum = source['statusnum']
                friendsnum = source['friendsnum']
            else:
                uname =  u'未知'
                fansnum =  u'未知'
                statusnum =  u'未知'
                friendsnum =  u'未知'
            out_user_result.append([uid, uname, fansnum, statusnum, friendsnum])
        iter_count += DETECT_ITER_COUNT 
    
    sort_out_user_result = sorted(out_user_result, key=lambda x:x[2], reverse=True)

    return in_user_list, sort_out_user_result
Example #18
0
def identify_user_out(input_uid_list):
    out_user_list = []
    in_user_list = []
    input_len = len(input_uid_list)
    iter_count = 0
    print 'identify user out'
    #get user list who is out user_portrait
    while iter_count < input_len:
        iter_user_list = input_uid_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':iter_user_list}, _source=False)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found'] != True:
                out_user_list.append(uid)
            else:
                in_user_list.append(uid)
        iter_count += DETECT_ITER_COUNT
    print 'get out user portrait information'
    #get user profile information for out user_portrait
    iter_count = 0
    out_user_count = len(out_user_list)
    out_user_result = []
    while iter_count < out_user_count:
        iter_user_list = out_user_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':iter_user_list}, _source=True)['docs']
        except:
            profile_result = []
        for item in profile_result:
            uid = item['_id']
            if item['found']==True:
                source = item['_source']
                uname = source['nick_name']
                fansnum = source['fansnum']
                statusnum = source['statusnum']
                friendsnum = source['friendsnum']
            else:
                uname =  u'未知'
                fansnum =  u'未知'
                statusnum =  u'未知'
                friendsnum =  u'未知'
            out_user_result.append([uid, uname, fansnum, statusnum, friendsnum])
        iter_count += DETECT_ITER_COUNT 
    
    sort_out_user_result = sorted(out_user_result, key=lambda x:x[2], reverse=True)

    return in_user_list, sort_out_user_result
Example #19
0
def search_user_profile_by_user_ids(users):
    users = list(users)
    user_profile_return = dict()
    try:
        user_result = es_user_profile.mget(index=profile_index_name,
                                           doc_type=profile_index_type,
                                           body={'ids': users})['docs']
    except:
        user_result = []

    for out_user_item in user_result:
        if out_user_item['found']:
            uid = out_user_item['_id']
            user_profile_return[uid] = out_user_item['_source']
    return user_profile_return
Example #20
0
def compare_user_profile(uid_list):
    results = {}
    index_name = 'weibo_user'
    index_type = 'user'
    search_results = es_user_profile.mget(index=index_name, doc_type=index_type, body={'ids':uid_list})['docs']
    #print 'results:', search_results
    for result in search_results:
        uid = result['_id']
        results[uid] = []
        try:
            item = result['_source']
        except:
            next
        photo_url = item['photo_url']
        results[uid] = photo_url
    #print 'results:', results
    return results
def portrait_user_vary(es, number, active_index, active_type, portrait_index, portrait_type, field="vary"):

    return_list = []
    index_exist = es.indices.exists(index=active_index)
    if not index_exist:
        return "no active_index exist"
        sys.exit(0)

    count_s = 0
    count_c = 0
    start = 0
    rank = 1
    try:
        while 1:
            search_list = []
            user_list = search_k(es, active_index, active_type, start, field, 100)
            start += 100
            for item in user_list:
                uid = item.get('uid', '0') # obtain uid, notice "uid" or "user"
                search_list.append(uid) # uid list
            search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"]
            profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

            for item in search_result:
                count_c += 1
                if item["found"]:
                    info = ['','','','','','1']
                    info[0] = rank
                    index = search_result.index(item)

                    if profile_result[index]['found']:
                        info[1] = profile_result[index]['_source'].get('photo_url','')
                        info[3] = profile_result[index]['_source'].get('nick_name','')
                    info[2] = search_result[index].get('_id','')
                    info[4] = user_list[index]['vary']
                    return_list.append(info)
                    rank += 1
                    if rank == int(number)+1:
                        return return_list

            if count_c > 10000:
                break
    except RequestError:
        print "timeout"

    return return_list
Example #22
0
def query_vary_top_k(index_name, doctype, top_k, sort_index="vary"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{
            sort_index: {
                "order": "desc"
            }
        }]
    }

    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    uid_list = []
    for item in result:
        uid_list.append(item['_id'])

    portrait_result = es_portrait.mget(index="user_portrait",
                                       doc_type="user",
                                       body={"ids": uid_list},
                                       _source=True)['docs']
    profile_result = es_profile.mget(index="weibo_user",
                                     doc_type="user",
                                     body={"ids": uid_list},
                                     _source=True)['docs']

    return_list = []
    rank = 1
    for i in range(len(result)):
        info = ['', '', '', '', '']
        info[0] = rank
        if profile_result[i]['found']:
            info[1] = profile_result[i]['_source'].get('photo_url', '')
            info[3] = profile_result[i]['_source'].get('nick_name', '')
        info[2] = result[i].get('_id', '')
        info[4] = result[i]['_source']['vary']
        if portrait_result[i]['found']:
            info.append('1')
        else:
            info.append('0')
        return_list.append(info)
        rank += 1

    return return_list
def get_user_url(uid_list):
    results = []
    try:
        es_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list})['docs']
    except:
        es_results = {}
    for item in es_results:
        temp = []
        if item['found']:
            temp.append(item['_source']["photo_url"])
            temp.append(item['_source']['nick_name'])
            temp.append(item['_id'])
        else:
            temp.append("unknown")
            temp.append("unknown")
            temp.append(item['_id'])
        results.append(temp)
    return results
def get_user_url(uid_list):
    results = []
    try:
        es_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list})['docs']
    except:
        es_results = {}
    for item in es_results:
        temp = []
        if item['found']:
            temp.append(item['_source']["photo_url"])
            temp.append(item['_source']['nick_name'])
            temp.append(item['_id'])
        else:
            temp.append("unknown")
            temp.append("unknown")
            temp.append(item['_id'])
        results.append(temp)
    return results
Example #25
0
def search_attention(uid):
    stat_results = dict()
    results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        ruid_results = r.hgetall('retweet_'+str(uid))
        if ruid_results:
            for ruid in ruid_results:
                if ruid != uid:
                    try:
                        stat_results[ruid] += ruid_results[ruid]
                    except:
                        stat_results[ruid] = ruid_results[ruid]
    # print 'results:', stat_results
    if not results:
        return [None, 0]
    try:
        sort_state_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20]
    except:
        return [None, 0]
    print 'sort_state_results:', sort_state_results
    uid_list = [item[0] for item in sort_state_results]
    es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs']
    es_portrait_results = es_user_portrait.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs']
    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['nick_name']
        except:
            uname = u'未知'
        # identify uid is in the user_portrait
        portrait_item = es_portrait_results[i]
        try:
            source = portrait_item[i]
            in_status = 1
        except:
            in_status = 0

        result_list.append([uid,[uname, stat_results[uid], in_status]])
       
    return [result_list[:20], len(stat_results)]
def search_tag(es, number, active_index, active_type, portrait_index, portrait_type, field_dict):

    #field_dict = {"domain":"art"}
    return_list = []
    count_s = 0
    count_c = 0
    start = 0
    rank = 1

    while 1:
        search_list = []
        user_list = search_k(es, active_index, active_type, start, "user_index", 1000)
        start += 1000
        for item in user_list:
            uid = item.get('user', '0')
            search_list.append(uid) # uid list

        search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"]
        profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"]

        for item in search_result:
            count_s += 1
            if item['found'] and field_dict.values()[0] in item['_source'][field_dict.keys()[0]]:
                info = ['','','','','','','']
                info[0] = rank
                index = search_result.index(item)

                if profile_result[index]['found']:
                    info[1] = profile_result[index]['_source'].get('photo_url','')
                    info[3] = profile_result[index]['_source'].get('nick_name','')
                info[2] = search_result[index].get('_id','')
                info[4] = user_list[index]['user_index']
                info[5] = search_result[index]['_source'].get('activeness','')
                info[6] = search_result[index]['_source'].get('importance','')

                rank += 1
                return_list.append(info)

                if rank >= int(number)+1:
                   return return_list

        if count_s > 10000:
            return return_list
Example #27
0
def compare_user_profile(uid_list):
    results = {}

    search_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type,\
            body={'ids':uid_list})['docs']
    for result in search_results:
        uid = result['_id']
        results[uid] = []
        try:
            item = result['_source']
        except:
            item = {}
        try:
            photo_url = item['photo_url']
        except:
            photo_url = 'unkown'

        results[uid] = photo_url
    return results
Example #28
0
def compare_user_profile(uid_list):
    results = {}

    search_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type,\
            body={'ids':uid_list})['docs']
    for result in search_results:
        uid = result['_id']
        results[uid] = []
        try:
            item = result['_source']
        except:
            item = {}
        try:
            photo_url = item['photo_url']
        except:
            photo_url = 'unkown'

        results[uid] = photo_url
    return results
Example #29
0
def search_follower(uid):
    results = dict()
    stat_results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        br_uid_results = r.hgetall('be_retweet_'+str(uid))
        if br_uid_results:
            for br_uid in br_uid_results:
                if br_uid != uid:
                    try:
                        stat_results[br_uid] += br_uid_results[br_uid]
                    except:
                        stat_results[br_uid] = br_uid_results[br_uid]
    if not stat_results:
        return [None, 0]
    try:
        sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20]
    except:
        return [None, 0]

    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs']
    es_portrait_results = es_user_portrait.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs']
    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['nick_name']
        except:
            uname = u'未知'

        portrait_item = es_portrait_results[i]
        try:
            source = portrait_item['_source']
            in_status = 1
        except:
            in_status = 0
        result_list.append([uid,[uname, stat_results[uid], in_status]])
    return [result_list[:20], len(stat_results)]
Example #30
0
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task,
                                       doc_type=_id,
                                       id=ts)['_source']

    mid_value = json.loads(task_detail['mid_topic_value'])
    duplicate_dict = json.loads(task_detail['duplicate_dict'])
    tmp_duplicate_dict = dict()
    for k, v in duplicate_dict.iteritems():
        try:
            tmp_duplicate_dict[v].append(k)
        except:
            tmp_duplicate_dict[v] = [k, v]

    if message_type == 1:
        weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    elif message_type == 2:
        weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])
    else:
        weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])
    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()
    print len(mid_list)
    results = []
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "terms": {
                        "mid": mid_list
                    }
                }
            }
        },
        "size": 1000,
        "sort": {
            "timestamp": {
                "order": "desc"
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts - DAY)
    index_name = flow_text_index_name_pre + datetime
    print es_text
    exist_es = es_text.indices.exists(index_name)
    print exist_es
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict()  # 文本信息
    portrait_dict = dict()  # 背景信息
    sort_results = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source']  # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(
                index=profile_index_name,
                doc_type=profile_index_type,
                body={"ids": uid_list},
                fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {
                        "nick_name": item["fields"]["nick_name"][0],
                        "photo_url": item["fields"]["photo_url"][0]
                    }
                else:
                    portrait_dict[item['_id']] = {
                        "nick_name": item['_id'],
                        "photo_url": ""
                    }

        if order == "total":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[1],
                                 reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[2],
                                 reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[3],
                                 reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        results_dict = dict()
        mid_index_dict = dict()
        for item in sorted_list:  # size
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, keywords_string, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid, ''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                if message_type == 1:
                    temp.append(1)
                elif message_type == 2:
                    temp.append(3)
                else:
                    temp.append(iter_text['message_type'])
                temp.append(iter_text['keywords_string'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                temp.append(iter_text['timestamp'])
                temp.append(mid_value[mid])
                temp.append(mid)
                results.append(temp)
            count_n += 1

        results = sorted(results,
                         key=operator.itemgetter(-4, -2, -6),
                         reverse=True)  # -4 -2 -3
        sort_results = []
        count = 0
        for item in results:
            sort_results.append([item])
            mid_index_dict[item[-1]] = count
            count += 1

        if tmp_duplicate_dict:
            remove_list = []
            value_list = tmp_duplicate_dict.values()  # [[mid, mid], ]
            for item in value_list:
                tmp = []
                for mid in item:
                    if mid_index_dict.get(mid, 0):
                        tmp.append(mid_index_dict[mid])
                if len(tmp) > 1:
                    tmp_min = min(tmp)
                else:
                    continue
                tmp.remove(tmp_min)
                for iter_count in tmp:
                    sort_results[tmp_min].extend(sort_results[iter_count])
                    remove_list.append(sort_results[iter_count])
            if remove_list:
                for item in remove_list:
                    sort_results.remove(item)

    return sort_results
Example #31
0
def identify_user_portrait(user_set, filter_type):
    in_portrait_result = []
    out_portrait_result = []
    user_list = list(user_set)
    #identify the user_portrait
    iter_count = 0
    all_user_count = len(user_list)
    all_in_portrait_user = dict()
    all_out_portrait_user_list = []
    max_result = get_evaluate_max()
    while iter_count <= all_user_count:
        iter_user_list = user_list[iter_count: iter_count + SENTIMENT_ITER_USER_COUNT]
        #search  user in user_portrait
        try:
            in_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                    body={'ids': iter_user_list}, _source=False, \
                    fields=['uname', 'influence', 'activeness', 'importance', 'sensitive'])['docs']
        except:
            in_portrait_result = []
        #add all hit user
        for in_portrait_item in in_portrait_result:
            if in_portrait_item['found'] == True:
                uname = in_portrait_item['fields']['uname'][0]
                influence = in_portrait_item['fields']['influence'][0]
                normal_influence = math.log(influence / max_result['influence'] * 9 + 1 , 10) * 100
                activeness = in_portrait_item['fields']['activeness'][0]
                normal_activeness = math.log(activeness / max_result['activeness'] * 9 + 1 , 10) * 100
                importance = in_portrait_item['fields']['importance'][0]
                normal_importance = math.log(importance / max_result['importance'] * 9 + 1 , 10) * 100
                try:
                    sensitive = in_portrait_item['fields']['sensitive'][0]
                    normal_sensitive = math.log(sensitive / max_result['sensitive'] * 9 + 1 , 10) * 100
                except:
                    normal_sensitive = 0
                all_in_portrait_user[in_portrait_item['_id']] = [uname, normal_influence, normal_activeness, \
                    normal_importance, normal_sensitive]
            else:
                all_out_portrait_user_list.append(int(in_portrait_item['_id']))
        iter_count += SENTIMENT_ITER_USER_COUNT
    if filter_type == 'in':
        return all_in_portrait_user
    #get out portrait user info
    iter_count = 0
    all_out_portrait_user = dict()
    all_out_user_count = len(all_out_portrait_user_list)
    while iter_count < all_out_user_count:
        iter_uid_list = all_out_portrait_user_list[iter_count: iter_count+SENTIMENT_ITER_USER_COUNT]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type,\
                    body={'ids':iter_uid_list}, _source=False, fields=['nick_name', 'statusnum', 'friendsnum', 'fansnum'])['docs']
        except:
            profile_result = []
        for profile_item in profile_result:
            if profile_item['found'] == True:
                uname = profile_item['fields']['nick_name'][0]
                statusnum = profile_item['fields']['statusnum'][0]
                friendsnum = profile_item['fields']['friendsnum'][0]
                fansnum = profile_item['fields']['fansnum'][0]
            else:
                uname= profile_item['_id']
                statusnum = 0
                friendsnum = 0
                fansnum = 0
            all_out_portrait_user[str(profile_item['_id'])] = [uname, statusnum, friendsnum, fansnum]
        iter_count += SENTIMENT_ITER_USER_COUNT
    return all_in_portrait_user, all_out_portrait_user
Example #32
0
 #step1 : get group user list by task_name
 group_index_name = 'group_result'
 group_index_type = 'group'
 try:
     group_task = es.get(index=group_index_name,
                         doc_type=group_index_type,
                         id=task_name)['_source']
 except Exception, e:
     raise e
 user_list = group_task['uid_list']
 #step2: get group user name
 profile_index_name = 'weibo_user'
 profile_index_type = 'user'
 try:
     user_name_result = es_user_profile.mget(index=profile_index_name,
                                             doc_type=profile_index_type,
                                             body={'ids':
                                                   user_list})['docs']
 except Exception, e:
     raise e
 #print 'user_name_result:', user_name_result
 #step3 : get group user weibo
 file_list = set(os.listdir(DEFAULT_LEVELDBPATH))
 count = 0
 for user in user_list:
     user_nick_name = user_name_result[count]['_source']['nick_name']
     for i in range(1, 25):
         leveldb_folder = date + str(i)
         if leveldb_folder in file_list:
             leveldb_bucket = dynamic_leveldb(leveldb_folder)
             try:
                 user_weibo = leveldb_bucket.Get(str(user))
Example #33
0
def get_final_submit_user_info(uid_list):
    final_results = []
    try:
        profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': uid_list})['docs']
    except:
        profile_results = []
    try:
        bci_history_results =es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': uid_list})['docs']
    except:
        bci_history_results = []
    #get bci_history max value
    now_time_ts = time.time()
    search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY))
    bci_key = 'bci_' + str(search_date_ts)
    query_body = {
        'query':{
             'match_all':{}
        },
        'sort': [{bci_key:{'order': 'desc'}}],
        'size': 1
    }
    #try:
    bci_max_result = es_bci_history.search(index=bci_history_index_name, doc_type=bci_history_index_type, body=query_body, _source=False, fields=[bci_key])['hits']['hits']
    #except:
    #    bci_max_result = {}
    if bci_max_result:
        bci_max_value = bci_max_result[0]['fields'][bci_key][0]
    else:
        bci_max_value = MAX_VALUE
    iter_count = 0
    for uid in uid_list:
        try:
            profile_item = profile_results[iter_count]
        except:
            profile_item = {}
        try:
            bci_history_item = bci_history_results[iter_count]
        except:
            bci_history_item = {}
        if profile_item and profile_item['found'] == True:
            uname = profile_item['_source']['nick_name']
            location = profile_item['_source']['user_location']
        else:
            uname = ''
            location = ''
        if bci_history_item and bci_history_item['found'] == True:
            fansnum = bci_history_item['_source']['user_fansnum']
            statusnum = bci_history_item['_source']['weibo_month_sum']
            try:
                bci = bci_history_item['_source'][bci_key]
                normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100
            except:
                normal_bci = ''
        else:
            fansnum = ''
            statusnum = ''
            normal_bci = ''
        final_results.append([uid, uname, location, fansnum, statusnum, normal_bci])
        iter_count += 1

    return final_results
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    origin_weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])

    mid_list = []
    mid_list.extend(origin_weibo_detail.keys())
    mid_list.extend(retweeted_weibo_detail.keys())

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }},
                            {"terms": {"root_mid": mid_list}}
                        ]
                    }
                }
            }
        },
        "sort": {"timestamp": {"order": "desc"}},
        "size": 100
    }

    if text_type == "message_type":
        query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}})
    if text_type == "sentiment":
        #if isinstance(type_value, str):
        if len(type_value) == 1:
            query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}})
        else:
            query_body['query']['filtered']['filter']['bool']['must'].append({"terms":{text_type: type_value}})

    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    # 1. 查询微博
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []
    #print search_results
    # 2. 获取微博相关信息
    results = []
    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append(item['uid'])
                temp.append("")
            temp.append(item["text"])
            #print item['text']
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            temp.append(item["message_type"])
            results.append(temp)

    return results
Example #35
0
def get_sensitive_text_detail(task_name, ts, user, order):
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])

    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}

        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        for item in sorted_list:
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                count_n += 1
                results.append(temp)

        if results and order == "ts":
            results = sorted(results, key=lambda x:x[5], reverse=True)

        if results and order == "sensitive":
            results = sorted(results, key=lambda x:x[-1], reverse=True)

    return results
Example #36
0
def get_user_detail(date, input_result, status, user_type="influence", auth=""):
    results = []
    if status=='show_in':
        uid_list = input_result
    if status=='show_compute':
        uid_list = input_result.keys()
    if status=='show_in_history':
        uid_list = input_result.keys()
    if date!='all':
        index_name = 'bci_' + ''.join(date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    index_type = 'bci'
    user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
    user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
    max_evaluate_influ = get_evaluate_max(index_name)
    for i in range(0, len(uid_list)):
        uid = uid_list[i]
        bci_dict = user_bci_result[i]
        profile_dict = user_profile_result[i]
        try:
            bci_source = bci_dict['_source']
        except:
            bci_source = None
        if bci_source:
            influence = bci_source['user_index']
            influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
            influence = influence * 100
        else:
            influence = ''
        try:
            profile_source = profile_dict['_source']
        except:
            profile_source = None
        if profile_source:
            uname = profile_source['nick_name'] 
            location = profile_source['user_location']
            fansnum = profile_source['fansnum']
            statusnum = profile_source['statusnum']
        else:
            uname = ''
            location = ''
            fansnum = ''
            statusnum = ''
        if status == 'show_in':
            if user_type == "sensitive":
                tmp_ts = datetime2ts(date) - DAY
                tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid)
                if tmp_data:
                    sensitive_dict = json.loads(tmp_data)
                    sensitive_words = sensitive_dict.keys()
                else:
                    senstive_words = []
                results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words])
            else:
                results.append([uid, uname, location, fansnum, statusnum, influence])
            if auth:
                hashname_submit = "submit_recomment_" + date
                tmp_data = json.loads(r.hget(hashname_submit, uid))
                recommend_list = (tmp_data['operation']).split('&')
                admin_list = []
                admin_list.append(tmp_data['system'])
                admin_list.append(list(set(recommend_list)))
                admin_list.append(len(recommend_list))
                results[-1].extend(admin_list)
        if status == 'show_compute':
            in_date = json.loads(input_result[uid])[0]
            compute_status = json.loads(input_result[uid])[1]
            if compute_status == '1':
                compute_status = '3'
            results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status])
        if status == 'show_in_history':
            in_status = input_result[uid]
            if user_type == "sensitive":
                tmp_ts = datetime2ts(date) - DAY
                tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid)
                if tmp_data:
                    sensitive_dict = json.loads(tmp_data)
                    sensitive_words = sensitive_dict.keys()
                results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words])
            else:
                results.append([uid, uname, location, fansnum, statusnum, influence, in_status])

    return results
def get_task_detail_2(task_name, ts, user):
    results = dict()
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"]
    task_name = task_detail['task_name']
    social_sensors = json.loads(task_detail['social_sensors'])
    history_status = json.loads(task_detail['history_status'])
    start_time = task_detail['create_at']
    create_by = task_detail['create_by']
    stop_time = task_detail['stop_time']
    remark = task_detail['remark']
    portrait_detail = []
    count = 0 # 计数

    if social_sensors:
        search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":social_sensors}, fields=SOCIAL_SENSOR_INFO)['docs']
        for item in search_results:
            temp = []
            if item['found']:
                for iter_item in SOCIAL_SENSOR_INFO:
                    if iter_item == "topic_string":
                        temp.append(item["fields"][iter_item][0].split('&'))
                    else:
                        temp.append(item["fields"][iter_item][0])
                portrait_detail.append(temp)
        portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True)

    time_series = [] # 时间
    positive_sentiment_list = [] # 情绪列表
    neutral_sentiment_list = []
    negetive_sentiment_list = []
    all_weibo_list = []
    origin_weibo_list = [] # 微博列表
    retweeted_weibo_list = []
    retweeted_weibo_count = [] # 别人转发他的数量
    comment_weibo_count = []
    total_number_count = []
    burst_time_list = [] # 爆发时间列表
    important_user_set = set() # 重要人物列表
    out_portrait_users = set() # 未入库

    ts = int(ts)
    for item in history_status:
        if int(item[0]) <= ts:
            time_series.append(item[0]) # 到目前为止的所有的时间戳

    # get detail task information from es
    if time_series:
        #print time_series
        flow_detail = es.mget(index=index_sensing_task, doc_type=_id, body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']
            sentiment_distribution = json.loads(item["sentiment_distribution"])
            positive_sentiment_list.append(int(sentiment_distribution['1']))
            negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \
                    +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6']))
            neutral_sentiment_list.append(int(sentiment_distribution['0']))
            origin_weibo_list.append(item["origin_weibo_number"]) # real
            retweeted_weibo_list.append(item['retweeted_weibo_number']) # real
            all_weibo_list.append(item["origin_weibo_number"]+item['retweeted_weibo_number'])
            retweeted_weibo_count.append(item['retweeted_weibo_count'])
            comment_weibo_count.append(item['comment_weibo_count'])
            total_number_count.append(item['weibo_total_number'])
            temp_important_user_list = json.loads(item['important_users'])
            unfiltered_users = json.loads(item['unfilter_users'])
            temp_out_portrait_users = set(unfiltered_users) - set(temp_important_user_list) # 未入库
            important_user_set = important_user_set | set(temp_important_user_list)
            out_portrait_users = out_portrait_users | set(temp_out_portrait_users)

            burst_reason = item.get("burst_reason", "")
            if burst_reason:
                burst_time_list.append([timestamp, count, burst_reason])
            count += 1

    ####################################################################################
    # 统计爆发原因,下相应的结论
    weibo_variation_count = 0
    weibo_variation_time = []
    sentiment_variation_count = 0
    sentiment_variation_time = []
    common_variation_count = 0
    common_variation_time = []
    if burst_time_list:
        for item in burst_time_list:
            tmp_common = 0
            x1 = 0
            x2 = 0
            if signal_count_varition in item[2]:
                weibo_variation_count += 1
                weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]])
                x1 = total_number_count[item[1]]
                tmp_common += 1
            if signal_sentiment_varition in item[2]:
                tmp_common += 1
                sentiment_variation_count += 1
                x2 = negetive_sentiment_list[item[1]]
                sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]])

            if tmp_common == 2:
                common_variation_count += 1
                common_variation_time.append([ts2date_min(item[0]), x1, x2])

    warning_conclusion = remark
    variation_distribution = []
    if weibo_variation_count:
        variation_distribution.append(weibo_variation_time)
    else:
        variation_distribution.append([])

    if sentiment_variation_count:
        variation_distribution.append(sentiment_variation_time)
    else:
        variation_distribution.append([])


    if common_variation_count:
        variation_distribution.append(common_variation_time)
    else:
        variation_distribution.append([])


    results['warning_conclusion'] = warning_conclusion
    results['variation_distribution'] = variation_distribution

    # 每个用户的热度


    # 获取重要用户的个人信息
    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    important_uid_list = list(important_user_set)
    out_portrait_users_list = list(out_portrait_users)
    user_detail_info = [] #
    out_user_detail_info = []
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list}, fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD:
                #    continue
                temp.append(item['fields']['uid'][0])
                temp.append(item['fields']['uname'][0])
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time)
                #temp.append(hot_count)
                temp.append(math.ceil(item['fields']['importance'][0]/float(top_importance)*100))
                temp.append(math.ceil(item['fields']['influence'][0]/float(top_influence)*100))
                temp.append(math.ceil(item['fields']['activeness'][0]/float(top_activeness)*100))
                user_detail_info.append(temp)
    # 排序
    user_detail_info = sorted(user_detail_info, key=lambda x:x[6], reverse=True)

    if out_portrait_users_list:
        profile_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":out_portrait_users_list})["docs"]
        bci_index = "bci_" + ts2datetime(ts-DAY).replace('-','')
        influence_results = es.mget(index=bci_index, doc_type="bci", body={"ids":out_portrait_users_list})['docs']
        top_influence = get_top_all_influence("influence", ts)
        count = 0
        if profile_results:
            for item in profile_results:
                temp = []
                if item['found']:
                    temp.append(item['_source']['uid'])
                    if item['_source']['nick_name']:
                        temp.append(item['_source']['nick_name'])
                    else:
                        temp.append(item['_source']['uid'])
                    temp.append(item['_source']['user_location'])
                    temp.append(item['_source']['fansnum'])
                else:
                    temp.append(item['_id'])
                    temp.append(item['_id'])
                    temp.extend(['',''])
                temp_influ = influence_results[count]
                if temp_influ.get('found', 0):
                    user_index = temp_influ['_source']['user_index']
                    temp.append(math.ceil(item['_source']['user_index']/float(top_influence)*100))
                else:
                    temp.append(0)
                count += 1
            out_user_detail_info.append(temp)

    revise_time_series = []
    for item in time_series:
        revise_time_series.append(ts2date_min(item))

    results['important_user_detail'] = user_detail_info
    results['out_portrait_user_detail'] = out_user_detail_info
    results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因
    results['time_series'] = revise_time_series
    results['positive_sentiment_list'] = positive_sentiment_list
    results['negetive_sentiment_list'] = negetive_sentiment_list
    results['neutral_sentiment_list'] = neutral_sentiment_list
    results['all_weibo_list'] = all_weibo_list
    results['origin_weibo_list'] = origin_weibo_list
    results['retweeted_weibo_list'] = retweeted_weibo_list
    results['comment_weibo_count'] = comment_weibo_count
    results['retweeted_weibo_count'] = retweeted_weibo_count
    results['total_number_list'] = total_number_count
    results['social_sensors_detail'] = portrait_detail


    return results
Example #38
0
def search_fans(uid,top_count):
    results = {}
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    db_number = get_db_num(now_date_ts)

    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    result = {}
    be_retweet_inter_dict = {}
    be_comment_inter_dict = {}
    center_uid = uid
    try:
        be_retweet_result = es_retweet.get(index = be_retweet_index_name,doc_type=be_retweet_index_type,id=uid)['_source']
    except:
        be_retweet_result = {}

    if be_retweet_result:
        be_retweet_uid_dict = json.loads(be_retweet_result['uid_be_retweet'])
    else:
        be_retweet_uid_dict = {}
    # print "be_retweet_uid_dict", be_retweet_uid_dict
    try:
        be_comment_result = es_be_comment.get(index=be_comment_index_name, doc_type=be_comment_index_type, id=uid)['_source']
    except:
        be_comment_result = {}

    if be_comment_result:
        be_comment_uid_dict = json.loads(be_comment_result['uid_be_comment'])
    else:
        be_comment_uid_dict = {}
    # print "be_comment_uid_dict", be_comment_uid_dict

    fans_result = union_dict(be_retweet_uid_dict,be_comment_uid_dict)
    fans_user_set = set(fans_result.keys())
    fans_list = list(fans_user_set)
    # print "fans_list", fans_list
    all_fans_dict = {}

    for fans_user in fans_list:
        if fans_user != center_uid:
            all_fans_dict[fans_user] = fans_result[fans_user]
    sort_all_fans_dict = sorted(all_fans_dict.items(), key=lambda x:x[1], reverse=True)
    all_fans_uid_list=[]
    all_fans_uid_list_all = [item[0] for item in sort_all_fans_dict]

    print all_fans_uid_list_all
    count = 0
    for i in all_fans_uid_list_all:
        count += 1
        all_fans_uid_list.append(i)
        if count == 1000:
            break
    print all_fans_uid_list

    out_portrait_list = all_fans_uid_list
    #use to get user information from user profile
    out_portrait_result = {}
    try:
        out_user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':out_portrait_list})['docs']
    except:
        out_user_result = []
    #add index from bci_history
    try:
        bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': out_portrait_list}, fields=fields)['docs']
    except:
        bci_history_result = []
    iter_count = 0
    out_portrait_list = []
    for out_user_item in out_user_result:
        uid = out_user_item['_id']
        if out_user_item['found'] == True:
            source = out_user_item['_source']
            uname = source['nick_name']
            photo_url = source['photo_url']
            if uname == '':
                uname =  u'未知'
            location = source['user_location']
            friendsnum = source['friendsnum']
        else:
            uname = u'未知'
            location = ''
            friendsnum = ''
            photo_url = 'unknown'
        #add index from bci_history
        try:
            bci_history_item = bci_history_result[iter_count]
        except:
            bci_history_item = {'found': False}
        # print bci_history_item
        if bci_history_item['found'] == True:
            fansnum = bci_history_item['fields'][fields[0]][0]
            user_weibo_count = bci_history_item['fields'][fields[1]][0]
            user_friendsnum = bci_history_item['fields'][fields[2]][0]
            influence = bci_history_item['fields'][fields[3]][0]
        else:
            fansnum = ''
            user_weibo_count = ''
            user_friendsnum = ''

        fans_count = int(all_fans_dict[uid])
        out_portrait_list.append({'uid':uid,'photo_url':photo_url,'uname':uname, 'count':fans_count, 'fansnum':fansnum,'friendsnum': user_friendsnum,'weibo_count': user_weibo_count})
        iter_count += 1

    return out_portrait_list
Example #39
0
def search_bidirect_interaction(uid, top_count):

    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    db_number = get_db_num(now_date_ts)
    retweet_index_name = retweet_index_name_pre + str(db_number)
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    comment_index_name = comment_index_name_pre + str(db_number)
    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    results = {}
    retweet_inter_dict = {}
    comment_inter_dict = {}
    center_uid = uid
    #bidirect interaction in retweet and be_retweet
    try:
        retweet_result = es_retweet.get(index=retweet_index_name, doc_type=retweet_index_type, id=uid)['_source']
    except:
        retweet_result = {}
    if retweet_result:
        retweet_uid_dict = json.loads(retweet_result['uid_retweet'])
    else:
        retweet_uid_dict = {}
    retweet_uid_list = retweet_uid_dict.keys()
    try:
        be_retweet_result = es_retweet.get(index=be_retweet_index_name, doc_type=be_retweet_index_type, id=uid)['_source']
    except:
        be_retweet_result = {}
    if be_retweet_result:
        be_retweet_uid_dict = json.loads(be_retweet_result['uid_be_retweet'])
    else:
        be_retweet_uid_dict = {}

    #bidirect interaction in comment and be_comment
    try:
        comment_result = es_comment.get(index=comment_index_name, doc_type=comment_index_type, id=uid)['_source']
    except:
        comment_result = {}
    if comment_result:
        comment_uid_dict = json.loads(comment_result['uid_comment'])
    else:
        comment_uid_dict = {}
    comment_uid_list = comment_uid_dict.keys()
    try:
        be_comment_result = es_comment.get(index=be_coment_index_name, doc_type=be_comment_index_type, id=uid)['_source']
    except:
        be_comment_result = {}
    if be_comment_result:
        be_comment_uid_dict = json.loads(be_comment_result['uid_be_comment'])
    else:
        be_comment_uid_dict = {}
    #get bidirect_interaction dict
    #all_interaction_dict = union_dict(retweet_inter_dict, comment_inter_dict)
    retweet_comment_result = union_dict(retweet_uid_dict, comment_uid_dict)
    be_retweet_comment_result = union_dict(be_retweet_uid_dict, be_comment_uid_dict)
    interaction_user_set = set(retweet_comment_result.keys()) & set(be_retweet_comment_result.keys())
    interaction_user_list = list(interaction_user_set)
    all_interaction_dict = {}
    for interaction_user in interaction_user_list:
        if interaction_user != center_uid:
            all_interaction_dict[interaction_user] = retweet_comment_result[interaction_user] + be_retweet_comment_result[interaction_user]

    sort_all_interaction_dict = sorted(all_interaction_dict.items(), key=lambda x:x[1], reverse=True)
    #get in_portrait_list, in_portrait_results and out_portrait_list
    all_interaction_uid_list = [item[0] for item in sort_all_interaction_dict]
    #print all_interaction_uid_list

    # if RUN_TYPE == 0:
        # all_interaction_dict = {'2029036025':3,'1282005885':2,'2549228714':2,'1809833450':1}
        # all_interaction_uid_list = ['2029036025', '1282005885', '2549228714', '1809833450']

    out_portrait_list = all_interaction_uid_list
    #use to get user information from user profile
    out_portrait_result = {}
    try:
        out_user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':out_portrait_list})['docs']
    except:
        out_user_result = []
    #add index from bci_history
    try:
        bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': out_portrait_list}, fields=fields)['docs']
    except:
        bci_history_result = []
    iter_count = 0
    out_portrait_list = []
    for out_user_item in out_user_result:
        uid = out_user_item['_id']
        if out_user_item['found'] == True:
            source = out_user_item['_source']
            uname = source['nick_name']
            photo_url = source['photo_url']
            if uname == '':
                uname =  u'未知'
            location = source['user_location']
            friendsnum = source['friendsnum']
        else:
            uname = u'未知'
            location = ''
            friendsnum = ''
            photo_url = 'unknown'
        #add index from bci_history
        try:
            bci_history_item = bci_history_result[iter_count]
        except:
            bci_history_item = {'found': False}
        # print bci_history_item
        if bci_history_item['found'] == True:
            fansnum = bci_history_item['fields'][fields[0]][0]
            user_weibo_count = bci_history_item['fields'][fields[1]][0]
            user_friendsnum = bci_history_item['fields'][fields[2]][0]
            influence = bci_history_item['fields'][fields[3]][0]
        else:
            fansnum = ''
            user_weibo_count = ''
            user_friendsnum = ''

        interaction_count = int(all_interaction_dict[uid])
        out_portrait_list.append({'uid':uid,'photo_url':photo_url,'uname':uname, 'count':interaction_count, 'fansnum':fansnum,'friendsnum': user_friendsnum,'weibo_count': user_weibo_count})
        iter_count += 1

    return out_portrait_list
def get_sensitive_weibo_detail(ts, social_sensors, sensitive_words_list, message_type, size=100):
    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }},
                            {"term": {"message_type": message_type}},
                            {"terms":{"keywords_string": sensitive_words_list}}
                        ]
                    }
                }
            }
        },
        "size": size,
        "sort": {"timestamp": {"order": "desc"}}
    }

    if social_sensors:
        query_body['query']['filtered']['filter']['bool']['must'].append({"terms": {"uid": social_sensors}})

    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append("unknown")
                temp.append("")
            temp.append(item["text"])
            #print item['text']
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            keywords_set = set(item['keywords_string'].split('&'))
            common_keywords = set(sensitive_words_list) & keywords_set
            temp.append(list(common_keywords))
            temp.append(item['message_type'])
            results.append(temp)

    return results
def get_positive_weibo_detail(ts, social_sensors, keywords_list, size, sentiment_type=1):
    former_mid_list = query_mid_list(ts-time_interval, keywords_list, time_segment, social_sensors) # 前一段时间内的微博mid list
    current_mid_list = query_mid_list(ts, keywords_list, time_interval,  social_sensors)
    mid_list = []
    mid_list.extend(former_mid_list)
    mid_list.extend(current_mid_list)

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }}
                        ],
                        "should":[
                            {"terms": {"root_mid": mid_list}},
                            {"terms": {"mid": mid_list}},
                        ]
                    }
                }
            }
        },
        "sort": {"timestamp": {"order": "desc"}},
        "size": 100
    }



    #if social_sensors and int(sentiment_type) == 1:
    #    query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"terms":{"uid": social_sensors}})

    if int(sentiment_type) == 1 or int(sentiment_type) == 0:
        query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"term":{"sentiment":sentiment_type}})
    else:
        query_body["query"]["filtered"]["filter"]["bool"]["must"].append([{"terms":{"sentiment": ["2", "3"]}}])

    # 判断当前ts和ts-time_interval是否属于同一天,确定查询哪个es
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    # 1. 聚合原创微博mid list
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    results = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append("unknown")
                temp.append("")
            temp.append(item["text"])
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            keywords_set = set(item['keywords_string'].split('&'))
            common_keywords = set(keywords_list) & keywords_set
            temp.append(list(common_keywords))
            temp.append(item['message_type'])
            results.append(temp)

    return results
Example #42
0
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task,
                                       doc_type=_id,
                                       id=ts)['_source']
    origin_weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])

    mid_list = []
    mid_list.extend(origin_weibo_detail.keys())
    mid_list.extend(retweeted_weibo_detail.keys())

    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [{
                            "range": {
                                "timestamp": {
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }
                        }, {
                            "terms": {
                                "root_mid": mid_list
                            }
                        }]
                    }
                }
            }
        },
        "sort": {
            "timestamp": {
                "order": "desc"
            }
        },
        "size": 100
    }

    if text_type == "message_type":
        query_body['query']['filtered']['filter']['bool']['must'].append(
            {"term": {
                text_type: type_value
            }})
    if text_type == "sentiment":
        #if isinstance(type_value, str):
        if len(type_value) == 1:
            query_body['query']['filtered']['filter']['bool']['must'].append(
                {"term": {
                    text_type: type_value
                }})
        else:
            query_body['query']['filtered']['filter']['bool']['must'].append(
                {"terms": {
                    text_type: type_value
                }})

    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts - time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    # 1. 查询微博
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    else:
        search_results = []
    #print search_results
    # 2. 获取微博相关信息
    results = []
    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(
                index=profile_index_name,
                doc_type=profile_index_type,
                body={"ids": uid_list},
                fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append(item['uid'])
                temp.append("")
            temp.append(item["text"])
            #print item['text']
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            temp.append(item["message_type"])
            results.append(temp)

    return results
Example #43
0
def get_recommentation(submit_user):
    if RUN_TYPE:
        now_ts = time.time()
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)

    in_portrait_set = set(r.hkeys("compute"))
    result = []
    for i in range(7):
        iter_ts = now_ts - i * DAY
        iter_date = ts2datetime(iter_ts)
        submit_user_recomment = "recomment_" + submit_user + "_" + str(
            iter_date)
        bci_date = ts2datetime(iter_ts - DAY)
        submit_user_recomment = r.hkeys(submit_user_recomment)
        bci_index_name = "bci_" + bci_date.replace('-', '')
        exist_bool = es_cluster.indices.exists(index=bci_index_name)
        if not exist_bool:
            continue
        if submit_user_recomment:
            user_bci_result = es_cluster.mget(
                index=bci_index_name,
                doc_type="bci",
                body={'ids': submit_user_recomment},
                _source=True)['docs']
            user_profile_result = es_user_profile.mget(
                index='weibo_user',
                doc_type='user',
                body={'ids': submit_user_recomment},
                _source=True)['docs']
            max_evaluate_influ = get_evaluate_max(bci_index_name)
            for i in range(len(submit_user_recomment)):
                uid = submit_user_recomment[i]
                bci_dict = user_bci_result[i]
                profile_dict = user_profile_result[i]
                try:
                    bci_source = bci_dict['_source']
                except:
                    bci_source = None
                if bci_source:
                    influence = bci_source['user_index']
                    influence = math.log(
                        influence / max_evaluate_influ['user_index'] * 9 + 1,
                        10)
                    influence = influence * 100
                else:
                    influence = ''
                try:
                    profile_source = profile_dict['_source']
                except:
                    profile_source = None
                if profile_source:
                    uname = profile_source['nick_name']
                    location = profile_source['user_location']
                    fansnum = profile_source['fansnum']
                    statusnum = profile_source['statusnum']
                else:
                    uname = ''
                    location = ''
                    fansnum = ''
                    statusnum = ''
                if uid in in_portrait_set:
                    in_portrait = "1"
                else:
                    in_portrait = "0"
                recomment_day = iter_date
                result.append([
                    iter_date, uid, uname, location, fansnum, statusnum,
                    influence, in_portrait
                ])

    return result
Example #44
0
def get_user_detail(date, input_result, status, user_type="influence", auth=""):
    bci_date = ts2datetime(datetime2ts(date) - DAY)
    results = []
    if status=='show_in':
        uid_list = input_result
    if status=='show_compute':
        uid_list = input_result.keys()
    if status=='show_in_history':
        uid_list = input_result.keys()
    if date!='all':
        index_name = 'bci_' + ''.join(bci_date.split('-'))
    else:
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        index_name = 'bci_' + ''.join(now_date.split('-'))
    index_type = 'bci'
    user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
    user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
    max_evaluate_influ = get_evaluate_max(index_name)
    for i in range(0, len(uid_list)):
        uid = uid_list[i]
        bci_dict = user_bci_result[i]
        profile_dict = user_profile_result[i]
        try:
            bci_source = bci_dict['_source']
        except:
            bci_source = None
        if bci_source:
            influence = bci_source['user_index']
            influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
            influence = influence * 100
        else:
            influence = ''
        try:
            profile_source = profile_dict['_source']
        except:
            profile_source = None
        if profile_source:
            uname = profile_source['nick_name'] 
            location = profile_source['user_location']
            fansnum = profile_source['fansnum']
            statusnum = profile_source['statusnum']
        else:
            uname = ''
            location = ''
            fansnum = ''
            statusnum = ''
        if status == 'show_in':
            if user_type == "sensitive":
                tmp_ts = datetime2ts(date) - DAY
                tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid)
                if tmp_data:
                    sensitive_dict = json.loads(tmp_data)
                    sensitive_words = sensitive_dict.keys()
                else:
                    sensitive_words = []
                results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words])
            else:
                results.append([uid, uname, location, fansnum, statusnum, influence])
            if auth:
                hashname_submit = "submit_recomment_" + date
                tmp_data = json.loads(r.hget(hashname_submit, uid))
                recommend_list = (tmp_data['operation']).split('&')
                admin_list = []
                admin_list.append(tmp_data['system'])
                admin_list.append(list(set(recommend_list)))
                admin_list.append(len(recommend_list))
                results[-1].extend(admin_list)
        if status == 'show_compute':
            in_date = json.loads(input_result[uid])[0]
            compute_status = json.loads(input_result[uid])[1]
            if compute_status == '1':
                compute_status = '3'
            results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status])
        if status == 'show_in_history':
            in_status = input_result[uid]
            if user_type == "sensitive":
                tmp_ts = datetime2ts(date) - DAY
                tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid)
                if tmp_data:
                    sensitive_dict = json.loads(tmp_data)
                    sensitive_words = sensitive_dict.keys()
                results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words])
            else:
                results.append([uid, uname, location, fansnum, statusnum, influence, in_status])

    return results
Example #45
0
 query_sensitive_body = {
     "query":{
         "match_all":{}
     },
     "size":1,
     "sort":{sensitive_string:{"order":"desc"}}
 }
 try:
     top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits']
     top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
 except Exception, reason:
     print Exception, reason
     top_sensitive = 400
 index_type = 'bci'
 user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
 user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
 bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs']
 sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs']
 max_evaluate_influ = get_evaluate_max(index_name)
 for i in range(0, len(uid_list)):
     uid = uid_list[i]
     bci_dict = user_bci_result[i]
     profile_dict = user_profile_result[i]
     bci_history_dict = bci_history_result[i]
     sensitive_history_dict = sensitive_history_result[i]
     #print sensitive_history_dict
     try:
         bci_source = bci_dict['_source']
     except:
         bci_source = None
     if bci_source:
Example #46
0
def get_temporal_rank(task_type, sort="retweeted"):
    if int(task_type) == 0: # 到目前位置
        sort_list = r.zrange("influence_%s" %sort, 0, 100, withscores=True, desc=True)
    elif int(task_type) == 1:
        sort_list = r.zrange("influence_%s_1" %sort, 0, 100, withscores=True, desc=True)
    elif int(task_type) == 2:
        sort_list = r.zrange("influence_%s_2" %sort, 0, 100, withscores=True, desc=True)
    elif int(task_type) == 3:
        sort_list = r.zrange("influence_%s_3" %sort, 0, 100, withscores=True, desc=True)
    else:
        sort_list = r.zrange("influence_%s_4" %sort, 0, 100, withscores=True, desc=True)

    uid_list = []
    for item in sort_list:
        uid_list.append(item[0])

    if sort == "retweeted":
        other = "comment"
    else:
        other = "retweeted"

    results = []
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
                tmp.append(item['statusnum'])
                tmp.append(item['user_location'])
                tmp.append(item['fansnum'])
            else:
                tmp.extend([_id,'','','',''])
            count_1 = int(sort_list[index][1])
            if int(task_type) == 0:
                count_2 = int(r.zscore("influence_%s" %other, _id))
            else:
                count_2 = int(r.zscore("influence_%s_%s" %(other,task_type), _id))
            if sort == "retweeted":
                tmp.append(count_1)
                tmp.append(count_2)
            else:
                tmp.append(count_2)
                tmp.append(count_1)
            results.append(tmp)

    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
Example #47
0
def get_final_submit_user_info(uid_list):
    final_results = []
    try:
        profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': uid_list})['docs']
    except:
        profile_results = []
    try:
        bci_history_results =es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': uid_list})['docs']
    except:
        bci_history_results = []
    #get bci_history max value
    now_time_ts = time.time()
    search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY))
    bci_key = 'bci_' + str(search_date_ts)
    query_body = {
        'query':{
             'match_all':{}
        },
        'sort': [{bci_key:{'order': 'desc'}}],
        'size': 1
    }
    #try:
    bci_max_result = es_bci_history.search(index=bci_history_index_name, doc_type=bci_history_index_type, body=query_body, _source=False, fields=[bci_key])['hits']['hits']
    #except:
    #    bci_max_result = {}
    if bci_max_result:
        bci_max_value = bci_max_result[0]['fields'][bci_key][0]
    else:
        bci_max_value = MAX_VALUE
    iter_count = 0
    for uid in uid_list:
        try:
            profile_item = profile_results[iter_count]
        except:
            profile_item = {}
        try:
            bci_history_item = bci_history_results[iter_count]
        except:
            bci_history_item = {}
        if profile_item and profile_item['found'] == True:
            uname = profile_item['_source']['nick_name']
            location = profile_item['_source']['user_location']
        else:
            uname = ''
            location = ''
        if bci_history_item and bci_history_item['found'] == True:
            fansnum = bci_history_item['_source']['user_fansnum']
            statusnum = bci_history_item['_source']['weibo_month_sum']
            try:
                bci = bci_history_item['_source'][bci_key]
                normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100
            except:
                normal_bci = ''
        else:
            fansnum = ''
            statusnum = ''
            normal_bci = ''
        final_results.append([uid, uname, location, fansnum, statusnum, normal_bci])
        iter_count += 1

    return final_results
Example #48
0
def identify_user_portrait(user_set, filter_type):
    in_portrait_result = []
    out_portrait_result = []
    user_list = list(user_set)
    #identify the user_portrait
    iter_count = 0
    all_user_count = len(user_list)
    all_in_portrait_user = dict()
    all_out_portrait_user_list = []
    max_result = get_evaluate_max()
    while iter_count <= all_user_count:
        iter_user_list = user_list[iter_count: iter_count + SENTIMENT_ITER_USER_COUNT]
        #search  user in user_portrait
        try:
            in_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                    body={'ids': iter_user_list}, _source=False, \
                    fields=['uname', 'influence', 'activeness', 'importance', 'sensitive'])['docs']
        except:
            in_portrait_result = []
        #add all hit user
        for in_portrait_item in in_portrait_result:
            if in_portrait_item['found'] == True:
                uname = in_portrait_item['fields']['uname'][0]
                if uname == '' or uname == 'unknown':
                    uname = in_portrait_item['_id']
                influence = in_portrait_item['fields']['influence'][0]
                normal_influence = math.log(influence / max_result['influence'] * 9 + 1 , 10) * 100
                activeness = in_portrait_item['fields']['activeness'][0]
                normal_activeness = math.log(activeness / max_result['activeness'] * 9 + 1 , 10) * 100
                importance = in_portrait_item['fields']['importance'][0]
                normal_importance = math.log(importance / max_result['importance'] * 9 + 1 , 10) * 100
                try:
                    sensitive = in_portrait_item['fields']['sensitive'][0]
                    normal_sensitive = math.log(sensitive / max_result['sensitive'] * 9 + 1 , 10) * 100
                except:
                    normal_sensitive = 0
                all_in_portrait_user[in_portrait_item['_id']] = [uname, normal_influence, normal_activeness, \
                    normal_importance, normal_sensitive]
            else:
                all_out_portrait_user_list.append(int(in_portrait_item['_id']))
        iter_count += SENTIMENT_ITER_USER_COUNT
    if filter_type == 'in':
        return all_in_portrait_user
    #get out portrait user info
    iter_count = 0
    all_out_portrait_user = dict()
    all_out_user_count = len(all_out_portrait_user_list)
    while iter_count <= all_out_user_count:
        iter_uid_list = all_out_portrait_user_list[iter_count: iter_count+SENTIMENT_ITER_USER_COUNT]
        bci_iter_uid_list = [str(item) for item in iter_uid_list]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type,\
                    body={'ids':iter_uid_list}, _source=False, fields=['nick_name'])['docs']
        except:
            profile_result = []
        #bci_history
        try:
            bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': bci_iter_uid_list}, _source=False, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs']
        except:
            bci_history_result = []
        bci_iter_count = 0
        for uid in iter_uid_list:
            try:
                profile_item = profile_result[bci_iter_count]
            except:
                profile_item = {'found': False}
            if profile_item['found'] == True:
                uname = profile_item['fields']['nick_name'][0]
            else:
                uname= profile_item['_id']
            try:
                bci_history_item = bci_history_result[bci_iter_count]
            except:
                bci_history_item = {'found': False}
            if bci_history_item['found'] == True:
                statusnum = bci_history_item['fields']['weibo_month_sum'][0]
                fansnum = bci_history_item['fields']['user_fansnum'][0]
                friendsnum = bci_history_item['fields']['user_friendsnum'][0]
            else:
                statusnum = 0
                fansnum = 0
                friendsnum = 0
            all_out_portrait_user[str(uid)] = [uname, statusnum, friendsnum, fansnum]
            bci_iter_count += 1
        iter_count += SENTIMENT_ITER_USER_COUNT
    return all_in_portrait_user, all_out_portrait_user
Example #49
0
# get group weibo by date
def get_group_weibo(task_name, date):
    group_weibo = []
    #step1 : get group user list by task_name
    group_index_name = 'group_result'
    group_index_type = 'group'
    try:
        group_task = es.get(index=group_index_name, doc_type=group_index_type, id=task_name)['_source']
    except Exception ,e:
        raise e
    user_list = group_task['uid_list']
    #step2: get group user name
    profile_index_name = 'weibo_user'
    profile_index_type = 'user'
    try:
        user_name_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':user_list})['docs']
    except Exception, e:
        raise e
    #print 'user_name_result:', user_name_result
    #step3 : get group user weibo
    file_list = set(os.listdir(DEFAULT_LEVELDBPATH))
    count = 0
    for user in user_list:
        user_nick_name = user_name_result[count]['_source']['nick_name']
        for i in range(1, 25):
            leveldb_folder = date + str(i)
            if leveldb_folder in file_list:
                leveldb_bucket = dynamic_leveldb(leveldb_folder)
                try:
                    user_weibo = leveldb_bucket.Get(str(user))
                    weibo_list = json.loads(user_weibo)
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']

    mid_value = json.loads(task_detail['mid_topic_value'])
    duplicate_dict = json.loads(task_detail['duplicate_dict'])
    tmp_duplicate_dict = dict()
    for k,v in duplicate_dict.iteritems():
        try:
            tmp_duplicate_dict[v].append(k)
        except:
            tmp_duplicate_dict[v] = [k, v]

        

    if message_type == 1:
        weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    elif message_type == 2:
        weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])
    else:
        weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])
    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        },
        "size": 1000,
        "sort": {"timestamp": {"order": "desc"}}
    }


    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    sort_results = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}

        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        results_dict = dict()
        mid_index_dict = dict()
        for item in sorted_list: # size
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                if message_type == 1:
                    temp.append(1)
                elif message_type == 2:
                    temp.append(3)
                else:
                    temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                temp.append(iter_text['timestamp'])
                temp.append(mid_value[mid])
                temp.append(mid)
                results.append(temp)
            count_n += 1

        results = sorted(results, key=operator.itemgetter(-4, -2, -6), reverse=True) # -4 -2 -3
        sort_results = []
        count = 0
        for item in results:
            sort_results.append([item])
            mid_index_dict[item[-1]] = count
            count += 1

        
        if tmp_duplicate_dict:
            remove_list = []
            value_list = tmp_duplicate_dict.values() # [[mid, mid], ]
            for item in value_list:
                tmp = []
                for mid in item:
                    if mid_index_dict.get(mid, 0):
                        tmp.append(mid_index_dict[mid])
                if len(tmp) > 1:
                    tmp_min = min(tmp)
                else:
                    continue
                tmp.remove(tmp_min)
                for iter_count in tmp:
                    sort_results[tmp_min].extend(sort_results[iter_count])
                    remove_list.append(sort_results[iter_count])
            if remove_list:
                for item in remove_list:
                    sort_results.remove(item)
        

    return sort_results
Example #51
0
def get_task_detail_2(task_name, ts, user):
    results = dict()
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)["_source"]
    task_name = task_detail['task_name']
    social_sensors = json.loads(task_detail['social_sensors'])
    history_status = json.loads(task_detail['history_status'])
    start_time = task_detail['create_at']
    create_by = task_detail['create_by']
    stop_time = task_detail['stop_time']
    remark = task_detail.get('remark', '')
    portrait_detail = []
    count = 0  # 计数

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")

    if social_sensors:
        search_results = es.mget(index=portrait_index_name,
                                 doc_type=portrait_index_type,
                                 body={"ids": social_sensors},
                                 fields=SOCIAL_SENSOR_INFO)['docs']
        for item in search_results:
            temp = []
            if item['found']:
                for iter_item in SOCIAL_SENSOR_INFO:
                    if iter_item == "topic_string":
                        temp.append(item["fields"][iter_item][0].split('&'))
                    elif iter_item == "activeness":
                        temp.append(
                            math.log(
                                item['fields']['activeness'][0] /
                                float(top_activeness) * 9 + 1, 10) * 100)
                    elif iter_item == "importance":
                        temp.append(
                            math.log(
                                item['fields']['importance'][0] /
                                float(top_importance) * 9 + 1, 10) * 100)
                    elif iter_item == "influence":
                        temp.append(
                            math.log(
                                item['fields']['influence'][0] /
                                float(top_influence) * 9 + 1, 10) * 100)
                    else:
                        temp.append(item["fields"][iter_item][0])
                portrait_detail.append(temp)
        portrait_detail = sorted(portrait_detail,
                                 key=lambda x: x[5],
                                 reverse=True)

    time_series = []  # 时间
    #positive_sentiment_list = [] # 情绪列表
    #neutral_sentiment_list = []
    #negetive_sentiment_list = []
    all_weibo_list = []
    origin_weibo_list = []  # 微博列表
    retweeted_weibo_list = []
    #retweeted_weibo_count = [] # 别人转发他的数量
    #comment_weibo_count = []
    #total_number_count = []
    #burst_time_list = [] # 爆发时间列表
    important_user_set = set()  # 重要人物列表
    out_portrait_users = set()  # 未入库

    ts = int(ts)
    time_series = history_status
    #for item in history_status:
    #    if int(item[0]) <= ts:
    #        time_series.append(item[0]) # 到目前为止的所有的时间戳

    # get detail task information from es
    if time_series:
        flow_detail = es.mget(index=index_sensing_task,
                              doc_type=_id,
                              body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']
            #sentiment_distribution = json.loads(item["sentiment_distribution"])
            #positive_sentiment_list.append(int(sentiment_distribution['1']))
            #negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \
            #        +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6']))
            #neutral_sentiment_list.append(int(sentiment_distribution['0']))
            origin_weibo_list.append(item["origin_weibo_number"])  # real
            retweeted_weibo_list.append(item['retweeted_weibo_number'])  # real
            all_weibo_list.append(item["origin_weibo_number"] +
                                  item['retweeted_weibo_number'])
            #retweeted_weibo_count.append(item['retweeted_weibo_count'])
            #comment_weibo_count.append(item['comment_weibo_count'])
            #total_number_count.append(item['weibo_total_number'])
            temp_important_user_list = json.loads(item['important_users'])
            unfiltered_users = json.loads(item['unfilter_users'])
            temp_out_portrait_users = set(unfiltered_users) - set(
                temp_important_user_list)  # 未入库
            important_user_set = important_user_set | set(
                temp_important_user_list)
            out_portrait_users = out_portrait_users | set(
                temp_out_portrait_users)

            #burst_reason = item.get("burst_reason", "")
            #if burst_reason:
            #    burst_time_list.append([timestamp, count, burst_reason])
            count += 1

    ####################################################################################
    # 统计爆发原因,下相应的结论
    """
    weibo_variation_count = 0
    weibo_variation_time = []
    sentiment_variation_count = 0
    sentiment_variation_time = []
    sensitive_variation_count = 0 # sensitive
    sensitive_variation_time = [] # sensitive
    common_variation_count = 0
    common_variation_time = []
    if burst_time_list:
        for item in burst_time_list:
            tmp_common = 0
            x1 = 0
            x2 = 0
            x3 = 0
            if signal_count_varition in item[2]:
                weibo_variation_count += 1
                weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]])
                x1 = total_number_count[item[1]]
                tmp_common += 1
            if signal_sentiment_varition in item[2]:
                tmp_common += 1
                sentiment_variation_count += 1
                x2 = negetive_sentiment_list[item[1]]
                sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]])
            if signal_sensitive_variation in item[2]:
                tmp_common += 1
                sensitive_variation_count += 1
                x3 = sensitive_total_number_list[item[1]]
                sensitive_variation_time.append([ts2date_min(item[0]), all_weibo_list[item[1]]])
            if tmp_common >= 2:
                common_variation_count += 1
                common_variation_time.append([ts2date_min(item[0]), x1, x2, x3])

    warning_conclusion = remark
    variation_distribution = []
    if weibo_variation_count:
        variation_distribution.append(weibo_variation_time)
    else:
        variation_distribution.append([])

    if sentiment_variation_count:
        variation_distribution.append(sentiment_variation_time)
    else:
        variation_distribution.append([])

    if sensitive_variation_count:
        variation_distribution.append(sensitive_variation_time)
    else:
        variation_distribution.append([])

    if common_variation_count:
        variation_distribution.append(common_variation_time)
    else:
        variation_distribution.append([])

    results['warning_conclusion'] = warning_conclusion
    results['variation_distribution'] = variation_distribution

    # 每个用户的热度
    """

    # 获取重要用户的个人信息
    important_uid_list = list(important_user_set)
    out_portrait_users_list = list(out_portrait_users)
    social_sensor_set = set(social_sensors)
    user_detail_info = []  #
    out_user_detail_info = []
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name,
                               doc_type=portrait_index_type,
                               body={"ids": important_uid_list},
                               fields=[
                                   'uid', 'uname', 'domain', 'topic_string',
                                   "photo_url", 'importance', 'influence',
                                   'activeness'
                               ])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD:
                #    continue
                temp.append(item['fields']['uid'][0])
                uname = item['fields']['uname'][0]
                if not uname or uname == "未知":
                    uname = item['fields']['uid'][0]
                temp.append(uname)
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time)
                #temp.append(hot_count)
                temp.append(
                    math.log(
                        item['fields']['importance'][0] /
                        float(top_importance) * 9 + 1, 10) * 100)
                temp.append(
                    math.log(
                        item['fields']['influence'][0] / float(top_influence) *
                        9 + 1, 10) * 100)
                temp.append(
                    math.log(
                        item['fields']['activeness'][0] /
                        float(top_activeness) * 9 + 1, 10) * 100)
                if item['fields']['uid'][0] in social_sensor_set:
                    temp.append(1)
                else:
                    temp.append(0)
                user_detail_info.append(temp)
    # 排序
    if user_detail_info:
        user_detail_info = sorted(user_detail_info,
                                  key=lambda x: x[6],
                                  reverse=True)
    else:
        user_detail_info = []

    if out_portrait_users_list:
        profile_results = es_profile.mget(
            index=profile_index_name,
            doc_type=profile_index_type,
            body={"ids": out_portrait_users_list})["docs"]
        bci_index = "bci_" + ts2datetime(ts - DAY).replace('-', '')
        influence_results = es.mget(index=bci_index,
                                    doc_type="bci",
                                    body={"ids": out_portrait_users_list},
                                    fields=["user_index"])['docs']
        bci_results = es_profile.mget(index="bci_history",
                                      doc_type="bci",
                                      body={"ids": out_portrait_users_list},
                                      fields=['user_fansnum'])['docs']
        top_influence = get_top_all_influence("user_index", ts)
        count = 0
        if profile_results:
            for item in profile_results:
                temp = []
                if item['found']:
                    temp.append(item['_source']['uid'])
                    if item['_source']['nick_name']:
                        temp.append(item['_source']['nick_name'])
                    else:
                        temp.append(item['_source']['uid'])
                    temp.append(item['_source']['user_location'])
                    #temp.append(item['_source']['fansnum'])
                else:
                    temp.append(item['_id'])
                    temp.append(item['_id'])
                    temp.extend([''])
                try:
                    user_fansnum = bci_results[count]["fields"][
                        "user_fansnum"][0]
                except:
                    user_fansnum = 0
                temp.append(user_fansnum)
                temp_influ = influence_results[count]
                if temp_influ.get('found', 0):
                    user_index = temp_influ['fields']['user_index'][0]
                    temp.append(
                        math.log(user_index / float(top_influence) * 9 + 1, 10)
                        * 100)
                else:
                    temp.append(0)
                count += 1
                out_user_detail_info.append(temp)
    print len(out_user_detail_info)
    if len(out_user_detail_info):
        print "sort"
        out_user_detail_info = sorted(out_user_detail_info,
                                      key=lambda x: x[4],
                                      reverse=True)

    revise_time_series = []
    for item in time_series:
        revise_time_series.append(ts2date_min(item))

    results['important_user_detail'] = user_detail_info
    results['out_portrait_user_detail'] = out_user_detail_info
    #results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因
    results['time_series'] = revise_time_series
    #results['positive_sentiment_list'] = positive_sentiment_list
    #esults['negetive_sentiment_list'] = negetive_sentiment_list
    #results['neutral_sentiment_list'] = neutral_sentiment_list
    results['all_weibo_list'] = all_weibo_list
    results['origin_weibo_list'] = origin_weibo_list
    results['retweeted_weibo_list'] = retweeted_weibo_list
    #results['comment_weibo_count'] = comment_weibo_count
    #results['retweeted_weibo_count'] = retweeted_weibo_count
    #results['total_number_list'] = total_number_count
    results['social_sensors_detail'] = portrait_detail

    return results
Example #52
0
def get_more_out(task_name, ts, user):
    results = dict()
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task,
                         doc_type=task_doc_type,
                         id=_id)["_source"]
    task_name = task_detail['task_name']
    social_sensors = json.loads(task_detail['social_sensors'])
    history_status = json.loads(task_detail['history_status'])

    important_user_set = set()  # 重要人物列表
    out_portrait_users = set()  # 未入库

    top_importance = get_top_influence("importance")
    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")

    ts = int(ts)
    time_series = history_status

    if time_series:
        flow_detail = es.mget(index=index_sensing_task,
                              doc_type=_id,
                              body={"ids": time_series})['docs']
    else:
        flow_detail = {}

    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']

            temp_important_user_list = json.loads(item['important_users'])
            unfiltered_users = json.loads(item['unfilter_users'])
            temp_out_portrait_users = set(unfiltered_users) - set(
                temp_important_user_list)
            important_user_set = important_user_set | set(
                temp_important_user_list)
            out_portrait_users = out_portrait_users | set(
                temp_out_portrait_users)

    out_portrait_users_list = list(out_portrait_users)
    social_sensor_set = set(social_sensors)
    out_user_detail_info = []
    if out_portrait_users_list:
        out_portrait_users_list = out_portrait_users_list[:1000]
        profile_results = es_profile.mget(
            index=profile_index_name,
            doc_type=profile_index_type,
            body={"ids": out_portrait_users_list})["docs"]
        bci_index = "bci_" + ts2datetime(ts - DAY).replace('-', '')
        influence_results = es.mget(index=bci_index,
                                    doc_type="bci",
                                    body={"ids": out_portrait_users_list},
                                    fields=["user_index"])['docs']
        bci_results = es_profile.mget(index="bci_history",
                                      doc_type="bci",
                                      body={"ids": out_portrait_users_list},
                                      fields=['user_fansnum'])['docs']
        top_influence = get_top_all_influence("user_index", ts)
        count = 0
        if profile_results:
            for item in profile_results:
                temp = []
                if item['found']:
                    temp.append(item['_source']['uid'])
                    if item['_source']['nick_name']:
                        temp.append(item['_source']['nick_name'])
                    else:
                        temp.append(item['_source']['uid'])
                    temp.append(item['_source']['user_location'])
                else:
                    temp.append(item['_id'])
                    temp.append(item['_id'])
                    temp.extend([''])
                try:
                    user_fansnum = bci_results[count]["fields"][
                        "user_fansnum"][0]
                except:
                    user_fansnum - 0
                temp.append(user_fansnum)
                temp_influ = influence_results[count]
                if temp_influ.get('found', 0):
                    user_index = temp_influ['fields']['user_index'][0]
                    temp.append(
                        math.log(user_index / float(top_influence) * 9 + 1, 10)
                        * 100)
                else:
                    temp.append(0)
                count += 1
                out_user_detail_info.append(temp)

    if len(out_user_detail_info):
        out_user_detail_info = sorted(out_user_detail_info,
                                      key=lambda x: x[4],
                                      reverse=True)

    return out_user_detail_info
Example #53
0
def get_sensitive_text_detail(task_name, ts, user, order):
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])

    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}

        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        for item in sorted_list:
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                count_n += 1
                results.append(temp)

        if results and order == "ts":
            results = sorted(results, key=lambda x:x[5], reverse=True)

        if results and order == "sensitive":
            results = sorted(results, key=lambda x:x[-1], reverse=True)

    return results