def search_mention(uid, sensitive):
    date = ts2datetime(time.time()).replace('-','')
    stat_results = dict()
    results = dict()
    test_ts = time.time()
    test_ts = datetime2ts('2013-09-07')
    for i in range(0,7):
        ts = test_ts -i*24*3600
        date = ts2datetime(ts).replace('-', '')
        if not sensitive:
            at_temp = r_cluster.hget('at_' + str(date), str(uid))
        else:
            at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid))
        if not at_temp:
            continue
        else:
            result_dict = json.loads(at_temp)
        for at_uid in result_dict:
            if stat_results.has_key(at_uid):
                stat_results[uid] += result_dict[at_uid]
            else:
                stat_results[uid] = result_dict[at_uid]
    if not stat_results:
        return [None, 0]

    in_status = identify_uid_list_in(result_dict.keys())
    for at_uid in result_dict:
        if at_uid in in_status:
            results[at_uid] = [result_dict[at_uid], '1']
        else:
            results[at_uid] = [result_dict[at_uid], '0']

    sorted_results = sorted(results.items(), key=lambda x:x[1][0], reverse=True)
    return [sorted_results[0:20], len(results)]
예제 #2
0
def get_sensitive_user_detail(uid_list, date, sensitive):
    es_cluster = es_user_profile
    ts = datetime2ts(date)
    results = []
    index_name = pre_influence_index + str(date).replace(
        '-', '')  # index_name:20130901
    user_bci_results = es_bci.mget(index=index_name,
                                   doc_type='bci',
                                   body={'ids': uid_list},
                                   _source=False,
                                   fields=['user_index'])['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user",
                                                doc_type="user",
                                                body={"ids": uid_list},
                                                _source=True)['docs']
    top_influnce_value = get_top_value("user_index", es_bci, index_name, "bci")
    for i in range(0, len(uid_list)):
        personal_info = [''] * 6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        personal_info[1] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            uname = profile_dict['nick_name']
            if uname:
                personal_info[1] = uname
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            try:
                tmp_bci = user_bci_results[i]['fields']['user_index'][0]
                influence = math.log(
                    tmp_bci / float(top_influnce_value) * 9 + 1, 10) * 100
                personal_info[5] = influence
            except:
                personal_info[5] = 0
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = redis_cluster.hget('sensitive_' + str(ts),
                                                 str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        else:
            personal_info.append([])
        results.append(personal_info)
    return results
예제 #3
0
def search_mention(uid, sensitive):
    date = ts2datetime(time.time()).replace('-', '')
    stat_results = dict()
    results = dict()
    test_ts = time.time()
    test_ts = datetime2ts('2013-09-07')
    for i in range(0, 7):
        ts = test_ts - i * 24 * 3600
        date = ts2datetime(ts).replace('-', '')
        if not sensitive:
            at_temp = r_cluster.hget('at_' + str(date), str(uid))
        else:
            at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid))
        if not at_temp:
            continue
        else:
            result_dict = json.loads(at_temp)
        for at_uid in result_dict:
            if stat_results.has_key(at_uid):
                stat_results[uid] += result_dict[at_uid]
            else:
                stat_results[uid] = result_dict[at_uid]
    if not stat_results:
        return [None, 0]

    in_status = identify_uid_list_in(result_dict.keys())
    for at_uid in result_dict:
        if at_uid in in_status:
            results[at_uid] = [result_dict[at_uid], '1']
        else:
            results[at_uid] = [result_dict[at_uid], '0']

    sorted_results = sorted(results.items(),
                            key=lambda x: x[1][0],
                            reverse=True)
    return [sorted_results[0:20], len(results)]
def get_sensitive_user_detail(uid_list, date, sensitive):
    es_cluster = es_user_profile
    ts = datetime2ts(date)
    results = []
    index_name = pre_influence_index + str(date).replace('-','') # index_name:20130901
    user_bci_results = es_bci.mget(index=index_name, doc_type='bci', body={'ids':uid_list}, _source=False, fields=['user_index'])['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
    top_influnce_value = get_top_value("user_index", es_bci, index_name, "bci")
    for i in range(0, len(uid_list)):
        personal_info = ['']*6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        personal_info[1] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            uname = profile_dict['nick_name']
            if uname:
                personal_info[1] = uname
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            try:
                tmp_bci = user_bci_results[i]['fields']['user_index'][0]
                influence = math.log(tmp_bci/float(top_influnce_value)*9+1, 10)*100
                personal_info[5] = influence
            except:
                personal_info[5] = 0
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = redis_cluster.hget('sensitive_' + str(ts), str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        else:
            personal_info.append([])
        results.append(personal_info)
    return results