Esempio n. 1
0
def show_in_history(date):
    print date
    results = []
    sensitive_uid_list = []
    influence_uid_list = []
    sen_iden_in_name = "identify_in_sensitive_" + str(date)
    inf_iden_in_name = "identify_in_influence_" + str(date)
    man_iden_in_name = "identify_in_manual_" + str(date)
    sen_iden_in_results = r.hgetall(sen_iden_in_name)
    inf_iden_in_results = r.hgetall(inf_iden_in_name)
    man_iden_in_results = r.hgetall(man_iden_in_name)
    sensitive_uid_list = sen_iden_in_results.keys()
    influence_uid_list = inf_iden_in_results.keys()
    manual_uid_list = man_iden_in_results.keys()
    #compute_results = r.hgetall('compute')
    results = []
    work_date = ts2datetime(datetime2ts(date) - DAY)

    if sensitive_uid_list:
        sensitive_results = get_sensitive_user_detail(sensitive_uid_list,
                                                      work_date, 1)
    else:
        sensitive_results = []
    for item in sensitive_results:
        uid = item[0]
        status = sen_iden_in_results[uid]
        item.append(status)
        results.append(item)

    if influence_uid_list:
        influence_results = get_sensitive_user_detail(influence_uid_list,
                                                      work_date, 0)
    else:
        influence_results = []
    for item in influence_results:
        uid = item[0]
        status = inf_iden_in_results[uid]
        item.append(status)
        results.append(item)

    if manual_uid_list:
        manual_results = get_sensitive_user_detail(manual_uid_list, work_date,
                                                   0)
    else:
        manual_results = []
    for item in manual_results:
        uid = item[0]
        status = man_iden_in_results[uid]
        item.append(status)
        results.append(item)

    sorted_results = sorted(results, key=lambda x: x[5], reverse=True)
    return sorted_results
Esempio n. 2
0
def search_follower(uid, sensitive):
    results = dict()
    stat_results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        if sensitive:
            br_uid_results = r.hgetall('sensitive_be_retweet_' + str(uid))
        else:
            br_uid_results = r.hgetall('be_retweet_' + str(uid))
        if br_uid_results:
            for br_uid in br_uid_results:
                if br_uid != uid:
                    try:
                        stat_results[br_uid] += br_uid_results[br_uid]
                    except:
                        stat_results[br_uid] = br_uid_results[br_uid]
    if not stat_results:
        return [None, 0]
    try:
        sort_stat_results = sorted(stat_results.items(),
                                   key=lambda x: x[1],
                                   reverse=True)[:20]
    except:
        return [None, 0]

    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user',
                                              doc_type='user',
                                              body={'ids': uid_list})['docs']
    es_portrait_results = es.mget(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body={'ids': uid_list})['docs']

    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['nick_name']
        except:
            uname = u'unknown'

        portrait_item = es_portrait_results[i]
        try:
            source = portrait_item['_source']
            in_status = 1
        except:
            in_status = 0
        result_list.append([uid, [uname, stat_results[uid], in_status]])
    return [result_list[:20], len(stat_results)]
def show_in_history(date):
    print date
    results = []
    sensitive_uid_list = []
    influence_uid_list = []
    sen_iden_in_name = "identify_in_sensitive_" + str(date)
    inf_iden_in_name = "identify_in_influence_" + str(date)
    man_iden_in_name = "identify_in_manual_" + str(date)
    sen_iden_in_results = r.hgetall(sen_iden_in_name)
    inf_iden_in_results = r.hgetall(inf_iden_in_name)
    man_iden_in_results = r.hgetall(man_iden_in_name)
    sensitive_uid_list = sen_iden_in_results.keys()
    influence_uid_list = inf_iden_in_results.keys()
    manual_uid_list = man_iden_in_results.keys()
    #compute_results = r.hgetall('compute')
    results = []
    work_date = ts2datetime(datetime2ts(date)-DAY)

    if sensitive_uid_list:
        sensitive_results = get_sensitive_user_detail(sensitive_uid_list, work_date, 1)
    else:
        sensitive_results = []
    for item in sensitive_results:
        uid = item[0]
        status = sen_iden_in_results[uid]
        item.append(status)
        results.append(item)

    if influence_uid_list:
        influence_results = get_sensitive_user_detail(influence_uid_list, work_date, 0)
    else:
        influence_results = []
    for item in influence_results:
        uid = item[0]
        status = inf_iden_in_results[uid]
        item.append(status)
        results.append(item)

    if manual_uid_list:
        manual_results = get_sensitive_user_detail(manual_uid_list, work_date, 0)
    else:
        manual_results = []
    for item in manual_results:
        uid = item[0]
        status = man_iden_in_results[uid]
        item.append(status)
        results.append(item)


    sorted_results = sorted(results, key=lambda x:x[5], reverse=True)
    return sorted_results
Esempio n. 4
0
def search_retweet(uid, sensitive):
    stat_results = dict()
    results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        if not sensitive:
            ruid_results = r.hgetall('retweet_' + str(uid))
        else:
            ruid_results = r.hgetall('sensitive_retweet_' +
                                     str(uid))  # because of sensitive weibo
        if ruid_results:
            for ruid in ruid_results:
                if ruid != uid:
                    if stat_results.has_key(ruid):
                        stat_results[ruid] += ruid_results[ruid]
                    else:
                        stat_results[ruid] = ruid_results[ruid]

    if stat_results:
        sort_stat_results = sorted(stat_results.items(),
                                   key=lambda x: x[1],
                                   reverse=True)[:20]
    else:
        return [None, 0]
    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user',
                                              doc_type='user',
                                              body={'ids': uid_list})['docs']
    es_portrait_results = es.mget(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body={'ids': uid_list})['docs']
    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        if item['found']:
            uname = item['_source']['nick_name']
        else:
            uname = u'unknown'

        portrait_item = es_portrait_results[i]
        if portrait_item['found']:
            in_status = 1
        else:
            in_status = 0

        result_list.append([uid, [uname, stat_results[uid], in_status]])

    return [result_list[:20], len(stat_results)]
Esempio n. 5
0
def recommend_in_sensitive(date):
    sensitive_name = "recomment_" + str(date) + "_sensitive"
    compute_name = "compute"
    re_sen_set = r.hkeys(sensitive_name)  # 敏感人物推荐
    iden_in_set = r.hkeys(compute_name)  # 已经入库用户
    if not re_sen_set:
        return []  # 那一天不存在数据
    uid_list = list(set(re_sen_set) - set(iden_in_set))
    sensitive = 1
    work_date = ts2datetime(datetime2ts(date) - DAY)
    if uid_list:
        results = get_sensitive_user_detail(uid_list, work_date, sensitive)
    else:
        results = []
    return results
Esempio n. 6
0
def ajax_cancel_delete():
    uid_list = request.args.get('uid_list', '')
    date = request.args.get('date', '')
    if not uid_list or not date:
        return '0'
    else:
        uid_list = str(uid_list).split(',')
        date = str(date).replace('-', '')
        delete_list = json.loads(r.hget('delete_user', date))
        revise_list = set(delete_list) - set(uid_list)
        if revise_list:
            r.hset('delete_user', date, json.dumps(list(revise_list)))
        else:
            r.hdel('delete_user', date)
        return '1'
Esempio n. 7
0
def ajax_delete_user():
    date = request.args.get('date', '') # '2013-09-01'
    date = str(date).replace('-', '')
    uid_list = request.args.get('uid_list', '') # uid_list, 12345,123456,
    delete_list = str(uid_list).split(',')

    if date and delete_list:
        temp = r.hget('delete_user', date)
        if temp:
            exist_data = json.loads(temp)
            delete_list.extend(exist_data)
        r.hset('delete_user', date, json.dumps(delete_list))
        return '1'
    else:
        return '0'
def ajax_delete_user():
    date = request.args.get("date", "")  # '2013-09-01'
    date = str(date).replace("-", "")
    uid_list = request.args.get("uid_list", "")  # uid_list, 12345,123456,
    delete_list = str(uid_list).split(",")

    if date and delete_list:
        temp = r.hget("delete_user", date)
        if temp:
            exist_data = json.loads(temp)
            delete_list.extend(exist_data)
        r.hset("delete_user", date, json.dumps(delete_list))
        return "1"
    else:
        return "0"
def ajax_cancel_delete():
    uid_list = request.args.get("uid_list", "")
    date = request.args.get("date", "")
    if not uid_list or not date:
        return "0"
    else:
        uid_list = str(uid_list).split(",")
        date = str(date).replace("-", "")
        delete_list = json.loads(r.hget("delete_user", date))
        revise_list = set(delete_list) - set(uid_list)
        if revise_list:
            r.hset("delete_user", date, json.dumps(list(revise_list)))
        else:
            r.hdel("delete_user", date)
        return "1"
def recommend_in_sensitive(date):
    sensitive_name = "recomment_" + str(date) + "_sensitive"
    compute_name = "compute"
    re_sen_set = r.hkeys(sensitive_name) # 敏感人物推荐
    iden_in_set = r.hkeys(compute_name) # 已经入库用户
    if not re_sen_set:
        return [] # 那一天不存在数据
    uid_list = list(set(re_sen_set) - set(iden_in_set))
    sensitive = 1
    work_date = ts2datetime(datetime2ts(date)-DAY)
    if uid_list:
        results = get_sensitive_user_detail(uid_list, work_date, sensitive)
    else:
        results = []
    return results
Esempio n. 11
0
def search_sensitive_words(
        level, category):  # level: 0, 1, 2, 3; category: '', or other category
    results = dict()
    word_list = []
    level = int(level)
    words_dict = r.hgetall('sensitive_words')
    if words_dict:
        if int(level) == 0 and not category:
            word_list = []
            for k, v in words_dict.items():
                word_state = json.loads(v)
                word_list.append([k, word_state[0], word_state[1]])
        elif level and category:
            word_list = []
            for k, v in words_dict.items():
                word_state = json.loads(v)
                if int(level) == int(
                        word_state[0]) and category == word_state[1]:
                    word_list.append([k, word_state[0], word_state[1]])
        elif not level and category:
            for k, v in words_dict.items():
                word_state = json.loads(v)
                if category == word_state[1]:
                    word_list.append([k, word_state[0], word_state[1]])
        else:
            for k, v in words_dict.items():
                word_state = json.loads(v)
                if int(level) == int(word_state[0]):
                    word_list.append([k, word_state[0], word_state[1]])
    return word_list
def recommend_new_words(date_list):
    results = []
    for date in date_list:
        date = date.replace('-', '')
        words_dict = r.hgetall('recommend_sensitive_words_'+date)
        if words_dict:
            for key, value in words_dict.items():
                detail = []
                detail.append(key)
                value = json.loads(value)
                uid_list = value[0]
                uname = []
                try:
                    search_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids': uid_list})['docs']
                    for item in search_results: 
                        if item['found']:
                            uname.append(item['_source']['nick_name'])
                        else:
                            uname.append('unknown')
                except:
                    uname = uid_list
                detail.extend([uname,value[1]])
                results.append(detail)
    sorted_results = sorted(results, key=lambda x:x[2], reverse=True)
    return sorted_results
def search_sensitive_words(level, category): # level: 0, 1, 2, 3; category: '', or other category
    results = dict()
    word_list = []
    words_dict = r.hgetall('sensitive_words')
    if words_dict:
        if int(level) == 0 and not category:
            word_list = []
            for k,v in words_dict.items():
                word_state = json.loads(v)
                word_list.append([k, word_state[0], word_state[1]])
        elif level and category:
            word_list = []
            for k,v in words_dict.items():
                word_state = json.loads(v)
                if int(level) == int(word_state[0]) and category == word_state[1]:
                    word_list.append([k, word_state[0], word_state[1]])
        elif not level and category:
            for k,v in words_dict.items():
                word_state = json.loads(v)
                if catetory == word_state[1]:
                    word_list.append([k, word_state[0], word_state[1]])
        else:
            for k,v in words_dict.items():
                word_state = json.loads(v)
                if int(level) == int(word_state[0]):
                    word_list.append([k, word_state[0], word_state[1]])
    return word_list
Esempio n. 14
0
def recommend_new_words(date_list):
    results = []
    for date in date_list:
        date = date.replace('-', '')
        words_dict = r.hgetall('recommend_sensitive_words_' + date)
        if words_dict:
            for key, value in words_dict.items():
                detail = []
                detail.append(key)
                value = json.loads(value)
                uid_list = value[0]
                uname = []
                try:
                    search_results = es_user_profile.mget(
                        index='weibo_user',
                        doc_type='user',
                        body={'ids': uid_list})['docs']
                    for item in search_results:
                        if item['found']:
                            uname.append(item['_source']['nick_name'])
                        else:
                            uname.append('unknown')
                except:
                    uname = uid_list
                detail.extend([uname, value[1]])
                results.append(detail)
    sorted_results = sorted(results, key=lambda x: x[2], reverse=True)
    return sorted_results
Esempio n. 15
0
def recommend_in_top_influence(date):
    influence_name = "recomment_" + date + "_influence"
    identify_in_name = "compute"
    re_inf_set = r.hkeys(influence_name)
    iden_in_set = r.hkeys(identify_in_name)  # 已经入库用户

    if not re_inf_set:
        return []
    else:
        uid_list = list(set(re_inf_set) - set(iden_in_set))
    sensitive = 0
    work_date = ts2datetime(datetime2ts(date) - DAY)
    if uid_list:
        results = get_sensitive_user_detail(uid_list, work_date, sensitive)
    else:
        results = []
    return results
def recommend_in_top_influence(date):
    influence_name = "recomment_" + date + "_influence"
    identify_in_name = "compute"
    re_inf_set = r.hkeys(influence_name)
    iden_in_set = r.hkeys(identify_in_name) # 已经入库用户

    if not re_inf_set:
        return []
    else:
        uid_list = list(set(re_inf_set) - set(iden_in_set))
    sensitive = 0
    work_date = ts2datetime(datetime2ts(date)-DAY)
    if uid_list:
        results = get_sensitive_user_detail(uid_list, work_date, sensitive)
    else:
        results = []
    return results
def identify_in(data):
    appoint_list = []
    now_list = []
    sensitive_list = set()
    influence_list = set()
    for item in data:
        date = item[0] # 2015-09-22
        uid = item[1]
        status = str(item[2])
        source = str(item[3])
        if int(source) == 1:
            r.hset('identify_in_sensitive_'+str(date), uid, status) # identify in user_list and compute status
            sensitive_list.add(uid)
        elif int(source) == 2:
            r.hset('identify_in_influence_'+str(date), uid, status)
            influence_list.add(uid)
        else:
            r.hset("identify_in_manual_"+str(date), uid, status)
        r.hset('compute', uid, json.dumps([date, status]))

    """
    sensitive_results = r.hget('recommend_sensitive', date)
    if sensitives_results:
        sensitive_results = json.loads(sensitive_results)
        revise_set = set(sensitive_results) - sensitive_list
        if revise_set:
            r.hset('recommend_sensitive', date, json.dumps(list(revise_set)))
        else:
            r.hdel('recommend_sensitive', date)
    influence_results = r.hget('recommend_influence', date)
    if influence_results and influence_results != []:
        influence_results = json.loads(influence_results)
        revise_set = set(influence_results) - influence_list
        if revise_set:
            r.hset('recommend_influence', date, json.dumps(list(revise_set)))
        else:
            r.hdel('recommend_influence', date)

    # about compute
    compute_now_list = r.hget('compute_now', date)
    compute_appoint_list = r.hget('compute_appoint', date)
    # compute now user list
    if compute_now_list:
        now_list.extend(json.loads(compute_now_list))
        r.hset('compute_now', date, json.dumps(now_list))
    else:
        r.hset('compute_now', date, json.dumps(now_list))
    # appointted compute user list
    if compute_appoint_list:
        appoint_list.extend(json.loads(compute_appoint_list))
        r.hset('compute_appoint', date, json.dumps(appoint_list))
    else:
        r.hset('compute_appoint', date, json.dumps(appoint_list)) # finish compute, revise 'identify_in_state' uid status
    """
    return '1'
Esempio n. 18
0
def search_follower(uid, sensitive):
    results = dict()
    stat_results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        if sensitive:
            br_uid_results = r.hgetall("sensitive_be_retweet_" + str(uid))
        else:
            br_uid_results = r.hgetall("be_retweet_" + str(uid))
        if br_uid_results:
            for br_uid in br_uid_results:
                if br_uid != uid:
                    try:
                        stat_results[br_uid] += br_uid_results[br_uid]
                    except:
                        stat_results[br_uid] = br_uid_results[br_uid]
    if not stat_results:
        return [None, 0]
    try:
        sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20]
    except:
        return [None, 0]

    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list})["docs"]
    es_portrait_results = es.mget(index="sensitive_user_portrait", doc_type="user", body={"ids": uid_list})["docs"]

    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item["_id"]
        try:
            source = item["_source"]
            uname = source["nick_name"]
        except:
            uname = u"unknown"

        portrait_item = es_portrait_results[i]
        try:
            source = portrait_item["_source"]
            in_status = 1
        except:
            in_status = 0
        result_list.append([uid, [uname, stat_results[uid], in_status]])
    return [result_list[:20], len(stat_results)]
Esempio n. 19
0
def identify_in(data):
    appoint_list = []
    now_list = []
    sensitive_list = set()
    influence_list = set()
    for item in data:
        date = item[0]  # 2015-09-22
        uid = item[1]
        status = str(item[2])
        source = str(item[3])
        if int(source) == 1:
            r.hset('identify_in_sensitive_' + str(date), uid,
                   status)  # identify in user_list and compute status
            sensitive_list.add(uid)
        elif int(source) == 2:
            r.hset('identify_in_influence_' + str(date), uid, status)
            influence_list.add(uid)
        else:
            r.hset("identify_in_manual_" + str(date), uid, status)
        r.hset('compute', uid, json.dumps([date, status]))
    """
    sensitive_results = r.hget('recommend_sensitive', date)
    if sensitives_results:
        sensitive_results = json.loads(sensitive_results)
        revise_set = set(sensitive_results) - sensitive_list
        if revise_set:
            r.hset('recommend_sensitive', date, json.dumps(list(revise_set)))
        else:
            r.hdel('recommend_sensitive', date)
    influence_results = r.hget('recommend_influence', date)
    if influence_results and influence_results != []:
        influence_results = json.loads(influence_results)
        revise_set = set(influence_results) - influence_list
        if revise_set:
            r.hset('recommend_influence', date, json.dumps(list(revise_set)))
        else:
            r.hdel('recommend_influence', date)

    # about compute
    compute_now_list = r.hget('compute_now', date)
    compute_appoint_list = r.hget('compute_appoint', date)
    # compute now user list
    if compute_now_list:
        now_list.extend(json.loads(compute_now_list))
        r.hset('compute_now', date, json.dumps(now_list))
    else:
        r.hset('compute_now', date, json.dumps(now_list))
    # appointted compute user list
    if compute_appoint_list:
        appoint_list.extend(json.loads(compute_appoint_list))
        r.hset('compute_appoint', date, json.dumps(appoint_list))
    else:
        r.hset('compute_appoint', date, json.dumps(appoint_list)) # finish compute, revise 'identify_in_state' uid status
    """
    return '1'
def search_follower(uid, sensitive):
    results = dict()
    stat_results = dict()
    if 1:
        r = r_cluster
        if sensitive:
            br_uid_results = r.hgetall('sensitive_be_retweet_'+str(uid))
        else:
            br_uid_results = r.hgetall('be_retweet_'+str(uid))
        if br_uid_results:
            for br_uid in br_uid_results:
                if br_uid != uid:
                    try:
                        stat_results[br_uid] += br_uid_results[br_uid]
                    except:
                        stat_results[br_uid] = br_uid_results[br_uid]
    if not stat_results:
        return [None, 0]
    try:
        sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20]
    except:
        return [None, 0]

    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs']
    es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids':uid_list})['docs']

    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['nick_name']
        except:
            uname = u'unknown'

        portrait_item = es_portrait_results[i]
        try:
            source = portrait_item['_source']
            in_status = 1
        except:
            in_status = 0
        result_list.append([uid,[uname, stat_results[uid], in_status]])
    return [result_list[:20], len(stat_results)]
Esempio n. 21
0
def recommend_in_top_influence(date):
    date = date.replace('-','')
    results = r.hget('recommend_influence', date)
    if not results:
        return []
    else:
        uid_list = json.loads(results)
    sensitive = 0
    return get_sensitive_user_detail(uid_list, date, sensitive)
Esempio n. 22
0
def recommend_in_sensitive(date):
    date = date.replace('-', '')
    results = r.hget('recommend_sensitive', date)
    if not results:
        return results  # return '0'
    else:
        uid_list = json.loads(results)
    sensitive = 1
    return get_sensitive_user_detail(uid_list, date, sensitive)
Esempio n. 23
0
def recommend_in_top_influence(date):
    date = date.replace('-', '')
    results = r.hget('recommend_influence', date)
    if not results:
        return []
    else:
        uid_list = json.loads(results)
    sensitive = 0
    return get_sensitive_user_detail(uid_list, date, sensitive)
Esempio n. 24
0
def recommend_in_sensitive(date):
    date = date.replace('-','')
    results = r.hget('recommend_sensitive', date)
    if not results:
        return results # return '0'
    else:
        uid_list = json.loads(results)
    sensitive = 1
    return get_sensitive_user_detail(uid_list, date, sensitive)
Esempio n. 25
0
def show_in_history(date, sensitive):
    results = []
    date = str(date).replace('-','')
    if sensitive: # sensitive user recommentation history
        sensitive_results = r.hgetall('identify_in_sensitive_'+str(date))
        if sensitive_results:
            uid_list = sensitive_results.keys()
            results = get_sensitive_user_detail(uid_list, date, 1)
            for item in results:
                item.append(sensitive_results[item[0]])
    else:
        influence_results = r.hgetall('identify_in_influence_'+str(date))
        if influence_results:
            uid_list = influence_results.keys()
            results = get_sensitive_user_detail(uid_list, date, 0)
            for item in results:
                item.append(influence_results[item[0]])

    return results
Esempio n. 26
0
def show_in_history(date, sensitive):
    results = []
    date = str(date).replace('-', '')
    if sensitive:  # sensitive user recommentation history
        sensitive_results = r.hgetall('identify_in_sensitive_' + str(date))
        if sensitive_results:
            uid_list = sensitive_results.keys()
            results = get_sensitive_user_detail(uid_list, date, 1)
            for item in results:
                item.append(sensitive_results[item[0]])
    else:
        influence_results = r.hgetall('identify_in_influence_' + str(date))
        if influence_results:
            uid_list = influence_results.keys()
            results = get_sensitive_user_detail(uid_list, date, 0)
            for item in results:
                item.append(influence_results[item[0]])

    return results
Esempio n. 27
0
def ajax_history_delete():
    date = request.args.get('date', '') # '2013-09-01'
    date = str(date).replace('-', '')
    search_all  = request.args.get('show_all', '') # return all
    uid_list = []
    if not search_all:
        temp = r.hget('delete_user', date)
        if temp:
            results = get_user_info(json.loads(temp))
            return json.dumps(results)
    else:
        all_temp = r.hgetall('delete_user')
        if all_temp:
            temp_list = all_temp.values()
            for item in temp_list:
                uid_list.extend(json.loads(item))
            results = get_user_info(uid_list)
            return json.dumps(results)
    return '0'
Esempio n. 28
0
def ajax_history_delete():
    date = request.args.get("date", "")  # '2013-09-01'
    date = str(date).replace("-", "")
    search_all = request.args.get("show_all", "")  # return all
    uid_list = []
    if not search_all:
        temp = r.hget("delete_user", date)
        if temp:
            results = get_user_info(json.loads(temp))
            return json.dumps(results)
    else:
        all_temp = r.hgetall("delete_user")
        if all_temp:
            temp_list = all_temp.values()
            for item in temp_list:
                uid_list.extend(json.loads(item))
            results = get_user_info(uid_list)
            return json.dumps(results)
    return "0"
Esempio n. 29
0
def search_retweet(uid, sensitive):
    stat_results = dict()
    results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        if not sensitive:
            ruid_results = r.hgetall("retweet_" + str(uid))
        else:
            ruid_results = r.hgetall("sensitive_retweet_" + str(uid))  # because of sensitive weibo
        if ruid_results:
            for ruid in ruid_results:
                if ruid != uid:
                    if stat_results.has_key(ruid):
                        stat_results[ruid] += ruid_results[ruid]
                    else:
                        stat_results[ruid] = ruid_results[ruid]

    if stat_results:
        sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20]
    else:
        return [None, 0]
    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list})["docs"]
    es_portrait_results = es.mget(index="sensitive_user_portrait", doc_type="user", body={"ids": uid_list})["docs"]
    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item["_id"]
        if item["found"]:
            uname = item["_source"]["nick_name"]
        else:
            uname = u"unknown"

        portrait_item = es_portrait_results[i]
        if portrait_item["found"]:
            in_status = 1
        else:
            in_status = 0

        result_list.append([uid, [uname, stat_results[uid], in_status]])

    return [result_list[:20], len(stat_results)]
def search_retweet(uid, sensitive):
    stat_results = dict()
    results = dict()
    if 1:
        r = r_cluster
        if not sensitive:
            ruid_results = r.hgetall('retweet_'+str(uid))
        else:
            ruid_results = r.hgetall('sensitive_retweet_'+str(uid)) # because of sensitive weibo
        if ruid_results:
            for ruid in ruid_results:
                if ruid != uid:
                    if stat_results.has_key(ruid):
                        stat_results[ruid] += ruid_results[ruid]
                    else:
                        stat_results[ruid] = ruid_results[ruid]

    if stat_results:
        sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20]
    else:
        return [None, 0]
    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs']
    es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids':uid_list})['docs']
    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        if item['found']:
            uname = item['_source']['nick_name']
        else:
            uname = u'unknown'

        portrait_item = es_portrait_results[i]
        if portrait_item['found']:
            in_status = 1
        else:
            in_status = 0

        result_list.append([uid,[uname, stat_results[uid], in_status]])

    return [result_list[:20], len(stat_results)]
def lastest_identify_in():
    results = dict()
    now_ts = time.time()
    now_ts = datetime2ts('2013-09-08')
    for i in range(1,8):
        ts = now_ts - i * 3600 *24
        date = ts2datetime(ts).replace('-','')
        words_dict = r.hgetall('history_in_'+date)
        for item in words_dict:
            results[item] = json.loads(words_dict[item])

    return results
Esempio n. 32
0
def lastest_identify_in():
    results = dict()
    now_ts = time.time()
    now_ts = datetime2ts('2013-09-08')
    for i in range(1, 8):
        ts = now_ts - i * 3600 * 24
        date = ts2datetime(ts).replace('-', '')
        words_dict = r.hgetall('history_in_' + date)
        for item in words_dict:
            results[item] = json.loads(words_dict[item])

    return results
Esempio n. 33
0
def sort_sensitive_words(words_list):
    sensitive_words_list = []
    for item in words_list:
        temp = []
        temp.extend(item)
        word = (item[0]).encode("utf-8", "ignore")
        r_word = r.hget("sensitive_words", word)
        if r_word:
            temp.extend(json.loads(r_word))
        else:
            temp.extend([1, "politics"])
        sensitive_words_list.append(temp)
    return sensitive_words_list
def sort_sensitive_words(words_list):
    sensitive_words_list = []
    for item in words_list:
        temp = []
        temp.extend(item)
        word = (item[0]).encode('utf-8', 'ignore')
        r_word = r.hget('sensitive_words', word)
        if r_word:
            temp.extend(json.loads(r_word))
        else:
            temp.extend([1,'politics'])
        sensitive_words_list.append(temp)
    return sensitive_words_list
Esempio n. 35
0
def sort_sensitive_words(words_list):
    sensitive_words_list = []
    for item in words_list:
        temp = []
        temp.extend(item)
        word = (item[0]).encode('utf-8', 'ignore')
        r_word = r.hget('sensitive_words', word)
        if r_word:
            temp.extend(json.loads(r_word))
        else:
            temp.extend([1, 'politics'])
        sensitive_words_list.append(temp)
    return sensitive_words_list
def identify_in(date, words_list):
    # identify_in date and words_list(include level and category, [word, level, category])
    # date is date when new words were recommended
    new_list = []
    print words_list
    for item in words_list:
        r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]]))
        new_list.append(item[0])
        r.hset('history_in_'+date, item[0], json.dumps([item[1], item[2]]))
    if new_list:
        for item in new_list:
            r.hdel('recommend_sensitive_words_'+date, item)
    return '1'
Esempio n. 37
0
def identify_in(date, words_list):
    # identify_in date and words_list(include level and category, [word, level, category])
    # date is date when new words were recommended
    ts = time.time()
    ts = datetime2ts('2013-09-07')
    time_list = []
    for i in range(7):
        now_ts = int(ts) - i * 24 * 3600
        now_date = ts2datetime(now_ts).replace('-', '')
        time_list.append(now_date)
    for item in words_list:
        r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]]))
        r.hset('history_in_' + date, item[0], json.dumps([item[1], item[2]]))
        for date in time_list:
            r.hdel('recommend_sensitive_words_' + date, item[0])
    return '1'
Esempio n. 38
0
def identify_in(date, words_list):
    # identify_in date and words_list(include level and category, [word, level, category])
    # date is date when new words were recommended
    ts = time.time()
    ts = datetime2ts('2013-09-07')
    time_list = []
    for i in range(7):
        now_ts = int(ts) - i*24*3600
        now_date = ts2datetime(now_ts).replace('-', '')
        time_list.append(now_date)
    for item in words_list:
        r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]]))
        r.hset('history_in_'+date, item[0], json.dumps([item[1], item[2]]))
        for date in time_list:
            r.hdel('recommend_sensitive_words_'+date, item[0])
    return '1'
Esempio n. 39
0
def get_attr(date):
    results = dict()
    number = es.count(index="sensitive_user_portrait",
                      doc_type="user")['count']
    results['total_number'] = number

    query_body = {"query": {"filtered": {"filter": {"term": {"type": 1}}}}}
    sensitive_number = es.count(index="sensitive_user_portrait",
                                doc_type="user",
                                body=query_body)['count']
    results['sensitive_number'] = sensitive_number
    results['influence_number'] = number - sensitive_number

    recommend_in_sensitive = 0
    sensitive_dict = r.hgetall('recommend_sensitive')
    for k, v in sensitive_dict.items():
        if v:
            sensitive_list = json.loads(v)
            recommend_in_sensitive += len(sensitive_list)

    recommend_in_influence = 0
    influence_dict = r.hgetall('recommend_influence')
    for k, v in influence_dict.items():
        if v:
            sensitive_list = json.loads(v)
            recommend_in_influence += len(sensitive_list)
    results['recommend_in'] = recommend_in_influence + recommend_in_sensitive

    results['monitor_number'] = [4, 83]  # test
    results['new_sensitive_words'] = 5  # test

    query_body = query_body_module('sensitive_words_string')
    sw_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_words = []
    for item in sw_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_words.append(temp)
    results['sensitive_words'] = sensitive_words

    query_body = query_body_module('sensitive_geo_string')
    sg_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_geo = []
    for item in sg_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_geo.append(temp)
    results['sensitive_geo'] = sensitive_geo

    query_body = query_body_module('sensitive_hashtag_string')
    sh_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_hashtag = []
    for item in sh_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_hashtag.append(temp)
    results['sensitive_hashtag'] = sensitive_hashtag

    query_body = query_body_module('sensitive_geo_string')
    sg_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_geo = []
    for item in sg_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_geo.append(temp)
    results['sensitive_geo'] = sensitive_geo

    query_body = query_body_module('psycho_status_string')
    sp_list = es.search(
        index='sensitive_user_portrait', doc_type='user',
        body=query_body)['aggregations']['all_interests']['buckets']
    psycho_status = []
    for item in sp_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        psycho_status.append(temp)
    results['psycho_status'] = psycho_status
    '''
    query_body = query_body_module('political_tendency')
    st_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    political_tendency = []
    for item in st_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        political_tendency.append(temp)
    results['political_tendency'] = political_tendency
    '''
    results['political_tendency'] = [['left', 123], ['middle', 768],
                                     ['right', 1095]]
    '''
    query_body = query_body_module('domain_string')
    sd_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    domain = []
    for item in sd_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        domain.append(temp)
    results['domain'] = domain
    '''

    # tendency distribution

    # domain and topic
    domain_list = ['']
    #search_important('domain', )
    domain_results = get_top_user()
    topic_results = get_topic_user()
    results['domain_rank'] = domain_results
    results['topic_rank'] = topic_results

    # rank
    important_list = search_in_portrait('importance')
    results['importance'] = important_list
    results['sensitive'] = search_in_portrait('sensitive')
    results['influence'] = search_in_portrait('influence')
    results['activeness'] = search_in_portrait('activeness')

    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "s_origin_weibo_comment_total_number": {
                "order": "desc"
            }
        }
    }
    date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
    date = '20130907'
    results_list = es.search(index=date, doc_type="bci",
                             body=query_body)['hits']['hits']
    comment_weibo_detail = []
    for item in results_list:
        temp = []
        uid = item['_source']['uid']
        try:
            uname = es_user_profile.get(index='weibo_user',
                                        doc_type='user',
                                        id=uid)['_source']['nick_name']
        except:
            uname = 'unknown'
        temp.append(item['_source']['uid'])
        temp.append(uname)
        temp.append(item['_source']['s_origin_weibo_comment_total_number'])
        comment_weibo_detail.append(temp)
    results['comment_total'] = comment_weibo_detail

    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "s_origin_weibo_retweeted_total_number": {
                "order": "desc"
            }
        }
    }
    date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
    date = '20130907'
    results_list = es.search(index=date, doc_type="bci",
                             body=query_body)['hits']['hits']
    retweeted_weibo_detail = []
    for item in results_list:
        temp = []
        uid = item['_source']['uid']
        try:
            uname = es_user_profile.get(index='weibo_user',
                                        doc_type='user',
                                        id=uid)['_source']['nick_name']
        except:
            uname = 'unknown'
        temp.append(item['_source']['uid'])
        temp.append(uname)
        temp.append(item['_source']['s_origin_weibo_retweeted_total_number'])
        retweeted_weibo_detail.append(temp)
    results['retweeted_total'] = retweeted_weibo_detail

    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "s_origin_weibo_number": {
                "order": "desc"
            }
        }
    }
    date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
    date = '20130907'
    results_list = es.search(index=date, doc_type="bci",
                             body=query_body)['hits']['hits']
    weibo_detail = []
    for item in results_list:
        temp = []
        uid = item['_source']['uid']
        try:
            uname = es_user_profile.get(index='weibo_user',
                                        doc_type='user',
                                        id=uid)['_source']['nick_name']
        except:
            uname = 'unknown'
        temp.append(item['_source']['uid'])
        temp.append(uname)
        temp.append(item['_source']['s_origin_weibo_number'])
        weibo_detail.append(temp)
    results['top_weibo_number'] = weibo_detail

    return results
Esempio n. 40
0
def self_delete(word):
    print word
    r.hdel('sensitive_words', word)
    r.sadd('black_sensitive_words', word)
    return '1'
def self_add_in(date, word, level, category):
    r.hset('sensitive_words', word, json.dumps([level, category]))
    r.hset('history_in_'+date, word, json.dumps([level, category]))
    return '1'
Esempio n. 42
0
def self_add_in(date, word, level, category):
    r.hset('sensitive_words', word, json.dumps([level, category]))
    r.hset('history_in_' + date, word, json.dumps([level, category]))
    return '1'
Esempio n. 43
0
def get_attr(date):
    results = dict()
    number = es.count(index="sensitive_user_portrait", doc_type="user")['count']
    results['total_number'] = number

    query_body={
        "query":{
            "filtered":{
                "filter":{
                    "term":{
                        "type": 1
                    }
                }
            }
        }
    }
    sensitive_number = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)['count']
    results['sensitive_number'] = sensitive_number
    results['influence_number'] = number - sensitive_number

    recommend_in_sensitive = 0
    sensitive_dict = r.hgetall('recommend_sensitive')
    for k,v in sensitive_dict.items():
        if v:
            sensitive_list = json.loads(v)
            recommend_in_sensitive += len(sensitive_list)

    recommend_in_influence = 0
    influence_dict = r.hgetall('recommend_influence')
    for k,v in influence_dict.items():
        if v:
            sensitive_list = json.loads(v)
            recommend_in_influence += len(sensitive_list)
    results['recommend_in'] = recommend_in_influence + recommend_in_sensitive

    results['monitor_number'] = [4, 83] # test
    results['new_sensitive_words'] = 5  # test

    query_body = query_body_module('sensitive_words_string')
    sw_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_words = []
    for item in sw_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_words.append(temp)
    results['sensitive_words'] = sensitive_words

    query_body = query_body_module('sensitive_geo_string')
    sg_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_geo = []
    for item in sg_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_geo.append(temp)
    results['sensitive_geo'] = sensitive_geo

    query_body = query_body_module('sensitive_hashtag_string')
    sh_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_hashtag = []
    for item in sh_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_hashtag.append(temp)
    results['sensitive_hashtag'] = sensitive_hashtag

    query_body = query_body_module('sensitive_geo_string')
    sg_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    sensitive_geo = []
    for item in sg_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        sensitive_geo.append(temp)
    results['sensitive_geo'] = sensitive_geo

    query_body = query_body_module('psycho_status_string')
    sp_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    psycho_status = []
    for item in sp_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        psycho_status.append(temp)
    results['psycho_status'] = psycho_status

    '''
    query_body = query_body_module('political_tendency')
    st_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    political_tendency = []
    for item in st_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        political_tendency.append(temp)
    results['political_tendency'] = political_tendency
    '''
    results['political_tendency'] = [['left', 123], ['middle', 768], ['right', 1095]]

    '''
    query_body = query_body_module('domain_string')
    sd_list =  es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets']
    domain = []
    for item in sd_list:
        temp = []
        temp.append(item['key'])
        temp.append(item['doc_count'])
        domain.append(temp)
    results['domain'] = domain
    '''

    # tendency distribution


    # domain and topic
    domain_list = ['']
    #search_important('domain', )
    domain_results = get_top_user()
    topic_results = get_topic_user()
    results['domain_rank'] = domain_results
    results['topic_rank'] = topic_results



    # rank
    important_list = search_in_portrait('importance')
    results['importance'] = important_list
    results['sensitive'] = search_in_portrait('sensitive')
    results['influence'] = search_in_portrait('influence')
    results['activeness'] = search_in_portrait('activeness')

    query_body={
        "query":{
            "match_all": {}
        },
        "sort": {"s_origin_weibo_comment_total_number": {"order": "desc"}}
    }
    date = ts2datetime(time.time()-24*3600).replace('-','')
    date = '20130907'
    results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits']
    comment_weibo_detail = []
    for item in results_list:
        temp = []
        uid = item['_source']['uid']
        try:
            uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name']
        except:
            uname = 'unknown'
        temp.append(item['_source']['uid'])
        temp.append(uname)
        temp.append(item['_source']['s_origin_weibo_comment_total_number'])
        comment_weibo_detail.append(temp)
    results['comment_total'] = comment_weibo_detail

    query_body={
        "query":{
            "match_all": {}
        },
        "sort": {"s_origin_weibo_retweeted_total_number": {"order": "desc"}}
    }
    date = ts2datetime(time.time()-24*3600).replace('-','')
    date = '20130907'
    results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits']
    retweeted_weibo_detail = []
    for item in results_list:
        temp = []
        uid = item['_source']['uid']
        try:
            uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name']
        except:
            uname = 'unknown'
        temp.append(item['_source']['uid'])
        temp.append(uname)
        temp.append(item['_source']['s_origin_weibo_retweeted_total_number'])
        retweeted_weibo_detail.append(temp)
    results['retweeted_total'] = retweeted_weibo_detail

    query_body={
        "query":{
            "match_all": {}
        },
        "sort": {"s_origin_weibo_number": {"order": "desc"}}
    }
    date = ts2datetime(time.time()-24*3600).replace('-','')
    date = '20130907'
    results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits']
    weibo_detail = []
    for item in results_list:
        temp = []
        uid = item['_source']['uid']
        try:
            uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name']
        except:
            uname = 'unknown'
        temp.append(item['_source']['uid'])
        temp.append(uname)
        temp.append(item['_source']['s_origin_weibo_number'])
        weibo_detail.append(temp)
    results['top_weibo_number'] = weibo_detail



    return results
def self_delete(word):
    r.hdel('sensitive_words', word)
    r.sadd('black_sensitive_words', word)
    return '1'
Esempio n. 45
0
def ajax_show_all():
    results = r.get('overview')
    if results:
        return results
    else:
        return None
Esempio n. 46
0
def identify_in(data):
    appoint_list = []
    now_list = []
    sensitive_list = set()
    influence_list = set()
    for item in data:
        date = item[0]  # 2015-09-22
        date = str(date).replace('-', '')
        uid = item[1]
        status = str(item[2])
        source = str(item[3])
        if source == '1':
            r.hset('identify_in_sensitive_' + str(date), uid,
                   status)  # identify in user_list and compute status
            sensitive_list.add(uid)
        elif source == '2':
            r.hset('identify_in_influence_' + str(date), uid, status)
            influence_list.add(uid)
        if status == '1':  # now
            now_list.append([uid, source])
        if status == '2':  # appoint
            appoint_list.append([uid, source])

    sensitive_results = r.hget('recommend_sensitive', date)
    if sensitive_results and sensitive_results != '0':
        sensitive_results = json.loads(sensitive_results)
        revise_set = set(sensitive_results) - sensitive_list
        if revise_set:
            r.hset('recommend_sensitive', date, json.dumps(list(revise_set)))
        else:
            r.hdel('recommend_sensitive', date)
    influence_results = r.hget('recommend_influence', date)
    if influence_results and influence_results != '0':
        influence_results = json.loads(influence_results)
        revise_set = set(influence_results) - influence_list
        if revise_set:
            r.hset('recommend_influence', date, json.dumps(list(revise_set)))
        else:
            r.hdel('recommend_influence', date)

    # about compute
    compute_now_list = r.hget('compute_now', date)
    compute_appoint_list = r.hget('compute_appoint', date)
    # compute now user list
    if compute_now_list:
        now_list.extend(json.loads(compute_now_list))
        r.hset('compute_now', date, json.dumps(now_list))
    else:
        r.hset('compute_now', date, json.dumps(now_list))
    # appointted compute user list
    if compute_appoint_list:
        appoint_list.extend(json.loads(compute_appoint_list))
        r.hset('compute_appoint', date, json.dumps(appoint_list))
    else:
        r.hset('compute_appoint', date, json.dumps(appoint_list)
               )  # finish compute, revise 'identify_in_state' uid status
    return '1'
def compute_mid_result(task_name, task_submit_date):
    result = {'count_0':{}, 'count_1':{}, 'sentiment_0_126':{}, 'sentiment_0_127':{}, 'sentiment_0_128':{},\
            'sentiment_0_129':{}, 'sentiment_0_130':{}, 'sensitive_score':{}, 'geo_0':{}, 'geo_1':{},\
            'hashtag_0':{}, 'hashtag_1':{}, 'sentiment_1_126':{}, 'sentiment_1_127':{}, \
            'sentiment_1_128':{}, 'sentiment_1_129':{}, 'sentiment_1_130':{}}
    #geo & hashtag: day
    #other: 15min
    search_time_segment = 3600 * 4
    #start_ts = datetime2ts(task_submit_date)
    start_ts = date2ts(task_submit_date)
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #test
    now_ts = datetime2ts('2013-09-08')
    date_ts = datetime2ts(now_date)
    segment = int((now_ts - date_ts) / 900) + 1
    end_ts = date_ts + segment * 900
    #every search time-range: 4 hour----bulk action to search
    begin_ts = start_ts

    while True:
        if begin_ts >= end_ts:
            break
        compute_ts = ts2date(begin_ts)
        #print 'compute ts:', compute_ts
        query_body = {'range':{'timestamp':{'from': begin_ts, 'to':begin_ts+search_time_segment}}}
        try:
            mid_result_list = es.search(index=monitor_index_name, doc_type=task_name, body={'query':query_body, 'size':100000, 'sort':[{'timestamp':{'order': 'asc'}}]})['hits']['hits']
        except Exception, e:
            raise e
        if mid_result_list:
            for mid_result_item in mid_result_list:
                result_item = mid_result_item['_source']
                timestamp = result_item['timestamp']
                #attr_count
                #print 'compute_count'
                count_dict = json.loads(result_item['count'])
                for sensitive in count_dict:
                    count_key = 'count_' + sensitive
                    result[count_key][str(timestamp)] = count_dict[sensitive]
                #attr_sentiment
                #print 'compute_sentiment'
                sensitive_sentiment_dict = json.loads(result_item['sentiment'])
                for sensitive in sensitive_sentiment_dict:
                    sentiment_dict = sensitive_sentiment_dict[sensitive]
                    for sentiment in sentiment_dict:
                        sentiment_key = 'sentiment_'+sensitive+'_'+sentiment
                        result[sentiment_key][str(timestamp)] = sentiment_dict[sentiment]
                #attr_sensitive_score
                #print 'compute_sensitive_word'
                if 'sensitive_word' in result_item:
                    sensitive_word_dict = json.loads(result_item['sensitive_word'])
                else:
                    sensitive_word_dict = {}
                ts_word_score = 0
                for word in sensitive_word_dict:
                    #print 'word:', json.dumps(word.encode('utf-8')), word.encode('utf-8'), type(word.encode('utf-8'))
                    search_word = word.encode('utf-8')
                    #print 'search_word:', search_word, type(search_word)
                    try:
                        word_identify = json.loads(word_r.hget('sensitive_words', search_word))
                    except:
                        word_identify = [2]
                    ts_word_score += sensitive_word_dict[word] * word_identify[0]
                result['sensitive_score'][str(timestamp)] = ts_word_score
                #attr_geo
                #print 'compute geo'
                timestamp_date = ts2datetime(timestamp)
                sensitive_geo_dict = json.loads(result_item['geo'])
                for sensitive in sensitive_geo_dict:
                    if timestamp_date not in result['geo_'+sensitive]:
                        result['geo_'+sensitive][timestamp_date] = {}
                        
                    geo_dict = sensitive_geo_dict[sensitive]
                    for geo in geo_dict:
                        try:
                            result['geo_'+sensitive][timestamp_date][geo] += geo_dict[geo]
                        except:
                            result['geo_'+sensitive][timestamp_date][geo] = geo_dict[geo]

                #attr_hashtag
                #print 'compute hashtag'
                if 'hashtag' in result_item:
                    sensitive_hashtag_dict = json.loads(result_item['hashtag'])
                else:
                    sensitive_hashtag_dict = {}
                    result['hashtag_0'][timestamp_date] = {}
                    result['hashtag_1'][timestamp_date] = {}
                for sensitive in sensitive_hashtag_dict:
                    for sensitive in sensitive_hashtag_dict:
                        if timestamp_date not in result['hashtag_'+sensitive]:
                            result['hashtag_'+sensitive][timestamp_date] = {}
                        hashtag_dict = sensitive_hashtag_dict[sensitive]
                        for hashtag in hashtag_dict:
                            try:
                                result['hashtag_'+sensitive][timestamp_date][hashtag] += hashtag_dict[hashtag]
                            except:
                                result['hashtag_'+sensitive][timestamp_date][hashtag] = hashtag_dict[hashtag]

        begin_ts += search_time_segment
Esempio n. 48
0
def identify_in(data):
    appoint_list = []
    now_list = []
    sensitive_list = set()
    influence_list = set()
    for item in data:
        date = item[0] # 2015-09-22
        date = str(date).replace('-','')
        uid = item[1]
        status = str(item[2])
        source = str(item[3])
        if source == '1':
            r.hset('identify_in_sensitive_'+str(date), uid, status) # identify in user_list and compute status
            sensitive_list.add(uid)
        elif source == '2':
            r.hset('identify_in_influence_'+str(date), uid, status)
            influence_list.add(uid)
        if status == '1': # now
            now_list.append([uid, source])
        if status == '2': # appoint
            appoint_list.append([uid, source])

    sensitive_results = r.hget('recommend_sensitive', date)
    if sensitive_results and sensitive_results != '0':
        sensitive_results = json.loads(sensitive_results)
        revise_set = set(sensitive_results) - sensitive_list
        if revise_set:
            r.hset('recommend_sensitive', date, json.dumps(list(revise_set)))
        else:
            r.hdel('recommend_sensitive', date)
    influence_results = r.hget('recommend_influence', date)
    if influence_results and influence_results != '0':
        influence_results = json.loads(influence_results)
        revise_set = set(influence_results) - influence_list
        if revise_set:
            r.hset('recommend_influence', date, json.dumps(list(revise_set)))
        else:
            r.hdel('recommend_influence', date)

    # about compute
    compute_now_list = r.hget('compute_now', date)
    compute_appoint_list = r.hget('compute_appoint', date)
    # compute now user list
    if compute_now_list:
        now_list.extend(json.loads(compute_now_list))
        r.hset('compute_now', date, json.dumps(now_list))
    else:
        r.hset('compute_now', date, json.dumps(now_list))
    # appointted compute user list
    if compute_appoint_list:
        appoint_list.extend(json.loads(compute_appoint_list))
        r.hset('compute_appoint', date, json.dumps(appoint_list))
    else:
        r.hset('compute_appoint', date, json.dumps(appoint_list)) # finish compute, revise 'identify_in_state' uid status
    return '1'