def show_in_history(date): print date results = [] sensitive_uid_list = [] influence_uid_list = [] sen_iden_in_name = "identify_in_sensitive_" + str(date) inf_iden_in_name = "identify_in_influence_" + str(date) man_iden_in_name = "identify_in_manual_" + str(date) sen_iden_in_results = r.hgetall(sen_iden_in_name) inf_iden_in_results = r.hgetall(inf_iden_in_name) man_iden_in_results = r.hgetall(man_iden_in_name) sensitive_uid_list = sen_iden_in_results.keys() influence_uid_list = inf_iden_in_results.keys() manual_uid_list = man_iden_in_results.keys() #compute_results = r.hgetall('compute') results = [] work_date = ts2datetime(datetime2ts(date) - DAY) if sensitive_uid_list: sensitive_results = get_sensitive_user_detail(sensitive_uid_list, work_date, 1) else: sensitive_results = [] for item in sensitive_results: uid = item[0] status = sen_iden_in_results[uid] item.append(status) results.append(item) if influence_uid_list: influence_results = get_sensitive_user_detail(influence_uid_list, work_date, 0) else: influence_results = [] for item in influence_results: uid = item[0] status = inf_iden_in_results[uid] item.append(status) results.append(item) if manual_uid_list: manual_results = get_sensitive_user_detail(manual_uid_list, work_date, 0) else: manual_results = [] for item in manual_results: uid = item[0] status = man_iden_in_results[uid] item.append(status) results.append(item) sorted_results = sorted(results, key=lambda x: x[5], reverse=True) return sorted_results
def search_follower(uid, sensitive): results = dict() stat_results = dict() for db_num in R_DICT: r = R_DICT[db_num] if sensitive: br_uid_results = r.hgetall('sensitive_be_retweet_' + str(uid)) else: br_uid_results = r.hgetall('be_retweet_' + str(uid)) if br_uid_results: for br_uid in br_uid_results: if br_uid != uid: try: stat_results[br_uid] += br_uid_results[br_uid] except: stat_results[br_uid] = br_uid_results[br_uid] if not stat_results: return [None, 0] try: sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20] except: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids': uid_list})['docs'] es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids': uid_list})['docs'] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item['_id'] try: source = item['_source'] uname = source['nick_name'] except: uname = u'unknown' portrait_item = es_portrait_results[i] try: source = portrait_item['_source'] in_status = 1 except: in_status = 0 result_list.append([uid, [uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def show_in_history(date): print date results = [] sensitive_uid_list = [] influence_uid_list = [] sen_iden_in_name = "identify_in_sensitive_" + str(date) inf_iden_in_name = "identify_in_influence_" + str(date) man_iden_in_name = "identify_in_manual_" + str(date) sen_iden_in_results = r.hgetall(sen_iden_in_name) inf_iden_in_results = r.hgetall(inf_iden_in_name) man_iden_in_results = r.hgetall(man_iden_in_name) sensitive_uid_list = sen_iden_in_results.keys() influence_uid_list = inf_iden_in_results.keys() manual_uid_list = man_iden_in_results.keys() #compute_results = r.hgetall('compute') results = [] work_date = ts2datetime(datetime2ts(date)-DAY) if sensitive_uid_list: sensitive_results = get_sensitive_user_detail(sensitive_uid_list, work_date, 1) else: sensitive_results = [] for item in sensitive_results: uid = item[0] status = sen_iden_in_results[uid] item.append(status) results.append(item) if influence_uid_list: influence_results = get_sensitive_user_detail(influence_uid_list, work_date, 0) else: influence_results = [] for item in influence_results: uid = item[0] status = inf_iden_in_results[uid] item.append(status) results.append(item) if manual_uid_list: manual_results = get_sensitive_user_detail(manual_uid_list, work_date, 0) else: manual_results = [] for item in manual_results: uid = item[0] status = man_iden_in_results[uid] item.append(status) results.append(item) sorted_results = sorted(results, key=lambda x:x[5], reverse=True) return sorted_results
def search_retweet(uid, sensitive): stat_results = dict() results = dict() for db_num in R_DICT: r = R_DICT[db_num] if not sensitive: ruid_results = r.hgetall('retweet_' + str(uid)) else: ruid_results = r.hgetall('sensitive_retweet_' + str(uid)) # because of sensitive weibo if ruid_results: for ruid in ruid_results: if ruid != uid: if stat_results.has_key(ruid): stat_results[ruid] += ruid_results[ruid] else: stat_results[ruid] = ruid_results[ruid] if stat_results: sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20] else: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids': uid_list})['docs'] es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids': uid_list})['docs'] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item['_id'] if item['found']: uname = item['_source']['nick_name'] else: uname = u'unknown' portrait_item = es_portrait_results[i] if portrait_item['found']: in_status = 1 else: in_status = 0 result_list.append([uid, [uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def recommend_in_sensitive(date): sensitive_name = "recomment_" + str(date) + "_sensitive" compute_name = "compute" re_sen_set = r.hkeys(sensitive_name) # 敏感人物推荐 iden_in_set = r.hkeys(compute_name) # 已经入库用户 if not re_sen_set: return [] # 那一天不存在数据 uid_list = list(set(re_sen_set) - set(iden_in_set)) sensitive = 1 work_date = ts2datetime(datetime2ts(date) - DAY) if uid_list: results = get_sensitive_user_detail(uid_list, work_date, sensitive) else: results = [] return results
def ajax_cancel_delete(): uid_list = request.args.get('uid_list', '') date = request.args.get('date', '') if not uid_list or not date: return '0' else: uid_list = str(uid_list).split(',') date = str(date).replace('-', '') delete_list = json.loads(r.hget('delete_user', date)) revise_list = set(delete_list) - set(uid_list) if revise_list: r.hset('delete_user', date, json.dumps(list(revise_list))) else: r.hdel('delete_user', date) return '1'
def ajax_delete_user(): date = request.args.get('date', '') # '2013-09-01' date = str(date).replace('-', '') uid_list = request.args.get('uid_list', '') # uid_list, 12345,123456, delete_list = str(uid_list).split(',') if date and delete_list: temp = r.hget('delete_user', date) if temp: exist_data = json.loads(temp) delete_list.extend(exist_data) r.hset('delete_user', date, json.dumps(delete_list)) return '1' else: return '0'
def ajax_delete_user(): date = request.args.get("date", "") # '2013-09-01' date = str(date).replace("-", "") uid_list = request.args.get("uid_list", "") # uid_list, 12345,123456, delete_list = str(uid_list).split(",") if date and delete_list: temp = r.hget("delete_user", date) if temp: exist_data = json.loads(temp) delete_list.extend(exist_data) r.hset("delete_user", date, json.dumps(delete_list)) return "1" else: return "0"
def ajax_cancel_delete(): uid_list = request.args.get("uid_list", "") date = request.args.get("date", "") if not uid_list or not date: return "0" else: uid_list = str(uid_list).split(",") date = str(date).replace("-", "") delete_list = json.loads(r.hget("delete_user", date)) revise_list = set(delete_list) - set(uid_list) if revise_list: r.hset("delete_user", date, json.dumps(list(revise_list))) else: r.hdel("delete_user", date) return "1"
def recommend_in_sensitive(date): sensitive_name = "recomment_" + str(date) + "_sensitive" compute_name = "compute" re_sen_set = r.hkeys(sensitive_name) # 敏感人物推荐 iden_in_set = r.hkeys(compute_name) # 已经入库用户 if not re_sen_set: return [] # 那一天不存在数据 uid_list = list(set(re_sen_set) - set(iden_in_set)) sensitive = 1 work_date = ts2datetime(datetime2ts(date)-DAY) if uid_list: results = get_sensitive_user_detail(uid_list, work_date, sensitive) else: results = [] return results
def search_sensitive_words( level, category): # level: 0, 1, 2, 3; category: '', or other category results = dict() word_list = [] level = int(level) words_dict = r.hgetall('sensitive_words') if words_dict: if int(level) == 0 and not category: word_list = [] for k, v in words_dict.items(): word_state = json.loads(v) word_list.append([k, word_state[0], word_state[1]]) elif level and category: word_list = [] for k, v in words_dict.items(): word_state = json.loads(v) if int(level) == int( word_state[0]) and category == word_state[1]: word_list.append([k, word_state[0], word_state[1]]) elif not level and category: for k, v in words_dict.items(): word_state = json.loads(v) if category == word_state[1]: word_list.append([k, word_state[0], word_state[1]]) else: for k, v in words_dict.items(): word_state = json.loads(v) if int(level) == int(word_state[0]): word_list.append([k, word_state[0], word_state[1]]) return word_list
def recommend_new_words(date_list): results = [] for date in date_list: date = date.replace('-', '') words_dict = r.hgetall('recommend_sensitive_words_'+date) if words_dict: for key, value in words_dict.items(): detail = [] detail.append(key) value = json.loads(value) uid_list = value[0] uname = [] try: search_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids': uid_list})['docs'] for item in search_results: if item['found']: uname.append(item['_source']['nick_name']) else: uname.append('unknown') except: uname = uid_list detail.extend([uname,value[1]]) results.append(detail) sorted_results = sorted(results, key=lambda x:x[2], reverse=True) return sorted_results
def search_sensitive_words(level, category): # level: 0, 1, 2, 3; category: '', or other category results = dict() word_list = [] words_dict = r.hgetall('sensitive_words') if words_dict: if int(level) == 0 and not category: word_list = [] for k,v in words_dict.items(): word_state = json.loads(v) word_list.append([k, word_state[0], word_state[1]]) elif level and category: word_list = [] for k,v in words_dict.items(): word_state = json.loads(v) if int(level) == int(word_state[0]) and category == word_state[1]: word_list.append([k, word_state[0], word_state[1]]) elif not level and category: for k,v in words_dict.items(): word_state = json.loads(v) if catetory == word_state[1]: word_list.append([k, word_state[0], word_state[1]]) else: for k,v in words_dict.items(): word_state = json.loads(v) if int(level) == int(word_state[0]): word_list.append([k, word_state[0], word_state[1]]) return word_list
def recommend_new_words(date_list): results = [] for date in date_list: date = date.replace('-', '') words_dict = r.hgetall('recommend_sensitive_words_' + date) if words_dict: for key, value in words_dict.items(): detail = [] detail.append(key) value = json.loads(value) uid_list = value[0] uname = [] try: search_results = es_user_profile.mget( index='weibo_user', doc_type='user', body={'ids': uid_list})['docs'] for item in search_results: if item['found']: uname.append(item['_source']['nick_name']) else: uname.append('unknown') except: uname = uid_list detail.extend([uname, value[1]]) results.append(detail) sorted_results = sorted(results, key=lambda x: x[2], reverse=True) return sorted_results
def recommend_in_top_influence(date): influence_name = "recomment_" + date + "_influence" identify_in_name = "compute" re_inf_set = r.hkeys(influence_name) iden_in_set = r.hkeys(identify_in_name) # 已经入库用户 if not re_inf_set: return [] else: uid_list = list(set(re_inf_set) - set(iden_in_set)) sensitive = 0 work_date = ts2datetime(datetime2ts(date) - DAY) if uid_list: results = get_sensitive_user_detail(uid_list, work_date, sensitive) else: results = [] return results
def recommend_in_top_influence(date): influence_name = "recomment_" + date + "_influence" identify_in_name = "compute" re_inf_set = r.hkeys(influence_name) iden_in_set = r.hkeys(identify_in_name) # 已经入库用户 if not re_inf_set: return [] else: uid_list = list(set(re_inf_set) - set(iden_in_set)) sensitive = 0 work_date = ts2datetime(datetime2ts(date)-DAY) if uid_list: results = get_sensitive_user_detail(uid_list, work_date, sensitive) else: results = [] return results
def identify_in(data): appoint_list = [] now_list = [] sensitive_list = set() influence_list = set() for item in data: date = item[0] # 2015-09-22 uid = item[1] status = str(item[2]) source = str(item[3]) if int(source) == 1: r.hset('identify_in_sensitive_'+str(date), uid, status) # identify in user_list and compute status sensitive_list.add(uid) elif int(source) == 2: r.hset('identify_in_influence_'+str(date), uid, status) influence_list.add(uid) else: r.hset("identify_in_manual_"+str(date), uid, status) r.hset('compute', uid, json.dumps([date, status])) """ sensitive_results = r.hget('recommend_sensitive', date) if sensitives_results: sensitive_results = json.loads(sensitive_results) revise_set = set(sensitive_results) - sensitive_list if revise_set: r.hset('recommend_sensitive', date, json.dumps(list(revise_set))) else: r.hdel('recommend_sensitive', date) influence_results = r.hget('recommend_influence', date) if influence_results and influence_results != []: influence_results = json.loads(influence_results) revise_set = set(influence_results) - influence_list if revise_set: r.hset('recommend_influence', date, json.dumps(list(revise_set))) else: r.hdel('recommend_influence', date) # about compute compute_now_list = r.hget('compute_now', date) compute_appoint_list = r.hget('compute_appoint', date) # compute now user list if compute_now_list: now_list.extend(json.loads(compute_now_list)) r.hset('compute_now', date, json.dumps(now_list)) else: r.hset('compute_now', date, json.dumps(now_list)) # appointted compute user list if compute_appoint_list: appoint_list.extend(json.loads(compute_appoint_list)) r.hset('compute_appoint', date, json.dumps(appoint_list)) else: r.hset('compute_appoint', date, json.dumps(appoint_list)) # finish compute, revise 'identify_in_state' uid status """ return '1'
def search_follower(uid, sensitive): results = dict() stat_results = dict() for db_num in R_DICT: r = R_DICT[db_num] if sensitive: br_uid_results = r.hgetall("sensitive_be_retweet_" + str(uid)) else: br_uid_results = r.hgetall("be_retweet_" + str(uid)) if br_uid_results: for br_uid in br_uid_results: if br_uid != uid: try: stat_results[br_uid] += br_uid_results[br_uid] except: stat_results[br_uid] = br_uid_results[br_uid] if not stat_results: return [None, 0] try: sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20] except: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list})["docs"] es_portrait_results = es.mget(index="sensitive_user_portrait", doc_type="user", body={"ids": uid_list})["docs"] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item["_id"] try: source = item["_source"] uname = source["nick_name"] except: uname = u"unknown" portrait_item = es_portrait_results[i] try: source = portrait_item["_source"] in_status = 1 except: in_status = 0 result_list.append([uid, [uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def identify_in(data): appoint_list = [] now_list = [] sensitive_list = set() influence_list = set() for item in data: date = item[0] # 2015-09-22 uid = item[1] status = str(item[2]) source = str(item[3]) if int(source) == 1: r.hset('identify_in_sensitive_' + str(date), uid, status) # identify in user_list and compute status sensitive_list.add(uid) elif int(source) == 2: r.hset('identify_in_influence_' + str(date), uid, status) influence_list.add(uid) else: r.hset("identify_in_manual_" + str(date), uid, status) r.hset('compute', uid, json.dumps([date, status])) """ sensitive_results = r.hget('recommend_sensitive', date) if sensitives_results: sensitive_results = json.loads(sensitive_results) revise_set = set(sensitive_results) - sensitive_list if revise_set: r.hset('recommend_sensitive', date, json.dumps(list(revise_set))) else: r.hdel('recommend_sensitive', date) influence_results = r.hget('recommend_influence', date) if influence_results and influence_results != []: influence_results = json.loads(influence_results) revise_set = set(influence_results) - influence_list if revise_set: r.hset('recommend_influence', date, json.dumps(list(revise_set))) else: r.hdel('recommend_influence', date) # about compute compute_now_list = r.hget('compute_now', date) compute_appoint_list = r.hget('compute_appoint', date) # compute now user list if compute_now_list: now_list.extend(json.loads(compute_now_list)) r.hset('compute_now', date, json.dumps(now_list)) else: r.hset('compute_now', date, json.dumps(now_list)) # appointted compute user list if compute_appoint_list: appoint_list.extend(json.loads(compute_appoint_list)) r.hset('compute_appoint', date, json.dumps(appoint_list)) else: r.hset('compute_appoint', date, json.dumps(appoint_list)) # finish compute, revise 'identify_in_state' uid status """ return '1'
def search_follower(uid, sensitive): results = dict() stat_results = dict() if 1: r = r_cluster if sensitive: br_uid_results = r.hgetall('sensitive_be_retweet_'+str(uid)) else: br_uid_results = r.hgetall('be_retweet_'+str(uid)) if br_uid_results: for br_uid in br_uid_results: if br_uid != uid: try: stat_results[br_uid] += br_uid_results[br_uid] except: stat_results[br_uid] = br_uid_results[br_uid] if not stat_results: return [None, 0] try: sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20] except: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs'] es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids':uid_list})['docs'] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item['_id'] try: source = item['_source'] uname = source['nick_name'] except: uname = u'unknown' portrait_item = es_portrait_results[i] try: source = portrait_item['_source'] in_status = 1 except: in_status = 0 result_list.append([uid,[uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def recommend_in_top_influence(date): date = date.replace('-','') results = r.hget('recommend_influence', date) if not results: return [] else: uid_list = json.loads(results) sensitive = 0 return get_sensitive_user_detail(uid_list, date, sensitive)
def recommend_in_sensitive(date): date = date.replace('-', '') results = r.hget('recommend_sensitive', date) if not results: return results # return '0' else: uid_list = json.loads(results) sensitive = 1 return get_sensitive_user_detail(uid_list, date, sensitive)
def recommend_in_top_influence(date): date = date.replace('-', '') results = r.hget('recommend_influence', date) if not results: return [] else: uid_list = json.loads(results) sensitive = 0 return get_sensitive_user_detail(uid_list, date, sensitive)
def recommend_in_sensitive(date): date = date.replace('-','') results = r.hget('recommend_sensitive', date) if not results: return results # return '0' else: uid_list = json.loads(results) sensitive = 1 return get_sensitive_user_detail(uid_list, date, sensitive)
def show_in_history(date, sensitive): results = [] date = str(date).replace('-','') if sensitive: # sensitive user recommentation history sensitive_results = r.hgetall('identify_in_sensitive_'+str(date)) if sensitive_results: uid_list = sensitive_results.keys() results = get_sensitive_user_detail(uid_list, date, 1) for item in results: item.append(sensitive_results[item[0]]) else: influence_results = r.hgetall('identify_in_influence_'+str(date)) if influence_results: uid_list = influence_results.keys() results = get_sensitive_user_detail(uid_list, date, 0) for item in results: item.append(influence_results[item[0]]) return results
def show_in_history(date, sensitive): results = [] date = str(date).replace('-', '') if sensitive: # sensitive user recommentation history sensitive_results = r.hgetall('identify_in_sensitive_' + str(date)) if sensitive_results: uid_list = sensitive_results.keys() results = get_sensitive_user_detail(uid_list, date, 1) for item in results: item.append(sensitive_results[item[0]]) else: influence_results = r.hgetall('identify_in_influence_' + str(date)) if influence_results: uid_list = influence_results.keys() results = get_sensitive_user_detail(uid_list, date, 0) for item in results: item.append(influence_results[item[0]]) return results
def ajax_history_delete(): date = request.args.get('date', '') # '2013-09-01' date = str(date).replace('-', '') search_all = request.args.get('show_all', '') # return all uid_list = [] if not search_all: temp = r.hget('delete_user', date) if temp: results = get_user_info(json.loads(temp)) return json.dumps(results) else: all_temp = r.hgetall('delete_user') if all_temp: temp_list = all_temp.values() for item in temp_list: uid_list.extend(json.loads(item)) results = get_user_info(uid_list) return json.dumps(results) return '0'
def ajax_history_delete(): date = request.args.get("date", "") # '2013-09-01' date = str(date).replace("-", "") search_all = request.args.get("show_all", "") # return all uid_list = [] if not search_all: temp = r.hget("delete_user", date) if temp: results = get_user_info(json.loads(temp)) return json.dumps(results) else: all_temp = r.hgetall("delete_user") if all_temp: temp_list = all_temp.values() for item in temp_list: uid_list.extend(json.loads(item)) results = get_user_info(uid_list) return json.dumps(results) return "0"
def search_retweet(uid, sensitive): stat_results = dict() results = dict() for db_num in R_DICT: r = R_DICT[db_num] if not sensitive: ruid_results = r.hgetall("retweet_" + str(uid)) else: ruid_results = r.hgetall("sensitive_retweet_" + str(uid)) # because of sensitive weibo if ruid_results: for ruid in ruid_results: if ruid != uid: if stat_results.has_key(ruid): stat_results[ruid] += ruid_results[ruid] else: stat_results[ruid] = ruid_results[ruid] if stat_results: sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20] else: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list})["docs"] es_portrait_results = es.mget(index="sensitive_user_portrait", doc_type="user", body={"ids": uid_list})["docs"] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item["_id"] if item["found"]: uname = item["_source"]["nick_name"] else: uname = u"unknown" portrait_item = es_portrait_results[i] if portrait_item["found"]: in_status = 1 else: in_status = 0 result_list.append([uid, [uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def search_retweet(uid, sensitive): stat_results = dict() results = dict() if 1: r = r_cluster if not sensitive: ruid_results = r.hgetall('retweet_'+str(uid)) else: ruid_results = r.hgetall('sensitive_retweet_'+str(uid)) # because of sensitive weibo if ruid_results: for ruid in ruid_results: if ruid != uid: if stat_results.has_key(ruid): stat_results[ruid] += ruid_results[ruid] else: stat_results[ruid] = ruid_results[ruid] if stat_results: sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20] else: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs'] es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids':uid_list})['docs'] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item['_id'] if item['found']: uname = item['_source']['nick_name'] else: uname = u'unknown' portrait_item = es_portrait_results[i] if portrait_item['found']: in_status = 1 else: in_status = 0 result_list.append([uid,[uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def lastest_identify_in(): results = dict() now_ts = time.time() now_ts = datetime2ts('2013-09-08') for i in range(1,8): ts = now_ts - i * 3600 *24 date = ts2datetime(ts).replace('-','') words_dict = r.hgetall('history_in_'+date) for item in words_dict: results[item] = json.loads(words_dict[item]) return results
def lastest_identify_in(): results = dict() now_ts = time.time() now_ts = datetime2ts('2013-09-08') for i in range(1, 8): ts = now_ts - i * 3600 * 24 date = ts2datetime(ts).replace('-', '') words_dict = r.hgetall('history_in_' + date) for item in words_dict: results[item] = json.loads(words_dict[item]) return results
def sort_sensitive_words(words_list): sensitive_words_list = [] for item in words_list: temp = [] temp.extend(item) word = (item[0]).encode("utf-8", "ignore") r_word = r.hget("sensitive_words", word) if r_word: temp.extend(json.loads(r_word)) else: temp.extend([1, "politics"]) sensitive_words_list.append(temp) return sensitive_words_list
def sort_sensitive_words(words_list): sensitive_words_list = [] for item in words_list: temp = [] temp.extend(item) word = (item[0]).encode('utf-8', 'ignore') r_word = r.hget('sensitive_words', word) if r_word: temp.extend(json.loads(r_word)) else: temp.extend([1,'politics']) sensitive_words_list.append(temp) return sensitive_words_list
def sort_sensitive_words(words_list): sensitive_words_list = [] for item in words_list: temp = [] temp.extend(item) word = (item[0]).encode('utf-8', 'ignore') r_word = r.hget('sensitive_words', word) if r_word: temp.extend(json.loads(r_word)) else: temp.extend([1, 'politics']) sensitive_words_list.append(temp) return sensitive_words_list
def identify_in(date, words_list): # identify_in date and words_list(include level and category, [word, level, category]) # date is date when new words were recommended new_list = [] print words_list for item in words_list: r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]])) new_list.append(item[0]) r.hset('history_in_'+date, item[0], json.dumps([item[1], item[2]])) if new_list: for item in new_list: r.hdel('recommend_sensitive_words_'+date, item) return '1'
def identify_in(date, words_list): # identify_in date and words_list(include level and category, [word, level, category]) # date is date when new words were recommended ts = time.time() ts = datetime2ts('2013-09-07') time_list = [] for i in range(7): now_ts = int(ts) - i * 24 * 3600 now_date = ts2datetime(now_ts).replace('-', '') time_list.append(now_date) for item in words_list: r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]])) r.hset('history_in_' + date, item[0], json.dumps([item[1], item[2]])) for date in time_list: r.hdel('recommend_sensitive_words_' + date, item[0]) return '1'
def identify_in(date, words_list): # identify_in date and words_list(include level and category, [word, level, category]) # date is date when new words were recommended ts = time.time() ts = datetime2ts('2013-09-07') time_list = [] for i in range(7): now_ts = int(ts) - i*24*3600 now_date = ts2datetime(now_ts).replace('-', '') time_list.append(now_date) for item in words_list: r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]])) r.hset('history_in_'+date, item[0], json.dumps([item[1], item[2]])) for date in time_list: r.hdel('recommend_sensitive_words_'+date, item[0]) return '1'
def get_attr(date): results = dict() number = es.count(index="sensitive_user_portrait", doc_type="user")['count'] results['total_number'] = number query_body = {"query": {"filtered": {"filter": {"term": {"type": 1}}}}} sensitive_number = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)['count'] results['sensitive_number'] = sensitive_number results['influence_number'] = number - sensitive_number recommend_in_sensitive = 0 sensitive_dict = r.hgetall('recommend_sensitive') for k, v in sensitive_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_sensitive += len(sensitive_list) recommend_in_influence = 0 influence_dict = r.hgetall('recommend_influence') for k, v in influence_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_influence += len(sensitive_list) results['recommend_in'] = recommend_in_influence + recommend_in_sensitive results['monitor_number'] = [4, 83] # test results['new_sensitive_words'] = 5 # test query_body = query_body_module('sensitive_words_string') sw_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_words = [] for item in sw_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_words.append(temp) results['sensitive_words'] = sensitive_words query_body = query_body_module('sensitive_geo_string') sg_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('sensitive_hashtag_string') sh_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_hashtag = [] for item in sh_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_hashtag.append(temp) results['sensitive_hashtag'] = sensitive_hashtag query_body = query_body_module('sensitive_geo_string') sg_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('psycho_status_string') sp_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] psycho_status = [] for item in sp_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) psycho_status.append(temp) results['psycho_status'] = psycho_status ''' query_body = query_body_module('political_tendency') st_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] political_tendency = [] for item in st_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) political_tendency.append(temp) results['political_tendency'] = political_tendency ''' results['political_tendency'] = [['left', 123], ['middle', 768], ['right', 1095]] ''' query_body = query_body_module('domain_string') sd_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] domain = [] for item in sd_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) domain.append(temp) results['domain'] = domain ''' # tendency distribution # domain and topic domain_list = [''] #search_important('domain', ) domain_results = get_top_user() topic_results = get_topic_user() results['domain_rank'] = domain_results results['topic_rank'] = topic_results # rank important_list = search_in_portrait('importance') results['importance'] = important_list results['sensitive'] = search_in_portrait('sensitive') results['influence'] = search_in_portrait('influence') results['activeness'] = search_in_portrait('activeness') query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_comment_total_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] comment_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_comment_total_number']) comment_weibo_detail.append(temp) results['comment_total'] = comment_weibo_detail query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_retweeted_total_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] retweeted_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_retweeted_total_number']) retweeted_weibo_detail.append(temp) results['retweeted_total'] = retweeted_weibo_detail query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_number']) weibo_detail.append(temp) results['top_weibo_number'] = weibo_detail return results
def self_delete(word): print word r.hdel('sensitive_words', word) r.sadd('black_sensitive_words', word) return '1'
def self_add_in(date, word, level, category): r.hset('sensitive_words', word, json.dumps([level, category])) r.hset('history_in_'+date, word, json.dumps([level, category])) return '1'
def self_add_in(date, word, level, category): r.hset('sensitive_words', word, json.dumps([level, category])) r.hset('history_in_' + date, word, json.dumps([level, category])) return '1'
def get_attr(date): results = dict() number = es.count(index="sensitive_user_portrait", doc_type="user")['count'] results['total_number'] = number query_body={ "query":{ "filtered":{ "filter":{ "term":{ "type": 1 } } } } } sensitive_number = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)['count'] results['sensitive_number'] = sensitive_number results['influence_number'] = number - sensitive_number recommend_in_sensitive = 0 sensitive_dict = r.hgetall('recommend_sensitive') for k,v in sensitive_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_sensitive += len(sensitive_list) recommend_in_influence = 0 influence_dict = r.hgetall('recommend_influence') for k,v in influence_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_influence += len(sensitive_list) results['recommend_in'] = recommend_in_influence + recommend_in_sensitive results['monitor_number'] = [4, 83] # test results['new_sensitive_words'] = 5 # test query_body = query_body_module('sensitive_words_string') sw_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_words = [] for item in sw_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_words.append(temp) results['sensitive_words'] = sensitive_words query_body = query_body_module('sensitive_geo_string') sg_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('sensitive_hashtag_string') sh_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_hashtag = [] for item in sh_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_hashtag.append(temp) results['sensitive_hashtag'] = sensitive_hashtag query_body = query_body_module('sensitive_geo_string') sg_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('psycho_status_string') sp_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] psycho_status = [] for item in sp_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) psycho_status.append(temp) results['psycho_status'] = psycho_status ''' query_body = query_body_module('political_tendency') st_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] political_tendency = [] for item in st_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) political_tendency.append(temp) results['political_tendency'] = political_tendency ''' results['political_tendency'] = [['left', 123], ['middle', 768], ['right', 1095]] ''' query_body = query_body_module('domain_string') sd_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] domain = [] for item in sd_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) domain.append(temp) results['domain'] = domain ''' # tendency distribution # domain and topic domain_list = [''] #search_important('domain', ) domain_results = get_top_user() topic_results = get_topic_user() results['domain_rank'] = domain_results results['topic_rank'] = topic_results # rank important_list = search_in_portrait('importance') results['importance'] = important_list results['sensitive'] = search_in_portrait('sensitive') results['influence'] = search_in_portrait('influence') results['activeness'] = search_in_portrait('activeness') query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_comment_total_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] comment_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_comment_total_number']) comment_weibo_detail.append(temp) results['comment_total'] = comment_weibo_detail query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_retweeted_total_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] retweeted_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_retweeted_total_number']) retweeted_weibo_detail.append(temp) results['retweeted_total'] = retweeted_weibo_detail query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_number']) weibo_detail.append(temp) results['top_weibo_number'] = weibo_detail return results
def self_delete(word): r.hdel('sensitive_words', word) r.sadd('black_sensitive_words', word) return '1'
def ajax_show_all(): results = r.get('overview') if results: return results else: return None
def identify_in(data): appoint_list = [] now_list = [] sensitive_list = set() influence_list = set() for item in data: date = item[0] # 2015-09-22 date = str(date).replace('-', '') uid = item[1] status = str(item[2]) source = str(item[3]) if source == '1': r.hset('identify_in_sensitive_' + str(date), uid, status) # identify in user_list and compute status sensitive_list.add(uid) elif source == '2': r.hset('identify_in_influence_' + str(date), uid, status) influence_list.add(uid) if status == '1': # now now_list.append([uid, source]) if status == '2': # appoint appoint_list.append([uid, source]) sensitive_results = r.hget('recommend_sensitive', date) if sensitive_results and sensitive_results != '0': sensitive_results = json.loads(sensitive_results) revise_set = set(sensitive_results) - sensitive_list if revise_set: r.hset('recommend_sensitive', date, json.dumps(list(revise_set))) else: r.hdel('recommend_sensitive', date) influence_results = r.hget('recommend_influence', date) if influence_results and influence_results != '0': influence_results = json.loads(influence_results) revise_set = set(influence_results) - influence_list if revise_set: r.hset('recommend_influence', date, json.dumps(list(revise_set))) else: r.hdel('recommend_influence', date) # about compute compute_now_list = r.hget('compute_now', date) compute_appoint_list = r.hget('compute_appoint', date) # compute now user list if compute_now_list: now_list.extend(json.loads(compute_now_list)) r.hset('compute_now', date, json.dumps(now_list)) else: r.hset('compute_now', date, json.dumps(now_list)) # appointted compute user list if compute_appoint_list: appoint_list.extend(json.loads(compute_appoint_list)) r.hset('compute_appoint', date, json.dumps(appoint_list)) else: r.hset('compute_appoint', date, json.dumps(appoint_list) ) # finish compute, revise 'identify_in_state' uid status return '1'
def compute_mid_result(task_name, task_submit_date): result = {'count_0':{}, 'count_1':{}, 'sentiment_0_126':{}, 'sentiment_0_127':{}, 'sentiment_0_128':{},\ 'sentiment_0_129':{}, 'sentiment_0_130':{}, 'sensitive_score':{}, 'geo_0':{}, 'geo_1':{},\ 'hashtag_0':{}, 'hashtag_1':{}, 'sentiment_1_126':{}, 'sentiment_1_127':{}, \ 'sentiment_1_128':{}, 'sentiment_1_129':{}, 'sentiment_1_130':{}} #geo & hashtag: day #other: 15min search_time_segment = 3600 * 4 #start_ts = datetime2ts(task_submit_date) start_ts = date2ts(task_submit_date) now_ts = time.time() now_date = ts2datetime(now_ts) #test now_ts = datetime2ts('2013-09-08') date_ts = datetime2ts(now_date) segment = int((now_ts - date_ts) / 900) + 1 end_ts = date_ts + segment * 900 #every search time-range: 4 hour----bulk action to search begin_ts = start_ts while True: if begin_ts >= end_ts: break compute_ts = ts2date(begin_ts) #print 'compute ts:', compute_ts query_body = {'range':{'timestamp':{'from': begin_ts, 'to':begin_ts+search_time_segment}}} try: mid_result_list = es.search(index=monitor_index_name, doc_type=task_name, body={'query':query_body, 'size':100000, 'sort':[{'timestamp':{'order': 'asc'}}]})['hits']['hits'] except Exception, e: raise e if mid_result_list: for mid_result_item in mid_result_list: result_item = mid_result_item['_source'] timestamp = result_item['timestamp'] #attr_count #print 'compute_count' count_dict = json.loads(result_item['count']) for sensitive in count_dict: count_key = 'count_' + sensitive result[count_key][str(timestamp)] = count_dict[sensitive] #attr_sentiment #print 'compute_sentiment' sensitive_sentiment_dict = json.loads(result_item['sentiment']) for sensitive in sensitive_sentiment_dict: sentiment_dict = sensitive_sentiment_dict[sensitive] for sentiment in sentiment_dict: sentiment_key = 'sentiment_'+sensitive+'_'+sentiment result[sentiment_key][str(timestamp)] = sentiment_dict[sentiment] #attr_sensitive_score #print 'compute_sensitive_word' if 'sensitive_word' in result_item: sensitive_word_dict = json.loads(result_item['sensitive_word']) else: sensitive_word_dict = {} ts_word_score = 0 for word in sensitive_word_dict: #print 'word:', json.dumps(word.encode('utf-8')), word.encode('utf-8'), type(word.encode('utf-8')) search_word = word.encode('utf-8') #print 'search_word:', search_word, type(search_word) try: word_identify = json.loads(word_r.hget('sensitive_words', search_word)) except: word_identify = [2] ts_word_score += sensitive_word_dict[word] * word_identify[0] result['sensitive_score'][str(timestamp)] = ts_word_score #attr_geo #print 'compute geo' timestamp_date = ts2datetime(timestamp) sensitive_geo_dict = json.loads(result_item['geo']) for sensitive in sensitive_geo_dict: if timestamp_date not in result['geo_'+sensitive]: result['geo_'+sensitive][timestamp_date] = {} geo_dict = sensitive_geo_dict[sensitive] for geo in geo_dict: try: result['geo_'+sensitive][timestamp_date][geo] += geo_dict[geo] except: result['geo_'+sensitive][timestamp_date][geo] = geo_dict[geo] #attr_hashtag #print 'compute hashtag' if 'hashtag' in result_item: sensitive_hashtag_dict = json.loads(result_item['hashtag']) else: sensitive_hashtag_dict = {} result['hashtag_0'][timestamp_date] = {} result['hashtag_1'][timestamp_date] = {} for sensitive in sensitive_hashtag_dict: for sensitive in sensitive_hashtag_dict: if timestamp_date not in result['hashtag_'+sensitive]: result['hashtag_'+sensitive][timestamp_date] = {} hashtag_dict = sensitive_hashtag_dict[sensitive] for hashtag in hashtag_dict: try: result['hashtag_'+sensitive][timestamp_date][hashtag] += hashtag_dict[hashtag] except: result['hashtag_'+sensitive][timestamp_date][hashtag] = hashtag_dict[hashtag] begin_ts += search_time_segment
def identify_in(data): appoint_list = [] now_list = [] sensitive_list = set() influence_list = set() for item in data: date = item[0] # 2015-09-22 date = str(date).replace('-','') uid = item[1] status = str(item[2]) source = str(item[3]) if source == '1': r.hset('identify_in_sensitive_'+str(date), uid, status) # identify in user_list and compute status sensitive_list.add(uid) elif source == '2': r.hset('identify_in_influence_'+str(date), uid, status) influence_list.add(uid) if status == '1': # now now_list.append([uid, source]) if status == '2': # appoint appoint_list.append([uid, source]) sensitive_results = r.hget('recommend_sensitive', date) if sensitive_results and sensitive_results != '0': sensitive_results = json.loads(sensitive_results) revise_set = set(sensitive_results) - sensitive_list if revise_set: r.hset('recommend_sensitive', date, json.dumps(list(revise_set))) else: r.hdel('recommend_sensitive', date) influence_results = r.hget('recommend_influence', date) if influence_results and influence_results != '0': influence_results = json.loads(influence_results) revise_set = set(influence_results) - influence_list if revise_set: r.hset('recommend_influence', date, json.dumps(list(revise_set))) else: r.hdel('recommend_influence', date) # about compute compute_now_list = r.hget('compute_now', date) compute_appoint_list = r.hget('compute_appoint', date) # compute now user list if compute_now_list: now_list.extend(json.loads(compute_now_list)) r.hset('compute_now', date, json.dumps(now_list)) else: r.hset('compute_now', date, json.dumps(now_list)) # appointted compute user list if compute_appoint_list: appoint_list.extend(json.loads(compute_appoint_list)) r.hset('compute_appoint', date, json.dumps(appoint_list)) else: r.hset('compute_appoint', date, json.dumps(appoint_list)) # finish compute, revise 'identify_in_state' uid status return '1'