def show_in_history(date): print date results = [] sensitive_uid_list = [] influence_uid_list = [] sen_iden_in_name = "identify_in_sensitive_" + str(date) inf_iden_in_name = "identify_in_influence_" + str(date) man_iden_in_name = "identify_in_manual_" + str(date) sen_iden_in_results = r.hgetall(sen_iden_in_name) inf_iden_in_results = r.hgetall(inf_iden_in_name) man_iden_in_results = r.hgetall(man_iden_in_name) sensitive_uid_list = sen_iden_in_results.keys() influence_uid_list = inf_iden_in_results.keys() manual_uid_list = man_iden_in_results.keys() #compute_results = r.hgetall('compute') results = [] work_date = ts2datetime(datetime2ts(date)-DAY) if sensitive_uid_list: sensitive_results = get_sensitive_user_detail(sensitive_uid_list, work_date, 1) else: sensitive_results = [] for item in sensitive_results: uid = item[0] status = sen_iden_in_results[uid] item.append(status) results.append(item) if influence_uid_list: influence_results = get_sensitive_user_detail(influence_uid_list, work_date, 0) else: influence_results = [] for item in influence_results: uid = item[0] status = inf_iden_in_results[uid] item.append(status) results.append(item) if manual_uid_list: manual_results = get_sensitive_user_detail(manual_uid_list, work_date, 0) else: manual_results = [] for item in manual_results: uid = item[0] status = man_iden_in_results[uid] item.append(status) results.append(item) sorted_results = sorted(results, key=lambda x:x[5], reverse=True) return sorted_results
def recommend_new_words(date_list): results = [] for date in date_list: date = date.replace('-', '') words_dict = r.hgetall('recommend_sensitive_words_'+date) if words_dict: for key, value in words_dict.items(): detail = [] detail.append(key) value = json.loads(value) uid_list = value[0] uname = [] try: search_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids': uid_list})['docs'] for item in search_results: if item['found']: uname.append(item['_source']['nick_name']) else: uname.append('unknown') except: uname = uid_list detail.extend([uname,value[1]]) results.append(detail) sorted_results = sorted(results, key=lambda x:x[2], reverse=True) return sorted_results
def search_sensitive_words(level, category): # level: 0, 1, 2, 3; category: '', or other category results = dict() word_list = [] words_dict = r.hgetall('sensitive_words') if words_dict: if int(level) == 0 and not category: word_list = [] for k,v in words_dict.items(): word_state = json.loads(v) word_list.append([k, word_state[0], word_state[1]]) elif level and category: word_list = [] for k,v in words_dict.items(): word_state = json.loads(v) if int(level) == int(word_state[0]) and category == word_state[1]: word_list.append([k, word_state[0], word_state[1]]) elif not level and category: for k,v in words_dict.items(): word_state = json.loads(v) if catetory == word_state[1]: word_list.append([k, word_state[0], word_state[1]]) else: for k,v in words_dict.items(): word_state = json.loads(v) if int(level) == int(word_state[0]): word_list.append([k, word_state[0], word_state[1]]) return word_list
def search_follower(uid, sensitive): results = dict() stat_results = dict() if 1: r = r_cluster if sensitive: br_uid_results = r.hgetall('sensitive_be_retweet_'+str(uid)) else: br_uid_results = r.hgetall('be_retweet_'+str(uid)) if br_uid_results: for br_uid in br_uid_results: if br_uid != uid: try: stat_results[br_uid] += br_uid_results[br_uid] except: stat_results[br_uid] = br_uid_results[br_uid] if not stat_results: return [None, 0] try: sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20] except: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs'] es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids':uid_list})['docs'] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item['_id'] try: source = item['_source'] uname = source['nick_name'] except: uname = u'unknown' portrait_item = es_portrait_results[i] try: source = portrait_item['_source'] in_status = 1 except: in_status = 0 result_list.append([uid,[uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def search_follower(uid, sensitive): results = dict() stat_results = dict() for db_num in R_DICT: r = R_DICT[db_num] if sensitive: br_uid_results = r.hgetall("sensitive_be_retweet_" + str(uid)) else: br_uid_results = r.hgetall("be_retweet_" + str(uid)) if br_uid_results: for br_uid in br_uid_results: if br_uid != uid: try: stat_results[br_uid] += br_uid_results[br_uid] except: stat_results[br_uid] = br_uid_results[br_uid] if not stat_results: return [None, 0] try: sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20] except: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list})["docs"] es_portrait_results = es.mget(index="sensitive_user_portrait", doc_type="user", body={"ids": uid_list})["docs"] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item["_id"] try: source = item["_source"] uname = source["nick_name"] except: uname = u"unknown" portrait_item = es_portrait_results[i] try: source = portrait_item["_source"] in_status = 1 except: in_status = 0 result_list.append([uid, [uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def show_in_history(date, sensitive): results = [] date = str(date).replace('-','') if sensitive: # sensitive user recommentation history sensitive_results = r.hgetall('identify_in_sensitive_'+str(date)) if sensitive_results: uid_list = sensitive_results.keys() results = get_sensitive_user_detail(uid_list, date, 1) for item in results: item.append(sensitive_results[item[0]]) else: influence_results = r.hgetall('identify_in_influence_'+str(date)) if influence_results: uid_list = influence_results.keys() results = get_sensitive_user_detail(uid_list, date, 0) for item in results: item.append(influence_results[item[0]]) return results
def search_retweet(uid, sensitive): stat_results = dict() results = dict() if 1: r = r_cluster if not sensitive: ruid_results = r.hgetall('retweet_'+str(uid)) else: ruid_results = r.hgetall('sensitive_retweet_'+str(uid)) # because of sensitive weibo if ruid_results: for ruid in ruid_results: if ruid != uid: if stat_results.has_key(ruid): stat_results[ruid] += ruid_results[ruid] else: stat_results[ruid] = ruid_results[ruid] if stat_results: sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20] else: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs'] es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids':uid_list})['docs'] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item['_id'] if item['found']: uname = item['_source']['nick_name'] else: uname = u'unknown' portrait_item = es_portrait_results[i] if portrait_item['found']: in_status = 1 else: in_status = 0 result_list.append([uid,[uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def search_retweet(uid, sensitive): stat_results = dict() results = dict() for db_num in R_DICT: r = R_DICT[db_num] if not sensitive: ruid_results = r.hgetall("retweet_" + str(uid)) else: ruid_results = r.hgetall("sensitive_retweet_" + str(uid)) # because of sensitive weibo if ruid_results: for ruid in ruid_results: if ruid != uid: if stat_results.has_key(ruid): stat_results[ruid] += ruid_results[ruid] else: stat_results[ruid] = ruid_results[ruid] if stat_results: sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20] else: return [None, 0] uid_list = [item[0] for item in sort_stat_results] es_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list})["docs"] es_portrait_results = es.mget(index="sensitive_user_portrait", doc_type="user", body={"ids": uid_list})["docs"] result_list = [] for i in range(len(es_profile_results)): item = es_profile_results[i] uid = item["_id"] if item["found"]: uname = item["_source"]["nick_name"] else: uname = u"unknown" portrait_item = es_portrait_results[i] if portrait_item["found"]: in_status = 1 else: in_status = 0 result_list.append([uid, [uname, stat_results[uid], in_status]]) return [result_list[:20], len(stat_results)]
def lastest_identify_in(): results = dict() now_ts = time.time() now_ts = datetime2ts('2013-09-08') for i in range(1,8): ts = now_ts - i * 3600 *24 date = ts2datetime(ts).replace('-','') words_dict = r.hgetall('history_in_'+date) for item in words_dict: results[item] = json.loads(words_dict[item]) return results
def ajax_history_delete(): date = request.args.get("date", "") # '2013-09-01' date = str(date).replace("-", "") search_all = request.args.get("show_all", "") # return all uid_list = [] if not search_all: temp = r.hget("delete_user", date) if temp: results = get_user_info(json.loads(temp)) return json.dumps(results) else: all_temp = r.hgetall("delete_user") if all_temp: temp_list = all_temp.values() for item in temp_list: uid_list.extend(json.loads(item)) results = get_user_info(uid_list) return json.dumps(results) return "0"
def ajax_history_delete(): date = request.args.get('date', '') # '2013-09-01' date = str(date).replace('-', '') search_all = request.args.get('show_all', '') # return all uid_list = [] if not search_all: temp = r.hget('delete_user', date) if temp: results = get_user_info(json.loads(temp)) return json.dumps(results) else: all_temp = r.hgetall('delete_user') if all_temp: temp_list = all_temp.values() for item in temp_list: uid_list.extend(json.loads(item)) results = get_user_info(uid_list) return json.dumps(results) return '0'
def get_attr(date): results = dict() number = es.count(index="sensitive_user_portrait", doc_type="user")['count'] results['total_number'] = number query_body={ "query":{ "filtered":{ "filter":{ "term":{ "type": 1 } } } } } sensitive_number = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)['count'] results['sensitive_number'] = sensitive_number results['influence_number'] = number - sensitive_number recommend_in_sensitive = 0 sensitive_dict = r.hgetall('recommend_sensitive') for k,v in sensitive_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_sensitive += len(sensitive_list) recommend_in_influence = 0 influence_dict = r.hgetall('recommend_influence') for k,v in influence_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_influence += len(sensitive_list) results['recommend_in'] = recommend_in_influence + recommend_in_sensitive results['monitor_number'] = [4, 83] # test results['new_sensitive_words'] = 5 # test query_body = query_body_module('sensitive_words_string') sw_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_words = [] for item in sw_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_words.append(temp) results['sensitive_words'] = sensitive_words query_body = query_body_module('sensitive_geo_string') sg_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('sensitive_hashtag_string') sh_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_hashtag = [] for item in sh_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_hashtag.append(temp) results['sensitive_hashtag'] = sensitive_hashtag query_body = query_body_module('sensitive_geo_string') sg_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('psycho_status_string') sp_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] psycho_status = [] for item in sp_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) psycho_status.append(temp) results['psycho_status'] = psycho_status ''' query_body = query_body_module('political_tendency') st_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] political_tendency = [] for item in st_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) political_tendency.append(temp) results['political_tendency'] = political_tendency ''' results['political_tendency'] = [['left', 123], ['middle', 768], ['right', 1095]] ''' query_body = query_body_module('domain_string') sd_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] domain = [] for item in sd_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) domain.append(temp) results['domain'] = domain ''' # tendency distribution # domain and topic domain_list = [''] #search_important('domain', ) domain_results = get_top_user() topic_results = get_topic_user() results['domain_rank'] = domain_results results['topic_rank'] = topic_results # rank important_list = search_in_portrait('importance') results['importance'] = important_list results['sensitive'] = search_in_portrait('sensitive') results['influence'] = search_in_portrait('influence') results['activeness'] = search_in_portrait('activeness') query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_comment_total_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] comment_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_comment_total_number']) comment_weibo_detail.append(temp) results['comment_total'] = comment_weibo_detail query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_retweeted_total_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] retweeted_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_retweeted_total_number']) retweeted_weibo_detail.append(temp) results['retweeted_total'] = retweeted_weibo_detail query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_number']) weibo_detail.append(temp) results['top_weibo_number'] = weibo_detail return results
def get_attr(date): results = dict() number = es.count(index="sensitive_user_portrait", doc_type="user")['count'] results['total_number'] = number query_body = {"query": {"filtered": {"filter": {"term": {"type": 1}}}}} sensitive_number = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)['count'] results['sensitive_number'] = sensitive_number results['influence_number'] = number - sensitive_number recommend_in_sensitive = 0 sensitive_dict = r.hgetall('recommend_sensitive') for k, v in sensitive_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_sensitive += len(sensitive_list) recommend_in_influence = 0 influence_dict = r.hgetall('recommend_influence') for k, v in influence_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_influence += len(sensitive_list) results['recommend_in'] = recommend_in_influence + recommend_in_sensitive results['monitor_number'] = [4, 83] # test results['new_sensitive_words'] = 5 # test query_body = query_body_module('sensitive_words_string') sw_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_words = [] for item in sw_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_words.append(temp) results['sensitive_words'] = sensitive_words query_body = query_body_module('sensitive_geo_string') sg_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('sensitive_hashtag_string') sh_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_hashtag = [] for item in sh_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_hashtag.append(temp) results['sensitive_hashtag'] = sensitive_hashtag query_body = query_body_module('sensitive_geo_string') sg_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('psycho_status_string') sp_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] psycho_status = [] for item in sp_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) psycho_status.append(temp) results['psycho_status'] = psycho_status ''' query_body = query_body_module('political_tendency') st_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] political_tendency = [] for item in st_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) political_tendency.append(temp) results['political_tendency'] = political_tendency ''' results['political_tendency'] = [['left', 123], ['middle', 768], ['right', 1095]] ''' query_body = query_body_module('domain_string') sd_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] domain = [] for item in sd_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) domain.append(temp) results['domain'] = domain ''' # tendency distribution # domain and topic domain_list = [''] #search_important('domain', ) domain_results = get_top_user() topic_results = get_topic_user() results['domain_rank'] = domain_results results['topic_rank'] = topic_results # rank important_list = search_in_portrait('importance') results['importance'] = important_list results['sensitive'] = search_in_portrait('sensitive') results['influence'] = search_in_portrait('influence') results['activeness'] = search_in_portrait('activeness') query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_comment_total_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] comment_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_comment_total_number']) comment_weibo_detail.append(temp) results['comment_total'] = comment_weibo_detail query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_retweeted_total_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] retweeted_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_retweeted_total_number']) retweeted_weibo_detail.append(temp) results['retweeted_total'] = retweeted_weibo_detail query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_number']) weibo_detail.append(temp) results['top_weibo_number'] = weibo_detail return results