def identify_in(data): appoint_list = [] now_list = [] sensitive_list = set() influence_list = set() for item in data: date = item[0] # 2015-09-22 date = str(date).replace('-', '') uid = item[1] status = str(item[2]) source = str(item[3]) if source == '1': r.hset('identify_in_sensitive_' + str(date), uid, status) # identify in user_list and compute status sensitive_list.add(uid) elif source == '2': r.hset('identify_in_influence_' + str(date), uid, status) influence_list.add(uid) if status == '1': # now now_list.append([uid, source]) if status == '2': # appoint appoint_list.append([uid, source]) sensitive_results = r.hget('recommend_sensitive', date) if sensitive_results and sensitive_results != '0': sensitive_results = json.loads(sensitive_results) revise_set = set(sensitive_results) - sensitive_list if revise_set: r.hset('recommend_sensitive', date, json.dumps(list(revise_set))) else: r.hdel('recommend_sensitive', date) influence_results = r.hget('recommend_influence', date) if influence_results and influence_results != '0': influence_results = json.loads(influence_results) revise_set = set(influence_results) - influence_list if revise_set: r.hset('recommend_influence', date, json.dumps(list(revise_set))) else: r.hdel('recommend_influence', date) # about compute compute_now_list = r.hget('compute_now', date) compute_appoint_list = r.hget('compute_appoint', date) # compute now user list if compute_now_list: now_list.extend(json.loads(compute_now_list)) r.hset('compute_now', date, json.dumps(now_list)) else: r.hset('compute_now', date, json.dumps(now_list)) # appointted compute user list if compute_appoint_list: appoint_list.extend(json.loads(compute_appoint_list)) r.hset('compute_appoint', date, json.dumps(appoint_list)) else: r.hset('compute_appoint', date, json.dumps(appoint_list) ) # finish compute, revise 'identify_in_state' uid status return '1'
def identify_in(data): appoint_list = [] now_list = [] sensitive_list = set() influence_list = set() for item in data: date = item[0] # 2015-09-22 date = str(date).replace('-','') uid = item[1] status = str(item[2]) source = str(item[3]) if source == '1': r.hset('identify_in_sensitive_'+str(date), uid, status) # identify in user_list and compute status sensitive_list.add(uid) elif source == '2': r.hset('identify_in_influence_'+str(date), uid, status) influence_list.add(uid) if status == '1': # now now_list.append([uid, source]) if status == '2': # appoint appoint_list.append([uid, source]) sensitive_results = r.hget('recommend_sensitive', date) if sensitive_results and sensitive_results != '0': sensitive_results = json.loads(sensitive_results) revise_set = set(sensitive_results) - sensitive_list if revise_set: r.hset('recommend_sensitive', date, json.dumps(list(revise_set))) else: r.hdel('recommend_sensitive', date) influence_results = r.hget('recommend_influence', date) if influence_results and influence_results != '0': influence_results = json.loads(influence_results) revise_set = set(influence_results) - influence_list if revise_set: r.hset('recommend_influence', date, json.dumps(list(revise_set))) else: r.hdel('recommend_influence', date) # about compute compute_now_list = r.hget('compute_now', date) compute_appoint_list = r.hget('compute_appoint', date) # compute now user list if compute_now_list: now_list.extend(json.loads(compute_now_list)) r.hset('compute_now', date, json.dumps(now_list)) else: r.hset('compute_now', date, json.dumps(now_list)) # appointted compute user list if compute_appoint_list: appoint_list.extend(json.loads(compute_appoint_list)) r.hset('compute_appoint', date, json.dumps(appoint_list)) else: r.hset('compute_appoint', date, json.dumps(appoint_list)) # finish compute, revise 'identify_in_state' uid status return '1'
def recommend_in_top_influence(date): date = date.replace('-','') results = r.hget('recommend_influence', date) if not results: return [] else: uid_list = json.loads(results) sensitive = 0 return get_sensitive_user_detail(uid_list, date, sensitive)
def recommend_in_sensitive(date): date = date.replace('-','') results = r.hget('recommend_sensitive', date) if not results: return results # return '0' else: uid_list = json.loads(results) sensitive = 1 return get_sensitive_user_detail(uid_list, date, sensitive)
def recommend_in_top_influence(date): date = date.replace('-', '') results = r.hget('recommend_influence', date) if not results: return [] else: uid_list = json.loads(results) sensitive = 0 return get_sensitive_user_detail(uid_list, date, sensitive)
def recommend_in_sensitive(date): date = date.replace('-', '') results = r.hget('recommend_sensitive', date) if not results: return results # return '0' else: uid_list = json.loads(results) sensitive = 1 return get_sensitive_user_detail(uid_list, date, sensitive)
def sort_sensitive_words(words_list): sensitive_words_list = [] for item in words_list: temp = [] temp.extend(item) word = (item[0]).encode('utf-8', 'ignore') r_word = r.hget('sensitive_words', word) if r_word: temp.extend(json.loads(r_word)) else: temp.extend([1, 'politics']) sensitive_words_list.append(temp) return sensitive_words_list
def sort_sensitive_words(words_list): sensitive_words_list = [] for item in words_list: temp = [] temp.extend(item) word = (item[0]).encode('utf-8', 'ignore') r_word = r.hget('sensitive_words', word) if r_word: temp.extend(json.loads(r_word)) else: temp.extend([1,'politics']) sensitive_words_list.append(temp) return sensitive_words_list
def sort_sensitive_words(words_list): sensitive_words_list = [] for item in words_list: temp = [] temp.extend(item) word = (item[0]).encode("utf-8", "ignore") r_word = r.hget("sensitive_words", word) if r_word: temp.extend(json.loads(r_word)) else: temp.extend([1, "politics"]) sensitive_words_list.append(temp) return sensitive_words_list
def ajax_cancel_delete(): uid_list = request.args.get("uid_list", "") date = request.args.get("date", "") if not uid_list or not date: return "0" else: uid_list = str(uid_list).split(",") date = str(date).replace("-", "") delete_list = json.loads(r.hget("delete_user", date)) revise_list = set(delete_list) - set(uid_list) if revise_list: r.hset("delete_user", date, json.dumps(list(revise_list))) else: r.hdel("delete_user", date) return "1"
def ajax_delete_user(): date = request.args.get("date", "") # '2013-09-01' date = str(date).replace("-", "") uid_list = request.args.get("uid_list", "") # uid_list, 12345,123456, delete_list = str(uid_list).split(",") if date and delete_list: temp = r.hget("delete_user", date) if temp: exist_data = json.loads(temp) delete_list.extend(exist_data) r.hset("delete_user", date, json.dumps(delete_list)) return "1" else: return "0"
def ajax_cancel_delete(): uid_list = request.args.get('uid_list', '') date = request.args.get('date', '') if not uid_list or not date: return '0' else: uid_list = str(uid_list).split(',') date = str(date).replace('-', '') delete_list = json.loads(r.hget('delete_user', date)) revise_list = set(delete_list) - set(uid_list) if revise_list: r.hset('delete_user', date, json.dumps(list(revise_list))) else: r.hdel('delete_user', date) return '1'
def ajax_delete_user(): date = request.args.get('date', '') # '2013-09-01' date = str(date).replace('-', '') uid_list = request.args.get('uid_list', '') # uid_list, 12345,123456, delete_list = str(uid_list).split(',') if date and delete_list: temp = r.hget('delete_user', date) if temp: exist_data = json.loads(temp) delete_list.extend(exist_data) r.hset('delete_user', date, json.dumps(delete_list)) return '1' else: return '0'
def ajax_history_delete(): date = request.args.get("date", "") # '2013-09-01' date = str(date).replace("-", "") search_all = request.args.get("show_all", "") # return all uid_list = [] if not search_all: temp = r.hget("delete_user", date) if temp: results = get_user_info(json.loads(temp)) return json.dumps(results) else: all_temp = r.hgetall("delete_user") if all_temp: temp_list = all_temp.values() for item in temp_list: uid_list.extend(json.loads(item)) results = get_user_info(uid_list) return json.dumps(results) return "0"
def ajax_history_delete(): date = request.args.get('date', '') # '2013-09-01' date = str(date).replace('-', '') search_all = request.args.get('show_all', '') # return all uid_list = [] if not search_all: temp = r.hget('delete_user', date) if temp: results = get_user_info(json.loads(temp)) return json.dumps(results) else: all_temp = r.hgetall('delete_user') if all_temp: temp_list = all_temp.values() for item in temp_list: uid_list.extend(json.loads(item)) results = get_user_info(uid_list) return json.dumps(results) return '0'
def compute_mid_result(task_name, task_submit_date): result = {'count_0':{}, 'count_1':{}, 'sentiment_0_126':{}, 'sentiment_0_127':{}, 'sentiment_0_128':{},\ 'sentiment_0_129':{}, 'sentiment_0_130':{}, 'sensitive_score':{}, 'geo_0':{}, 'geo_1':{},\ 'hashtag_0':{}, 'hashtag_1':{}, 'sentiment_1_126':{}, 'sentiment_1_127':{}, \ 'sentiment_1_128':{}, 'sentiment_1_129':{}, 'sentiment_1_130':{}} #geo & hashtag: day #other: 15min search_time_segment = 3600 * 4 #start_ts = datetime2ts(task_submit_date) start_ts = date2ts(task_submit_date) now_ts = time.time() now_date = ts2datetime(now_ts) #test now_ts = datetime2ts('2013-09-08') date_ts = datetime2ts(now_date) segment = int((now_ts - date_ts) / 900) + 1 end_ts = date_ts + segment * 900 #every search time-range: 4 hour----bulk action to search begin_ts = start_ts while True: if begin_ts >= end_ts: break compute_ts = ts2date(begin_ts) #print 'compute ts:', compute_ts query_body = {'range':{'timestamp':{'from': begin_ts, 'to':begin_ts+search_time_segment}}} try: mid_result_list = es.search(index=monitor_index_name, doc_type=task_name, body={'query':query_body, 'size':100000, 'sort':[{'timestamp':{'order': 'asc'}}]})['hits']['hits'] except Exception, e: raise e if mid_result_list: for mid_result_item in mid_result_list: result_item = mid_result_item['_source'] timestamp = result_item['timestamp'] #attr_count #print 'compute_count' count_dict = json.loads(result_item['count']) for sensitive in count_dict: count_key = 'count_' + sensitive result[count_key][str(timestamp)] = count_dict[sensitive] #attr_sentiment #print 'compute_sentiment' sensitive_sentiment_dict = json.loads(result_item['sentiment']) for sensitive in sensitive_sentiment_dict: sentiment_dict = sensitive_sentiment_dict[sensitive] for sentiment in sentiment_dict: sentiment_key = 'sentiment_'+sensitive+'_'+sentiment result[sentiment_key][str(timestamp)] = sentiment_dict[sentiment] #attr_sensitive_score #print 'compute_sensitive_word' if 'sensitive_word' in result_item: sensitive_word_dict = json.loads(result_item['sensitive_word']) else: sensitive_word_dict = {} ts_word_score = 0 for word in sensitive_word_dict: #print 'word:', json.dumps(word.encode('utf-8')), word.encode('utf-8'), type(word.encode('utf-8')) search_word = word.encode('utf-8') #print 'search_word:', search_word, type(search_word) try: word_identify = json.loads(word_r.hget('sensitive_words', search_word)) except: word_identify = [2] ts_word_score += sensitive_word_dict[word] * word_identify[0] result['sensitive_score'][str(timestamp)] = ts_word_score #attr_geo #print 'compute geo' timestamp_date = ts2datetime(timestamp) sensitive_geo_dict = json.loads(result_item['geo']) for sensitive in sensitive_geo_dict: if timestamp_date not in result['geo_'+sensitive]: result['geo_'+sensitive][timestamp_date] = {} geo_dict = sensitive_geo_dict[sensitive] for geo in geo_dict: try: result['geo_'+sensitive][timestamp_date][geo] += geo_dict[geo] except: result['geo_'+sensitive][timestamp_date][geo] = geo_dict[geo] #attr_hashtag #print 'compute hashtag' if 'hashtag' in result_item: sensitive_hashtag_dict = json.loads(result_item['hashtag']) else: sensitive_hashtag_dict = {} result['hashtag_0'][timestamp_date] = {} result['hashtag_1'][timestamp_date] = {} for sensitive in sensitive_hashtag_dict: for sensitive in sensitive_hashtag_dict: if timestamp_date not in result['hashtag_'+sensitive]: result['hashtag_'+sensitive][timestamp_date] = {} hashtag_dict = sensitive_hashtag_dict[sensitive] for hashtag in hashtag_dict: try: result['hashtag_'+sensitive][timestamp_date][hashtag] += hashtag_dict[hashtag] except: result['hashtag_'+sensitive][timestamp_date][hashtag] = hashtag_dict[hashtag] begin_ts += search_time_segment