def add_task_record_time(task_name, submit_date): status = 0 #start_ts = datetime2ts(submit_date) start_ts = date2ts(submit_date) r_task.hset('monitor_task_time_record', task_name, start_ts) status = 1 return status
def get_network(task_exist): task_name = task_exist['task_name'] submit_date = task_exist['submit_date'] submit_ts = date2ts(submit_date) time_segment = 24*3600 now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) #test now_date_ts = datetime2ts('2013-09-07') iter_date_ts = now_date_ts iter_count = 1 date_list = [] top_list_dict = {} while True: if iter_count >= 8 or iter_date_ts < submit_ts: break iter_date = ts2datetime(iter_date_ts) date_list.append(iter_date) key = 'inner_' + str(iter_date) try: task_date_result = es.get(index=monitor_index_name, doc_type=task_name, id=key)['_source'] except: task_date_result = {} #print 'task_name, key, task_date_result:', task_name, key, task_date_result iter_field = ['top1', 'top2', 'top3', 'top4', 'top5'] for field in iter_field: user_count_item = json.loads(task_date_result[field]) uid = user_count_item[0] uname = uid2uname(uid) count = user_count_item[1] try: top_list_dict[field].append([uid, uname, count]) except: top_list_dict[field] = [[uid, uname, count]] iter_date_ts -= time_segment # get inner-retweet group from es---field: inner_graph ''' try: inner_graph = json.loads(task_date_result['inner_graph']) except: inner_graph = {} ''' abnormal_index = compute_inner_polarization(top_list_dict) return [date_list, top_list_dict, abnormal_index]
def compute_mid_result(task_name, task_submit_date): result = {'count_0':{}, 'count_1':{}, 'sentiment_0_126':{}, 'sentiment_0_127':{}, 'sentiment_0_128':{},\ 'sentiment_0_129':{}, 'sentiment_0_130':{}, 'sensitive_score':{}, 'geo_0':{}, 'geo_1':{},\ 'hashtag_0':{}, 'hashtag_1':{}, 'sentiment_1_126':{}, 'sentiment_1_127':{}, \ 'sentiment_1_128':{}, 'sentiment_1_129':{}, 'sentiment_1_130':{}} #geo & hashtag: day #other: 15min search_time_segment = 3600 * 4 #start_ts = datetime2ts(task_submit_date) start_ts = date2ts(task_submit_date) now_ts = time.time() now_date = ts2datetime(now_ts) #test now_ts = datetime2ts('2013-09-08') date_ts = datetime2ts(now_date) segment = int((now_ts - date_ts) / 900) + 1 end_ts = date_ts + segment * 900 #every search time-range: 4 hour----bulk action to search begin_ts = start_ts while True: if begin_ts >= end_ts: break compute_ts = ts2date(begin_ts) #print 'compute ts:', compute_ts query_body = {'range':{'timestamp':{'from': begin_ts, 'to':begin_ts+search_time_segment}}} try: mid_result_list = es.search(index=monitor_index_name, doc_type=task_name, body={'query':query_body, 'size':100000, 'sort':[{'timestamp':{'order': 'asc'}}]})['hits']['hits'] except Exception, e: raise e if mid_result_list: for mid_result_item in mid_result_list: result_item = mid_result_item['_source'] timestamp = result_item['timestamp'] #attr_count #print 'compute_count' count_dict = json.loads(result_item['count']) for sensitive in count_dict: count_key = 'count_' + sensitive result[count_key][str(timestamp)] = count_dict[sensitive] #attr_sentiment #print 'compute_sentiment' sensitive_sentiment_dict = json.loads(result_item['sentiment']) for sensitive in sensitive_sentiment_dict: sentiment_dict = sensitive_sentiment_dict[sensitive] for sentiment in sentiment_dict: sentiment_key = 'sentiment_'+sensitive+'_'+sentiment result[sentiment_key][str(timestamp)] = sentiment_dict[sentiment] #attr_sensitive_score #print 'compute_sensitive_word' if 'sensitive_word' in result_item: sensitive_word_dict = json.loads(result_item['sensitive_word']) else: sensitive_word_dict = {} ts_word_score = 0 for word in sensitive_word_dict: #print 'word:', json.dumps(word.encode('utf-8')), word.encode('utf-8'), type(word.encode('utf-8')) search_word = word.encode('utf-8') #print 'search_word:', search_word, type(search_word) try: word_identify = json.loads(word_r.hget('sensitive_words', search_word)) except: word_identify = [2] ts_word_score += sensitive_word_dict[word] * word_identify[0] result['sensitive_score'][str(timestamp)] = ts_word_score #attr_geo #print 'compute geo' timestamp_date = ts2datetime(timestamp) sensitive_geo_dict = json.loads(result_item['geo']) for sensitive in sensitive_geo_dict: if timestamp_date not in result['geo_'+sensitive]: result['geo_'+sensitive][timestamp_date] = {} geo_dict = sensitive_geo_dict[sensitive] for geo in geo_dict: try: result['geo_'+sensitive][timestamp_date][geo] += geo_dict[geo] except: result['geo_'+sensitive][timestamp_date][geo] = geo_dict[geo] #attr_hashtag #print 'compute hashtag' if 'hashtag' in result_item: sensitive_hashtag_dict = json.loads(result_item['hashtag']) else: sensitive_hashtag_dict = {} result['hashtag_0'][timestamp_date] = {} result['hashtag_1'][timestamp_date] = {} for sensitive in sensitive_hashtag_dict: for sensitive in sensitive_hashtag_dict: if timestamp_date not in result['hashtag_'+sensitive]: result['hashtag_'+sensitive][timestamp_date] = {} hashtag_dict = sensitive_hashtag_dict[sensitive] for hashtag in hashtag_dict: try: result['hashtag_'+sensitive][timestamp_date][hashtag] += hashtag_dict[hashtag] except: result['hashtag_'+sensitive][timestamp_date][hashtag] = hashtag_dict[hashtag] begin_ts += search_time_segment
def get_user_comment_retweet(task_exist): result = {} # result = {'uid1_comment':{ts:value}, 'uid1_retweet':{ts_value}, 'uid2_comment'} submit_date = task_exist['submit_date'] start_ts = date2ts(submit_date) task_status = task_exist['status'] if task_status == 1: now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) segment = int((now_ts - now_date_ts) / 900) + 1 end_ts = now_date_ts + segment * 900 #test end_ts = datetime2ts('2013-09-02') else: end_ts = date2ts(task_exist['end_date']) task_user = task_exist['uid_list'] select_top_dict = {} # {uid:[ave_retweet_count, ave_peak_retweet_count]} #select union top5 ave_retweet_count and top5 ave_peak_retweet_count for user in task_user: result[user+'_comment'] = {} result[user+'_retweet'] = {} comment_retweet_dict = monitor_r.hgetall(user) for item in comment_retweet_dict: item_type_ts = item.split('_') item_type = item_type_ts[0] item_ts = item_type_ts[1] result[user+'_'+item_type][item_ts] = int(comment_retweet_dict[item]) # use to detect peaks comment_dict = result[user+'_comment'] complement_comment_dict = complement_ts(comment_dict, start_ts, end_ts) sort_comment_dict = sorted(complement_comment_dict.items(), key=lambda x:int(x[0])) detect_peaks_comment_input = [item[1] for item in sort_comment_dict] #print 'detect_peaks_comment_input:', detect_peaks_comment_input result[user+'_comment_peak'] = detect_peaks(detect_peaks_comment_input) retweet_dict = result[user+'_retweet'] complement_retweet_dict = complement_ts(retweet_dict, start_ts, end_ts) sort_retweet_dict = sorted(complement_retweet_dict.items(), key=lambda x:int(x[0])) detect_peaks_retweet_input = [item[1] for item in sort_retweet_dict] result[user+'_retweet_peak'] = detect_peaks(detect_peaks_retweet_input) ave_retweet_count = sum(detect_peaks_retweet_input) / len(detect_peaks_retweet_input) peak_count_list = [detect_peaks_retweet_input[peak_location] for peak_location in result[user+'_retweet_peak']] ave_peak_count = sum(peak_count_list) / len(peak_count_list) select_top_dict[user] = [ave_retweet_count, ave_peak_count] #select union top5 sort_select_top_count_dict = sorted(select_top_dict.items(), key=lambda x:x[1][0], reverse=True) top5_count_user_list = sort_select_top_count_dict[:5] top5_count_user = [item[0] for item in top5_count_user_list] sort_select_top_peak_dict = sorted(select_top_dict.items(), key=lambda x:x[1][1], reverse=True) top5_peak_user_list = sort_select_top_peak_dict[:5] top5_peak_user = [item[0] for item in top5_peak_user_list] union_user = list(set(top5_count_user) & set(top5_peak_user)) new_result = {} for user in union_user: new_result[user+'_retweet'] = result[user+'_retweet'] new_result[user+'_retweet_peak'] = result[user+'_retweet_peak'] new_result[user+'_comment'] = result[user+'_comment'] new_result[user+'_comment_peak'] = result[user+'_comment_peak'] new_result['profile'] = get_top_user_profile(union_user) #compute abnormal index new_result['abnormal_index'] = compute_comment_retweet_abnormal(new_result, union_user) return new_result