def submit_sensing(input_dict): status = True #step1: identify the task name is valid task_name = input_dict['task_information']['task_name'] task_id = input_dict['task_information']['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step2: save to compute es submit_date = int(time.time()) input_dict['task_information']['submit_date'] = submit_date input_dict['task_information']['count'] = len(input_dict['task_information']['uid_list']) input_dict['task_information']['state'] = input_dict['task_information']['state'] input_dict['task_information']['status'] = 0 input_dict['task_information']['detect_type'] = 'sensing' input_dict['task_information']['task_type'] = input_dict['task_information']['task_type'] es_status = save_compute2es(input_dict) #step3: save to compute redis add_dict2redis = input_dict['task_information'] redis_status = save_compute2redis(add_dict2redis) #identify the operation status if es_status == True and redis_status ==True: status = True else: status = False return status
def save_detect_event_task(input_dict): status = True #step1:identify the task name is valid----is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] task_id = task_information['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step2:save to es es_status = save_detect2es(input_dict) #step3:save to redis redis_status = save_detect2redis(input_dict) #identify the operation status if es_status == True and redis_status == True: status = True else: status = False return status
def submit_sensing(input_dict): status = True #step1: identify the task name is valid task_name = input_dict['task_information']['task_name'] task_id = input_dict['task_information']['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step2: save to compute es submit_date = int(time.time()) input_dict['task_information']['submit_date'] = submit_date input_dict['task_information']['count'] = len( input_dict['task_information']['uid_list']) input_dict['task_information']['state'] = input_dict['task_information'][ 'state'] input_dict['task_information']['status'] = 0 input_dict['task_information']['detect_type'] = 'sensing' input_dict['task_information']['task_type'] = input_dict[ 'task_information']['task_type'] es_status = save_compute2es(input_dict) #step3: save to compute redis add_dict2redis = input_dict['task_information'] redis_status = save_compute2redis(add_dict2redis) #identify the operation status if es_status == True and redis_status == True: status = True else: status = False return status
def save_detect_attribute_task(input_dict): status = True #step1: identify the detect task name id valid---is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] task_id = task_information['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #print 'input_dict:', input_dict #step2: save to es es_status = save_detect2es(input_dict) #step3: save to redis redis_status = save_detect2redis(input_dict) #identify the operation status if es_status==True and redis_status==True: status = True else: status = False return status
def submit_task(input_data): status = 0 # mark it can not submit task_name = input_data['task_name'] submit_user = input_data['submit_user'] task_id = submit_user + task_name try: result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: status = 1 if status != 0 and 'uid_file' not in input_data: input_data['status'] = 0 # mark the task not compute count = len(input_data['uid_list']) input_data['count'] = count input_data['task_type'] = 'analysis' input_data['submit_user'] = '******' input_data['detect_type'] = '' input_data['detect_process'] = '' add_es_dict = {'task_information': input_data, 'query_condition': ''} es_group_result.index(index=group_index_name, doc_type=group_index_type, id=task_id, body=input_data) r.lpush(group_analysis_queue_name, json.dumps(input_data)) return status
def get_activity_weibo(task_name, submit_user, start_ts, time_segment=FOUR_HOUR): results = [] #step1: get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\ id=task_id, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body = {'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step3: search time_segment weibo end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({'terms': {'uid': uid_list}}) query.append({'range': {'timestamp': {'gte': start_ts, 'lt': end_ts}}}) try: flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \ body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits'] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item['_source'] weibo['timestamp'] = ts2date(source['timestamp']) weibo['ip'] = source['ip'] weibo['text'] = source['text'] if source['geo']: weibo['geo'] = '\t'.join(source['geo']) else: weibo['geo'] = '' results.append(weibo) return results
def show_detect_result(task_id): user_result = [] #step1:identify the task name id exist try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exist' #step2:get uid list uid_list = json.loads(task_exist_result['uid_list']) #step3:get user evaluation information---uid/uname/activeness/importance/influence iter_count = 0 uid_count = len(uid_list) while iter_count < uid_count: iter_user_list = uid_list[iter_count:iter_count + DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':iter_user_list}, _source=True)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] evaluate_max = get_evaluate_max() activeness = math.log( source['activeness'] / evaluate_max['activeness'] * 9 + 1, 10) * 100 importance = math.log( source['importance'] / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = math.log( source['influence'] / evaluate_max['influence'] * 9 + 1, 10) * 100 else: uname = u'未知' activeness = u'未知' importance = u'未知' influence = u'未知' user_result.append([uid, uname, activeness, importance, influence]) iter_count += DETECT_ITER_COUNT sort_user_result = sorted(user_result, key=lambda x: x[4], reverse=True) return sort_user_result
def save_detect_multi_task(input_dict, extend_mark): results = {} task_information_dict = input_dict['task_information'] input_uid_list = task_information_dict['uid_list'] #step1: identify user is in user_portrait and not in user_portrait in_user_list, out_user_list = identify_user_out(input_uid_list) input_dict['task_information']['uid_list'] = in_user_list print 'step1' #step2: identify task name is valid task_name = input_dict['task_information']['task_name'] task_id = input_dict['task_information']['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' print 'step2' #step3: identify whether or not to extend----extend mark if extend_mark == '1': print 'step3 save' es_status = save_detect2es(input_dict) redis_status = save_detect2redis(input_dict) # detect redis queue elif extend_mark == '0': uid_list = input_dict['task_information']['uid_list'] input_dict['task_information']['uid_list'] = uid_list input_dict['task_information']['status'] = 0 print 'uid_list:', len(uid_list), uid_list, type(uid_list) input_dict['task_information']['count'] = len(uid_list) print 'step3 save' es_status = save_compute2es(input_dict) add_redis_dict = input_dict['task_information'] redis_status = save_compute2redis( add_redis_dict) # compute redis queue #identify the operation status if es_status == True and redis_status == True: status = True else: status = False return status, out_user_list
def detect2analysis(input_data): results = {} status = True task_name = input_data['task_name'] submit_user = input_data['submit_user'] task_id = submit_user + task_name uid_list = input_data['uid_list'] #step1: identify the task is exist try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exsit' #step2: update task uid list task_exist_result['uid_list'] = uid_list #step3: update task_type in es task_exist_result['status'] = 0 # mark the compute status task_exist_result['count'] = len(uid_list) task_exist_result['task_type'] = 'analysis' #get task information dict task_information_dict = {'task_id': task_id, 'task_name':task_name, 'uid_list':uid_list, 'status':0, 'count':len(uid_list),\ 'task_type':'analysis', 'submit_user':task_exist_result['submit_user'], 'submit_date':task_exist_result['submit_date'], \ 'detect_type':task_exist_result['detect_type'], 'detect_process':task_exist_result['detect_process'], \ 'state': task_exist_result['state']} add_es_dict = { 'task_information': task_information_dict, 'query_condition': task_exist_result['query_condition'] } es_status = save_compute2es(add_es_dict) #step4: add task to analysis queue redis_status = save_compute2redis(task_exist_result) #identify the operation status if es_status == True and redis_status == True: status = True else: status = False return status
def show_detect_result(task_id): user_result = [] #step1:identify the task name id exist try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exist' #step2:get uid list uid_list = json.loads(task_exist_result['uid_list']) #step3:get user evaluation information---uid/uname/activeness/importance/influence iter_count = 0 uid_count = len(uid_list) while iter_count < uid_count: iter_user_list = uid_list[iter_count: iter_count+DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':iter_user_list}, _source=True)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found']==True: source = item['_source'] uname = source['uname'] evaluate_max = get_evaluate_max() activeness = math.log(source['activeness']/evaluate_max['activeness'] * 9 + 1 ,10)*100 importance = math.log(source['importance']/evaluate_max['importance'] * 9 + 1 ,10)*100 influence = math.log(source['influence']/evaluate_max['influence'] * 9 + 1 ,10)*100 else: uname = u'未知' activeness = u'未知' importance = u'未知' influence = u'未知' user_result.append([uid, uname, activeness, importance, influence]) iter_count += DETECT_ITER_COUNT sort_user_result = sorted(user_result, key=lambda x:x[4], reverse=True) return sort_user_result
def save_detect_multi_task(input_dict, extend_mark): results = {} task_information_dict = input_dict['task_information'] input_uid_list = task_information_dict['uid_list'] #step1: identify user is in user_portrait and not in user_portrait in_user_list, out_user_list = identify_user_out(input_uid_list) input_dict['task_information']['uid_list'] = in_user_list print 'step1' #step2: identify task name is valid task_name = input_dict['task_information']['task_name'] task_id = input_dict['task_information']['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' print 'step2' #step3: identify whether or not to extend----extend mark if extend_mark=='1': print 'step3 save' es_status = save_detect2es(input_dict) redis_status = save_detect2redis(input_dict) # detect redis queue elif extend_mark=='0': uid_list = input_dict['task_information']['uid_list'] input_dict['task_information']['uid_list'] = uid_list input_dict['task_information']['status'] = 0 print 'uid_list:', len(uid_list), uid_list, type(uid_list) input_dict['task_information']['count'] = len(uid_list) print 'step3 save' es_status = save_compute2es(input_dict) add_redis_dict = input_dict['task_information'] redis_status = save_compute2redis(add_redis_dict) # compute redis queue #identify the operation status if es_status==True and redis_status==True: status = True else: status = False return status, out_user_list
def submit_task(input_data): status = 0 # mark it can not submit task_name = input_data["task_name"] submit_user = input_data["submit_user"] task_id = submit_user + task_name try: result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)["_source"] except: status = 1 if status != 0 and "uid_file" not in input_data: input_data["status"] = 0 # mark the task not compute count = len(input_data["uid_list"]) input_data["count"] = count input_data["task_type"] = "analysis" input_data["submit_user"] = "******" input_data["detect_type"] = "" input_data["detect_process"] = "" add_es_dict = {"task_information": input_data, "query_condition": ""} es_group_result.index(index=group_index_name, doc_type=group_index_type, id=task_id, body=input_data) r.lpush(group_analysis_queue_name, json.dumps(input_data)) return status
def detect2analysis(input_data): results = {} status = True task_name = input_data['task_name'] submit_user = input_data['submit_user'] task_id = submit_user + task_name uid_list = input_data['uid_list'] #step1: identify the task is exist try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exsit' #step2: update task uid list task_exist_result['uid_list'] = uid_list #step3: update task_type in es task_exist_result['status'] = 0 # mark the compute status task_exist_result['count'] = len(uid_list) task_exist_result['task_type'] = 'analysis' #get task information dict task_information_dict = {'task_id': task_id, 'task_name':task_name, 'uid_list':uid_list, 'status':0, 'count':len(uid_list),\ 'task_type':'analysis', 'submit_user':task_exist_result['submit_user'], 'submit_date':task_exist_result['submit_date'], \ 'detect_type':task_exist_result['detect_type'], 'detect_process':task_exist_result['detect_process'], \ 'state': task_exist_result['state']} add_es_dict = {'task_information':task_information_dict, 'query_condition':task_exist_result['query_condition']} es_status = save_compute2es(add_es_dict) #step4: add task to analysis queue redis_status = save_compute2redis(task_exist_result) #identify the operation status if es_status==True and redis_status==True: status = True else: status = False return status
def get_group_member_name(task_name, submit_user): results = {} task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return results uid_list = group_result['uid_list'] try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'ids':uid_list})['docs'] except: return results for item in user_portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] else: uname = 'unkown' results[uid] = uname return results
def get_group_member_name(task_name, submit_user): results = {} task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)["_source"] except: return results uid_list = group_result["uid_list"] try: user_portrait_result = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list} )["docs"] except: return results for item in user_portrait_result: uid = item["_id"] if item["found"] == True: source = item["_source"] uname = source["uname"] else: uname = "unkown" results[uid] = uname return results
seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits'] except Exception, e: raise e try: seed_user_source = seed_user_result[0]['_source'] except: return 'seed user invalid' #step2: identify the detect task name is valid----is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] task_id = task_information['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step3: save to es es_status = save_detect2es(input_dict) #step4: save to redis queue redis_status = save_detect2redis(input_dict) #identify the operation status if es_status == True and redis_status == True: status = True else: status = False
def search_group_sentiment_weibo(task_name, submit_user, start_ts, sentiment): weibo_list = [] # step1:get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get( index=group_index_name, doc_type=group_index_type, id=task_id, _source=False, fields=["uid_list"] ) except: group_result = {} if group_result == {}: return "task name invalid" try: uid_list = group_result["fields"]["uid_list"] except: uid_list = [] if uid_list == []: return "task uid list null" # step3: get ui2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, _source=False, fields=["uname"], )["docs"] except: user_portrait_result = [] for item in user_portrait_result: uid = item["_id"] if item["found"] == True: uname = item["fields"]["uname"][0] uid2uname[uid] = uname # step4:iter date to search weibo weibo_list = [] iter_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) # step4: get query_body if sentiment != "2": query_body = [ {"terms": {"uid": uid_list}}, {"term": {"sentiment": sentiment}}, {"range": {"timestamp": {"gte": start_ts, "lt": start_ts + DAY}}}, ] else: query_body = [ {"terms": {"uid": uid_list}}, {"terms": {"sentiment": SENTIMENT_SECOND}}, {"range": {"timestamp": {"gte": start_ts, "lt": start_ts + DAY}}}, ] try: flow_text_result = es_flow_text.search( index=flow_text_index_name, doc_type=flow_text_index_type, body={ "query": {"bool": {"must": query_body}}, "sort": [{"timestamp": {"order": "asc"}}], "size": MAX_VALUE, }, )["hits"]["hits"] except: flow_text_result = [] for flow_text_item in flow_text_result: source = flow_text_item["_source"] weibo = {} weibo["uid"] = source["uid"] weibo["uname"] = uid2uname[weibo["uid"]] weibo["ip"] = source["ip"] try: weibo["geo"] = "\t".join(source["geo"].split("&")) except: weibo["geo"] = "" weibo["text"] = source["text"] weibo["timestamp"] = source["timestamp"] weibo["sentiment"] = source["sentiment"] weibo_list.append(weibo) return weibo_list
def get_activity_weibo(task_name, submit_user, start_ts, time_segment=FOUR_HOUR): results = [] # step1: get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get( index=group_index_name, doc_type=group_index_type, id=task_id, _source=False, fields=["uid_list"] ) except: group_result = {} if group_result == {}: return "task name invalid" try: uid_list = group_result["fields"]["uid_list"] except: uid_list = [] if uid_list == []: return "task uid list null" # step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget( index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, _source=False, fields=["uname"], )["docs"] except: user_portrait_result = [] for item in user_portrait_result: uid = item["_id"] if item["found"] == True: uname = item["fields"]["uname"][0] uid2uname[uid] = uname # step3: search time_segment weibo end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({"terms": {"uid": uid_list}}) query.append({"range": {"timestamp": {"gte": start_ts, "lt": end_ts}}}) try: flow_text_es_result = es_flow_text.search( index=flow_text_index_name, doc_type=flow_text_index_type, body={"query": {"bool": {"must": query}}, "sort": "timestamp", "size": MAX_VALUE}, )["hits"]["hits"] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item["_source"] weibo["timestamp"] = ts2date(source["timestamp"]) weibo["ip"] = source["ip"] weibo["text"] = source["text"] if source["geo"]: weibo["geo"] = "\t".join(source["geo"]) else: weibo["geo"] = "" results.append(weibo) return results
def search_group_sentiment_weibo(task_name, submit_user, start_ts, sentiment): weibo_list = [] #step1:get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step3: get ui2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step4:iter date to search weibo weibo_list = [] iter_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) #step4: get query_body if sentiment != '2': query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \ {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}] else: query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\ {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}] try: flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\ body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits'] except: flow_text_result = [] for flow_text_item in flow_text_result: source = flow_text_item['_source'] weibo = {} weibo['uid'] = source['uid'] weibo['uname'] = uid2uname[weibo['uid']] weibo['ip'] = source['ip'] try: weibo['geo'] = '\t'.join(source['geo'].split('&')) except: weibo['geo'] = '' weibo['text'] = source['text'] weibo['timestamp'] = source['timestamp'] weibo['sentiment'] = source['sentiment'] weibo_list.append(weibo) return weibo_list
try: seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits'] except Exception, e: raise e try: seed_user_source = seed_user_result[0]['_source'] except: return 'seed user invalid' #step2: identify the detect task name is valid----is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] task_id = task_information['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step3: save to es es_status = save_detect2es(input_dict) #step4: save to redis queue redis_status = save_detect2redis(input_dict) #identify the operation status if es_status==True and redis_status==True: status = True else: status = False