def search_task(task_name, submit_date, state, status): results = [] query = [] condition_num = 0 if task_name: task_name_list = task_name.split(' ') for item in task_name_list: #print 'item:', item query.append({'wildcard':{'task_name': '*' + item + '*'}}) condition_num += 1 if submit_date: query.append({'match':{'submit_date': submit_date}}) condition_num += 1 if state: state_list = state.split(' ') for item in state_list: query.append({'wildcard':{'state': '*' + item + '*'}}) condition_num += 1 if status: query.append({'match':{'status': status}}) condition_num += 1 if condition_num > 0: try: source = es.search( index = 'group_result', doc_type = 'group', body = { 'query':{ 'bool':{ 'must':query } }, # 'sort': [{'count':{'order': 'desc'}}], 'size': 10000 } ) except Exception as e: raise e else: source = es.search( index = 'group_result', doc_type = 'group', body = { 'query':{'match_all':{} }, # 'sort': [{'count': {'order': 'desc'}}], 'size': 10000 } ) try: task_dict_list = source['hits']['hits'] except: return None result = [] #print 'len task_dict_list:', len(task_dict_list) for task_dict in task_dict_list: result.append([task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['count'], task_dict['_source']['state'], task_dict['_source']['status']]) return result
def search_portrait(condition_num, query, sort, size): user_result = [] index_name = 'sensitive_user_portrait' index_type = 'user' if condition_num > 0: result = es.search(index=index_name, doc_type=index_type, \ body={'query':{'bool':{'must':query}}, 'sort':sort, 'size':size})['hits']['hits'] else: result = es.search(index=index_name, doc_type=index_type, \ body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size})['hits']['hits'] if result: for item in result: user_dict = item['_source'] score = item['_score'] if not user_dict['uname']: user_dict['uname'] = 'unknown' if not user_dict['location']: user_dict['location'] = 'unknown' user_result.append([ user_dict['uid'], user_dict['uname'], user_dict['location'], user_dict['activeness'], user_dict['importance'], user_dict['influence'], score ]) return user_result
def search_track_task(task_name, submit_date, state, status): result = [] query = [] condition_num = 0 if task_name: task_name_list = task_name.split(' ') for item in task_name_list: query.append({'wildcard': {'task_name': '*' + item + '*'}}) condition_num += 1 if submit_date: query.append({'term': {'submit_date': submit_date}}) condition_num += 1 if state: state_list = state.split(' ') for item in state_list: query.append({'wildcard': {'state': '*' + item + '*'}}) condition_num += 1 if status: query.append({'term': {'status': status}}) condition_num += 1 if condition_num > 0: try: source = es.search(index='group_result', doc_type='group', body={ 'query': { 'bool': { 'must': query } }, 'size': 100000 }) except Exception as e: raise e else: source = es.search(index='group_result', doc_type='group', body={ 'query': { 'match_all': {} }, 'size': 100000 }) try: task_dict_list = source['hits']['hits'] except: return None #print 'task_dict_list:', task_dict_list for task_dict in task_dict_list: #print 'task_dict:', task_dict['_source'], type(task_dict) if 'end_date' not in task_dict: task_dict['_source']['end_date'] = u'至今' result.append([ task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['submit_date'], task_dict['_source']['count'], task_dict['_source']['state'], task_dict['_source']['status'] ]) #print 'result:', result return result
def search_track_task(task_name, submit_date, state, status): result = [] query = [] condition_num = 0 if task_name: task_name_list = task_name.split(' ') for item in task_name_list: query.append({'wildcard': {'task_name': '*' + item + '*'}}) condition_num += 1 if submit_date: query.append({'term': {'submit_date': submit_date}}) condition_num += 1 if state: state_list = state.split(' ') for item in state_list: query.append({'wildcard': {'state': '*' + item + '*'}}) condition_num += 1 if status: query.append({'term': {'status': status}}) condition_num += 1 if condition_num > 0: try: source = es.search( index = 'group_result', doc_type = 'group', body = { 'query':{ 'bool':{ 'must':query } }, 'size': 100000 } ) except Exception as e: raise e else: source = es.search( index = 'group_result', doc_type = 'group', body = { 'query':{'match_all':{} }, 'size': 100000 } ) try: task_dict_list = source['hits']['hits'] except: return None #print 'task_dict_list:', task_dict_list for task_dict in task_dict_list: #print 'task_dict:', task_dict['_source'], type(task_dict) if 'end_date' not in task_dict: task_dict['_source']['end_date'] = u'至今' result.append([task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['submit_date'] ,task_dict['_source']['count'], task_dict['_source']['state'],task_dict['_source']['status']]) #print 'result:', result return result
def ajax_get_hot_keywords(): query_body = { "query": { "match_all": {} }, "aggs": { "hot_words": { "terms": { "field": "sensitive_words_string", "size": 20 } } } } sensitive_words = [] search_results = es_sensitive_user_portrait.search( index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['aggregations']["hot_words"]['buckets'] if search_results: for item in search_results: sensitive_words.append([item['key'], item["doc_count"]]) return json.dumps(sensitive_words)
def search_current_es(domain, date, number): query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term": {"topic_string": domain}} ] } } } }, 'sort': {"influence": {"order": "desc"}}, 'size': number } search_result = es.search(index=date, doc_type='user', body=query_body)['hits']['hits'] result_list = [] for item in search_result: if not item['_source']['uname']: item['_source']['uname'] = 'unknown' item['_source']['photo_url'] = 'unknown' result_list.append([item['_id'], item['_source']['uname'], item['_source']['photo_url'], item['_source']['influence'], item['_source']['sensitive'], item['_source']['importance'], item['_source']['activeness']]) return result_list
def get_attribute_name(): attribute_name_list = [] try: attribute_result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}})['hits']['hits'] except Exception, e: raise e
def get_sentiment_weibo(task_name, sentiment, timestamp): result = [] #step1: get task user #step2: search weibo by condition: task_user, sensitive_status, sentiment, timestamp task_exist = identify_task(task_name) if not task_exist: return 'the task is not exist' task_user = task_exist['uid_list'] query_body = [] # multi-search: uid_list nest_body_list = [] for uid in task_user: nest_body_list.append({'term':{'uid': uid}}) query_body.append({'bool':{'should': nest_body_list}}) # range search: timestamo timestamp+900 query_body.append({'range':{'timestamp':{'from': timestamp, 'to':timestamp+900}}}) # match search: sensitive_status #query_body.append({'term': {'sensitive': sensitive_status}}) # match search: sentiment query_body.append({'term': {'sentiment': sentiment}}) try: weibo_result = es.search(index=text_index_name, doc_type=text_index_type, \ body={'query':{'bool':{'must': query_body}}, 'sort':[{'timestamp': {'order':'asc'}}], 'size':10000})['hits']['hits'] except Exception, e: raise e
def get_geo_weibo(task_name, geo, timestamp): result = [] #step1: identify task exist #step2: search weibo from monitor_user_text by condition:task_user, geo, timestamp task_exist = identify_task(task_name) if not task_exist: return 'the task is not exist' task_user = task_exist['uid_list'] query_body = [] # multi-search: task user nest_body_list = [] for uid in task_user: nest_body_list.append({'term':{'uid': uid}}) query_body.append({'bool':{'should': nest_body_list}}) # range-search: task user query_body.append({'range':{'timestamp':{'from': timestamp, 'to': timestamp+24*3600}}}) # term-search: geo geo_list = geo.split('\t') city = geo_list[-1] query_body.append({'wildcard': {'geo': '*' + city + '*'}}) try: weibo_dict_list = es.search(index=text_index_name, doc_type=text_index_type, \ body={'query':{'bool':{'must': query_body}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':10000})['hits']['hits'] except Exception, e: raise e
def search_important(category, detail): query_body = { "query": { "filtered": { "filter": { "term": { category: detail } } } }, "sort": { "sensitive": { "order": "desc" } }, "size": 20 } results = es.search(index="sensitive_user_portrait", doc_type="user", body=query_body)['hits']['hits'] uid_list = [] for item in results: uid_list.append([item['_source']['uid'], item['_source']['uname']]) return uid_list
def search_domain(domain, date, order, number=100): # top influence user in domain count = 0 count_n = 0 result_list = [] date = str(date).replace('-', '') order = str(order) query_body = { "query":{ "match_all": {} }, "sort": {search_order[order]: {"order": "desc"}}, "size": 1+count } while 1: search_results = es.search(index=date, doc_type='bci', body=query_body)['hits']['hits'] uid_list = [] count += 1 for item in search_results: uid_list.append(item['_id']) portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={"ids":uid_list})['docs'] for item in portrait_results: print item domain_list = (item['_source']['topic_string']).split('&') # attention if domain in set(domain_list): result_list.append([item['_id'], item['_source']['uname'], item['_source']['photo_url'], item['_source']['influence'], item['_source']['sensitive'], item['_source']['importance'], item['_source']['activeness']]) count_n += 1 if count_n >= int(number): break return result_list
def test_influence_rank(domain, date, order): uid_list = domain_dict[domain] order = str(order) query_body = { "query":{ "match_all": {} }, "sort": {search_order[order]: {"order": "desc"}}, "size": 100 } search_result = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] portrait_result = es.mget(index='sensitive_user_portrait', doc_type='user', body={"ids": uid_list})['docs'] results = [] for i in range(len(search_result)): detail = [] try: detail.append(search_result[i]['_source'][search_order[order]]) except: print uid_list[i] detail.append(0) try: temp = portrait_result[i]['_source'] except: continue detail.extend([temp['uid'], temp['uname'], temp['photo_url'], temp['activeness'], temp['importance'], temp['sensitive']]) results.append(detail) sorted_list = sorted(results, key=lambda x:x[0], reverse=True) return sorted_list
def search_sensitive_text(uid, stype=0, sort_type="timestamp"): results = [] query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term": {"uid": uid}}, {"term": {"sensitive": 1}} ] } } } }, "sort": {'timestamp': {'order': 'desc'}} } query_body['sort'] = {sort_type: {'order': 'desc'}} if stype == 0: search_results = es.search(index='sensitive_user_text', doc_type='user', body=query_body)['hits']['hits'] elif int(stype) == 1: query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"term": {"message_type": 1}}) elif int(stype) == 2: query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"term": {"message_type": 2}}) else: pass if search_results: results = search_results return results
def get_evaluate_max(): max_result = {} index_name = 'user_portrait' index_type = 'user' evaluate_index = ['importance', 'influence'] for evaluate in evaluate_index: query_body = { 'query': { 'match_all': {} }, 'size': 1, 'sort': [{ evaluate: { 'order': 'desc' } }] } try: result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def search_attribute(query_body, condition_num): item_list = [] default_size = 100000 if condition_num == 0: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e
def search_attribute(query_body, condition_num): item_list = [] default_size = 100000 if condition_num==0: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e
def search_in_portrait(category): query_body={ "query":{ "match_all": {} }, "sort": {category: {"order": "desc"}} } results = es.search(index="sensitive_user_portrait", doc_type="user", body=query_body)['hits']['hits'] uid_list = [] for item in results: uid_list.append([item['_source']['uid'], item['_source']['uname'], item['_source'][category]]) return uid_list
def user_sentiment_trend(uid): query_body = {"query": {"filtered": {"filter": {"term": {"uid": uid}}}}} search_results = es.search(index='sensitive_user_text', doc_type='user', body=query_body)['hits']['hits'] sentiment_dict = dict() sentiment_results = dict() for item in search_results: datetime = ts2datetime(float(item['_source']['timestamp'])).replace( '-', '') try: sentiment_dict[datetime].append( json.loads(item['_source']['sentiment'])) except: sentiment_dict[datetime] = [ json.loads(item['_source']['sentiment']) ] total_positive = 0 total_negetive = 0 total_neutral = 0 for datetime, sentiment_detail in sentiment_dict.items(): positive_count = 0 negetive_count = 0 neutral_count = 0 sentiment_results[datetime] = {} for item in sentiment_detail: if not item: try: neutral_count += 1 except: neutral_count = 1 total_neutral += 1 continue positive_dict = item.get('126', {}) positive = sum(positive_dict.values()) positive_count += positive negetive = sum(item.get('127', {}).values()) + sum( item.get('128', {}).values()) + sum( item.get('129', {}).values()) negetive_count += negetive if positive > negetive: total_positive += 1 elif positive < negetive: total_negetive += 1 else: total_neutral += 1 sentiment_results[datetime]['neutral'] = neutral_count sentiment_results[datetime]['positive'] = positive_count sentiment_results[datetime]['negetive'] = negetive_count return [[total_positive, total_neutral, total_negetive], sentiment_results]
def search_portrait(condition_num, query, sort, size): user_result = [] index_name = 'sensitive_user_portrait' index_type = 'user' if condition_num > 0: result = es.search(index=index_name, doc_type=index_type, \ body={'query':{'bool':{'must':query}}, 'sort':sort, 'size':size})['hits']['hits'] else: result = es.search(index=index_name, doc_type=index_type, \ body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size})['hits']['hits'] if result: for item in result: user_dict = item['_source'] score = item['_score'] if not user_dict['uname']: user_dict['uname'] = 'unknown' if not user_dict['location']: user_dict['location'] = 'unknown' user_result.append([user_dict['uid'], user_dict['uname'], user_dict['location'], user_dict['activeness'], user_dict['importance'], user_dict['influence'], score]) return user_result
def user_sentiment_trend(uid): query_body = { "query":{ "filtered":{ "filter":{ "term": {"uid": uid} } } } } search_results = es.search(index='sensitive_user_text', doc_type='user', body=query_body)['hits']['hits'] sentiment_dict = dict() sentiment_results = dict() for item in search_results: datetime = ts2datetime(float(item['_source']['timestamp'])).replace('-', '') try: sentiment_dict[datetime].append(json.loads(item['_source']['sentiment'])) except: sentiment_dict[datetime] = [json.loads(item['_source']['sentiment'])] total_positive = 0 total_negetive = 0 total_neutral = 0 for datetime, sentiment_detail in sentiment_dict.items(): positive_count = 0 negetive_count = 0 neutral_count = 0 sentiment_results[datetime] = {} for item in sentiment_detail: if not item: try: neutral_count += 1 except: neutral_count = 1 total_neutral += 1 continue positive_dict = item.get('126', {}) positive = sum(positive_dict.values()) positive_count += positive negetive = sum(item.get('127', {}).values()) + sum(item.get('128', {}).values()) + sum(item.get('129', {}).values()) negetive_count += negetive if positive > negetive: total_positive += 1 elif positive < negetive: total_negetive += 1 else: total_neutral += 1 sentiment_results[datetime]['neutral'] = neutral_count sentiment_results[datetime]['positive'] = positive_count sentiment_results[datetime]['negetive'] = negetive_count return [[total_positive, total_neutral, total_negetive], sentiment_results]
def search_portrait(condition_num, query, sort, size): user_result = [] index_name = "sensitive_user_portrait" index_type = "user" if condition_num > 0: result = es.search( index=index_name, doc_type=index_type, body={"query": {"bool": {"must": query}}, "sort": sort, "size": size} )["hits"]["hits"] else: result = es.search( index=index_name, doc_type=index_type, body={"query": {"match_all": {}}, "sort": [{sort: {"order": "desc"}}], "size": size}, )["hits"]["hits"] if result: for item in result: user_dict = item["_source"] score = item["_score"] if not user_dict["uname"]: user_dict["uname"] = "unknown" if not user_dict["location"]: user_dict["location"] = "unknown" user_result.append( [ user_dict["uid"], user_dict["uname"], user_dict["location"], user_dict["activeness"], user_dict["importance"], user_dict["influence"], score, ] ) return user_result
def search_sensitive_text(uid, stype=0, sort_type="timestamp"): results = [] query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "uid": uid } }, { "term": { "sensitive": 1 } }] } } } }, "sort": { 'timestamp': { 'order': 'desc' } } } query_body['sort'] = {sort_type: {'order': 'desc'}} if stype == 0: search_results = es.search(index='sensitive_user_text', doc_type='user', body=query_body)['hits']['hits'] elif int(stype) == 1: query_body["query"]["filtered"]["filter"]["bool"]["must"].append( {"term": { "message_type": 1 }}) elif int(stype) == 2: query_body["query"]["filtered"]["filter"]["bool"]["must"].append( {"term": { "message_type": 2 }}) else: pass if search_results: results = search_results return results
def search_domain(domain, date, order, number=100): # top influence user in domain count = 0 count_n = 0 result_list = [] date = str(date).replace('-', '') order = str(order) query_body = { "query": { "match_all": {} }, "sort": { search_order[order]: { "order": "desc" } }, "size": 1 + count } while 1: search_results = es.search(index=date, doc_type='bci', body=query_body)['hits']['hits'] uid_list = [] count += 1 for item in search_results: uid_list.append(item['_id']) portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={"ids": uid_list})['docs'] for item in portrait_results: print item domain_list = (item['_source']['topic_string']).split( '&') # attention if domain in set(domain_list): result_list.append([ item['_id'], item['_source']['uname'], item['_source']['photo_url'], item['_source']['influence'], item['_source']['sensitive'], item['_source']['importance'], item['_source']['activeness'] ]) count_n += 1 if count_n >= int(number): break return result_list
def search_important(category, detail): query_body={ "query":{ "filtered":{ "filter":{ "term": {category: detail} } } }, "sort": {"sensitive": {"order": "desc"}}, "size": 20 } results = es.search(index="sensitive_user_portrait", doc_type="user", body=query_body)['hits']['hits'] uid_list = [] for item in results: uid_list.append([item['_source']['uid'], item['_source']['uname']]) return uid_list
def user_sentiment_trend(uid): query_body = {"query": {"filtered": {"filter": {"term": {"uid": uid}}}}} search_results = es.search(index="sensitive_user_text", doc_type="user", body=query_body)["hits"]["hits"] sentiment_dict = dict() sentiment_results = dict() for item in search_results: datetime = ts2datetime(float(item["_source"]["timestamp"])).replace("-", "") try: sentiment_dict[datetime].append(json.loads(item["_source"]["sentiment"])) except: sentiment_dict[datetime] = [json.loads(item["_source"]["sentiment"])] total_positive = 0 total_negetive = 0 total_neutral = 0 for datetime, sentiment_detail in sentiment_dict.items(): positive_count = 0 negetive_count = 0 neutral_count = 0 sentiment_results[datetime] = {} for item in sentiment_detail: if not item: try: neutral_count += 1 except: neutral_count = 1 total_neutral += 1 continue positive_dict = item.get("126", {}) positive = sum(positive_dict.values()) positive_count += positive negetive = ( sum(item.get("127", {}).values()) + sum(item.get("128", {}).values()) + sum(item.get("129", {}).values()) ) negetive_count += negetive if positive > negetive: total_positive += 1 elif positive < negetive: total_negetive += 1 else: total_neutral += 1 sentiment_results[datetime]["neutral"] = neutral_count sentiment_results[datetime]["positive"] = positive_count sentiment_results[datetime]["negetive"] = negetive_count return [[total_positive, total_neutral, total_negetive], sentiment_results]
def ajax_get_hot_keywords(): query_body = { "query":{ "match_all":{} }, "aggs":{ "hot_words":{ "terms":{"field": "sensitive_words_string", "size":20} } } } sensitive_words = [] search_results = es_sensitive_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['aggregations']["hot_words"]['buckets'] if search_results: for item in search_results: sensitive_words.append([item['key'],item["doc_count"]]) return json.dumps(sensitive_words)
def get_evaluate_max(): max_result = {} index_name = 'user_portrait' index_type = 'user' evaluate_index = ['importance', 'influence'] for evaluate in evaluate_index: query_body = { 'query':{ 'match_all':{} }, 'size': 1, 'sort': [{evaluate: {'order': 'desc'}}] } try: result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def get_network(task_exist): task_name = task_exist['task_name'] submit_date = task_exist['submit_date'] submit_ts = date2ts(submit_date) time_segment = 24*3600 now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) iter_date_ts = now_date_ts iter_count = 1 date_list = [] top_list_dict = {} while True: if iter_count >= 8 or iter_date_ts < submit_ts: break iter_date = ts2datetime(iter_date_ts) date_list.append(iter_date) key = 'inner_' + str(iter_date) try: task_date_result = es.search(index=monitor_index_name, doc_type=task_name, id=key)['_source'] except: task_date_result = {} iter_field = ['top1', 'top2', 'top3', 'top4', 'top5'] for field in iter_field: user_count_item = json.loads(task_date_result[field]) uid = user_count_item[0] uname = uid2uname(uid) count = user_count_item[1] try: top_list_dict[field].append([uid, uname, count]) except: top_list_dict[field] = [[uid, uname, count]] iter_date_ts -= time_segment # get inner-retweet group from es---field: inner_graph try: inner_graph = json.loads(task_date_result['inner_graph']) except: inner_graph = {} return [date_list, top_list_dict, inner_graph]
def search_sensitive_text(uid, stype=0, sort_type="timestamp"): results = [] query_body = { "query": {"filtered": {"filter": {"bool": {"must": [{"term": {"uid": uid}}, {"term": {"sensitive": 1}}]}}}}, "sort": {"timestamp": {"order": "desc"}}, } query_body["sort"] = {sort_type: {"order": "desc"}} if stype == 0: search_results = es.search(index="sensitive_user_text", doc_type="user", body=query_body)["hits"]["hits"] elif int(stype) == 1: query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"term": {"message_type": 1}}) elif int(stype) == 2: query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"term": {"message_type": 2}}) else: pass if search_results: results = search_results return results
def search_in_portrait(category): query_body = { "query": { "match_all": {} }, "sort": { category: { "order": "desc" } } } results = es.search(index="sensitive_user_portrait", doc_type="user", body=query_body)['hits']['hits'] uid_list = [] for item in results: uid_list.append([ item['_source']['uid'], item['_source']['uname'], item['_source'][category] ]) return uid_list
def test_influence_rank(domain, date, order): uid_list = domain_dict[domain] order = str(order) query_body = { "query": { "match_all": {} }, "sort": { search_order[order]: { "order": "desc" } }, "size": 100 } search_result = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] portrait_result = es.mget(index='sensitive_user_portrait', doc_type='user', body={"ids": uid_list})['docs'] results = [] for i in range(len(search_result)): detail = [] try: detail.append(search_result[i]['_source'][search_order[order]]) except: print uid_list[i] detail.append(0) try: temp = portrait_result[i]['_source'] except: continue detail.extend([ temp['uid'], temp['uname'], temp['photo_url'], temp['activeness'], temp['importance'], temp['sensitive'] ]) results.append(detail) sorted_list = sorted(results, key=lambda x: x[0], reverse=True) return sorted_list
def search_current_es(domain, date, number): query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "topic_string": domain } }] } } } }, 'sort': { "influence": { "order": "desc" } }, 'size': number } search_result = es.search(index=date, doc_type='user', body=query_body)['hits']['hits'] result_list = [] for item in search_result: if not item['_source']['uname']: item['_source']['uname'] = 'unknown' item['_source']['photo_url'] = 'unknown' result_list.append([ item['_id'], item['_source']['uname'], item['_source']['photo_url'], item['_source']['influence'], item['_source']['sensitive'], item['_source']['importance'], item['_source']['activeness'] ]) return result_list
def get_inner_top_weibo(task_name, date, uid): result = [] # step1: identify the task exist # step2: search weibo from monitor_user_text by condition: task_user, date task_exist = identify_task(task_name) if not task_exist: return 'the task is not exist' task_user = task_exist['uid_list'] if uid not in task_user: return 'the user is not exist' end_ts = datetime2ts(date) time_segment = 24*3600 start_ts = end_ts - time_segment query_body = [] #term search: uid query_body.append({'term': uid}) #range search: date-24*3600, date query_body.append({'range':{'timestamp': {'from': start_ts, 'to': end_ts}}}) try: weibo_result = es.search(index=text_index_name, doc_type=text_index_type, \ body={'query':{'bool':{'must': query_body}}, 'sort':[{'timestamp':{'order':'asc'}}], 'size':10000})['hits']['hits'] except Exception, e: raise e
def search_full_text(uid, date): index_flow_text = flow_text_index_name_pre + date doctype_flow_text = flow_text_index_type result = [] ts = datetime2ts(date) next_ts = ts + 24*3600 query_body = { "query": { "filtered":{ "filter":{ "bool": { "must": [ {"term": {"uid": uid}} ] } } } }, "size": 200, "sort":{"timestamp":{"order": "desc"}} } search_results = es.search(index=index_flow_text, doc_type=doctype_flow_text, body=query_body)['hits']['hits'] for item in search_results: detail = [] source = item['_source'] detail.append(source.get('sensitive', 0)) detail.append(source['message_type']) ts =source['timestamp'] re_time = time.strftime('%H:%M:%S', time.localtime(float(ts))) detail.append(re_time) geo_string = source['geo'] geo_list = geo_string.split('/t') if len(geo_list) >= 3: geo = '/t'.join(geo_list[-2:]) else: geo = geo_string detail.append(geo) detail.append(source['text']) date = date.replace('-', '') mid = source['mid'] try: weibo_bci = es.get(index=date, doc_type='bci', id=uid)['_source'] except: weibo_bci = {} retweeted_number = 0 comment_number = 0 if source.get('sensitive', 0): if int(source['message_type']) == 1: if weibo_bci: if weibo_bci.get('s_origin_weibo_retweeted_detail', {}): retweeted_detail = json.loads(weibo_bci['s_origin_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('s_origin_weibo_comment_detail', {}): comment_detail = json.loads(weibo_bci['s_origin_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source['message_type']) == 2: if weibo_bci: if weibo_bci.get('s_retweeted_weibo_retweeted_detail', {}): retweeted_detail = json.loads(weibo_bci['s_retweeted_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('s_retweetd_weibo_comment_detail', {}): comment_detail = json.loads(weibo_bci['s_retweeted_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass else: if int(source['message_type']) == 1: if weibo_bci: if weibo_bci.get('origin_weibo_retweeted_detail', {}): retweeted_detail = json.loads(weibo_bci['origin_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('origin_weibo_comment_detail', {}): comment_detail = json.loads(weibo_bci['origin_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source['message_type']) == 2: if weibo_bci: if weibo_bci.get('retweeted_weibo_retweeted_detail', {}): retweeted_detail = json.loads(weibo_bci['retweeted_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('retweetd_weibo_comment_detail', {}): comment_detail = json.loads(weibo_bci['retweeted_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass detail.append(retweeted_number) detail.append(comment_number) result.append(detail) return result
status = True return status # use to search attribute table def search_attribute(query_body, condition_num): item_list = [] default_size = 100000 if condition_num==0: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e else: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'bool':{'must':query_body}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e if result: for item in result: print 'item:', item source = item['_source'] item_list.append(source) return item_list # use to change attribtue def change_attribute(attribute_name, value, user, state): status = False # identify the attribute_name is in ES - custom attribute try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
def search_task(task_name, submit_date, state, status): results = [] query = [] condition_num = 0 if task_name: task_name_list = task_name.split(' ') for item in task_name_list: #print 'item:', item query.append({'wildcard': {'task_name': '*' + item + '*'}}) condition_num += 1 if submit_date: query.append({'match': {'submit_date': submit_date}}) condition_num += 1 if state: state_list = state.split(' ') for item in state_list: query.append({'wildcard': {'state': '*' + item + '*'}}) condition_num += 1 if status: query.append({'match': {'status': status}}) condition_num += 1 if condition_num > 0: try: source = es.search( index='group_result', doc_type='group', body={ 'query': { 'bool': { 'must': query } }, # 'sort': [{'count':{'order': 'desc'}}], 'size': 10000 }) except Exception as e: raise e else: source = es.search( index='group_result', doc_type='group', body={ 'query': { 'match_all': {} }, # 'sort': [{'count': {'order': 'desc'}}], 'size': 10000 }) try: task_dict_list = source['hits']['hits'] except: return None result = [] #print 'len task_dict_list:', len(task_dict_list) for task_dict in task_dict_list: result.append([ task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['count'], task_dict['_source']['state'], task_dict['_source']['status'] ]) return result
def compute_mid_result(task_name, task_submit_date): result = {'count_0':{}, 'count_1':{}, 'sentiment_0_126':{}, 'sentiment_0_127':{}, 'sentiment_0_128':{},\ 'sentiment_0_129':{}, 'sentiment_0_130':{}, 'sensitive_score':{}, 'geo_0':{}, 'geo_1':{},\ 'hashtag_0':{}, 'hashtag_1':{}, 'sentiment_1_126':{}, 'sentiment_1_127':{}, \ 'sentiment_1_128':{}, 'sentiment_1_129':{}, 'sentiment_1_130':{}} #geo & hashtag: day #other: 15min search_time_segment = 3600 * 4 #start_ts = datetime2ts(task_submit_date) start_ts = date2ts(task_submit_date) now_ts = time.time() now_date = ts2datetime(now_ts) #test now_ts = datetime2ts('2013-09-08') date_ts = datetime2ts(now_date) segment = int((now_ts - date_ts) / 900) + 1 end_ts = date_ts + segment * 900 #every search time-range: 4 hour----bulk action to search begin_ts = start_ts while True: if begin_ts >= end_ts: break compute_ts = ts2date(begin_ts) #print 'compute ts:', compute_ts query_body = {'range':{'timestamp':{'from': begin_ts, 'to':begin_ts+search_time_segment}}} try: mid_result_list = es.search(index=monitor_index_name, doc_type=task_name, body={'query':query_body, 'size':100000, 'sort':[{'timestamp':{'order': 'asc'}}]})['hits']['hits'] except Exception, e: raise e if mid_result_list: for mid_result_item in mid_result_list: result_item = mid_result_item['_source'] timestamp = result_item['timestamp'] #attr_count #print 'compute_count' count_dict = json.loads(result_item['count']) for sensitive in count_dict: count_key = 'count_' + sensitive result[count_key][str(timestamp)] = count_dict[sensitive] #attr_sentiment #print 'compute_sentiment' sensitive_sentiment_dict = json.loads(result_item['sentiment']) for sensitive in sensitive_sentiment_dict: sentiment_dict = sensitive_sentiment_dict[sensitive] for sentiment in sentiment_dict: sentiment_key = 'sentiment_'+sensitive+'_'+sentiment result[sentiment_key][str(timestamp)] = sentiment_dict[sentiment] #attr_sensitive_score #print 'compute_sensitive_word' if 'sensitive_word' in result_item: sensitive_word_dict = json.loads(result_item['sensitive_word']) else: sensitive_word_dict = {} ts_word_score = 0 for word in sensitive_word_dict: #print 'word:', json.dumps(word.encode('utf-8')), word.encode('utf-8'), type(word.encode('utf-8')) search_word = word.encode('utf-8') #print 'search_word:', search_word, type(search_word) try: word_identify = json.loads(word_r.hget('sensitive_words', search_word)) except: word_identify = [2] ts_word_score += sensitive_word_dict[word] * word_identify[0] result['sensitive_score'][str(timestamp)] = ts_word_score #attr_geo #print 'compute geo' timestamp_date = ts2datetime(timestamp) sensitive_geo_dict = json.loads(result_item['geo']) for sensitive in sensitive_geo_dict: if timestamp_date not in result['geo_'+sensitive]: result['geo_'+sensitive][timestamp_date] = {} geo_dict = sensitive_geo_dict[sensitive] for geo in geo_dict: try: result['geo_'+sensitive][timestamp_date][geo] += geo_dict[geo] except: result['geo_'+sensitive][timestamp_date][geo] = geo_dict[geo] #attr_hashtag #print 'compute hashtag' if 'hashtag' in result_item: sensitive_hashtag_dict = json.loads(result_item['hashtag']) else: sensitive_hashtag_dict = {} result['hashtag_0'][timestamp_date] = {} result['hashtag_1'][timestamp_date] = {} for sensitive in sensitive_hashtag_dict: for sensitive in sensitive_hashtag_dict: if timestamp_date not in result['hashtag_'+sensitive]: result['hashtag_'+sensitive][timestamp_date] = {} hashtag_dict = sensitive_hashtag_dict[sensitive] for hashtag in hashtag_dict: try: result['hashtag_'+sensitive][timestamp_date][hashtag] += hashtag_dict[hashtag] except: result['hashtag_'+sensitive][timestamp_date][hashtag] = hashtag_dict[hashtag] begin_ts += search_time_segment
return status # use to search attribute table def search_attribute(query_body, condition_num): item_list = [] default_size = 100000 if condition_num == 0: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'match_all':{}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e else: try: result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \ body={'query':{'bool':{'must':query_body}}, 'size':default_size})['hits']['hits'] except Exception, e: raise e if result: for item in result: print 'item:', item source = item['_source'] item_list.append(source) return item_list # use to change attribtue def change_attribute(attribute_name, value, user, state): status = False # identify the attribute_name is in ES - custom attribute try:
def get_attr(date): results = dict() number = es.count(index="sensitive_user_portrait", doc_type="user")['count'] results['total_number'] = number query_body={ "query":{ "filtered":{ "filter":{ "term":{ "type": 1 } } } } } sensitive_number = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)['count'] results['sensitive_number'] = sensitive_number results['influence_number'] = number - sensitive_number recommend_in_sensitive = 0 sensitive_dict = r.hgetall('recommend_sensitive') for k,v in sensitive_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_sensitive += len(sensitive_list) recommend_in_influence = 0 influence_dict = r.hgetall('recommend_influence') for k,v in influence_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_influence += len(sensitive_list) results['recommend_in'] = recommend_in_influence + recommend_in_sensitive results['monitor_number'] = [4, 83] # test results['new_sensitive_words'] = 5 # test query_body = query_body_module('sensitive_words_string') sw_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_words = [] for item in sw_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_words.append(temp) results['sensitive_words'] = sensitive_words query_body = query_body_module('sensitive_geo_string') sg_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('sensitive_hashtag_string') sh_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_hashtag = [] for item in sh_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_hashtag.append(temp) results['sensitive_hashtag'] = sensitive_hashtag query_body = query_body_module('sensitive_geo_string') sg_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('psycho_status_string') sp_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] psycho_status = [] for item in sp_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) psycho_status.append(temp) results['psycho_status'] = psycho_status ''' query_body = query_body_module('political_tendency') st_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] political_tendency = [] for item in st_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) political_tendency.append(temp) results['political_tendency'] = political_tendency ''' results['political_tendency'] = [['left', 123], ['middle', 768], ['right', 1095]] ''' query_body = query_body_module('domain_string') sd_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] domain = [] for item in sd_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) domain.append(temp) results['domain'] = domain ''' # tendency distribution # domain and topic domain_list = [''] #search_important('domain', ) domain_results = get_top_user() topic_results = get_topic_user() results['domain_rank'] = domain_results results['topic_rank'] = topic_results # rank important_list = search_in_portrait('importance') results['importance'] = important_list results['sensitive'] = search_in_portrait('sensitive') results['influence'] = search_in_portrait('influence') results['activeness'] = search_in_portrait('activeness') query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_comment_total_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] comment_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_comment_total_number']) comment_weibo_detail.append(temp) results['comment_total'] = comment_weibo_detail query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_retweeted_total_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] retweeted_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_retweeted_total_number']) retweeted_weibo_detail.append(temp) results['retweeted_total'] = retweeted_weibo_detail query_body={ "query":{ "match_all": {} }, "sort": {"s_origin_weibo_number": {"order": "desc"}} } date = ts2datetime(time.time()-24*3600).replace('-','') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_number']) weibo_detail.append(temp) results['top_weibo_number'] = weibo_detail return results
def imagine(uid, query_fields_dict,index_name="sensitive_user_portrait", doctype='user'): """ uid: search users relate to uid query_fields_dict: defined search field weight fields: domain, topic, keywords, psycho_status, psycho_feature, activity_geo, hashtag for example: "domain": 2 domain, psycho_feature """ personal_info = es.get(index="sensitive_user_portrait", doc_type="user", id=uid, _source=True)['_source'] keys_list = query_fields_dict.keys() keys_list.remove('field') keys_list.remove('size') search_dict = {} iter_list = [] for iter_key in keys_list: if iter_key not in personal_info or personal_info[iter_key] == '': query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') if len(iter_list) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } }, "field_value_factor":{ } } } } score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict['field'] == "activeness": score_standard['field'] = "activeness" score_standard['factor'] = 100 elif query_fields_dict['field'] == "importance": score_standard['field'] = "importance" score_standard['factor'] = 0.01 elif query_fields_dict['field'] == "sensitive": score_standard['field'] = "sensitive" score_standard['factor'] = 100 elif query_fields_dict['field'] == 'influence': score_standard['field'] = "influence" score_standard['factor'] = 0.1 else: score_standard['field'] = "influence" score_standard['factor'] = 0 query_body['query']['function_score']['boost_mode'] = "sum" query_body['query']['function_score']['field_value_factor'] = score_standard query_fields_dict.pop('field') number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 100 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence', 'sensitive'] return_list = [] count = 0 for item in result: if uid == item['_id']: score = item['_score'] continue info = [] if not item['_source']['uname']: item['_source']['uname'] = 'unknown' for field in field_list: info.append(item['_source'][field]) info.append(item['_score']) common_dict = dict() for iter_key in iter_list: iter_common_list = item['_source'][iter_key].split('&') search_common_list = list(set(iter_common_list) & set(search_dict[iter_key])) iter_key = shift_dict[iter_key] common_dict[iter_key] = search_common_list info.append(common_dict) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: temp_list.append(personal_info[field]) results = [] results.append(temp_list) results.extend(return_list) return results
def get_attr(date): results = dict() number = es.count(index="sensitive_user_portrait", doc_type="user")['count'] results['total_number'] = number query_body = {"query": {"filtered": {"filter": {"term": {"type": 1}}}}} sensitive_number = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body)['count'] results['sensitive_number'] = sensitive_number results['influence_number'] = number - sensitive_number recommend_in_sensitive = 0 sensitive_dict = r.hgetall('recommend_sensitive') for k, v in sensitive_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_sensitive += len(sensitive_list) recommend_in_influence = 0 influence_dict = r.hgetall('recommend_influence') for k, v in influence_dict.items(): if v: sensitive_list = json.loads(v) recommend_in_influence += len(sensitive_list) results['recommend_in'] = recommend_in_influence + recommend_in_sensitive results['monitor_number'] = [4, 83] # test results['new_sensitive_words'] = 5 # test query_body = query_body_module('sensitive_words_string') sw_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_words = [] for item in sw_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_words.append(temp) results['sensitive_words'] = sensitive_words query_body = query_body_module('sensitive_geo_string') sg_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('sensitive_hashtag_string') sh_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_hashtag = [] for item in sh_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_hashtag.append(temp) results['sensitive_hashtag'] = sensitive_hashtag query_body = query_body_module('sensitive_geo_string') sg_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] sensitive_geo = [] for item in sg_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) sensitive_geo.append(temp) results['sensitive_geo'] = sensitive_geo query_body = query_body_module('psycho_status_string') sp_list = es.search( index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] psycho_status = [] for item in sp_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) psycho_status.append(temp) results['psycho_status'] = psycho_status ''' query_body = query_body_module('political_tendency') st_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] political_tendency = [] for item in st_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) political_tendency.append(temp) results['political_tendency'] = political_tendency ''' results['political_tendency'] = [['left', 123], ['middle', 768], ['right', 1095]] ''' query_body = query_body_module('domain_string') sd_list = es.search(index='sensitive_user_portrait', doc_type='user', body=query_body)['aggregations']['all_interests']['buckets'] domain = [] for item in sd_list: temp = [] temp.append(item['key']) temp.append(item['doc_count']) domain.append(temp) results['domain'] = domain ''' # tendency distribution # domain and topic domain_list = [''] #search_important('domain', ) domain_results = get_top_user() topic_results = get_topic_user() results['domain_rank'] = domain_results results['topic_rank'] = topic_results # rank important_list = search_in_portrait('importance') results['importance'] = important_list results['sensitive'] = search_in_portrait('sensitive') results['influence'] = search_in_portrait('influence') results['activeness'] = search_in_portrait('activeness') query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_comment_total_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] comment_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_comment_total_number']) comment_weibo_detail.append(temp) results['comment_total'] = comment_weibo_detail query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_retweeted_total_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] retweeted_weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_retweeted_total_number']) retweeted_weibo_detail.append(temp) results['retweeted_total'] = retweeted_weibo_detail query_body = { "query": { "match_all": {} }, "sort": { "s_origin_weibo_number": { "order": "desc" } } } date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' results_list = es.search(index=date, doc_type="bci", body=query_body)['hits']['hits'] weibo_detail = [] for item in results_list: temp = [] uid = item['_source']['uid'] try: uname = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)['_source']['nick_name'] except: uname = 'unknown' temp.append(item['_source']['uid']) temp.append(uname) temp.append(item['_source']['s_origin_weibo_number']) weibo_detail.append(temp) results['top_weibo_number'] = weibo_detail return results
def search_full_text(uid, date): result = [] ts = datetime2ts(date) next_ts = ts + 24 * 3600 query_body = { "query": { "filtered": { "filter": { "bool": {"must": [{"term": {"uid": uid}}, {"range": {"timestamp": {"gte": ts, "lt": next_ts}}}]} } } }, "size": 200, } search_results = es.search(index="sensitive_user_text", doc_type="user", body=query_body)["hits"]["hits"] for item in search_results: detail = [] source = item["_source"] detail.append(source["sensitive"]) detail.append(source["message_type"]) ts = source["timestamp"] re_time = time.strftime("%H:%M:%S", time.localtime(float(ts))) detail.append(re_time) geo_string = source["geo"] geo_list = geo_string.split("/t") if len(geo_list) >= 3: geo = "/t".join(geo_list[-2:]) else: geo = geo_string detail.append(geo) detail.append(source["text"]) date = date.replace("-", "") mid = source["mid"] try: weibo_bci = es.get(index=date, doc_type="bci", id=uid)["_source"] except: weibo_bci = {} retweeted_number = 0 comment_number = 0 if source["sensitive"]: if int(source["message_type"]) == 1: if weibo_bci: if weibo_bci.get("s_origin_weibo_retweeted_detail", {}): retweeted_detail = json.loads(weibo_bci["s_origin_weibo_retweeted_detail"]) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get("s_origin_weibo_comment_detail", {}): comment_detail = json.loads(weibo_bci["s_origin_weibo_comment_detail"]) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source["message_type"]) == 2: if weibo_bci: if weibo_bci.get("s_retweeted_weibo_retweeted_detail", {}): retweeted_detail = json.loads(weibo_bci["s_retweeted_weibo_retweeted_detail"]) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get("s_retweetd_weibo_comment_detail", {}): comment_detail = json.loads(weibo_bci["s_retweeted_weibo_comment_detail"]) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass else: if int(source["message_type"]) == 1: if weibo_bci: print weibo_bci["origin_weibo_retweeted_detail"] if weibo_bci.get("origin_weibo_retweeted_detail", {}): retweeted_detail = json.loads(weibo_bci["origin_weibo_retweeted_detail"]) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get("origin_weibo_comment_detail", {}): comment_detail = json.loads(weibo_bci["origin_weibo_comment_detail"]) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source["message_type"]) == 2: if weibo_bci: if weibo_bci.get("retweeted_weibo_retweeted_detail", {}): retweeted_detail = json.loads(weibo_bci["retweeted_weibo_retweeted_detail"]) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get("retweetd_weibo_comment_detail", {}): comment_detail = json.loads(weibo_bci["retweeted_weibo_comment_detail"]) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass detail.append(retweeted_number) detail.append(comment_number) result.append(detail) return result
def search_full_text(uid, date): result = [] ts = datetime2ts(date) next_ts = ts + 24 * 3600 query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "uid": uid } }, { "range": { "timestamp": { "gte": ts, "lt": next_ts } } }] } } } }, "size": 200 } search_results = es.search(index='sensitive_user_text', doc_type="user", body=query_body)['hits']['hits'] for item in search_results: detail = [] source = item['_source'] detail.append(source['sensitive']) detail.append(source['message_type']) ts = source['timestamp'] re_time = time.strftime('%H:%M:%S', time.localtime(float(ts))) detail.append(re_time) geo_string = source['geo'] geo_list = geo_string.split('/t') if len(geo_list) >= 3: geo = '/t'.join(geo_list[-2:]) else: geo = geo_string detail.append(geo) detail.append(source['text']) date = date.replace('-', '') mid = source['mid'] try: weibo_bci = es.get(index=date, doc_type='bci', id=uid)['_source'] except: weibo_bci = {} retweeted_number = 0 comment_number = 0 if source['sensitive']: if int(source['message_type']) == 1: if weibo_bci: if weibo_bci.get('s_origin_weibo_retweeted_detail', {}): retweeted_detail = json.loads( weibo_bci['s_origin_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('s_origin_weibo_comment_detail', {}): comment_detail = json.loads( weibo_bci['s_origin_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source['message_type']) == 2: if weibo_bci: if weibo_bci.get('s_retweeted_weibo_retweeted_detail', {}): retweeted_detail = json.loads( weibo_bci['s_retweeted_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('s_retweetd_weibo_comment_detail', {}): comment_detail = json.loads( weibo_bci['s_retweeted_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass else: if int(source['message_type']) == 1: if weibo_bci: print weibo_bci['origin_weibo_retweeted_detail'] if weibo_bci.get('origin_weibo_retweeted_detail', {}): retweeted_detail = json.loads( weibo_bci['origin_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('origin_weibo_comment_detail', {}): comment_detail = json.loads( weibo_bci['origin_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source['message_type']) == 2: if weibo_bci: if weibo_bci.get('retweeted_weibo_retweeted_detail', {}): retweeted_detail = json.loads( weibo_bci['retweeted_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('retweetd_weibo_comment_detail', {}): comment_detail = json.loads( weibo_bci['retweeted_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass detail.append(retweeted_number) detail.append(comment_number) result.append(detail) return result