def get_group_user_track(uid): results = [] #step1:get user_portrait activity_geo_dict try: portrait_result = es.get(index=portrait_index_name, doc_type=portrait_index_type,\ id=uid, _source=False, fields=['activity_geo_dict']) except: portrait_result = {} if portrait_result == {}: return 'uid is not in user_portrait' activity_geo_dict = json.loads(portrait_result['fields']['activity_geo_dict'][0]) now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) #step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x:x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, '']) start_ts = start_ts + DAY # results= [['2017-03-31', ''], ['2017-04-01', u'\u4e2d\u56fd\t\u56db\u5ddd\t2\u6210\u90fd'], ['2017-04-02', u'\u4e2d\u56fd\t\u56db\u5ddd\t3\u6210\u90fd'], ['2017-04-03', u'\u4e2d\u56fd\t\u56db\u5ddd\t7\u6210\u90fd'], ['2017-04-04', u'\u4e2d\u56fd\t\u56db\u5ddd\t\u6210\u90fd'], ['2017-04-05', u'\u4e2d\u56fd\t\u56db\u5ddd\t\u6210\u90fd'], ['2017-04-06', u'\u4e2d\u56fd\t\u56db\u5ddd\t\u6210\u90fd']] geolist = [] line_list = [] index_city = 0 for i in results: if i[1] and i[1].split('\t')[0] == u'中国': geolist.append(i[1]) geolist = [i for i in set(geolist)] for x in range(len(results)-1): if results[x][1] != '' and results[x+1][1]!='' and results[x][1].split('\t')[0] == u'中国' and results[x+1][1].split('\t')[0] == u'中国': if results[x][1] != results[x+1][1]: line_list.append([results[x][1], results[x+1][1]]) return {'city':geolist, 'line':line_list}
def get_group_user_track(uid): results = [] #step1:get user_portrait activity_geo_dict try: portrait_result = es.get(index=portrait_index_name, doc_type=portrait_index_type,\ id=uid, _source=False, fields=['activity_geo_dict']) except: portrait_result = {} if portrait_result == {}: return 'uid is not in user_portrait' activity_geo_dict = json.loads( portrait_result['fields']['activity_geo_dict'][0]) now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) #step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x: x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, '']) start_ts = start_ts + DAY return results
def get_people_org_track(activity_geo_dict):#根据用户地理位置计算轨迹 results = [] now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) #step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x:x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, '']) start_ts = start_ts + DAY geolist = [] line_list = [] index_city = 0 for i in results: if i[1] and i[1].split('\t')[0] == u'中国': geolist.append(i[1]) geolist = [i for i in set(geolist)] for x in range(len(results)-1): if results[x][1] != '' and results[x+1][1]!='' and results[x][1].split('\t')[0] == u'中国' and results[x+1][1].split('\t')[0] == u'中国': if results[x][1] != results[x+1][1]: line_list.append([results[x][1], results[x+1][1]]) return {'city':geolist, 'line':line_list}
def get_user_detail(date, input_result): bci_date = ts2datetime(datetime2ts(date) - DAY) results = [] uid_list = input_result if date!='all': index_name = 'bci_' + ''.join(bci_date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] #INFLUENCE,fans,status user_profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':uid_list}, _source=True)['docs'] #个人姓名,注册地 max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/float(max_evaluate_influ['user_index']) * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] try: fansnum = bci_dict['fields']['user_fansnum'][0] except: fansnum = 0 try: statusnum = bci_dict['fields']['weibo_month_sum'][0] except: statusnum = 0 else: uname = uid location = '' try: fansnum = bci_dict['fields']['user_fansnum'][0] except: fansnum = 0 try: statusnum = bci_dict['fields']['weibo_month_sum'][0] except: statusnum = 0 results.append({'uid':uid, 'uname':uname, 'location':location, 'fansnum':fansnum, 'statusnum':statusnum, 'influence':round(influence,2)}) return results
def get_user_detail(date, input_result, status, user_type="influence", auth=""): bci_date = ts2datetime(datetime2ts(date) - DAY) results = [] if status == 'show_in': uid_list = input_result if status == 'show_compute': uid_list = input_result.keys() if status == 'show_in_history': uid_list = input_result.keys() if date != 'all': index_name = 'bci_' + ''.join(bci_date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) tmp_ts = str(datetime2ts(date) - DAY) sensitive_string = "sensitive_score_" + tmp_ts query_sensitive_body = { "query": { "match_all": {} }, "size": 1, "sort": { sensitive_string: { "order": "desc" } } } try: top_sensitive_result = es_bci_history.search( index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits'] top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0] except Exception, reason: print Exception, reason top_sensitive = 400
def ajax_recommentation_in(): #按影响力推荐,按敏感度推荐 date = request.args.get('date', '2016-11-27') # '2013-09-01' recomment_type = request.args.get('type', 'influence') #influence sensitive submit_user = request.args.get('submit_user', 'admin') # 提交人 input_ts = datetime2ts(date) #run_type if RUN_TYPE == 1: now_ts = time.time() else: now_ts = test_time if now_ts - 3600*24*7 >= input_ts: return json.dumps([]) else: results = recommentation_in(input_ts, recomment_type, submit_user) return json.dumps(results)
def recommentation_in_auto(date, submit_user): results = [] #run type if RUN_TYPE == 1: now_date = search_date else: now_date = ts2datetime(datetime2ts(RUN_TEST_TIME)) recomment_hash_name = 'recomment_' + now_date + '_auto' # print recomment_hash_name,'============' recomment_influence_hash_name = 'recomment_' + now_date + '_influence' recomment_sensitive_hash_name = 'recomment_' + now_date + '_sensitive' recomment_submit_hash_name = 'recomment_' + submit_user + '_' + now_date recomment_compute_hash_name = 'compute' # #step1: get auto # auto_result = r.hget(recomment_hash_name, 'auto') # if auto_result: # auto_user_list = json.loads(auto_result) # else: # auto_user_list = [] #step2: get admin user result admin_result = r.hget(recomment_hash_name, submit_user) admin_user_list = [] if admin_result: admin_result_dict = json.loads(admin_result) else: return None final_result = [] #step3: get union user and filter compute/influence/sensitive for k, v in admin_result_dict.iteritems(): admin_user_list = v union_user_auto_set = set(admin_user_list) influence_user = set(r.hkeys(recomment_influence_hash_name)) sensitive_user = set(r.hkeys(recomment_sensitive_hash_name)) compute_user = set(r.hkeys(recomment_compute_hash_name)) been_submit_user = set(r.hkeys(recomment_submit_hash_name)) filter_union_user = union_user_auto_set - ( influence_user | sensitive_user | compute_user | been_submit_user) auto_user_list = list(filter_union_user) #step4: get user detail if auto_user_list == []: return auto_user_list results = get_user_detail(now_date, auto_user_list, 'show_in', 'auto') for detail in results: #add root re_detail = detail re_detail.append(k) final_result.append(re_detail) return final_result
from knowledge.global_utils import es_event, R_RECOMMENTATION as r from knowledge.global_utils import es_user_portrait as es, portrait_index_name, portrait_index_type from knowledge.global_utils import es_related_docs, user_docs_name, user_docs_type, event_docs_name, event_docs_type from knowledge.global_config import event_task_name, event_task_type from utils import recommentation_in, recommentation_in_auto, submit_task, identify_in, submit_event, submit_event_file,\ relation_add, search_user, search_event, search_node_time_limit, show_node_detail, edit_node,\ deal_user_tag, create_node_or_node_rel, show_relation, update_event, submit_identify_in,\ node_delete, delete_relation, deal_event_tag, show_weibo_list, show_wiki, show_wiki_related,show_wiki_basic,\ wikinode_exist from knowledge.time_utils import ts2datetime, datetime2ts, ts2datetimestr from knowledge.parameter import RUN_TYPE, RUN_TEST_TIME, DAY from knowledge.global_config import event_analysis_name, event_type from knowledge.model import PeopleHistory, EventHistory from knowledge.extensions import db test_time = datetime2ts(RUN_TEST_TIME) # from draw_redis import * # from knowledge.global_utils import event_name_search mod = Blueprint('construction', __name__, url_prefix='/construction') @mod.route('/') @login_required def construction_main():#导航页 return render_template('construction/construction_main.html') @mod.route('/graph_add/') @login_required def construction_graph_add():#图谱添加 return render_template('construction/graph_add.html')
def get_final_submit_user_info(uid_list): final_results = [] try: profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': uid_list})['docs'] except: profile_results = [] try: bci_history_results = es_bci_history.mget( index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': uid_list})['docs'] except: bci_history_results = [] #get bci_history max value now_time_ts = time.time() search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY)) bci_key = 'bci_' + str(search_date_ts) query_body = { 'query': { 'match_all': {} }, 'sort': [{ bci_key: { 'order': 'desc' } }], 'size': 1 } #try: bci_max_result = es_bci_history.search(index=bci_history_index_name, doc_type=bci_history_index_type, body=query_body, _source=False, fields=[bci_key])['hits']['hits'] #except: # bci_max_result = {} if bci_max_result: bci_max_value = bci_max_result[0]['fields'][bci_key][0] else: bci_max_value = MAX_VALUE iter_count = 0 for uid in uid_list: try: profile_item = profile_results[iter_count] except: profile_item = {} try: bci_history_item = bci_history_results[iter_count] except: bci_history_item = {} if profile_item and profile_item['found'] == True: uname = profile_item['_source']['nick_name'] location = profile_item['_source']['user_location'] else: uname = '' location = '' if bci_history_item and bci_history_item['found'] == True: fansnum = bci_history_item['_source']['user_fansnum'] statusnum = bci_history_item['_source']['weibo_month_sum'] try: bci = bci_history_item['_source'][bci_key] normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100 except: normal_bci = '' else: fansnum = '' statusnum = '' normal_bci = '' final_results.append( [uid, uname, location, fansnum, statusnum, normal_bci]) iter_count += 1 return final_results
except: statusnum = 0 else: uname = uid location = '' try: fansnum = bci_dict['fields']['user_fansnum'][0] except: fansnum = 0 try: statusnum = bci_dict['fields']['weibo_month_sum'][0] except: statusnum = 0 if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_" + str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: sensitive_words = [] if sensitive_history_dict.get('fields', 0): #print sensitive_history_dict['fields'][sensitive_string][0] #print top_sensitive sensitive_value = math.log( sensitive_history_dict['fields'][sensitive_string][0] / float(top_sensitive) * 9 + 1, 10) * 100 #print "sensitive_value", sensitive_value else: sensitive_value = 0
def group_geo_vary(g_name, submit_user): group_id = p.get_pinyin(g_name) group_id = group_id.lower() uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id, fields=['people']) uid_list = uid_string['fields']['people'][0].split('&') activity_geo_vary = {} main_start_geo = {} main_end_geo = {} vary_detail_geo = {} activity_geo_distribution_date = {} if RUN_TYPE == 1: now_ts = int(time.time()) else: now_ts = datetime2ts(RUN_TEST_TIME) now_date_ts = datetime2ts(ts2datetime(now_ts)) try: iter_user_dict_list = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':uid_list})['docs'] except: iter_user_dict_list = [] for user_dict in iter_user_dict_list: uid = user_dict['_id'] source = user_dict['_source'] #attr8: activity_geo_dict---distribution by date user_activity_geo = {} activity_geo_dict_list = json.loads(source['activity_geo_dict']) activity_geo_date_count = len(activity_geo_dict_list) iter_ts = now_date_ts - activity_geo_date_count * DAY user_date_main_list = [] for i in range(0, activity_geo_date_count): date_item = activity_geo_dict_list[i] if iter_ts in activity_geo_distribution_date: activity_geo_distribution_date[iter_ts] = union_dict_list( [activity_geo_distribution_date[iter_ts], date_item]) else: activity_geo_distribution_date[iter_ts] = date_item #use to get activity_geo vary sort_date_item = sorted(date_item.items(), key=lambda x: x[1], reverse=True) if date_item != {}: main_date_city = sort_date_item[0][0] try: last_user_date_main_item = user_date_main_list[-1][0] except: last_user_date_main_item = '' if main_date_city != last_user_date_main_item: user_date_main_list.append([main_date_city, iter_ts]) iter_ts += DAY #attr8: activity_geo_dict---location vary if len(user_date_main_list) > 1: for i in range(1, len(user_date_main_list)): vary_city = [ geo_ts_item[0] for geo_ts_item in user_date_main_list[i - 1:i + 1] ] vary_ts = [ geo_ts_item[1] for geo_ts_item in user_date_main_list[i - 1:i + 1] ] vary_item = '&'.join(vary_city) #vary_item = '&'.join(user_date_main_list[i-1:i+1]) #get activity geo vary for vary table and map try: activity_geo_vary[vary_item] += 1 except: activity_geo_vary[vary_item] = 1 #get main start geo try: main_start_geo[vary_city[0]] += 1 except: main_start_geo[vary_city[0]] = 1 #get main end geo try: main_end_geo[vary_city[1]] += 1 except: main_end_geo[vary_city[1]] = 1 #get vary detail geo try: vary_detail_geo[vary_item].append( [uid, vary_ts[0], vary_ts[1]]) except: vary_detail_geo[vary_item] = [[ uid, vary_ts[0], vary_ts[1] ]] all_activity_geo = union_dict_list(activity_geo_distribution_date.values()) sort_all_activity_geo = sorted(all_activity_geo.items(), key=lambda x: x[1], reverse=True) try: main_activity_geo = sort_all_activity_geo[0][0] except: main_activity_geo = '' return {'main_start_geo':main_start_geo, 'main_end_geo': main_end_geo, \ 'vary_detail_geo': vary_detail_geo, 'activity_geo_vary':activity_geo_vary,\ 'main_activity_geo':main_activity_geo, 'activity_geo_distribution_date':activity_geo_distribution_date}