def get_vary_detail_info(vary_detail_dict, uid_list): results = {} #get uname try: user_portrait_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list})['docs'] except: user_portrait_result = [] uname_dict = {} for portrait_item in user_portrait_result: uid = portrait_item['_id'] if portrait_item['found'] == True: uname = portrait_item['_source']['uname'] uname_dict[uid] = uname else: uname_dict[uid] = uid #get new vary detail information for vary_pattern in vary_detail_dict: user_info_list = vary_detail_dict[vary_pattern] new_pattern_list = [] for user_item in user_info_list: uid = user_item[0] uname = uname_dict[uid] start_date = ts2datetime(int(user_item[1])) end_date = ts2datetime(int(user_item[2])) new_pattern_list.append([uid, uname, start_date, end_date]) results[vary_pattern] = new_pattern_list return results
def get_theme_user_tag(theme_name, submit_user): topic_id = p.get_pinyin(theme_name) topic_id = topic_id.lower() eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id, fields=['event']) event_list = eid_string['fields']['event'][0].split('&') user_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \ body={'ids':event_list}, fields=['user_results','name'])['docs'] user_list =[] for i in user_result: event_name = i['fields']['name'][0] user_dict = json.loads(i['fields']['user_results'][0]) for k,v in user_dict.iteritems(): user_list.append(k) user_list_set = [i for i in set(user_list)] tag_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':user_list_set}, fields=['function_mark', 'keywords'])['docs'] keywords_dict = {} mark_dict = {} print len(tag_result) for i in tag_result: i_keywords = json.loads(i['fields']['keywords'][0]) try: i_mark = i['fields']['function_mark'][0] except: i_mark = '' for key in i_keywords: try: keywords_dict[key[0]] += key[1] except: keywords_dict[key[0]] = key[1] if i_mark: user_mark = deal_user_tag(i_mark, submit_user)[0] for mark in user_mark: try: mark_dict[mark] += 1 except: mark_dict[mark] = 1 sorted_keywords_dict = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100] sorted_mark_dict = sorted(mark_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100] try: max_keywords_value = sorted_keywords_dict[0][1] except: max_keywords_value = 1.0 normal_keywords_list = [] for words in sorted_keywords_dict: normal_keywords_list.append([words[0], float(words[1])/max_keywords_value]) try: max_mark_value = sorted_mark_dict[0][1] except: max_mark_value = 1.0 normal_mark_list = [] for words in sorted_mark_dict: normal_mark_list.append([words[0], float(words[1])/max_mark_value]) return {'keywords':normal_keywords_list, 'mark':normal_mark_list}
def group_user_keyowrds(g_name, submit_user): group_id = p.get_pinyin(g_name) group_id = group_id.lower() uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id, fields=['people']) uid_list = uid_string['fields']['people'][0].split('&') tag_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':uid_list}, fields=['hashtag_dict', 'keywords'])['docs'] keywords_dict = {} hashtag_dict = {} print len(tag_result) for i in tag_result: i_keywords = json.loads(i['fields']['keywords'][0]) i_hashtag = json.loads(i['fields']['hashtag_dict'][0]) for hashtag, value in i_hashtag.iteritems(): try: hashtag_dict[hashtag] += value except: hashtag_dict[hashtag] = value for key in i_keywords: try: keywords_dict[key[0]] += key[1] except: keywords_dict[key[0]] = key[1] sorted_keywords_dict = sorted(keywords_dict.iteritems(), key=lambda x: x[1], reverse=True)[:100] sorted_mark_dict = sorted(hashtag_dict.iteritems(), key=lambda x: x[1], reverse=True)[:100] try: max_keywords_value = sorted_keywords_dict[0][1] except: max_keywords_value = 1.0 normal_keywords_list = [] for words in sorted_keywords_dict: normal_keywords_list.append( [words[0], float(words[1]) / max_keywords_value]) try: max_mark_value = sorted_mark_dict[0][1] except: max_mark_value = 1.0 normal_mark_list = [] for words in sorted_mark_dict: normal_mark_list.append([words[0], float(words[1]) / max_mark_value]) return {'keywords': normal_keywords_list, 'mark': normal_mark_list}
def get_special_labels(node1_list): labels = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':node1_list},\ fields=['keywords_string'], _source=False)['docs'] result_label = [] group_label = [] keywords_dict = {} for i in labels: group_label.extend(i['fields']['keywords_string'][0].split('&')) for i in set(group_label): keywords_dict[i] = group_label.count(i) sorted_keywords = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True) # print sorted_keywords result_label = [i[0] for i in sorted_keywords[:100]] result_label_string = '&'.join(result_label) return result_label_string
def get_theme_related(theme_name, submit_user): topic_id = p.get_pinyin(theme_name) eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id, fields=['event', 'wiki_link', 'file_link']) event_list = eid_string['fields']['event'][0].split('&') try: file_link = eid_string['fields']['file_link'][0].split('+') except: file_link = [] final_file = [] for i in file_link: final_file.append(i.split(',')) try: final_wiki = json.loads(eid_string['fields']['wiki_link'][0]) except: final_wiki = [] event_graph_id = [] for i in event_list: a = graph.run('start n=node:' + event_index_name + '("' + event_primary + ':' + str(i) + '") return id(n)') for j in a: event_graph_id.append(str(dict(j)['id(n)'])) print event_graph_id event_id_string = ','.join(event_graph_id) query = 'start d=node(' + event_id_string + ') match (d)-[r]-(e) return labels(e), e' result = graph.run(query) node_dict = {} for i in result: dict_i = dict(i) node_type = dict_i['labels(e)'][0] if node_type == people_node: node_id = dict_i['e']['uid'] try: node_dict['user'].append(node_id) except: node_dict['user'] = [] node_dict['user'].append(node_id) elif node_type == org_node: node_id = dict_i['e']['org_id'] try: node_dict['org'].append(node_id) except: node_dict['org'] = [] node_dict['org'].append(node_id) elif node_type == event_node: node_id = dict_i['e']['event_id'] if node_id in event_graph_id: continue try: node_dict['event'].append(node_id) except: node_dict['event'] = [] node_dict['event'].append(node_id) uid_list = [i for i in set(node_dict['user'])] org_list = [i for i in set(node_dict['org'])] event_list = [i for i in set(node_dict['event'])] user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list}, fields=['uname', 'uid'])['docs'] org_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': org_list}, fields=['uname', 'uid'])['docs'] event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, body={'ids': event_list}, fields=['en_name', 'name'])['docs'] final_user = [] for i in user_result: if i['found'] == True: if i['fields']['uname'][0] == '': uname_s = i['fields']['uid'][0] else: uname_s = i['fields']['uname'][0] final_user.append([i['fields']['uid'][0], uname_s]) else: final_user.append([i['_id'], i['_id']]) final_org = [] for i in org_result: if i['found'] == True: if i['fields']['uname'][0] == '': uname_s = i['fields']['uid'][0] else: uname_s = i['fields']['uname'][0] final_org.append([i['fields']['uid'][0], uname_s]) else: final_org.append([i['_id'], i['_id']]) final_event = [] for i in event_result: if i['found'] == True: final_org.append( [i['fields']['en_name'][0], i['fields']['name'][0]]) else: final_org.append([i['_id'], i['_id']]) return [final_user, final_org, final_event, final_file, final_wiki]
def group_geo_vary(g_name, submit_user): group_id = p.get_pinyin(g_name) group_id = group_id.lower() uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id, fields=['people']) uid_list = uid_string['fields']['people'][0].split('&') activity_geo_vary = {} main_start_geo = {} main_end_geo = {} vary_detail_geo = {} activity_geo_distribution_date = {} if RUN_TYPE == 1: now_ts = int(time.time()) else: now_ts = datetime2ts(RUN_TEST_TIME) now_date_ts = datetime2ts(ts2datetime(now_ts)) try: iter_user_dict_list = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':uid_list})['docs'] except: iter_user_dict_list = [] for user_dict in iter_user_dict_list: uid = user_dict['_id'] source = user_dict['_source'] #attr8: activity_geo_dict---distribution by date user_activity_geo = {} activity_geo_dict_list = json.loads(source['activity_geo_dict']) activity_geo_date_count = len(activity_geo_dict_list) iter_ts = now_date_ts - activity_geo_date_count * DAY user_date_main_list = [] for i in range(0, activity_geo_date_count): date_item = activity_geo_dict_list[i] if iter_ts in activity_geo_distribution_date: activity_geo_distribution_date[iter_ts] = union_dict_list( [activity_geo_distribution_date[iter_ts], date_item]) else: activity_geo_distribution_date[iter_ts] = date_item #use to get activity_geo vary sort_date_item = sorted(date_item.items(), key=lambda x: x[1], reverse=True) if date_item != {}: main_date_city = sort_date_item[0][0] try: last_user_date_main_item = user_date_main_list[-1][0] except: last_user_date_main_item = '' if main_date_city != last_user_date_main_item: user_date_main_list.append([main_date_city, iter_ts]) iter_ts += DAY #attr8: activity_geo_dict---location vary if len(user_date_main_list) > 1: for i in range(1, len(user_date_main_list)): vary_city = [ geo_ts_item[0] for geo_ts_item in user_date_main_list[i - 1:i + 1] ] vary_ts = [ geo_ts_item[1] for geo_ts_item in user_date_main_list[i - 1:i + 1] ] vary_item = '&'.join(vary_city) #vary_item = '&'.join(user_date_main_list[i-1:i+1]) #get activity geo vary for vary table and map try: activity_geo_vary[vary_item] += 1 except: activity_geo_vary[vary_item] = 1 #get main start geo try: main_start_geo[vary_city[0]] += 1 except: main_start_geo[vary_city[0]] = 1 #get main end geo try: main_end_geo[vary_city[1]] += 1 except: main_end_geo[vary_city[1]] = 1 #get vary detail geo try: vary_detail_geo[vary_item].append( [uid, vary_ts[0], vary_ts[1]]) except: vary_detail_geo[vary_item] = [[ uid, vary_ts[0], vary_ts[1] ]] all_activity_geo = union_dict_list(activity_geo_distribution_date.values()) sort_all_activity_geo = sorted(all_activity_geo.items(), key=lambda x: x[1], reverse=True) try: main_activity_geo = sort_all_activity_geo[0][0] except: main_activity_geo = '' return {'main_start_geo':main_start_geo, 'main_end_geo': main_end_geo, \ 'vary_detail_geo': vary_detail_geo, 'activity_geo_vary':activity_geo_vary,\ 'main_activity_geo':main_activity_geo, 'activity_geo_distribution_date':activity_geo_distribution_date}
def search_related_u_card(item, submit_user, g_name): evaluate_max = get_evaluate_max() if g_name: g_name = g_name + '_' + submit_user g_name_pinyin = p.get_pinyin(g_name) g_name_pinyin = g_name_pinyin.lower() user_list_string = es_group.get(index=group_name, doc_type=group_type, id=g_name_pinyin,\ fields=['people']) uid_list = [] uid_list = user_list_string['fields']['people'][0].split('&') # print uid_list,'===========' else: uid_list = [] query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'keywords': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 1000 } try: user_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, \ body=query_body, fields=['uid'])['hits']['hits'] except: return 'node does not exist' # print user_result search_uid = [] result = [] for i in user_result: i_fields = i['fields'] search_uid.append(i_fields['uid'][0]) show_id_set = set(search_uid) - set(uid_list) show_id = [i for i in show_id_set] if not show_id: return [] fields_list = [ 'uid', 'uname', 'location', 'influence', 'sensitive', 'activeness', 'keywords_string', 'function_mark' ] user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':show_id}, fields=fields_list)['docs'] for i in user_result: user = [] i_fields = i['fields'] for j in fields_list: if not i_fields.has_key(j): user.append('') continue if j == 'keywords': keywords = i_fields[j][0].split('&') keywords = keywords[:5] user.append(keywords) elif j == 'function_mark': tag = deal_user_tag(i_fields[j][0], submit_user)[0] user.append(tag) elif j in ['influence', 'sensitive', 'activeness']: user.append( math.log(i_fields[j][0] / (evaluate_max[j] * 9 + 1) + 1, 10) * 100) else: user.append(i_fields[j][0]) result.append(user) return result
def submit_identify_in_uid(input_data): print input_data, '00000000000' in_date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] compute_status = input_data['compute_status'] relation_string = input_data['relation_string'] recommend_style = input_data['recommend_style'] node_type = input_data['node_type'] hashname_submit = 'submit_recomment_' + in_date hashname_influence = 'recomment_' + in_date + '_influence' hashname_sensitive = 'recomment_' + in_date + '_sensitive' compute_hash_name = 'compute' # submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) upload_data = input_data['upload_data'] uid_list = [] invalid_uid_list = [] if recommend_style == 'upload': line_list = upload_data # print line_list,'====8888====' for line in line_list: uid = line.strip('\r') # print len(str(uid)),'!!!0000000000999999999999' if len(str(uid)) == 10: uid_list.append(uid) else: invalid_uid_list.append(uid) if recommend_style == 'write': line_list = upload_data # print line_list,'====8888====' for line in line_list: uid = line if len(str(uid)) == 10: uid_list.append(uid) else: invalid_uid_list.append(uid) if len(invalid_uid_list) != 0: return 0, 'invalid user info', invalid_uid_list #identify the uid is not exist in user_portrait and compute #step1: filter in user_portrait new_uid_list = [] have_in_uid_list = [] try: exist_portrait_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list}, _source=False)['docs'] except: exist_portrait_result = [] if exist_portrait_result: for exist_item in exist_portrait_result: if exist_item['found'] == False: new_uid_list.append(exist_item['_id']) else: have_in_uid_list.append(exist_item['_id']) else: new_uid_list = uid_list #step2: filter in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_set = list(new_uid_set - compute_set) print 'new_uid_set:', new_uid_set print 'in_uid_set:', in_uid_set if len(in_uid_set) == 0: return 0, 'all user in' #identify the final add user final_submit_user_list = [] for in_item in in_uid_set: # if in_item in auto_recomment_set: # tmp = json.loads(r.hget(hashname_submit, in_item)) # recommentor_list = tmp['operation'].split('&') # recommentor_list.append(str(submit_user)) # new_list = list(set(recommentor_list)) # tmp['operation'] = '&'.join(new_list) # else: # tmp = {'system':'0', 'operation':submit_user} if operation_type == 'submit': relation_list = relation_string.split(',') r.hset( compute_hash_name, in_item, json.dumps([ in_date, compute_status, node_type, relation_list, submit_user, recommend_style ])) # r.hset(hashname_submit, in_item, json.dumps(tmp)) # r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return 1, invalid_uid_list, have_in_uid_list, final_submit_user_list
def group_related(g_name, submit_user): group_id = p.get_pinyin(g_name) group_id = group_id.lower() uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id, fields=['people', 'file_link', 'wiki_link']) origin_list = uid_string['fields']['people'][0].split('&') # origin_list = [] try: file_link = uid_string['fields']['file_link'][0].split('+') except: file_link = [] final_file = [] for i in file_link: final_file.append(i.split(',')) try: final_wiki = json.loads(uid_string['fields']['wiki_link'][0]) except: final_wiki = [] event_graph_id = [] user_list, org_list = search_user_type(origin_list) for i in user_list: a = graph.run('start n=node:'+node_index_name+'("'+people_primary+':'+str(i)+'") return id(n)') for j in a: event_graph_id.append(str(dict(j)['id(n)'])) for i in org_list: a = graph.run('start n=node:'+org_index_name+'("'+org_primary+':'+str(i)+'") return id(n)') for j in a: event_graph_id.append(str(dict(j)['id(n)'])) # for i in origin_list: # a = graph.run('start n=node:'+node_index_name+'("'+people_primary+':'+str(i)+'") return id(n)') # for j in a: # event_graph_id.append(str(dict(j)['id(n)'])) print event_graph_id event_id_string = ','.join(event_graph_id) query = 'start d=node('+event_id_string+') match (d)-[r]-(e) return labels(e), e' result = graph.run(query) node_dict = {} for i in result: dict_i = dict(i) node_type = dict_i['labels(e)'][0] if node_type == people_node: node_id = dict_i['e']['uid'] try: node_dict['user'].append(node_id) except: node_dict['user'] = [] node_dict['user'].append(node_id) elif node_type == org_node: node_id = dict_i['e']['org_id'] try: node_dict['org'].append(node_id) except: node_dict['org'] = [] node_dict['org'].append(node_id) elif node_type == event_node: node_id = dict_i['e']['event_id'] if node_id in event_graph_id: continue try: node_dict['event'].append(node_id) except: node_dict['event'] = [] node_dict['event'].append(node_id) try: uid_list = [i for i in set(node_dict['user'])] user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, fields=['uname', 'uid'])['docs'] except: user_result = [] try: org_list_ = [i for i in set(node_dict['org'])] org_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':org_list_}, fields=['uname', 'uid'])['docs'] except: org_result = [] try: event_list = [i for i in set(node_dict['event'])] event_result = es_event.mget(index=event_analysis_name,doc_type=event_text_type, body={'ids':event_list}, fields=['en_name', 'name'])['docs'] except: event_result = [] final_user = [] for i in user_result: if i['_id'] in origin_list: continue if i['found'] == True: if i['fields']['uname'][0] == '': uname_s = i['fields']['uid'][0] else: uname_s = i['fields']['uname'][0] final_user.append([i['fields']['uid'][0], uname_s]) else: final_user.append([i['_id'],i['_id']]) final_org = [] for i in org_result: if i['_id'] in origin_list: continue if i['found'] == True: if i['fields']['uname'][0] == '': uname_s = i['fields']['uid'][0] else: uname_s = i['fields']['uname'][0] final_org.append([i['fields']['uid'][0], uname_s]) else: final_org.append([i['_id'],i['_id']]) final_event = [] for i in event_result: if i['found'] == True: final_event.append([i['fields']['en_name'][0], i['fields']['name'][0]]) else: final_event.append([i['_id'],i['_id']]) return {'final_user':final_user, 'final_org':final_org, 'final_event':final_event, \ 'final_file':final_file, 'final_wiki':final_wiki}