def draw_graph(relation_list): result = {} key_dict = { 'User': '******', 'Event': 'event_id', 'Group': 'group', 'SpecialEvent': 'event' } map_eid = [] result_relation = [] #[[node1,relation,node2],...] for i in relation_list: # print i,'**********************' this_relation = ['', '', ''] #[node1,relation,node2] only_relation = [] #[node1,node2] for m in walk(i): try: this_relation[1] = m.type() # print m.type(),'!!!!!!!!!' except: aa = m.labels() aa = [i for i in aa] if len(aa) == 1: try: primary_key = key_dict[aa[0]] except: continue primary_value = m[primary_key] only_relation.append(primary_value) if aa[0] == 'User': eu_name = user_name_search(m['uid']) elif aa[0] == 'Event': # print m['event_id'].encode('utf-8'),'************' if m['event_id'] in [u'徐玉玉事件', u'大学生失联']: continue eu_name = event_name_search(m['event_id']) map_eid.append(m['event_id']) else: eu_name = m[primary_key] if len(aa) > 1 or len(aa) < 1: primary_key = 'User' primary_value = m[primary_key] eu_name = user_name_search(m['uid']) only_relation.append(m['uid']) try: result[primary_key][primary_value] = eu_name except: result[primary_key] = {} result[primary_key][primary_value] = eu_name if len(only_relation) < 2: continue this_relation[0] = only_relation[0] this_relation[2] = only_relation[1] result_relation.append(this_relation) return { 'result_relation': result_relation, 'node': result, 'map_eid': map_eid }
def group_user_rank(g_name, submit_user): group_id = p.get_pinyin(g_name) group_id = group_id.lower() print group_id uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id, fields=['people']) uid_list = uid_string['fields']['people'][0].split('&') indx_id_list = [] for i in uid_list: a = graph.run('start n=node:' + node_index_name + '("' + people_primary + ':' + str(i) + '") return id(n)') for j in a: indx_id_list.append(str(dict(j)['id(n)'])) event_id_string = ','.join(indx_id_list) query = 'start d=node(' + event_id_string + '),e=node(' + event_id_string + ') match (d)-[r]->(e) return d,type(r),e' result = graph.run(query) exist_relation = [] exist_relation_string = [] for i in result: # print i dict_i = dict(i) start_id = dict_i['d']['uid'] start_name = user_name_search(start_id) end_id = dict_i['e']['uid'] end_name = user_name_search(end_id) exist_relation.append([start_id, start_name, relation_dict[dict_i['type(r)']], \ end_id, end_name]) # print exist_relation relation_string = start_id + '-' + end_id exist_relation_string.append(relation_string) set_exist_relation = set(exist_relation_string) relation_set_count = len(list(set_exist_relation)) node_count = len(uid_list) total_count = node_count * (node_count - 1) / 2 try: relation_degree = float(relation_set_count) / total_count except: relation_degree = 0 if relation_degree == 0: conclusion = u'无关联' elif relation_degree < 0.33 and relation_degree > 0: conclusion = u'关联度较低' elif relation_degree >= 0.33 and relation_degree < 0.66: conclusion = u'关联度适中' elif relation_degree >= 0.66: conclusion = u'联系紧密' ##未定义!! return {'relation_table':exist_relation, 'relation_count':relation_set_count,\ 'conclusion':conclusion, 'relation_degree':relation_degree}
def user_weibo_search(uid_list, sort_flag): # es.update(index="flow_text", doc_type="text", id=1, body={“doc”:{“text”:“更新”, “user_fansnum”: 100}}) query_body = { 'query': { 'terms': { 'uid': uid_list } }, "sort": [{ sort_flag: 'desc' }], 'size': 200 } fields_list = [ 'text', 'uid', 'sensitive', 'comment', 'retweeted', 'timestamp', 'sensitive_words_string' ] event_detail = es_flow_text.search(index=flow_text_name, doc_type=flow_text_type, \ body=query_body, _source=False, fields=fields_list)['hits']['hits'] result = [] for event in event_detail: event_dict = {} uid = event['fields']['uid'][0] uname = user_name_search(uid) event_dict['uname'] = uname for k, v in event['fields'].iteritems(): event_dict[k] = v[0] result.append(event_dict) return result
def user_list_group(group_name): s_string = 'START s0 = node:group_index(group="%s")\ MATCH (s0)-[r]-(s:User) RETURN s.uid as uid' % (group_name) # print s_string uid_list = graph.run(s_string) uid_list_l = [] for i in uid_list: uid_this = dict(i)['uid'] user_name = user_name_search(uid_this) uid_list_l.append([uid_this, user_name]) return uid_list_l
def group_tab_graph(group_name, node_type, relation_type, layer): s_string = 'START s0 = node:group_index(group="' + group_name + '") \ MATCH (s0)-[r]-(s) RETURN s.uid as uid' all_uid_list = [] #for map user_list = graph.run(s_string) origin_relation = [] s_string2 = 'START s0 = node:group_index(group="' + group_name + '") \ MATCH (s0)-[r]-(s) RETURN r' user_list_o = graph.run(s_string2) for r in user_list_o: r1 = dict(r)['r'] origin_relation.append(r1) # b = Node("Group", group=group_name) # print g.degree(b),'-=-=-=-=-=----------------' if node_type != '': node_type = ':' + node_type # if relation_type!='': # relation_type = ':' + relation_type user_relation = [] # total_user = len(list(uid_list)) uid_list = [] u_nodes_list = {} #all user nodes e_nodes_list = {} #all event nodes only_uid_no = [] for uid in user_list: uid_value = str(uid['uid']) only_uid_no.append(uid_value) user_name = user_name_search(uid_value) # print uid_value,'000000000000' all_uid_list.append([str(uid_value), user_name]) uid_list.append([str(uid_value), user_name]) # = user_name #取uid u_nodes_list[str(uid_value)] = user_name #取uid # u_nodes_list.extend(uid_list) # all_uid_list.extend(uid_list) relation = get_graph_single(only_uid_no, node_type, relation_type, layer) # print relation,'len(relation)' relation.extend(origin_relation) relation = [i for i in set(relation)] result = draw_graph(relation) for i in only_uid_no: try: result['map_uid'].append(i) except: result['map_uid'] = [] result['map_uid'].append(i) result['map_uid'] = [i for i in set(result['map_uid'])] # print len(result['node']['uid']), len(result['map_uid']) return result
def get_theme_user_rank(theme_name, submit_user): topic_id = p.get_pinyin(theme_name) topic_id = topic_id.lower() eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id, fields=['event']) event_list = eid_string['fields']['event'][0].split('&') user_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \ body={'ids':event_list}, fields=['user_results','name'])['docs'] user_influence = {} for i in user_result: # print i event_name = i['fields']['name'][0] user_dict = json.loads(i['fields']['user_results'][0]) for k, v in user_dict.iteritems(): if user_influence.has_key(k): continue user_influence[k] = {} user_influence[k]['id'] = k user_influence[k]['name'] = user_name_search(k) user_influence[k]['node_type'] = search_type(k) for i in user_result: event_name = i['fields']['name'][0] user_dict = json.loads(i['fields']['user_results'][0]) for k, v in user_dict.iteritems(): try: user_influence[k]['related_event'].append(event_name) except: user_influence[k]['related_event'] = [] user_influence[k]['related_event'].append(event_name) try: user_influence[k]['influ'] += v['influ'] except: user_influence[k]['influ'] = v['influ'] user_influence_list = [] for k, v in user_influence.iteritems(): user_influence_list.append(v) sorted_user_influ = sorted(user_influence_list, key=lambda x: x['influ'], reverse=True) max_importance = sorted_user_influ[0]['influ'] for i in sorted_user_influ: i['influ'] = float(i['influ']) / max_importance return sorted_user_influ
def search_way(node1, node2, node_type1, node_type2): relation_type = [ 'join', 'pusher', 'maker', 'other_relationship', 'friend', 'relative', 'colleague', 'user_tag' ] index_type_dict = {'User': '******', 'Event': 'event_index'} primary_idct = {'User': '******', 'Event': 'event'} key_dict = {'User': '******', 'Event': 'event_id'} origin_idlist = [node1, node2] print origin_idlist if node_type1 == 'User': start_node_card = related_user_search([node1], 'activeness')[0] else: start_node_card = event_detail_search([node1], 'start_ts')[0] if node_type2 == 'User': end_node_card = related_user_search([node2], 'activeness')[0] else: end_node_card = event_detail_search([node2], 'start_ts')[0] c_string = 'START node1 = node:' + index_type_dict[ node_type1] + '(' + primary_idct[node_type1] + '="' + node1 + '"),' c_string += 'node2 = node:' + index_type_dict[ node_type2] + '(' + primary_idct[node_type2] + '="' + node2 + '") ' c_string += 'MATCH p = allShortestPaths(node1-[r*..5]-node2) return r' print c_string result = graph.run(c_string) # uid_list = []#for card # eid_list = []#for card middle_card = [] #for card, middle nodes uid_dict = {} #for graph eid_dict = {} #for graph relation_all = list(result) # print relation_all,'!!!!!!!!!!!!!!!1' relation_result = [] relation_result2 = [] # print [relation_all[0]['r'] , relation_all[1]['r']] # if relation_all[0]['r'] == relation_all[1]['r']: # return 'haha' # else: # return [relation_all[0]['r'] , relation_all[1]['r']] length_relation = [0, 0] length_relation[0] = len(relation_all) for relation in relation_all: # print list(relation['r']),'99999999999999999' if len(list(relation['r'])) < 2: return 0 #返回0 说明这两个节点有直接关系 print relation, 'relation' length_relation[1] = len(list(relation['r'])) line_rel = [] for i in relation['r']: # print i a = walk(i) # print a this_relation = [] # aa = [] for m in a: #a=[node1,r,node2] try: m.type() except: aa = m.labels() aa = [i for i in aa] mm = dict(m) # print mm,'========' if mm.has_key('uid'): # print mm if m['uid'] == '1765891182': print a, m, '----000000000000000000' eu_name = user_name_search(m['uid']) if uid_dict.has_key(m['uid']) == False: uid_dict[m['uid']] = eu_name if m['uid'] not in origin_idlist: # print m['uid'], origin_idlist,'inininini' mid_card = related_user_search([m['uid']], 'activeness') if len(mid_card) == 0: middle_card.append({'uid': m['uid']}) else: # print len(mid_card), '!!!!!!!!!!!!!' middle_card.append(mid_card[0]) this_relation.append([m['uid'], eu_name]) if [m['uid'], eu_name] not in line_rel: line_rel.append([m['uid'], eu_name]) elif mm.has_key('event_id'): eu_name = event_name_search(m['event_id']) if eid_dict.has_key(m['event_id']) == False: eid_dict[m['event_id']] = eu_name if m['event_id'] not in origin_idlist: mid_card = event_detail_search([m['event_id']], 'start_ts') if len(mid_card) == 0: middle_card.append( {'event_id': m['event_id']}) else: middle_card.append(mid_card[0]) this_relation.append([m['event_id'], eu_name]) if [m['event_id'], eu_name] not in line_rel: line_rel.append([m['event_id'], eu_name]) else: break if len(this_relation) > 1: if this_relation not in relation_result: relation_result.append(this_relation) # print len(line_rel), length_relation[1],'000000000' # if len(line_rel) == length_relation[1]: # relation_result2.append(line_rel) return {'relation':relation_result, 'start_node_card':start_node_card, 'end_node_card':end_node_card,\ 'user_nodes':uid_dict, 'event_nodes': eid_dict, 'middle_card':middle_card,'length_relation':length_relation}
def compare_graph_group(group_name1, group_name2, layer, diff): s_string1 = 'START s0 = node:group_index(group="%s")\ MATCH (s0)-[r]-(s) RETURN s.uid as user_id' % group_name1 group_result1 = graph.run(s_string1) uid_list1 = [] for i in group_result1: user_dict = dict(i) usd = user_dict['user_id'] uid_list1.append(usd) print len(uid_list1) s_string2 = 'START s0 = node:group_index(group="%s")\ MATCH (s0)-[r]-(s) RETURN s.uid as user_id' % group_name2 group_result2 = graph.run(s_string2) uid_list2 = [] for i in group_result2: user_dict = dict(i) usd = user_dict['user_id'] uid_list2.append(usd) print len(uid_list2) relation_1 = get_graph(uid_list1, layer) relation_2 = get_graph(uid_list2, layer) if diff == '0': u1 = draw_graph(relation_1) u2 = draw_graph(relation_2) if diff == '1': same_relation = set(relation_1) & set(relation_2) same_relation = [i for i in same_relation] u1 = draw_graph(same_relation) u2 = u1 if diff == '2': same_relation = set(relation_1) & set(relation_2) only1_relation = set(relation_1) - same_relation only2_relation = set(relation_2) - same_relation u1 = draw_graph(only1_relation) u2 = draw_graph(only2_relation) for i in uid_list1: u_name = user_name_search(i) try: u1['node']['uid'][i] = u_name except: u1['node']['uid'] = {} u1['node']['uid'][i] = u_name try: u1['map_uid'].append(i) except: u1['map_uid'] = [] u1['map_uid'].append(i) for i in uid_list2: u_name = user_name_search(i) try: u2['node']['uid'][i] = u_name # e2[] except: u2['node']['uid'] = {} u2['node']['uid'][i] = u_name try: u2['map_uid'].append(i) except: u2['map_uid'] = [] u2['map_uid'].append(i) return {'u1': u1, 'u2': u2}
def search_related_user(item): query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'uid': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'uname': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 10 } only_uid = [] user_uid_list = [] u_nodes_list = {} try: name_results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \ body=query_body, fields=['uid','uname'])['hits']['hits'] # print name_results,'@@@@@@@@@@@@@@@@@' except: return 'node does not exist' if len(name_results) == 0: return 'node does not exist' for i in name_results: # print i uid = i['fields']['uid'][0] uname = i['fields']['uname'][0] only_uid.append(uid) u_nodes_list[uid] = uname user_uid_list.append([uid, uname]) print len(user_uid_list) e_nodes_list = {} user_relation = [] mid_uid_list = [] #存放第一层的数据,再以这些为起始点,扩展第二层 mid_eid_list = [] for uid_value in user_uid_list: c_string = 'START s0 = node:node_index(uid="' + str( uid_value[0]) + '") ' c_string += 'MATCH (s0)-[r1]-(s1) return s0,r1,s1 LIMIT 1' result = graph.run(c_string) # print list(result),'-----------------' for i in list(result): start_id = i['s0']['uid'] # # start_id = s0['uid'] relation1 = i['r1'].type() m_id = dict(i['s1']) if m_id.has_key('uid'): middle_id = m_id['uid'] mid_uid_list.append(middle_id) user_name = user_name_search(middle_id) # print middle_id,'2222222222222222222' u_nodes_list[str(middle_id)] = user_name user_relation.append([start_id, relation1, middle_id]) if m_id.has_key('envent_id'): middle_id = m_id['envent_id'] mid_eid_list.append(middle_id) event_name = event_name_search(middle_id) e_nodes_list[str(middle_id)] = event_name user_relation.append([start_id, relation1, middle_id]) print len(mid_uid_list) print len(mid_eid_list), '++++++++++++++++' for mid_uid in mid_uid_list: c_string = 'START s1 = node:node_index(uid="' + str(mid_uid) + '") ' c_string += 'MATCH (s1)-[r2]->(s2:User) return s1,r2,s2 LIMIT 5' # print c_string result = graph.run(c_string) for i in result: start_mid_id = i['s1']['uid'] relation2 = i['r2'].type() end_id = dict(i['s2']) if end_id.has_key('uid'): user_name = user_name_search(end_id['uid']) # print end_id['uid'],'333333333333333333333333' u_nodes_list[end_id['uid']] = user_name user_relation.append([start_mid_id, relation2, end_id['uid']]) if end_id.has_key('envent_id'): event_name = event_name_search(end_id['event_id']) e_nodes_list[end_id['event_id']] = event_name user_relation.append( [start_mid_id, relation2, end_id['envent_id']]) for mid_eid in mid_eid_list: c_string = 'START s1 = node:event_index(event="' + str(mid_eid) + '") ' c_string += 'MATCH (s1)-[r2]->(s2:User) return s1,r2,s2 LIMIT 3' event_result = graph.run(c_string) for i in event_result: relation2 = i['r2'].type() end_id = dict(i['s2']) if end_id.has_key('uid'): # print end_id['uid'],'44444444444444444444444' user_name = user_name_search(end_id['uid']) u_nodes_list[end_id['uid']] = user_name user_relation.append([mid_eid, relation2, end_id['uid']]) if end_id.has_key('envent_id'): event_name = event_name_search(end_id['event_id']) e_nodes_list[end_id['event_id']] = event_name user_relation.append([mid_eid, relation2, end_id['envent_id']]) return {'total_user':len(user_uid_list),'user_nodes':u_nodes_list,'event_nodes':e_nodes_list,\ 'relation':user_relation,'draw_nodes_length':len(u_nodes_list)}
def search_related_event_f(item): query_body = { "query": { 'bool': { 'should': [{ "wildcard": { 'keywords': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'en_name': '*' + str(item.encode('utf-8')) + '*' } }, { "wildcard": { 'name': '*' + str(item.encode('utf-8')) + '*' } }] } }, 'size': 10 } only_eid = [] event_id_list = [] u_nodes_list = {} e_nodes_list = {} event_relation = [] try: name_results = es_event.search(index=event_name, doc_type=event_type, \ body=query_body, fields=['name','en_name'])['hits']['hits'] except: return 'node does not exist' if len(name_results) == 0: return 'node does not exist' print name_results, '*********************' for i in name_results: name = i['fields']['name'][0] en_name = i['fields']['en_name'][0] only_eid.append(en_name) e_nodes_list[en_name] = name event_id_list.append([en_name, name]) for event_value in event_id_list: c_string = 'START s0 = node:event_index(event="' + str( event_value[0]) + '") ' c_string += 'MATCH (s0)-[r1]-(s1) return s0,r1,s1 LIMIT 10' # print c_string,'===========' mid_eid_list = [] #存放第一层的数据,再以这些为起始点,扩展第二层 mid_uid_list = [] result = graph.run(c_string) # print list(result),'-----------------' for i in list(result): print i start_id = i['s0']['event_id'] # start_id = s0['event'] relation1 = i['r1'].type() m_id = dict(i['s1']) if m_id.has_key('uid'): middle_id = m_id['uid'] mid_uid_list.append(middle_id) user_name = user_name_search(middle_id) u_nodes_list[middle_id] = user_name event_relation.append([start_id, relation1, middle_id]) if m_id.has_key('envent_id'): middle_id = m_id['envent_id'] mid_eid_list.append(middle_id) event_name2 = event_name_search(middle_id) e_nodes_list[middle_id] = event_name2 event_relation.append([start_id, relation1, middle_id]) # print mid_uid_list # print mid_eid_list,'++++++++++++++++' for mid_uid in mid_uid_list: c_string = 'START s1 = node:node_index(uid="' + str(mid_uid) + '") ' c_string += 'MATCH (s1)-[r2]->(s2:Event) return s1,r2,s2 LIMIT 5' uid_result = graph.run(c_string) for i in uid_result: relation2 = i['r2'].type() end_id = dict(i['s2']) if end_id.has_key('uid'): user_name = user_name_search(end_id['uid']) u_nodes_list[end_id['uid']] = user_name event_relation.append([mid_uid, relation2, end_id['uid']]) if end_id.has_key('envent_id'): event_name2 = event_name_search(end_id['envent_id']) e_nodes_list[end_id['envent_id']] = event_name2 event_relation.append( [mid_uid, relation2, end_id['envent_id']]) for mid_eid in mid_eid_list: c_string = 'START s1 = node:event_index(event="' + str(mid_eid) + '") ' c_string += 'MATCH (s1)-[r2]->(s2:Event) return s1,r2,s2 LIMIT 5' eid_result = graph.run(c_string) for i in eid_result: relation2 = i['r2'].type() end_id = dict(i['s2']) if end_id.has_key('uid'): user_name = user_name_search(end_id['uid']) u_nodes_list[end_id['uid']] = user_name event_relation.append([mid_eid, relation2, end_id['uid']]) if end_id.has_key('envent_id'): event_name2 = event_name_search(end_id['envent_id']) e_nodes_list[end_id['envent_id']] = event_name2 event_relation.append( [mid_eid, relation2, end_id['envent_id']]) return {'total_event':len(event_id_list),'user_nodes':u_nodes_list,'event_nodes':e_nodes_list,\ 'relation':event_relation}