Esempio n. 1
0
def draw_graph(relation_list):
    result = {}
    key_dict = {
        'User': '******',
        'Event': 'event_id',
        'Group': 'group',
        'SpecialEvent': 'event'
    }
    map_eid = []
    result_relation = []  #[[node1,relation,node2],...]
    for i in relation_list:
        # print i,'**********************'
        this_relation = ['', '', '']  #[node1,relation,node2]
        only_relation = []  #[node1,node2]
        for m in walk(i):
            try:
                this_relation[1] = m.type()
                # print m.type(),'!!!!!!!!!'
            except:
                aa = m.labels()
                aa = [i for i in aa]
                if len(aa) == 1:
                    try:
                        primary_key = key_dict[aa[0]]
                    except:
                        continue
                    primary_value = m[primary_key]
                    only_relation.append(primary_value)
                    if aa[0] == 'User':
                        eu_name = user_name_search(m['uid'])
                    elif aa[0] == 'Event':
                        # print m['event_id'].encode('utf-8'),'************'
                        if m['event_id'] in [u'徐玉玉事件', u'大学生失联']:
                            continue
                        eu_name = event_name_search(m['event_id'])
                        map_eid.append(m['event_id'])
                    else:
                        eu_name = m[primary_key]
                if len(aa) > 1 or len(aa) < 1:
                    primary_key = 'User'
                    primary_value = m[primary_key]
                    eu_name = user_name_search(m['uid'])
                    only_relation.append(m['uid'])
                try:
                    result[primary_key][primary_value] = eu_name
                except:
                    result[primary_key] = {}
                    result[primary_key][primary_value] = eu_name
        if len(only_relation) < 2:
            continue
        this_relation[0] = only_relation[0]
        this_relation[2] = only_relation[1]
        result_relation.append(this_relation)
    return {
        'result_relation': result_relation,
        'node': result,
        'map_eid': map_eid
    }
Esempio n. 2
0
def group_user_rank(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    print group_id
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')

    indx_id_list = []
    for i in uid_list:
        a = graph.run('start n=node:' + node_index_name + '("' +
                      people_primary + ':' + str(i) + '") return id(n)')
        for j in a:
            indx_id_list.append(str(dict(j)['id(n)']))
    event_id_string = ','.join(indx_id_list)
    query = 'start d=node(' + event_id_string + '),e=node(' + event_id_string + ') match (d)-[r]->(e) return d,type(r),e'
    result = graph.run(query)
    exist_relation = []
    exist_relation_string = []
    for i in result:
        # print i
        dict_i = dict(i)
        start_id = dict_i['d']['uid']
        start_name = user_name_search(start_id)
        end_id = dict_i['e']['uid']
        end_name = user_name_search(end_id)
        exist_relation.append([start_id, start_name, relation_dict[dict_i['type(r)']], \
                    end_id, end_name])
        # print exist_relation
        relation_string = start_id + '-' + end_id
        exist_relation_string.append(relation_string)
    set_exist_relation = set(exist_relation_string)
    relation_set_count = len(list(set_exist_relation))
    node_count = len(uid_list)
    total_count = node_count * (node_count - 1) / 2
    try:
        relation_degree = float(relation_set_count) / total_count
    except:
        relation_degree = 0
    if relation_degree == 0:
        conclusion = u'无关联'
    elif relation_degree < 0.33 and relation_degree > 0:
        conclusion = u'关联度较低'
    elif relation_degree >= 0.33 and relation_degree < 0.66:
        conclusion = u'关联度适中'
    elif relation_degree >= 0.66:
        conclusion = u'联系紧密'  ##未定义!!
    return {'relation_table':exist_relation, 'relation_count':relation_set_count,\
        'conclusion':conclusion, 'relation_degree':relation_degree}
Esempio n. 3
0
def user_weibo_search(uid_list, sort_flag):
    # es.update(index="flow_text", doc_type="text", id=1,  body={“doc”:{“text”:“更新”, “user_fansnum”: 100}})

    query_body = {
        'query': {
            'terms': {
                'uid': uid_list
            }
        },
        "sort": [{
            sort_flag: 'desc'
        }],
        'size': 200
    }
    fields_list = [
        'text', 'uid', 'sensitive', 'comment', 'retweeted', 'timestamp',
        'sensitive_words_string'
    ]
    event_detail = es_flow_text.search(index=flow_text_name, doc_type=flow_text_type, \
                body=query_body, _source=False, fields=fields_list)['hits']['hits']
    result = []
    for event in event_detail:
        event_dict = {}
        uid = event['fields']['uid'][0]
        uname = user_name_search(uid)
        event_dict['uname'] = uname
        for k, v in event['fields'].iteritems():
            event_dict[k] = v[0]
        result.append(event_dict)

    return result
Esempio n. 4
0
def user_list_group(group_name):
    s_string = 'START s0 = node:group_index(group="%s")\
                MATCH (s0)-[r]-(s:User) RETURN s.uid as uid' % (group_name)

    # print s_string
    uid_list = graph.run(s_string)
    uid_list_l = []
    for i in uid_list:
        uid_this = dict(i)['uid']
        user_name = user_name_search(uid_this)
        uid_list_l.append([uid_this, user_name])
    return uid_list_l
Esempio n. 5
0
def group_tab_graph(group_name, node_type, relation_type, layer):
    s_string = 'START s0 = node:group_index(group="' + group_name + '")  \
                MATCH (s0)-[r]-(s) RETURN s.uid as uid'

    all_uid_list = []  #for map
    user_list = graph.run(s_string)
    origin_relation = []
    s_string2 = 'START s0 = node:group_index(group="' + group_name + '")  \
                MATCH (s0)-[r]-(s) RETURN r'

    user_list_o = graph.run(s_string2)
    for r in user_list_o:
        r1 = dict(r)['r']
        origin_relation.append(r1)

    # b = Node("Group", group=group_name)
    # print g.degree(b),'-=-=-=-=-=----------------'
    if node_type != '':
        node_type = ':' + node_type
    # if relation_type!='':
    #     relation_type = ':' + relation_type
    user_relation = []
    # total_user = len(list(uid_list))
    uid_list = []
    u_nodes_list = {}  #all user nodes
    e_nodes_list = {}  #all event nodes
    only_uid_no = []
    for uid in user_list:
        uid_value = str(uid['uid'])
        only_uid_no.append(uid_value)
        user_name = user_name_search(uid_value)
        # print uid_value,'000000000000'
        all_uid_list.append([str(uid_value), user_name])
        uid_list.append([str(uid_value), user_name])  # = user_name  #取uid
        u_nodes_list[str(uid_value)] = user_name  #取uid
    # u_nodes_list.extend(uid_list)
    # all_uid_list.extend(uid_list)
    relation = get_graph_single(only_uid_no, node_type, relation_type, layer)
    # print relation,'len(relation)'
    relation.extend(origin_relation)
    relation = [i for i in set(relation)]
    result = draw_graph(relation)
    for i in only_uid_no:
        try:
            result['map_uid'].append(i)
        except:
            result['map_uid'] = []
            result['map_uid'].append(i)
    result['map_uid'] = [i for i in set(result['map_uid'])]
    # print len(result['node']['uid']), len(result['map_uid'])
    return result
Esempio n. 6
0
def get_theme_user_rank(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    user_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['user_results','name'])['docs']
    user_influence = {}
    for i in user_result:
        # print i
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k, v in user_dict.iteritems():
            if user_influence.has_key(k):
                continue
            user_influence[k] = {}
            user_influence[k]['id'] = k
            user_influence[k]['name'] = user_name_search(k)
            user_influence[k]['node_type'] = search_type(k)

    for i in user_result:
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k, v in user_dict.iteritems():
            try:
                user_influence[k]['related_event'].append(event_name)
            except:
                user_influence[k]['related_event'] = []
                user_influence[k]['related_event'].append(event_name)
            try:
                user_influence[k]['influ'] += v['influ']
            except:
                user_influence[k]['influ'] = v['influ']
    user_influence_list = []
    for k, v in user_influence.iteritems():
        user_influence_list.append(v)
    sorted_user_influ = sorted(user_influence_list,
                               key=lambda x: x['influ'],
                               reverse=True)
    max_importance = sorted_user_influ[0]['influ']
    for i in sorted_user_influ:
        i['influ'] = float(i['influ']) / max_importance
    return sorted_user_influ
Esempio n. 7
0
def search_way(node1, node2, node_type1, node_type2):
    relation_type = [
        'join', 'pusher', 'maker', 'other_relationship', 'friend', 'relative',
        'colleague', 'user_tag'
    ]
    index_type_dict = {'User': '******', 'Event': 'event_index'}
    primary_idct = {'User': '******', 'Event': 'event'}
    key_dict = {'User': '******', 'Event': 'event_id'}
    origin_idlist = [node1, node2]
    print origin_idlist
    if node_type1 == 'User':
        start_node_card = related_user_search([node1], 'activeness')[0]
    else:
        start_node_card = event_detail_search([node1], 'start_ts')[0]

    if node_type2 == 'User':
        end_node_card = related_user_search([node2], 'activeness')[0]
    else:
        end_node_card = event_detail_search([node2], 'start_ts')[0]

    c_string = 'START node1 = node:' + index_type_dict[
        node_type1] + '(' + primary_idct[node_type1] + '="' + node1 + '"),'
    c_string += 'node2 = node:' + index_type_dict[
        node_type2] + '(' + primary_idct[node_type2] + '="' + node2 + '") '
    c_string += 'MATCH p = allShortestPaths(node1-[r*..5]-node2) return r'
    print c_string
    result = graph.run(c_string)

    # uid_list = []#for card
    # eid_list = []#for card
    middle_card = []  #for card, middle nodes
    uid_dict = {}  #for graph
    eid_dict = {}  #for graph
    relation_all = list(result)
    # print relation_all,'!!!!!!!!!!!!!!!1'
    relation_result = []
    relation_result2 = []
    # print [relation_all[0]['r'] , relation_all[1]['r']]
    # if relation_all[0]['r'] == relation_all[1]['r']:
    #     return 'haha'
    # else:
    #     return [relation_all[0]['r'] , relation_all[1]['r']]
    length_relation = [0, 0]
    length_relation[0] = len(relation_all)
    for relation in relation_all:
        # print list(relation['r']),'99999999999999999'
        if len(list(relation['r'])) < 2:
            return 0  #返回0 说明这两个节点有直接关系
        print relation, 'relation'
        length_relation[1] = len(list(relation['r']))
        line_rel = []
        for i in relation['r']:
            # print i
            a = walk(i)
            # print a
            this_relation = []
            # aa = []
            for m in a:  #a=[node1,r,node2]
                try:
                    m.type()
                except:
                    aa = m.labels()
                    aa = [i for i in aa]
                    mm = dict(m)
                    # print mm,'========'
                    if mm.has_key('uid'):
                        # print mm
                        if m['uid'] == '1765891182':
                            print a, m, '----000000000000000000'
                        eu_name = user_name_search(m['uid'])
                        if uid_dict.has_key(m['uid']) == False:
                            uid_dict[m['uid']] = eu_name
                            if m['uid'] not in origin_idlist:
                                # print m['uid'], origin_idlist,'inininini'
                                mid_card = related_user_search([m['uid']],
                                                               'activeness')
                                if len(mid_card) == 0:
                                    middle_card.append({'uid': m['uid']})
                                else:
                                    # print len(mid_card), '!!!!!!!!!!!!!'
                                    middle_card.append(mid_card[0])
                        this_relation.append([m['uid'], eu_name])
                        if [m['uid'], eu_name] not in line_rel:
                            line_rel.append([m['uid'], eu_name])
                    elif mm.has_key('event_id'):
                        eu_name = event_name_search(m['event_id'])
                        if eid_dict.has_key(m['event_id']) == False:
                            eid_dict[m['event_id']] = eu_name
                            if m['event_id'] not in origin_idlist:
                                mid_card = event_detail_search([m['event_id']],
                                                               'start_ts')
                                if len(mid_card) == 0:
                                    middle_card.append(
                                        {'event_id': m['event_id']})
                                else:
                                    middle_card.append(mid_card[0])
                        this_relation.append([m['event_id'], eu_name])
                        if [m['event_id'], eu_name] not in line_rel:
                            line_rel.append([m['event_id'], eu_name])
                    else:
                        break

                if len(this_relation) > 1:
                    if this_relation not in relation_result:
                        relation_result.append(this_relation)
        # print len(line_rel), length_relation[1],'000000000'
        # if len(line_rel) == length_relation[1]:
        #     relation_result2.append(line_rel)
    return {'relation':relation_result, 'start_node_card':start_node_card, 'end_node_card':end_node_card,\
            'user_nodes':uid_dict, 'event_nodes': eid_dict, 'middle_card':middle_card,'length_relation':length_relation}
Esempio n. 8
0
def compare_graph_group(group_name1, group_name2, layer, diff):
    s_string1 = 'START s0 = node:group_index(group="%s")\
                MATCH (s0)-[r]-(s) RETURN s.uid as user_id' % group_name1
    group_result1 = graph.run(s_string1)
    uid_list1 = []
    for i in group_result1:
        user_dict = dict(i)
        usd = user_dict['user_id']
        uid_list1.append(usd)
    print len(uid_list1)

    s_string2 = 'START s0 = node:group_index(group="%s")\
                MATCH (s0)-[r]-(s) RETURN s.uid as user_id' % group_name2
    group_result2 = graph.run(s_string2)
    uid_list2 = []
    for i in group_result2:
        user_dict = dict(i)
        usd = user_dict['user_id']
        uid_list2.append(usd)
    print len(uid_list2)
    relation_1 = get_graph(uid_list1, layer)
    relation_2 = get_graph(uid_list2, layer)
    if diff == '0':
        u1 = draw_graph(relation_1)
        u2 = draw_graph(relation_2)
    if diff == '1':
        same_relation = set(relation_1) & set(relation_2)
        same_relation = [i for i in same_relation]
        u1 = draw_graph(same_relation)
        u2 = u1
    if diff == '2':
        same_relation = set(relation_1) & set(relation_2)
        only1_relation = set(relation_1) - same_relation
        only2_relation = set(relation_2) - same_relation
        u1 = draw_graph(only1_relation)
        u2 = draw_graph(only2_relation)
    for i in uid_list1:
        u_name = user_name_search(i)
        try:
            u1['node']['uid'][i] = u_name

        except:
            u1['node']['uid'] = {}
            u1['node']['uid'][i] = u_name
        try:
            u1['map_uid'].append(i)
        except:
            u1['map_uid'] = []
            u1['map_uid'].append(i)
    for i in uid_list2:
        u_name = user_name_search(i)
        try:
            u2['node']['uid'][i] = u_name
            # e2[]
        except:
            u2['node']['uid'] = {}
            u2['node']['uid'][i] = u_name
        try:
            u2['map_uid'].append(i)
        except:
            u2['map_uid'] = []
            u2['map_uid'].append(i)
    return {'u1': u1, 'u2': u2}
Esempio n. 9
0
def search_related_user(item):
    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 10
    }
    only_uid = []
    user_uid_list = []
    u_nodes_list = {}

    try:
        name_results = es_user_portrait.search(index=portrait_name, doc_type=portrait_type, \
                body=query_body, fields=['uid','uname'])['hits']['hits']
        # print name_results,'@@@@@@@@@@@@@@@@@'
    except:
        return 'node does not exist'
    if len(name_results) == 0:
        return 'node does not exist'
    for i in name_results:
        # print i
        uid = i['fields']['uid'][0]
        uname = i['fields']['uname'][0]
        only_uid.append(uid)
        u_nodes_list[uid] = uname
        user_uid_list.append([uid, uname])
    print len(user_uid_list)
    e_nodes_list = {}
    user_relation = []
    mid_uid_list = []  #存放第一层的数据,再以这些为起始点,扩展第二层
    mid_eid_list = []
    for uid_value in user_uid_list:
        c_string = 'START s0 = node:node_index(uid="' + str(
            uid_value[0]) + '") '
        c_string += 'MATCH (s0)-[r1]-(s1) return s0,r1,s1 LIMIT 1'

        result = graph.run(c_string)
        # print list(result),'-----------------'
        for i in list(result):
            start_id = i['s0']['uid']
            # # start_id = s0['uid']
            relation1 = i['r1'].type()
            m_id = dict(i['s1'])
            if m_id.has_key('uid'):
                middle_id = m_id['uid']
                mid_uid_list.append(middle_id)
                user_name = user_name_search(middle_id)
                # print middle_id,'2222222222222222222'
                u_nodes_list[str(middle_id)] = user_name
                user_relation.append([start_id, relation1, middle_id])
            if m_id.has_key('envent_id'):
                middle_id = m_id['envent_id']
                mid_eid_list.append(middle_id)
                event_name = event_name_search(middle_id)
                e_nodes_list[str(middle_id)] = event_name
                user_relation.append([start_id, relation1, middle_id])
    print len(mid_uid_list)
    print len(mid_eid_list), '++++++++++++++++'
    for mid_uid in mid_uid_list:
        c_string = 'START s1 = node:node_index(uid="' + str(mid_uid) + '") '
        c_string += 'MATCH (s1)-[r2]->(s2:User) return s1,r2,s2 LIMIT 5'
        # print c_string
        result = graph.run(c_string)
        for i in result:
            start_mid_id = i['s1']['uid']
            relation2 = i['r2'].type()
            end_id = dict(i['s2'])
            if end_id.has_key('uid'):
                user_name = user_name_search(end_id['uid'])
                # print end_id['uid'],'333333333333333333333333'
                u_nodes_list[end_id['uid']] = user_name
                user_relation.append([start_mid_id, relation2, end_id['uid']])
            if end_id.has_key('envent_id'):
                event_name = event_name_search(end_id['event_id'])
                e_nodes_list[end_id['event_id']] = event_name
                user_relation.append(
                    [start_mid_id, relation2, end_id['envent_id']])
    for mid_eid in mid_eid_list:
        c_string = 'START s1 = node:event_index(event="' + str(mid_eid) + '") '
        c_string += 'MATCH (s1)-[r2]->(s2:User) return s1,r2,s2 LIMIT 3'
        event_result = graph.run(c_string)
        for i in event_result:
            relation2 = i['r2'].type()
            end_id = dict(i['s2'])
            if end_id.has_key('uid'):
                # print end_id['uid'],'44444444444444444444444'
                user_name = user_name_search(end_id['uid'])
                u_nodes_list[end_id['uid']] = user_name
                user_relation.append([mid_eid, relation2, end_id['uid']])
            if end_id.has_key('envent_id'):
                event_name = event_name_search(end_id['event_id'])
                e_nodes_list[end_id['event_id']] = event_name
                user_relation.append([mid_eid, relation2, end_id['envent_id']])
    return {'total_user':len(user_uid_list),'user_nodes':u_nodes_list,'event_nodes':e_nodes_list,\
            'relation':user_relation,'draw_nodes_length':len(u_nodes_list)}
Esempio n. 10
0
def search_related_event_f(item):
    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'keywords': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'en_name': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'name': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 10
    }
    only_eid = []
    event_id_list = []
    u_nodes_list = {}
    e_nodes_list = {}
    event_relation = []
    try:
        name_results = es_event.search(index=event_name, doc_type=event_type, \
                body=query_body, fields=['name','en_name'])['hits']['hits']
    except:
        return 'node does not exist'
    if len(name_results) == 0:
        return 'node does not exist'
    print name_results, '*********************'
    for i in name_results:
        name = i['fields']['name'][0]
        en_name = i['fields']['en_name'][0]
        only_eid.append(en_name)
        e_nodes_list[en_name] = name
        event_id_list.append([en_name, name])

    for event_value in event_id_list:
        c_string = 'START s0 = node:event_index(event="' + str(
            event_value[0]) + '") '
        c_string += 'MATCH (s0)-[r1]-(s1) return s0,r1,s1 LIMIT 10'
        # print c_string,'==========='

        mid_eid_list = []  #存放第一层的数据,再以这些为起始点,扩展第二层
        mid_uid_list = []
        result = graph.run(c_string)
        # print list(result),'-----------------'
        for i in list(result):
            print i
            start_id = i['s0']['event_id']
            # start_id = s0['event']
            relation1 = i['r1'].type()
            m_id = dict(i['s1'])
            if m_id.has_key('uid'):
                middle_id = m_id['uid']
                mid_uid_list.append(middle_id)
                user_name = user_name_search(middle_id)
                u_nodes_list[middle_id] = user_name
                event_relation.append([start_id, relation1, middle_id])
            if m_id.has_key('envent_id'):
                middle_id = m_id['envent_id']
                mid_eid_list.append(middle_id)
                event_name2 = event_name_search(middle_id)
                e_nodes_list[middle_id] = event_name2
                event_relation.append([start_id, relation1, middle_id])

    # print mid_uid_list
    # print mid_eid_list,'++++++++++++++++'
    for mid_uid in mid_uid_list:
        c_string = 'START s1 = node:node_index(uid="' + str(mid_uid) + '") '
        c_string += 'MATCH (s1)-[r2]->(s2:Event) return s1,r2,s2 LIMIT 5'
        uid_result = graph.run(c_string)

        for i in uid_result:
            relation2 = i['r2'].type()
            end_id = dict(i['s2'])
            if end_id.has_key('uid'):
                user_name = user_name_search(end_id['uid'])
                u_nodes_list[end_id['uid']] = user_name
                event_relation.append([mid_uid, relation2, end_id['uid']])
            if end_id.has_key('envent_id'):
                event_name2 = event_name_search(end_id['envent_id'])
                e_nodes_list[end_id['envent_id']] = event_name2
                event_relation.append(
                    [mid_uid, relation2, end_id['envent_id']])
    for mid_eid in mid_eid_list:
        c_string = 'START s1 = node:event_index(event="' + str(mid_eid) + '") '
        c_string += 'MATCH (s1)-[r2]->(s2:Event) return s1,r2,s2 LIMIT 5'
        eid_result = graph.run(c_string)
        for i in eid_result:
            relation2 = i['r2'].type()
            end_id = dict(i['s2'])
            if end_id.has_key('uid'):
                user_name = user_name_search(end_id['uid'])
                u_nodes_list[end_id['uid']] = user_name
                event_relation.append([mid_eid, relation2, end_id['uid']])
            if end_id.has_key('envent_id'):
                event_name2 = event_name_search(end_id['envent_id'])
                e_nodes_list[end_id['envent_id']] = event_name2
                event_relation.append(
                    [mid_eid, relation2, end_id['envent_id']])

    return {'total_event':len(event_id_list),'user_nodes':u_nodes_list,'event_nodes':e_nodes_list,\
            'relation':event_relation}