Exemple #1
0
def compare_group_k_label(g_name1, g_name2, submit_user, flag):
    topic_id1 = p.get_pinyin(g_name1)
    topic_id1 = topic_id1.lower()
    eid_string1 = es_group.get(index=group_name, doc_type=group_type, id=topic_id1,  fields=['k_label'])
    label_list1 = eid_string1['fields']['k_label'][0].split('&')
    
    topic_id2 = p.get_pinyin(g_name2)
    topic_id2 = topic_id2.lower()
    eid_string2 = es_group.get(index=group_name, doc_type=group_type, id=topic_id2,  fields=['k_label'])
    label_list2 = eid_string2['fields']['k_label'][0].split('&')
    if flag == 'all':
        new_label_list1 = [i for i in set(label_list1)]
        new_label_list2 = [i for i in set(label_list2)]

    if flag == 'same':
        same_u = set(label_list1)&set(label_list2)
        same_u = [i for i in same_u]
        new_label_list1 = same_u
        new_label_list2 = same_u

    if flag == 'diff':
        diff_u1 = set(label_list1) - (set(label_list1)&set(label_list2))
        new_label_list1 = [i for i in diff_u1]

        diff_u2 = set(label_list2) - (set(label_list1)&set(label_list2))
        new_label_list2 = [i for i in diff_u2]
    return {'detail_result1':new_label_list1,'detail_result2':new_label_list2}
Exemple #2
0
def compare_group_user(g_name1, g_name2, submit_user, flag):
    if flag == 'all':
        detail_result1 = query_detail_group(g_name1, submit_user)
        detail_result2 = query_detail_group(g_name2, submit_user)
        return {'detail_result1':detail_result1,'detail_result2':detail_result2}
    else:
        topic_id1 = p.get_pinyin(g_name1)
        topic_id1 = topic_id1.lower()
        eid_string1 = es_group.get(index=group_name, doc_type=group_type, id=topic_id1,  fields=['people'])
        event_list1 = eid_string1['fields']['people'][0].split('&')
        topic_id2 = p.get_pinyin(g_name2)
        topic_id2 = topic_id2.lower()
        eid_string2 = es_group.get(index=group_name, doc_type=group_type, id=topic_id2,  fields=['people'])
        event_list2 = eid_string2['fields']['people'][0].split('&')
        if flag == 'same':
            same_e = set(event_list1)&set(event_list2)
            same_e = [i for i in same_e]
            detail_result1 = user_detail_search(same_e,submit_user)
            detail_result2 = user_detail_search(same_e,submit_user)
        if flag == 'diff':
            diff_e1 = set(event_list1) - (set(event_list1)&set(event_list2))
            diff_e1 = [i for i in diff_e1]
            diff_e2 = set(event_list2) - (set(event_list1)&set(event_list2))
            diff_e2 = [i for i in diff_e2]
            detail_result1 = user_detail_search(diff_e1,submit_user)
            detail_result2 = user_detail_search(diff_e2,submit_user)
        return {'detail_result1':detail_result1,'detail_result2':detail_result2}
Exemple #3
0
def compare_group_event(g_name1, g_name2, submit_user, flag):
    group_id1 = p.get_pinyin(g_name1)
    group_id1 = group_id1.lower()
    uid_string1 = es_group.get(index=group_name, doc_type=group_type, id=group_id1,  fields=['people'])
    uid_list1 = uid_string1['fields']['people'][0].split('&')

    group_id2 = p.get_pinyin(g_name2)
    group_id2 = group_id2.lower()
    uid_string2 = es_group.get(index=group_name, doc_type=group_type, id=group_id2,  fields=['people'])
    uid_list2 = uid_string2['fields']['people'][0].split('&')
    
    uid_list_all = [uid_list1, uid_list2]
    event_list = []
    for user_result in uid_list_all:
        event_list1 = []

        user_list, org_list = search_user_type(user_result)
        for uid in user_list:
            c_string = 'start n=node:'+node_index_name+'("'+people_primary+':'+str(uid)+'") match (n)-[r]-(e:Event) return e'
            result = graph.run(c_string)
            for event in result:
                # print event,'---------'
                # if event:
                event_dict = dict(event)
                event_id = event_dict['e']['event_id']
                event_list1.append(event_id)
        for uid in org_list:
            c_string = 'start n=node:'+org_index_name+'("'+org_primary+':'+str(uid)+'") match (n)-[r]-(e:Event) return e'
            result = graph.run(c_string)
            for event in result:
                # print event,'---------'
                # if event:
                event_dict = dict(event)
                event_id = event_dict['e']['event_id']
                event_list1.append(event_id)
        event_list.append(event_list1)
    if flag == 'all':
        event_list1 = [i for i in set(event_list[0])]
        event_list2 = [i for i in set(event_list[1])]
        detail_result1 = event_detail_search(event_list1,submit_user)
        detail_result2 = event_detail_search(event_list2,submit_user)

    if flag == 'same':
        same_u = set(event_list[0])&set(event_list[1])
        same_u = [i for i in same_u]
        detail_result1 = event_detail_search(same_u,submit_user)
        detail_result2 = event_detail_search(same_u,submit_user)

    if flag == 'diff':
        diff_u1 = set(event_list[0]) - (set(event_list[0])&set(event_list[1]))
        diff_u1 = [i for i in diff_u1]
        diff_u2 = set(event_list[1]) - (set(event_list[0])&set(event_list[1]))
        diff_u2 = [i for i in diff_u2]
        detail_result1 = event_detail_search(diff_u1,submit_user)
        detail_result2 = event_detail_search(diff_u2,submit_user)
    return {'detail_result1':detail_result1,'detail_result2':detail_result2}
Exemple #4
0
def compare_group_event(g_name1, g_name2, submit_user, flag):
    group_id1 = p.get_pinyin(g_name1)
    group_id1 = group_id1.lower()
    uid_string1 = es_group.get(index=group_name,
                               doc_type=group_type,
                               id=group_id1,
                               fields=['people'])
    uid_list1 = uid_string1['fields']['people'][0].split('&')

    group_id2 = p.get_pinyin(g_name2)
    group_id2 = group_id2.lower()
    uid_string2 = es_group.get(index=group_name,
                               doc_type=group_type,
                               id=group_id2,
                               fields=['people'])
    uid_list2 = uid_string2['fields']['people'][0].split('&')

    uid_list_all = [uid_list1, uid_list2]
    event_list = []
    for user_result in uid_list_all:
        event_list1 = []
        print user_result
        for user in user_result:
            user_value = user
            c_string = 'START s0 = node:node_index(uid="' + str(
                user_value) + '") '
            c_string += 'MATCH (s0)-[r]-(s1:Event) return s1 LIMIT 50'
            print c_string
            result = graph.run(c_string)
            for i in list(result):
                end_id = dict(i['s1'])
                event_list1.append(end_id['event_id'])
        event_list.append(event_list1)
    if flag == 'all':
        event_list1 = [i for i in set(event_list[0])]
        event_list2 = [i for i in set(event_list[1])]
        detail_result1 = event_detail_search(event_list1, submit_user)
        detail_result2 = event_detail_search(event_list2, submit_user)

    if flag == 'same':
        same_u = set(event_list[0]) & set(event_list[1])
        same_u = [i for i in same_u]
        detail_result1 = event_detail_search(same_u, submit_user)
        detail_result2 = event_detail_search(same_u, submit_user)

    if flag == 'diff':
        diff_u1 = set(
            event_list[0]) - (set(event_list[0]) & set(event_list[1]))
        diff_u1 = [i for i in diff_u1]
        diff_u2 = set(
            event_list[1]) - (set(event_list[0]) & set(event_list[1]))
        diff_u2 = [i for i in diff_u2]
        detail_result1 = event_detail_search(diff_u1, submit_user)
        detail_result2 = event_detail_search(diff_u2, submit_user)
    return {'detail_result1': detail_result1, 'detail_result2': detail_result2}
Exemple #5
0
def del_u_group_rel(g_name, uid):
    en_name = p.get_pinyin(g_name)
    en_name = en_name.lower()
    s_string = 'START s0 = node:'+group_index_name+'('+group_primary+'="'+en_name+'"),'\
               +'s3 = node:'+node_index_name+'('+people_primary+'="'+uid+'") MATCH (s0)-[r:'+group_rel+']-(s3) DELETE r'

    print s_string
    graph.run(s_string)

    user_list_string = es_group.get(index=group_name,
                                    doc_type=group_type,
                                    id=en_name,
                                    fields=['people'])
    uid_list = user_list_string['fields']['people'][0].split('&')
    new_uid_list = set(uid_list) - set([uid])
    new_uid_list = [i for i in new_uid_list]
    uid_string = '&'.join(new_uid_list)
    if len(new_uid_list) == 0:
        s_string = 'START s0 = node:' + group_index_name + '(' + group_primary + '="' + en_name + '") DELETE s0'
        graph.run(s_string)
        es_group.delete(index=group_name, doc_type=group_type, id=en_name)
    else:
        es_group.update(index=group_name,doc_type=group_type,id=en_name,\
            body={'doc':{'people':uid_string, 'people_count':len(new_uid_list)}})
    return '1'
Exemple #6
0
def group_map(g_name, submit_user):
    result = {}
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id,  fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    source = group_geo_vary(g_name, submit_user)
    result['activity_geo_distribution_date'] = source['activity_geo_distribution_date']
    result['activity_geo_vary'] = source['activity_geo_vary']
    result['main_activity_geo'] = source['main_activity_geo']
    try:
        vary_detail_geo_dict = source['vary_detail_geo']
    except:
        vary_detail_geo_dict = {}
    if vary_detail_geo_dict != {}:
        result['vary_detail_geo'] = get_vary_detail_info(vary_detail_geo_dict, uid_list)
    else:
        result['vary_detail_geo'] = {}

    try:
        main_start_geo_dict = source['main_start_geo']
    except:
        main_start_geo_dict = {}
    result['main_start_geo'] = sorted(main_start_geo_dict.items(), key=lambda x:x[1], reverse=True)

    try:
        main_end_geo_dict = source['main_end_geo']
    except:
        main_end_geo_dict = {}
    result['main_end_geo'] = sorted(main_end_geo_dict.items(), key=lambda x:x[1], reverse=True)
    return result
Exemple #7
0
def create_group_relation(node_key1, node1_list, node1_index_name, rel,
                          node_key2, node2_id, node2_index_name, submit_user):
    node2_id_pinyin = p.get_pinyin(node2_id)
    node2_id_pinyin = node2_id_pinyin.lower()
    user_list_string = es_group.get(index=group_name, doc_type=group_type, id=node2_id_pinyin,\
                                fields=['people'])
    uid_list = []
    uid_list = user_list_string['fields']['people'][0].split('&')
    uid_list.extend(node1_list)
    uid_list = [i for i in set(uid_list)]
    eid_string = '&'.join(uid_list)
    # print eid_string
    es_group.update(index=group_name, doc_type=group_type, id=node2_id_pinyin,\
            body={'doc':{'people':eid_string, 'people_count':len(uid_list)}})
    user_org = search_user_type(uid_list)
    user_id = user_org[0]
    org_id = user_org[1]
    flag = create_rel(node_key1, user_id, node1_index_name, rel, node_key2,
                      node2_id_pinyin, node2_index_name, submit_user)
    node_key11 = org_primary
    node11_index_name = org_index_name
    flag = create_rel(node_key11, org_id, node11_index_name, rel, node_key2,
                      node2_id_pinyin, node2_index_name, submit_user)

    return flag
Exemple #8
0
def search_related_u_auto(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    related_list = []
    for en_name in uid_list:
        s_string = 'START s0 = node:node_index(uid="%s") \
                MATCH (s0)-[r]-(s3:User) return s3' % (en_name)
        print s_string
        result = graph.run(s_string)
        for item in result:
            item_dict = dict(item)
            related_list.append(item_dict['s3']['uid'])
    for en_name in uid_list:
        s_string = 'START s0 = node:node_index(uid="%s") \
                MATCH (s0)-[r]-(s3:Org) return s3' % (en_name)
        print s_string
        result = graph.run(s_string)
        for item in result:
            item_dict = dict(item)
            related_list.append(
                item_dict['s3']['org_id'])  # print uid_list, '---------'
    related_list = set(related_list) - set(uid_list)
    related_list = [i for i in related_list]
    print related_list, '---------'
    result = user_detail_search(related_list, submit_user)
    return result
Exemple #9
0
def group_user_tag(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    event_list = ['te-lang-pu-1480176000']
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['keywords_list', 'work_tag'])['docs']
    keywords_dict = {}
    mark_dict = {}
    print len(event_result)
    for i in event_result:
        i_keywords = json.loads(i['fields']['keywords_list'][0])
        try:
            i_mark = i['fields']['work_tag'][0]
        except:
            i_mark = ''
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        if i_mark:
            user_mark = deal_user_tag(i_mark)[0]
            for mark in user_mark:
                try:
                    mark_dict[mark] += 1
                except:
                    mark_dict[mark] = 1
    sorted_keywords_dict = sorted(keywords_dict.iteritems(),
                                  key=lambda x: x[1],
                                  reverse=True)[:100]
    sorted_mark_dict = sorted(mark_dict.iteritems(),
                              key=lambda x: x[1],
                              reverse=True)[:100]
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append(
            [words[0], float(words[1]) / max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1]) / max_mark_value])

    return {'keywords': normal_keywords_list, 'mark': normal_mark_list}
Exemple #10
0
def query_detail_group(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    try:
        uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id,  fields=['people'])
    except:
        return 0
    uid_list = uid_string['fields']['people'][0].split('&')
    # result = uid_list
    result = user_detail_search(uid_list, submit_user) #后面加!!
    return result
Exemple #11
0
def group_user_keyowrds(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')

    tag_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
            body={'ids':uid_list}, fields=['hashtag_dict', 'keywords'])['docs']
    keywords_dict = {}
    hashtag_dict = {}
    print len(tag_result)
    for i in tag_result:
        i_keywords = json.loads(i['fields']['keywords'][0])
        i_hashtag = json.loads(i['fields']['hashtag_dict'][0])
        for hashtag, value in i_hashtag.iteritems():
            try:
                hashtag_dict[hashtag] += value
            except:
                hashtag_dict[hashtag] = value
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
    sorted_keywords_dict = sorted(keywords_dict.iteritems(),
                                  key=lambda x: x[1],
                                  reverse=True)[:100]
    sorted_mark_dict = sorted(hashtag_dict.iteritems(),
                              key=lambda x: x[1],
                              reverse=True)[:100]

    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append(
            [words[0], float(words[1]) / max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1]) / max_mark_value])

    return {'keywords': normal_keywords_list, 'mark': normal_mark_list}
Exemple #12
0
def group_user_rank(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    print group_id
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')

    indx_id_list = []
    for i in uid_list:
        a = graph.run('start n=node:' + node_index_name + '("' +
                      people_primary + ':' + str(i) + '") return id(n)')
        for j in a:
            indx_id_list.append(str(dict(j)['id(n)']))
    event_id_string = ','.join(indx_id_list)
    query = 'start d=node(' + event_id_string + '),e=node(' + event_id_string + ') match (d)-[r]->(e) return d,type(r),e'
    result = graph.run(query)
    exist_relation = []
    exist_relation_string = []
    for i in result:
        # print i
        dict_i = dict(i)
        start_id = dict_i['d']['uid']
        start_name = user_name_search(start_id)
        end_id = dict_i['e']['uid']
        end_name = user_name_search(end_id)
        exist_relation.append([start_id, start_name, relation_dict[dict_i['type(r)']], \
                    end_id, end_name])
        # print exist_relation
        relation_string = start_id + '-' + end_id
        exist_relation_string.append(relation_string)
    set_exist_relation = set(exist_relation_string)
    relation_set_count = len(list(set_exist_relation))
    node_count = len(uid_list)
    total_count = node_count * (node_count - 1) / 2
    try:
        relation_degree = float(relation_set_count) / total_count
    except:
        relation_degree = 0
    if relation_degree == 0:
        conclusion = u'无关联'
    elif relation_degree < 0.33 and relation_degree > 0:
        conclusion = u'关联度较低'
    elif relation_degree >= 0.33 and relation_degree < 0.66:
        conclusion = u'关联度适中'
    elif relation_degree >= 0.66:
        conclusion = u'联系紧密'  ##未定义!!
    return {'relation_table':exist_relation, 'relation_count':relation_set_count,\
        'conclusion':conclusion, 'relation_degree':relation_degree}
Exemple #13
0
def show_file_link(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id,  fields=['people', 'file_link', 'wiki_link'])
    uid_list = uid_string['fields']['people'][0].split('&')
    origin_list = []
    try:
        file_link = uid_string['fields']['file_link'][0].split('+')
    except:
        file_link = []
    final_file = []
    for i in file_link:
        final_file.append(i.split(','))
    return final_file
Exemple #14
0
def add_group_file_link(g_name, file_name, operation):
    new_label = file_name.split('+')
    en_name = p.get_pinyin(g_name)
    print en_name
    group_label = es_group.get(index=group_name, doc_type=group_type, id=en_name,\
            fields=['file_link'])
    print group_label, '------------'
    try:
        group_label_list = group_label['fields']['file_link'][0].split('+')
    except:
        group_label_list = []
    if operation == 'add':
        group_label_list.extend(new_label)
    elif operation == 'del':
        group_label_list = set(group_label_list) - set(new_label)
    group_label_list = [i for i in set(group_label_list)]
    group_label_string = '+'.join(group_label_list)
    es_group.update(index=group_name,doc_type=group_type, id=en_name,\
            body={'doc':{'file_link':group_label_string}})
    return 1
Exemple #15
0
def search_related_u_auto(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id, fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    related_list = []
    user_list,org_list = search_user_type(uid_list)
    indx_id_list = []
    for i in user_list:
        a = graph.run('start n=node:'+node_index_name+'("'+people_primary+':'+str(i)+'") return id(n)')
        for j in a:
            indx_id_list.append(str(dict(j)['id(n)']))
    for i in org_list:
        a = graph.run('start n=node:'+org_index_name+'("'+org_primary+':'+str(i)+'") return id(n)')
        for j in a:
            indx_id_list.append(str(dict(j)['id(n)']))

    event_id_string = ','.join(indx_id_list)
    query = 'start d=node('+event_id_string+') match (d)-[r]-(e) where labels(e)[0] in '+json.dumps(['User', 'Org'])+' return e, labels(e)'
    print query
    result = graph.run(query)
    node_dict = {}
    related_list = []
    for i in result:
        dict_i = dict(i)
        print dict_i
        node_type = dict_i['labels(e)'][0]

        if node_type == people_node:
            node_id = dict_i['e']['uid']
            related_list.append(node_id)
        elif node_type == org_node:
            node_id = dict_i['e']['org_id']
            related_list.append(node_id)

    related_list = set(related_list) - set(uid_list)
    related_list = [i for i in related_list]
    print related_list,'---------'
    result = user_detail_search(related_list, submit_user)
    return result
Exemple #16
0
def group_event_rank(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    related_event_list = []
    event_user_dict = {}
    for uid in uid_list:
        c_string = 'start n=node:' + node_index_name + '("' + people_primary + ':' + str(
            uid) + '") match (n)-[r]-(e:Event) return e'
        result = graph.run(c_string)
        for event in result:
            print event, '---------'
            # if event:
            event_dict = dict(event)
            event_id = event_dict['e']['event_id']
            related_event_list.append(event_id)
            try:
                event_user_dict[event_id].append(uid)
            except:
                event_user_dict[event_id] = []
                event_user_dict[event_id].append(uid)
    event_rank_list = []
    for k, v in event_user_dict.iteritems():
        k_dict = {}
        event_result = es_event.get(index=event_analysis_name,
                                    doc_type=event_text_type,
                                    id=k,
                                    fields=['user_results', 'name'])
        event_rank = event_result['fields']['user_results'][0]
        event_name = event_result['fields']['name'][0]
        user_results = json.loads(event_rank)
        k_dict['event_id'] = k
        k_dict['event_name'] = event_name
        k_dict['user'] = v
        k_dict['influ'] = 0
        print k
        for u in v:
            print u
            # if not user_results.has_key(u):
            #     continue
            try:
                influ_val = user_results[u]['influ']
            except:
                print u, '00000'
                influ_val = 10.0
            k_dict['influ'] += influ_val
        event_rank_list.append(k_dict)
    # print event_rank_list,'event_rank_list'
    sorted_event = sorted(event_rank_list,
                          key=lambda x: x['influ'],
                          reverse=True)
    try:
        max_value = sorted_event[0]['influ']
    except:
        return []
    final_event_rank = []
    for ii in sorted_event:
        ii['influ'] = float(ii['influ']) / max_value
        final_event_rank.append(ii)
    return final_event_rank
Exemple #17
0
def group_geo_vary(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    activity_geo_vary = {}
    main_start_geo = {}
    main_end_geo = {}
    vary_detail_geo = {}
    activity_geo_distribution_date = {}
    if RUN_TYPE == 1:
        now_ts = int(time.time())
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    try:
        iter_user_dict_list = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
            body={'ids':uid_list})['docs']
    except:
        iter_user_dict_list = []
    for user_dict in iter_user_dict_list:
        uid = user_dict['_id']
        source = user_dict['_source']
        #attr8: activity_geo_dict---distribution by date
        user_activity_geo = {}
        activity_geo_dict_list = json.loads(source['activity_geo_dict'])
        activity_geo_date_count = len(activity_geo_dict_list)
        iter_ts = now_date_ts - activity_geo_date_count * DAY
        user_date_main_list = []
        for i in range(0, activity_geo_date_count):
            date_item = activity_geo_dict_list[i]
            if iter_ts in activity_geo_distribution_date:
                activity_geo_distribution_date[iter_ts] = union_dict_list(
                    [activity_geo_distribution_date[iter_ts], date_item])
            else:
                activity_geo_distribution_date[iter_ts] = date_item
            #use to get activity_geo vary
            sort_date_item = sorted(date_item.items(),
                                    key=lambda x: x[1],
                                    reverse=True)
            if date_item != {}:
                main_date_city = sort_date_item[0][0]
                try:
                    last_user_date_main_item = user_date_main_list[-1][0]
                except:
                    last_user_date_main_item = ''
                if main_date_city != last_user_date_main_item:
                    user_date_main_list.append([main_date_city, iter_ts])

            iter_ts += DAY
        #attr8: activity_geo_dict---location vary
        if len(user_date_main_list) > 1:
            for i in range(1, len(user_date_main_list)):
                vary_city = [
                    geo_ts_item[0]
                    for geo_ts_item in user_date_main_list[i - 1:i + 1]
                ]
                vary_ts = [
                    geo_ts_item[1]
                    for geo_ts_item in user_date_main_list[i - 1:i + 1]
                ]
                vary_item = '&'.join(vary_city)
                #vary_item = '&'.join(user_date_main_list[i-1:i+1])
                #get activity geo vary for vary table and map
                try:
                    activity_geo_vary[vary_item] += 1
                except:
                    activity_geo_vary[vary_item] = 1
                #get main start geo
                try:
                    main_start_geo[vary_city[0]] += 1
                except:
                    main_start_geo[vary_city[0]] = 1
                #get main end geo
                try:
                    main_end_geo[vary_city[1]] += 1
                except:
                    main_end_geo[vary_city[1]] = 1
                #get vary detail geo
                try:
                    vary_detail_geo[vary_item].append(
                        [uid, vary_ts[0], vary_ts[1]])
                except:
                    vary_detail_geo[vary_item] = [[
                        uid, vary_ts[0], vary_ts[1]
                    ]]
    all_activity_geo = union_dict_list(activity_geo_distribution_date.values())
    sort_all_activity_geo = sorted(all_activity_geo.items(),
                                   key=lambda x: x[1],
                                   reverse=True)
    try:
        main_activity_geo = sort_all_activity_geo[0][0]
    except:
        main_activity_geo = ''


    return  {'main_start_geo':main_start_geo, 'main_end_geo': main_end_geo, \
        'vary_detail_geo': vary_detail_geo, 'activity_geo_vary':activity_geo_vary,\
        'main_activity_geo':main_activity_geo, 'activity_geo_distribution_date':activity_geo_distribution_date}
Exemple #18
0
def group_user_tag(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id,  fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    user_list, org_list = search_user_type(uid_list)
    event_list = []
    related_event_list = []
    for uid in user_list:
        c_string = 'start n=node:'+node_index_name+'("'+people_primary+':'+str(uid)+'") match (n)-[r]-(e:Event) return e'
        result = graph.run(c_string)
        for event in result:
            # print event,'---------'
            # if event:
            event_dict = dict(event)
            event_id = event_dict['e']['event_id']
            related_event_list.append(event_id)
            # try:
            #     event_user_dict[event_id].append(uid)
            # except:
            #     event_user_dict[event_id] = []
            #     event_user_dict[event_id].append(uid)
    for uid in org_list:
        c_string = 'start n=node:'+org_index_name+'("'+org_primary+':'+str(uid)+'") match (n)-[r]-(e:Event) return e'
        result = graph.run(c_string)
        for event in result:
            # print event,'---------'
            # if event:
            event_dict = dict(event)
            event_id = event_dict['e']['event_id']
            related_event_list.append(event_id)
            # try:
            #     event_user_dict[event_id].append(uid)
            # except:
            #     event_user_dict[event_id] = []
            #     event_user_dict[event_id].append(uid)                
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':related_event_list}, fields=['keywords_list', 'work_tag'])['docs']
    keywords_dict = {}
    mark_dict = {}
    print len(event_result)
    for i in event_result:
        i_keywords = json.loads(i['fields']['keywords_list'][0])
        try:
            i_mark = i['fields']['work_tag'][0]
        except:
            i_mark = ''
        print i_mark
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        if i_mark:
            print i_mark,'i_mark'
            user_mark = deal_user_tag(i_mark, submit_user)[0]
            for mark in user_mark:
                try:
                    mark_dict[mark] += 1
                except:
                    mark_dict[mark] = 1
    sorted_keywords_dict = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    sorted_mark_dict = sorted(mark_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append([words[0], float(words[1])/max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1])/max_mark_value])

    return {'keywords':normal_keywords_list, 'mark':normal_mark_list}
Exemple #19
0
def search_related_u_card(item, submit_user, g_name):
    evaluate_max = get_evaluate_max()
    if g_name:
        g_name = g_name + '_' + submit_user
        g_name_pinyin = p.get_pinyin(g_name)
        g_name_pinyin = g_name_pinyin.lower()
        user_list_string = es_group.get(index=group_name, doc_type=group_type, id=g_name_pinyin,\
                            fields=['people'])
        uid_list = []
        uid_list = user_list_string['fields']['people'][0].split('&')
        # print uid_list,'==========='
    else:
        uid_list = []

    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'keywords': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 1000
    }
    try:
        user_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, fields=['uid'])['hits']['hits']
    except:
        return 'node does not exist'
    # print user_result
    search_uid = []
    result = []
    for i in user_result:
        i_fields = i['fields']
        search_uid.append(i_fields['uid'][0])
    show_id_set = set(search_uid) - set(uid_list)
    show_id = [i for i in show_id_set]
    if not show_id:
        return []
    fields_list = [
        'uid', 'uname', 'location', 'influence', 'sensitive', 'activeness',
        'keywords_string', 'function_mark'
    ]
    user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body={'ids':show_id}, fields=fields_list)['docs']
    for i in user_result:
        user = []
        i_fields = i['fields']
        for j in fields_list:
            if not i_fields.has_key(j):
                user.append('')
                continue
            if j == 'keywords':
                keywords = i_fields[j][0].split('&')
                keywords = keywords[:5]
                user.append(keywords)
            elif j == 'function_mark':
                tag = deal_user_tag(i_fields[j][0], submit_user)[0]
                user.append(tag)
            elif j in ['influence', 'sensitive', 'activeness']:
                user.append(
                    math.log(i_fields[j][0] /
                             (evaluate_max[j] * 9 + 1) + 1, 10) * 100)
            else:
                user.append(i_fields[j][0])
        result.append(user)
    return result
Exemple #20
0
def group_related(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people', 'file_link', 'wiki_link'])
    origin_list = uid_string['fields']['people'][0].split('&')
    # origin_list = []

    try:
        file_link = uid_string['fields']['file_link'][0].split('+')
    except:
        file_link = []
    final_file = []
    for i in file_link:
        final_file.append(i.split(','))
    try:
        final_wiki = json.loads(uid_string['fields']['wiki_link'][0])
    except:
        final_wiki = []
    event_graph_id = []
    for i in origin_list:
        a = graph.run('start n=node:' + node_index_name + '("' +
                      people_primary + ':' + str(i) + '") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))
    print event_graph_id
    event_id_string = ','.join(event_graph_id)
    query = 'start d=node(' + event_id_string + ') match (d)-[r]-(e) return labels(e), e'
    result = graph.run(query)
    node_dict = {}
    for i in result:
        dict_i = dict(i)
        node_type = dict_i['labels(e)'][0]

        if node_type == people_node:
            node_id = dict_i['e']['uid']
            try:
                node_dict['user'].append(node_id)
            except:
                node_dict['user'] = []
                node_dict['user'].append(node_id)
        elif node_type == org_node:
            node_id = dict_i['e']['org_id']
            try:
                node_dict['org'].append(node_id)
            except:
                node_dict['org'] = []
                node_dict['org'].append(node_id)

        elif node_type == event_node:
            node_id = dict_i['e']['event_id']
            if node_id in event_graph_id:
                continue
            try:
                node_dict['event'].append(node_id)
            except:
                node_dict['event'] = []
                node_dict['event'].append(node_id)
    try:
        uid_list = [i for i in set(node_dict['user'])]
        user_result = es.mget(index=portrait_index_name,
                              doc_type=portrait_index_type,
                              body={'ids': uid_list},
                              fields=['uname', 'uid'])['docs']
    except:
        user_result = []
    try:
        org_list_ = [i for i in set(node_dict['org'])]
        org_result = es.mget(index=portrait_index_name,
                             doc_type=portrait_index_type,
                             body={'ids': org_list_},
                             fields=['uname', 'uid'])['docs']
    except:
        org_result = []
    try:
        event_list = [i for i in set(node_dict['event'])]
        event_result = es_event.mget(index=event_analysis_name,
                                     doc_type=event_text_type,
                                     body={'ids': event_list},
                                     fields=['en_name', 'name'])['docs']
    except:
        event_result = []
    final_user = []
    for i in user_result:
        if i['_id'] in origin_list:
            continue
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_user.append([i['fields']['uid'][0], uname_s])
        else:
            final_user.append([i['_id'], i['_id']])

    final_org = []
    for i in org_result:
        if i['_id'] in origin_list:
            continue
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_org.append([i['fields']['uid'][0], uname_s])
        else:
            final_org.append([i['_id'], i['_id']])

    final_event = []
    for i in event_result:
        if i['found'] == True:
            final_event.append(
                [i['fields']['en_name'][0], i['fields']['name'][0]])
        else:
            final_event.append([i['_id'], i['_id']])
    return {'final_user':final_user, 'final_org':final_org, 'final_event':final_event, \
            'final_file':final_file, 'final_wiki':final_wiki}