Example #1
0
def get_theme_user_tag(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id,  fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    user_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['user_results','name'])['docs']
    user_list =[]
    for i in user_result:
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k,v in user_dict.iteritems():
            user_list.append(k)
    user_list_set = [i for i in set(user_list)]

    tag_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
            body={'ids':user_list_set}, fields=['function_mark', 'keywords'])['docs']
    keywords_dict = {}
    mark_dict = {}
    print len(tag_result)
    for i in tag_result:
        i_keywords = json.loads(i['fields']['keywords'][0])
        try:
            i_mark = i['fields']['function_mark'][0]
        except:
            i_mark = ''
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        if i_mark:
            user_mark = deal_user_tag(i_mark, submit_user)[0]
            for mark in user_mark:
                try:
                    mark_dict[mark] += 1
                except:
                    mark_dict[mark] = 1
    sorted_keywords_dict = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    sorted_mark_dict = sorted(mark_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append([words[0], float(words[1])/max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1])/max_mark_value])

    return {'keywords':normal_keywords_list, 'mark':normal_mark_list}
Example #2
0
def group_user_tag(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    event_list = ['te-lang-pu-1480176000']
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['keywords_list', 'work_tag'])['docs']
    keywords_dict = {}
    mark_dict = {}
    print len(event_result)
    for i in event_result:
        i_keywords = json.loads(i['fields']['keywords_list'][0])
        try:
            i_mark = i['fields']['work_tag'][0]
        except:
            i_mark = ''
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        if i_mark:
            user_mark = deal_user_tag(i_mark)[0]
            for mark in user_mark:
                try:
                    mark_dict[mark] += 1
                except:
                    mark_dict[mark] = 1
    sorted_keywords_dict = sorted(keywords_dict.iteritems(),
                                  key=lambda x: x[1],
                                  reverse=True)[:100]
    sorted_mark_dict = sorted(mark_dict.iteritems(),
                              key=lambda x: x[1],
                              reverse=True)[:100]
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append(
            [words[0], float(words[1]) / max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1]) / max_mark_value])

    return {'keywords': normal_keywords_list, 'mark': normal_mark_list}
def get_theme_net(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['en_name', 'name'])['docs']
    event_name_dict = {}
    for i in event_result:
        event_en_name = i['fields']['en_name'][0]
        event_name = i['fields']['name'][0]
        event_name_dict[event_en_name] = event_name
    event_graph_id = []
    for i in event_list:
        a = graph.run('start n=node:' + event_index_name + '("' +
                      event_primary + ':' + str(i) + '") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))
    # print event_graph_id
    event_id_string = ','.join(event_graph_id)
    query = 'start d=node(' + event_id_string + '),e=node(' + event_id_string + ') match (d)-[r]->(e) return d,type(r),e'
    result = graph.run(query)
    exist_relation = []
    exist_relation_string = []
    for i in result:
        # print i
        dict_i = dict(i)
        start_id = dict_i['d']['event_id']
        end_id = dict_i['e']['event_id']
        exist_relation.append([event_name_dict[start_id], relation_dict[dict_i['type(r)']], \
                    event_name_dict[end_id]])
        # print exist_relation
        relation_string = start_id + '-' + end_id
        exist_relation_string.append(relation_string)
    set_exist_relation = set(exist_relation_string)
    relation_set_count = len(list(set_exist_relation))
    node_count = len(event_list)
    total_count = node_count * (node_count - 1) / 2
    try:
        relation_degree = float(relation_set_count) / total_count
    except:
        relation_degree = 0
    if relation_degree == 0:
        conclusion = u'无关联'
    elif relation_degree < 0.33 and relation_degree > 0:
        conclusion = u'关联度较低'
    elif relation_degree >= 0.33 and relation_degree < 0.66:
        conclusion = u'关联度适中'
    elif relation_degree >= 0.66:
        conclusion = u'联系紧密'  ##未定义!!
    return {'relation_table':exist_relation, 'relation_count':relation_set_count,\
        'conclusion':conclusion, 'relation_degree':relation_degree}
Example #4
0
def draw_map(uid_list):
    uid_list = [i for i in set(uid_list)]
    black_country = [u'美国', u'其他', u'法国', u'英国', u'中国', u'局域网']
    if len(uid_list) == 0:
        return ''
    results = es_event.mget(index=event_analysis_name, doc_type=event_type, \
                body={'ids': uid_list},_source=False, fields=['geo_weibo_count'])['docs']

    # print results
    geo_list = []
    for i in results:
        # print type(i['fields']['geo_weibo_count'][0]),'==========='
        geo_list.extend(json.loads(i['fields']['geo_weibo_count'][0]))
    # print geo_list,'!!!!!!!!!!!!!!!!!!'
    location_dict = dict()
    for geo in geo_list:
        for k, v in geo[1].iteritems():
            if k == 'total' or k == 'unknown':
                continue
            try:
                location_dict[geo[0] + ' ' + k] += v
            except:
                location_dict[geo[0] + ' ' + k] = v
    # print location_dict
    # for item in results:
    #     if item["key"] == "" or item["key"] == "unknown" or item['key'] == u'其他':
    #         continue
    #     location_dict[item["key"]] = item["doc_count"]

    filter_location = dict()
    for k, v in location_dict.iteritems():
        tmp = k.split(' ')
        if tmp[1] in black_country or u'国' in tmp[1]:
            continue
        if u'北京' in k or u'天津' in k or u'上海' in k or u'重庆' in k or u'香港' in k or u'澳门' in k:
            try:
                filter_location[tmp[1]] += v
            except:
                filter_location[tmp[1]] = v
        elif len(tmp) == 1:
            continue
        else:
            try:
                # filter_location[k] += v
                filter_location[tmp[1]] += v
            except:
                # filter_location[k] = v
                filter_location[tmp[1]] = v

    #检查一下群体和话题的两层关系,然后再看这个地理位置对不对修改了189/192行
    return_results = sorted(filter_location.iteritems(),
                            key=lambda x: x[1],
                            reverse=True)
    return return_results
def get_theme_geo(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['geo_results', 'name'])['docs']
    city_dict = {}
    event_city = {}
    event_name_list = []
    for i in event_result:
        if not i['found']:
            continue
        event_name = i['fields']['name'][0]
        event_city[event_name] = {}
        event_name_list.append(event_name)
        geo_event = json.loads(i['fields']['geo_results'][0])
        # print geo_event
        for k, v in geo_event.iteritems():
            for province_k, city_v in v.iteritems():
                for city_name, city_count in city_v.iteritems():
                    if city_name == 'total' or city_name == 'unknown':
                        continue
                    try:
                        city_dict[city_name] += city_count
                    except:
                        city_dict[city_name] = city_count
                    try:
                        event_city[event_name][city_name] += city_count
                    except:
                        event_city[event_name][city_name] = city_count

    sorted_city_dict = sorted(city_dict.iteritems(),
                              key=lambda x: x[1],
                              reverse=True)[:10]
    top_city = [i[0] for i in sorted_city_dict]
    final_city_count = {}
    for city in event_name_list:
        final_city_count[city] = []
        print final_city_count, 'final_city_count'
        for i in top_city:
            if event_city[city].has_key(i):
                final_city_count[city].append(event_city[city][i])
            else:
                final_city_count[city].append(0)

    return {'top_city': top_city, 'event_city': final_city_count}
def get_theme_keywords(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['keywords_list', 'hashtag_dict'])['docs']
    keywords_dict = {}
    hash_dict = {}
    for i in event_result:
        i_keywords = json.loads(i['fields']['keywords_list'][0])
        i_hashtag = json.loads(i['fields']['hashtag_dict'][0])
        for key in i_keywords:
            # print key,'====='
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        for k, v in i_hashtag.iteritems():
            try:
                hash_dict[k] += v
            except:
                hash_dict[k] = v
    sorted_keywords_dict = sorted(keywords_dict.iteritems(),
                                  key=lambda x: x[1],
                                  reverse=True)[:100]
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append(
            [words[0], float(words[1]) / max_keywords_value])

    sorted_hash_dict = sorted(hash_dict.iteritems(),
                              key=lambda x: x[1],
                              reverse=True)[:100]
    try:
        max_hash_value = sorted_hash_dict[0][1]
    except:
        max_hash_value = 1.0
    normal_hash_list = []
    for words in sorted_hash_dict:
        normal_hash_list.append([words[0], float(words[1]) / max_hash_value])
    return {'keywords': normal_keywords_list, 'hashtag': normal_hash_list}
Example #7
0
def get_special_labels(node1_list):
    labels = es_event.mget(index=event_analysis_name, doc_type=event_text_type, body={'ids':node1_list},\
                                fields=['keywords'], _source=False)['docs']
    result_label = []
    theme_label = []
    keywords_dict = {}
    for i in labels:
        theme_label.extend(i['fields']['keywords'][0].split('&'))
    for i in set(theme_label):
        keywords_dict[i] = theme_label.count(i)
    sorted_keywords = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True)
    # print sorted_keywords
    result_label = [i[0] for i in sorted_keywords[:100]]
    result_label_string = '&'.join(result_label)
    return result_label_string
Example #8
0
def theme_tab_map(theme_name, node_type, relation_type, layer):
    # return 1
    black_country = [u'美国', u'其他', u'法国', u'英国', u'中国', u'局域网']
    # black_country = [u'美国',u'其他',u'法国',u'英国']
    tab_theme_result = theme_tab_graph(theme_name, node_type, relation_type,
                                       layer)
    uid_list_origin = tab_theme_result['map_eid']
    uid_list = [i for i in uid_list_origin]
    print uid_list, len(uid_list), '--------++++++++++------------'
    results = es_event.mget(index=event_analysis_name, doc_type=event_type, \
                body={'ids': uid_list},_source=False, fields=['geo_weibo_count'])['docs']

    geo_list = []
    for i in results:
        geo_list.extend(json.loads(i['fields']['geo_weibo_count'][0]))
    print len(geo_list)
    location_dict = dict()
    for geo in geo_list:
        for k, v in geo[1].iteritems():
            if k == 'total' or k == 'unknown':
                continue
            try:
                location_dict[geo[0] + ' ' + k] += v
            except:
                location_dict[geo[0] + ' ' + k] = v
    # print location_dict
    filter_location = dict()
    for k, v in location_dict.iteritems():
        tmp = k.split(' ')
        if tmp[1] in black_country or u'国' in tmp[1]:
            continue
        if u'北京' in k or u'天津' in k or u'上海' in k or u'重庆' in k or u'香港' in k or u'澳门' in k:
            try:
                filter_location[tmp[1]] += v
            except:
                filter_location[tmp[1]] = v
        elif len(tmp) == 1:
            continue
        else:
            try:
                filter_location[tmp[1]] += v
            except:
                filter_location[tmp[1]] = v

    return_results = sorted(filter_location.iteritems(),
                            key=lambda x: x[1],
                            reverse=True)
    return return_results[:50]
def get_theme_user_rank(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    user_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['user_results','name'])['docs']
    user_influence = {}
    for i in user_result:
        # print i
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k, v in user_dict.iteritems():
            if user_influence.has_key(k):
                continue
            user_influence[k] = {}
            user_influence[k]['id'] = k
            user_influence[k]['name'] = user_name_search(k)
            user_influence[k]['node_type'] = search_type(k)

    for i in user_result:
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k, v in user_dict.iteritems():
            try:
                user_influence[k]['related_event'].append(event_name)
            except:
                user_influence[k]['related_event'] = []
                user_influence[k]['related_event'].append(event_name)
            try:
                user_influence[k]['influ'] += v['influ']
            except:
                user_influence[k]['influ'] = v['influ']
    user_influence_list = []
    for k, v in user_influence.iteritems():
        user_influence_list.append(v)
    sorted_user_influ = sorted(user_influence_list,
                               key=lambda x: x['influ'],
                               reverse=True)
    max_importance = sorted_user_influ[0]['influ']
    for i in sorted_user_influ:
        i['influ'] = float(i['influ']) / max_importance
    return sorted_user_influ
Example #10
0
def get_theme_related(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event', 'wiki_link', 'file_link'])
    event_list = eid_string['fields']['event'][0].split('&')
    try:
        file_link = eid_string['fields']['file_link'][0].split('+')
    except:
        file_link = []
    final_file = []
    for i in file_link:
        final_file.append(i.split(','))
    try:
        final_wiki = json.loads(eid_string['fields']['wiki_link'][0])
    except:
        final_wiki = []
    event_graph_id = []
    for i in event_list:
        a = graph.run('start n=node:' + event_index_name + '("' +
                      event_primary + ':' + str(i) + '") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))
    print event_graph_id
    event_id_string = ','.join(event_graph_id)
    query = 'start d=node(' + event_id_string + ') match (d)-[r]-(e) return labels(e), e'
    result = graph.run(query)
    node_dict = {}
    for i in result:
        dict_i = dict(i)
        node_type = dict_i['labels(e)'][0]

        if node_type == people_node:
            node_id = dict_i['e']['uid']
            try:
                node_dict['user'].append(node_id)
            except:
                node_dict['user'] = []
                node_dict['user'].append(node_id)
        elif node_type == org_node:
            node_id = dict_i['e']['org_id']
            try:
                node_dict['org'].append(node_id)
            except:
                node_dict['org'] = []
                node_dict['org'].append(node_id)

        elif node_type == event_node:
            node_id = dict_i['e']['event_id']
            if node_id in event_graph_id:
                continue
            try:
                node_dict['event'].append(node_id)
            except:
                node_dict['event'] = []
                node_dict['event'].append(node_id)
    uid_list = [i for i in set(node_dict['user'])]
    org_list = [i for i in set(node_dict['org'])]
    event_list = [i for i in set(node_dict['event'])]
    user_result = es.mget(index=portrait_index_name,
                          doc_type=portrait_index_type,
                          body={'ids': uid_list},
                          fields=['uname', 'uid'])['docs']
    org_result = es.mget(index=portrait_index_name,
                         doc_type=portrait_index_type,
                         body={'ids': org_list},
                         fields=['uname', 'uid'])['docs']
    event_result = es_event.mget(index=event_analysis_name,
                                 doc_type=event_text_type,
                                 body={'ids': event_list},
                                 fields=['en_name', 'name'])['docs']
    final_user = []
    for i in user_result:
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_user.append([i['fields']['uid'][0], uname_s])
        else:
            final_user.append([i['_id'], i['_id']])

    final_org = []
    for i in org_result:
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_org.append([i['fields']['uid'][0], uname_s])
        else:
            final_org.append([i['_id'], i['_id']])

    final_event = []
    for i in event_result:
        if i['found'] == True:
            final_org.append(
                [i['fields']['en_name'][0], i['fields']['name'][0]])
        else:
            final_org.append([i['_id'], i['_id']])
    return [final_user, final_org, final_event, final_file, final_wiki]
Example #11
0
def search_related_e_card(item, submit_user, theme_name):
    if theme_name:
        theme_name = theme_name + '_' + submit_user
        theme_name_pinyin = p.get_pinyin(theme_name)
        event_list_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=theme_name_pinyin,\
                            fields=['event'])
        eid_list = []
        eid_list = event_list_string['fields']['event'][0].split('&')
    else:
        eid_list = []

    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'keywords': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'en_name': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'name': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 1000
    }
    fields_list = [
        'en_name', 'name', 'event_type', 'real_time', 'real_geo', 'uid_counts',
        'weibo_counts', 'keywords', 'work_tag'
    ]
    only_eid = []
    event_id_list = []
    u_nodes_list = {}
    e_nodes_list = {}
    event_relation = []
    try:
        event_result = es_event.search(index=event_analysis_name, doc_type=event_text_type, \
                body=query_body, fields=['en_name'])['hits']['hits']
    except:
        return 'node does not exist'
    # print event_result
    search_eid = []
    result = []
    for i in event_result:
        i_fields = i['fields']
        search_eid.append(i_fields['en_name'][0])
    show_id_set = set(search_eid) - set(eid_list)
    show_id = [i for i in show_id_set]
    if not show_id:
        return []
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':show_id}, fields=fields_list)['docs']
    for i in event_result:
        event = []
        i_fields = i['fields']
        for j in fields_list:
            if not i_fields.has_key(j):
                event.append('')
                continue
            if j == 'keywords':
                keywords = i_fields[j][0].split('&')
                keywords = keywords[:5]
                event.append(keywords)
            elif j == 'work_tag':
                tag = deal_event_tag(i_fields[j][0], submit_user)[0]
                event.append(tag)
            else:
                event.append(i_fields[j][0])
        result.append(event)
    return result
Example #12
0
def group_user_tag(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id,  fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    user_list, org_list = search_user_type(uid_list)
    event_list = []
    related_event_list = []
    for uid in user_list:
        c_string = 'start n=node:'+node_index_name+'("'+people_primary+':'+str(uid)+'") match (n)-[r]-(e:Event) return e'
        result = graph.run(c_string)
        for event in result:
            # print event,'---------'
            # if event:
            event_dict = dict(event)
            event_id = event_dict['e']['event_id']
            related_event_list.append(event_id)
            # try:
            #     event_user_dict[event_id].append(uid)
            # except:
            #     event_user_dict[event_id] = []
            #     event_user_dict[event_id].append(uid)
    for uid in org_list:
        c_string = 'start n=node:'+org_index_name+'("'+org_primary+':'+str(uid)+'") match (n)-[r]-(e:Event) return e'
        result = graph.run(c_string)
        for event in result:
            # print event,'---------'
            # if event:
            event_dict = dict(event)
            event_id = event_dict['e']['event_id']
            related_event_list.append(event_id)
            # try:
            #     event_user_dict[event_id].append(uid)
            # except:
            #     event_user_dict[event_id] = []
            #     event_user_dict[event_id].append(uid)                
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':related_event_list}, fields=['keywords_list', 'work_tag'])['docs']
    keywords_dict = {}
    mark_dict = {}
    print len(event_result)
    for i in event_result:
        i_keywords = json.loads(i['fields']['keywords_list'][0])
        try:
            i_mark = i['fields']['work_tag'][0]
        except:
            i_mark = ''
        print i_mark
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        if i_mark:
            print i_mark,'i_mark'
            user_mark = deal_user_tag(i_mark, submit_user)[0]
            for mark in user_mark:
                try:
                    mark_dict[mark] += 1
                except:
                    mark_dict[mark] = 1
    sorted_keywords_dict = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    sorted_mark_dict = sorted(mark_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append([words[0], float(words[1])/max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1])/max_mark_value])

    return {'keywords':normal_keywords_list, 'mark':normal_mark_list}
Example #13
0
def group_related(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id,  fields=['people', 'file_link', 'wiki_link'])
    origin_list = uid_string['fields']['people'][0].split('&')
    # origin_list = []

    try:
        file_link = uid_string['fields']['file_link'][0].split('+')
    except:
        file_link = []
    final_file = []
    for i in file_link:
        final_file.append(i.split(','))
    try:
        final_wiki = json.loads(uid_string['fields']['wiki_link'][0])
    except:
        final_wiki = []
    event_graph_id = []
    user_list, org_list = search_user_type(origin_list)
    for i in user_list:
        a = graph.run('start n=node:'+node_index_name+'("'+people_primary+':'+str(i)+'") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))
    for i in org_list:
        a = graph.run('start n=node:'+org_index_name+'("'+org_primary+':'+str(i)+'") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))

    # for i in origin_list:
    #     a = graph.run('start n=node:'+node_index_name+'("'+people_primary+':'+str(i)+'") return id(n)')
    #     for j in a:
    #         event_graph_id.append(str(dict(j)['id(n)']))
    print event_graph_id
    event_id_string = ','.join(event_graph_id)
    query = 'start d=node('+event_id_string+') match (d)-[r]-(e) return labels(e), e'
    result = graph.run(query)
    node_dict = {}
    for i in result:
        dict_i = dict(i)
        node_type = dict_i['labels(e)'][0]

        if node_type == people_node:
            node_id = dict_i['e']['uid']
            try:
                node_dict['user'].append(node_id)
            except:
                node_dict['user'] = []
                node_dict['user'].append(node_id)
        elif node_type == org_node:
            node_id = dict_i['e']['org_id']
            try:
                node_dict['org'].append(node_id)
            except:
                node_dict['org'] = []
                node_dict['org'].append(node_id)

        elif node_type == event_node:
            node_id = dict_i['e']['event_id']
            if node_id in event_graph_id:
                continue
            try:
                node_dict['event'].append(node_id)
            except:
                node_dict['event'] = []
                node_dict['event'].append(node_id)
    try:
        uid_list = [i for i in set(node_dict['user'])]
        user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, fields=['uname', 'uid'])['docs']
    except:
        user_result = []
    try:
        org_list_ = [i for i in set(node_dict['org'])]
        org_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':org_list_}, fields=['uname', 'uid'])['docs']
    except:
        org_result = []
    try:
        event_list = [i for i in set(node_dict['event'])]
        event_result = es_event.mget(index=event_analysis_name,doc_type=event_text_type, body={'ids':event_list}, fields=['en_name', 'name'])['docs']
    except:
        event_result = []
    final_user = []
    for i in user_result:
        if i['_id'] in origin_list:
            continue
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_user.append([i['fields']['uid'][0], uname_s])
        else:
            final_user.append([i['_id'],i['_id']])

    final_org = []
    for i in org_result:
        if i['_id'] in origin_list:
            continue
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_org.append([i['fields']['uid'][0], uname_s])
        else:
            final_org.append([i['_id'],i['_id']])

    final_event = []
    for i in event_result:
        if i['found'] == True:
            final_event.append([i['fields']['en_name'][0], i['fields']['name'][0]])
        else:
            final_event.append([i['_id'],i['_id']])
    return {'final_user':final_user, 'final_org':final_org, 'final_event':final_event, \
            'final_file':final_file, 'final_wiki':final_wiki}