Exemple #1
0
def get_vary_detail_info(vary_detail_dict, uid_list):
    results = {}
    #get uname
    try:
        user_portrait_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                            body={'ids':uid_list})['docs']
    except:
        user_portrait_result = []
    uname_dict = {}
    for portrait_item in user_portrait_result:
        uid = portrait_item['_id']
        if portrait_item['found'] == True:
            uname = portrait_item['_source']['uname']
            uname_dict[uid] = uname
        else:
            uname_dict[uid] = uid

    #get new vary detail information
    for vary_pattern in vary_detail_dict:
        user_info_list = vary_detail_dict[vary_pattern]
        new_pattern_list = []
        for user_item in user_info_list:
            uid = user_item[0]
            uname = uname_dict[uid]
            start_date = ts2datetime(int(user_item[1]))
            end_date = ts2datetime(int(user_item[2]))
            new_pattern_list.append([uid, uname, start_date, end_date])
        results[vary_pattern] = new_pattern_list

    return results
Exemple #2
0
def get_theme_user_tag(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id,  fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    user_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['user_results','name'])['docs']
    user_list =[]
    for i in user_result:
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k,v in user_dict.iteritems():
            user_list.append(k)
    user_list_set = [i for i in set(user_list)]

    tag_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
            body={'ids':user_list_set}, fields=['function_mark', 'keywords'])['docs']
    keywords_dict = {}
    mark_dict = {}
    print len(tag_result)
    for i in tag_result:
        i_keywords = json.loads(i['fields']['keywords'][0])
        try:
            i_mark = i['fields']['function_mark'][0]
        except:
            i_mark = ''
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        if i_mark:
            user_mark = deal_user_tag(i_mark, submit_user)[0]
            for mark in user_mark:
                try:
                    mark_dict[mark] += 1
                except:
                    mark_dict[mark] = 1
    sorted_keywords_dict = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    sorted_mark_dict = sorted(mark_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append([words[0], float(words[1])/max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1])/max_mark_value])

    return {'keywords':normal_keywords_list, 'mark':normal_mark_list}
Exemple #3
0
def group_user_keyowrds(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')

    tag_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
            body={'ids':uid_list}, fields=['hashtag_dict', 'keywords'])['docs']
    keywords_dict = {}
    hashtag_dict = {}
    print len(tag_result)
    for i in tag_result:
        i_keywords = json.loads(i['fields']['keywords'][0])
        i_hashtag = json.loads(i['fields']['hashtag_dict'][0])
        for hashtag, value in i_hashtag.iteritems():
            try:
                hashtag_dict[hashtag] += value
            except:
                hashtag_dict[hashtag] = value
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
    sorted_keywords_dict = sorted(keywords_dict.iteritems(),
                                  key=lambda x: x[1],
                                  reverse=True)[:100]
    sorted_mark_dict = sorted(hashtag_dict.iteritems(),
                              key=lambda x: x[1],
                              reverse=True)[:100]

    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append(
            [words[0], float(words[1]) / max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1]) / max_mark_value])

    return {'keywords': normal_keywords_list, 'mark': normal_mark_list}
Exemple #4
0
def get_special_labels(node1_list):
    labels = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':node1_list},\
                                fields=['keywords_string'], _source=False)['docs']
    result_label = []
    group_label = []
    keywords_dict = {}
    for i in labels:
        group_label.extend(i['fields']['keywords_string'][0].split('&'))
    for i in set(group_label):
        keywords_dict[i] = group_label.count(i)
    sorted_keywords = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True)
    # print sorted_keywords
    result_label = [i[0] for i in sorted_keywords[:100]]
    result_label_string = '&'.join(result_label)
    return result_label_string
def get_theme_related(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event', 'wiki_link', 'file_link'])
    event_list = eid_string['fields']['event'][0].split('&')
    try:
        file_link = eid_string['fields']['file_link'][0].split('+')
    except:
        file_link = []
    final_file = []
    for i in file_link:
        final_file.append(i.split(','))
    try:
        final_wiki = json.loads(eid_string['fields']['wiki_link'][0])
    except:
        final_wiki = []
    event_graph_id = []
    for i in event_list:
        a = graph.run('start n=node:' + event_index_name + '("' +
                      event_primary + ':' + str(i) + '") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))
    print event_graph_id
    event_id_string = ','.join(event_graph_id)
    query = 'start d=node(' + event_id_string + ') match (d)-[r]-(e) return labels(e), e'
    result = graph.run(query)
    node_dict = {}
    for i in result:
        dict_i = dict(i)
        node_type = dict_i['labels(e)'][0]

        if node_type == people_node:
            node_id = dict_i['e']['uid']
            try:
                node_dict['user'].append(node_id)
            except:
                node_dict['user'] = []
                node_dict['user'].append(node_id)
        elif node_type == org_node:
            node_id = dict_i['e']['org_id']
            try:
                node_dict['org'].append(node_id)
            except:
                node_dict['org'] = []
                node_dict['org'].append(node_id)

        elif node_type == event_node:
            node_id = dict_i['e']['event_id']
            if node_id in event_graph_id:
                continue
            try:
                node_dict['event'].append(node_id)
            except:
                node_dict['event'] = []
                node_dict['event'].append(node_id)
    uid_list = [i for i in set(node_dict['user'])]
    org_list = [i for i in set(node_dict['org'])]
    event_list = [i for i in set(node_dict['event'])]
    user_result = es.mget(index=portrait_index_name,
                          doc_type=portrait_index_type,
                          body={'ids': uid_list},
                          fields=['uname', 'uid'])['docs']
    org_result = es.mget(index=portrait_index_name,
                         doc_type=portrait_index_type,
                         body={'ids': org_list},
                         fields=['uname', 'uid'])['docs']
    event_result = es_event.mget(index=event_analysis_name,
                                 doc_type=event_text_type,
                                 body={'ids': event_list},
                                 fields=['en_name', 'name'])['docs']
    final_user = []
    for i in user_result:
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_user.append([i['fields']['uid'][0], uname_s])
        else:
            final_user.append([i['_id'], i['_id']])

    final_org = []
    for i in org_result:
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_org.append([i['fields']['uid'][0], uname_s])
        else:
            final_org.append([i['_id'], i['_id']])

    final_event = []
    for i in event_result:
        if i['found'] == True:
            final_org.append(
                [i['fields']['en_name'][0], i['fields']['name'][0]])
        else:
            final_org.append([i['_id'], i['_id']])
    return [final_user, final_org, final_event, final_file, final_wiki]
Exemple #6
0
def group_geo_vary(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    activity_geo_vary = {}
    main_start_geo = {}
    main_end_geo = {}
    vary_detail_geo = {}
    activity_geo_distribution_date = {}
    if RUN_TYPE == 1:
        now_ts = int(time.time())
    else:
        now_ts = datetime2ts(RUN_TEST_TIME)
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    try:
        iter_user_dict_list = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
            body={'ids':uid_list})['docs']
    except:
        iter_user_dict_list = []
    for user_dict in iter_user_dict_list:
        uid = user_dict['_id']
        source = user_dict['_source']
        #attr8: activity_geo_dict---distribution by date
        user_activity_geo = {}
        activity_geo_dict_list = json.loads(source['activity_geo_dict'])
        activity_geo_date_count = len(activity_geo_dict_list)
        iter_ts = now_date_ts - activity_geo_date_count * DAY
        user_date_main_list = []
        for i in range(0, activity_geo_date_count):
            date_item = activity_geo_dict_list[i]
            if iter_ts in activity_geo_distribution_date:
                activity_geo_distribution_date[iter_ts] = union_dict_list(
                    [activity_geo_distribution_date[iter_ts], date_item])
            else:
                activity_geo_distribution_date[iter_ts] = date_item
            #use to get activity_geo vary
            sort_date_item = sorted(date_item.items(),
                                    key=lambda x: x[1],
                                    reverse=True)
            if date_item != {}:
                main_date_city = sort_date_item[0][0]
                try:
                    last_user_date_main_item = user_date_main_list[-1][0]
                except:
                    last_user_date_main_item = ''
                if main_date_city != last_user_date_main_item:
                    user_date_main_list.append([main_date_city, iter_ts])

            iter_ts += DAY
        #attr8: activity_geo_dict---location vary
        if len(user_date_main_list) > 1:
            for i in range(1, len(user_date_main_list)):
                vary_city = [
                    geo_ts_item[0]
                    for geo_ts_item in user_date_main_list[i - 1:i + 1]
                ]
                vary_ts = [
                    geo_ts_item[1]
                    for geo_ts_item in user_date_main_list[i - 1:i + 1]
                ]
                vary_item = '&'.join(vary_city)
                #vary_item = '&'.join(user_date_main_list[i-1:i+1])
                #get activity geo vary for vary table and map
                try:
                    activity_geo_vary[vary_item] += 1
                except:
                    activity_geo_vary[vary_item] = 1
                #get main start geo
                try:
                    main_start_geo[vary_city[0]] += 1
                except:
                    main_start_geo[vary_city[0]] = 1
                #get main end geo
                try:
                    main_end_geo[vary_city[1]] += 1
                except:
                    main_end_geo[vary_city[1]] = 1
                #get vary detail geo
                try:
                    vary_detail_geo[vary_item].append(
                        [uid, vary_ts[0], vary_ts[1]])
                except:
                    vary_detail_geo[vary_item] = [[
                        uid, vary_ts[0], vary_ts[1]
                    ]]
    all_activity_geo = union_dict_list(activity_geo_distribution_date.values())
    sort_all_activity_geo = sorted(all_activity_geo.items(),
                                   key=lambda x: x[1],
                                   reverse=True)
    try:
        main_activity_geo = sort_all_activity_geo[0][0]
    except:
        main_activity_geo = ''


    return  {'main_start_geo':main_start_geo, 'main_end_geo': main_end_geo, \
        'vary_detail_geo': vary_detail_geo, 'activity_geo_vary':activity_geo_vary,\
        'main_activity_geo':main_activity_geo, 'activity_geo_distribution_date':activity_geo_distribution_date}
Exemple #7
0
def search_related_u_card(item, submit_user, g_name):
    evaluate_max = get_evaluate_max()
    if g_name:
        g_name = g_name + '_' + submit_user
        g_name_pinyin = p.get_pinyin(g_name)
        g_name_pinyin = g_name_pinyin.lower()
        user_list_string = es_group.get(index=group_name, doc_type=group_type, id=g_name_pinyin,\
                            fields=['people'])
        uid_list = []
        uid_list = user_list_string['fields']['people'][0].split('&')
        # print uid_list,'==========='
    else:
        uid_list = []

    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'keywords': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uid': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'uname': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 1000
    }
    try:
        user_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, \
                body=query_body, fields=['uid'])['hits']['hits']
    except:
        return 'node does not exist'
    # print user_result
    search_uid = []
    result = []
    for i in user_result:
        i_fields = i['fields']
        search_uid.append(i_fields['uid'][0])
    show_id_set = set(search_uid) - set(uid_list)
    show_id = [i for i in show_id_set]
    if not show_id:
        return []
    fields_list = [
        'uid', 'uname', 'location', 'influence', 'sensitive', 'activeness',
        'keywords_string', 'function_mark'
    ]
    user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body={'ids':show_id}, fields=fields_list)['docs']
    for i in user_result:
        user = []
        i_fields = i['fields']
        for j in fields_list:
            if not i_fields.has_key(j):
                user.append('')
                continue
            if j == 'keywords':
                keywords = i_fields[j][0].split('&')
                keywords = keywords[:5]
                user.append(keywords)
            elif j == 'function_mark':
                tag = deal_user_tag(i_fields[j][0], submit_user)[0]
                user.append(tag)
            elif j in ['influence', 'sensitive', 'activeness']:
                user.append(
                    math.log(i_fields[j][0] /
                             (evaluate_max[j] * 9 + 1) + 1, 10) * 100)
            else:
                user.append(i_fields[j][0])
        result.append(user)
    return result
Exemple #8
0
def submit_identify_in_uid(input_data):
    print input_data, '00000000000'
    in_date = input_data['date']
    submit_user = input_data['user']
    operation_type = input_data['operation_type']
    compute_status = input_data['compute_status']
    relation_string = input_data['relation_string']
    recommend_style = input_data['recommend_style']
    node_type = input_data['node_type']
    hashname_submit = 'submit_recomment_' + in_date
    hashname_influence = 'recomment_' + in_date + '_influence'
    hashname_sensitive = 'recomment_' + in_date + '_sensitive'
    compute_hash_name = 'compute'
    # submit_user_recomment = 'recomment_' + submit_user + '_' + str(date)
    auto_recomment_set = set(r.hkeys(hashname_influence)) | set(
        r.hkeys(hashname_sensitive))
    upload_data = input_data['upload_data']
    uid_list = []
    invalid_uid_list = []
    if recommend_style == 'upload':
        line_list = upload_data
        # print line_list,'====8888===='
        for line in line_list:
            uid = line.strip('\r')
            # print len(str(uid)),'!!!0000000000999999999999'
            if len(str(uid)) == 10:
                uid_list.append(uid)
            else:
                invalid_uid_list.append(uid)
    if recommend_style == 'write':
        line_list = upload_data
        # print line_list,'====8888===='
        for line in line_list:
            uid = line
            if len(str(uid)) == 10:
                uid_list.append(uid)
            else:
                invalid_uid_list.append(uid)
    if len(invalid_uid_list) != 0:
        return 0, 'invalid user info', invalid_uid_list
    #identify the uid is not exist in user_portrait and compute
    #step1: filter in user_portrait
    new_uid_list = []
    have_in_uid_list = []
    try:
        exist_portrait_result = es.mget(index=portrait_index_name,
                                        doc_type=portrait_index_type,
                                        body={'ids': uid_list},
                                        _source=False)['docs']
    except:
        exist_portrait_result = []
    if exist_portrait_result:
        for exist_item in exist_portrait_result:
            if exist_item['found'] == False:
                new_uid_list.append(exist_item['_id'])
            else:
                have_in_uid_list.append(exist_item['_id'])
    else:
        new_uid_list = uid_list

    #step2: filter in compute
    new_uid_set = set(new_uid_list)
    compute_set = set(r.hkeys('compute'))
    in_uid_set = list(new_uid_set - compute_set)
    print 'new_uid_set:', new_uid_set
    print 'in_uid_set:', in_uid_set
    if len(in_uid_set) == 0:
        return 0, 'all user in'
    #identify the final add user
    final_submit_user_list = []
    for in_item in in_uid_set:
        # if in_item in auto_recomment_set:
        #     tmp = json.loads(r.hget(hashname_submit, in_item))
        #     recommentor_list = tmp['operation'].split('&')
        #     recommentor_list.append(str(submit_user))
        #     new_list = list(set(recommentor_list))
        #     tmp['operation'] = '&'.join(new_list)
        # else:
        #     tmp = {'system':'0', 'operation':submit_user}
        if operation_type == 'submit':
            relation_list = relation_string.split(',')
            r.hset(
                compute_hash_name, in_item,
                json.dumps([
                    in_date, compute_status, node_type, relation_list,
                    submit_user, recommend_style
                ]))
            # r.hset(hashname_submit, in_item, json.dumps(tmp))
            # r.hset(submit_user_recomment, in_item, '0')
        final_submit_user_list.append(in_item)
    return 1, invalid_uid_list, have_in_uid_list, final_submit_user_list
Exemple #9
0
def group_related(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name, doc_type=group_type, id=group_id,  fields=['people', 'file_link', 'wiki_link'])
    origin_list = uid_string['fields']['people'][0].split('&')
    # origin_list = []

    try:
        file_link = uid_string['fields']['file_link'][0].split('+')
    except:
        file_link = []
    final_file = []
    for i in file_link:
        final_file.append(i.split(','))
    try:
        final_wiki = json.loads(uid_string['fields']['wiki_link'][0])
    except:
        final_wiki = []
    event_graph_id = []
    user_list, org_list = search_user_type(origin_list)
    for i in user_list:
        a = graph.run('start n=node:'+node_index_name+'("'+people_primary+':'+str(i)+'") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))
    for i in org_list:
        a = graph.run('start n=node:'+org_index_name+'("'+org_primary+':'+str(i)+'") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))

    # for i in origin_list:
    #     a = graph.run('start n=node:'+node_index_name+'("'+people_primary+':'+str(i)+'") return id(n)')
    #     for j in a:
    #         event_graph_id.append(str(dict(j)['id(n)']))
    print event_graph_id
    event_id_string = ','.join(event_graph_id)
    query = 'start d=node('+event_id_string+') match (d)-[r]-(e) return labels(e), e'
    result = graph.run(query)
    node_dict = {}
    for i in result:
        dict_i = dict(i)
        node_type = dict_i['labels(e)'][0]

        if node_type == people_node:
            node_id = dict_i['e']['uid']
            try:
                node_dict['user'].append(node_id)
            except:
                node_dict['user'] = []
                node_dict['user'].append(node_id)
        elif node_type == org_node:
            node_id = dict_i['e']['org_id']
            try:
                node_dict['org'].append(node_id)
            except:
                node_dict['org'] = []
                node_dict['org'].append(node_id)

        elif node_type == event_node:
            node_id = dict_i['e']['event_id']
            if node_id in event_graph_id:
                continue
            try:
                node_dict['event'].append(node_id)
            except:
                node_dict['event'] = []
                node_dict['event'].append(node_id)
    try:
        uid_list = [i for i in set(node_dict['user'])]
        user_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list}, fields=['uname', 'uid'])['docs']
    except:
        user_result = []
    try:
        org_list_ = [i for i in set(node_dict['org'])]
        org_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':org_list_}, fields=['uname', 'uid'])['docs']
    except:
        org_result = []
    try:
        event_list = [i for i in set(node_dict['event'])]
        event_result = es_event.mget(index=event_analysis_name,doc_type=event_text_type, body={'ids':event_list}, fields=['en_name', 'name'])['docs']
    except:
        event_result = []
    final_user = []
    for i in user_result:
        if i['_id'] in origin_list:
            continue
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_user.append([i['fields']['uid'][0], uname_s])
        else:
            final_user.append([i['_id'],i['_id']])

    final_org = []
    for i in org_result:
        if i['_id'] in origin_list:
            continue
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_org.append([i['fields']['uid'][0], uname_s])
        else:
            final_org.append([i['_id'],i['_id']])

    final_event = []
    for i in event_result:
        if i['found'] == True:
            final_event.append([i['fields']['en_name'][0], i['fields']['name'][0]])
        else:
            final_event.append([i['_id'],i['_id']])
    return {'final_user':final_user, 'final_org':final_org, 'final_event':final_event, \
            'final_file':final_file, 'final_wiki':final_wiki}