Ejemplo n.º 1
0
def get_show_domain_group_detail_portrait(domain_name):
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    es_result = es.get(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,\
                id=domain_pinyin)['_source']

    member_uids = es_result['member_uids']

    es_mget_result = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,\
                    body={'ids':member_uids})['docs']
    result_all = []
    for result in es_mget_result:
        item = {}
        if result['found']:
            result = result['_source']
            item['uid'] = result['uid']
            item['nick_name'] = result['uname']
            item['photo_url'] = result['photo_url']
            item['domain'] = result['domain']
            item['sensitive'] = result['sensitive']
            item['location'] = result['location']
            item['fans_num'] = result['fansnum']
            item['friends_num'] = result['friendsnum']
            item['gender'] = result['gender']
            item['home_page'] = 'http://weibo.com/' + result[
                'uid'] + '/profile?topnav=1&wvr=6&is_all=1'
            # item['sensor_mark'] = judge_sensing_sensor(xnr_user_no,item['uid'])
            # item['weibo_type'] = judge_follow_type(xnr_user_no,item['uid'])
            item['influence'] = get_influence_relative(item['uid'],
                                                       result['influence'])

        else:
            item['uid'] = result['_id']
            item['nick_name'] = ''
            item['photo_url'] = ''
            item['domain'] = ''
            item['sensitive'] = ''
            item['location'] = ''
            item['fans_num'] = ''
            item['friends_num'] = ''
            item['gender'] = ''
            item['home_page'] = 'http://weibo.com/' + result[
                '_id'] + '/profile?topnav=1&wvr=6&is_all=1'
            # item['sensor_mark'] = judge_sensing_sensor(xnr_user_no,result['_id'])
            # item['weibo_type'] = judge_follow_type(xnr_user_no,result['_id'])
            item['influence'] = ''

        result_all.append(item)

    return result_all
Ejemplo n.º 2
0
def get_generate_example_model(domain_name,role_name):

    domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_')
    role_en = domain_ch2en_dict[role_name]

    task_id = domain_pinyin + '_' + role_en

    es_result = es.get(index=weibo_role_index_name,doc_type=weibo_role_index_type,id=task_id)['_source']
    item = es_result
    print 'es_result:::',es_result
    # 政治倾向
    political_side = json.loads(item['political_side'])[0][0]

    if political_side == 'mid':
        item['political_side'] = u'中立'
    elif political_side == 'left':
        item['political_side'] = u'左倾'
    else:
        item['political_side'] = u'右倾'

    # 心理特征
    psy_feature_list = []

    psy_feature = json.loads(item['psy_feature'])

    for i in range(TOP_PSY_FEATURE):
        psy_feature_list.append(psy_feature[i][0])

    item['psy_feature'] = '&'.join(psy_feature_list)

    role_group_uids = json.loads(item['member_uids'])

    mget_results = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,body={'ids':role_group_uids})['docs']

    # topic_list = []
    # for mget_item in mget_results:
        
    #     if mget_item['found']:
    #         keywords_list = json.loads(mget_item['_source']['keywords'])
    #         topic_list.extend(keywords_list)
    
    # topic_keywords_dict = {}
    # for topic_item in topic_list:
    #     keyword = topic_item[0]
    #     keyword_count = topic_item[1]
    #     try:
    #         topic_keywords_dict[keyword] += keyword_count
    #     except:
    #         topic_keywords_dict[keyword] = keyword_count

    # monitor_keywords_list = []
    # for i in range(3):
        
    #     keyword_max = max(topic_keywords_dict,key=topic_keywords_dict.get)
    #     monitor_keywords_list.append(keyword_max)
    #     del topic_keywords_dict[keyword_max]

    # item['monitor_keywords'] = '&'.join(monitor_keywords_list)
    if S_TYPE == 'test':
        current_time  = datetime2ts(S_DATE)
    else:
        current_time = int(time.time())

    index_name_list = get_flow_text_index_list(current_time)

    query_body_search = {
        'query':{
            'filtered':{
                'filter':{
                    'terms':{'uid':role_group_uids}
                }
            }
        },
        'size':MAX_VALUE,
        '_source':['keywords_string']
    }
    
    es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\
                        body=query_body_search)['hits']['hits']

    keywords_string = ''
    for mget_item in es_keyword_results:
        #print 'mget_item:::',mget_item
        #if mget_item['found']:
        keywords_string += '&'
        keywords_string += mget_item['_source']['keywords_string']
    
    k_dict = extract_keywords(keywords_string)
    
    monitor_keywords_list = []

    for item_item in k_dict:
        monitor_keywords_list.append(item_item.word.encode('utf-8'))

    item['monitor_keywords'] = ','.join(monitor_keywords_list)

    mget_results_user = es_user_portrait.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs']
    item['nick_name'] = []
    for mget_item in mget_results_user:
        #print 'mget_item:::',mget_item
        if mget_item['found']:
            item['nick_name'] = mget_item['_source']['nick_name']
            item['location'] = mget_item['_source']['user_location']
            item['gender'] = mget_item['_source']['sex']
            uid = mget_item['_source']['uid']
            try:
                profile_results = es_user_portrait.get(index=profile_index_name,doc_type=profile_index_type,id=uid)['_source']
                if profile_results['description']:
                    item['description'] = profile_results['description']
                    break
            except:
                pass


    item['business_goal'] = u'渗透'
    item['daily_interests'] = u'旅游'
    # if S_TYPE == 'test':
    #     user_mget_results = es.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs']
    #     if user_mget_results
    item['age'] = 30
    item['career'] = u'自由职业'

    active_time_list_np = np.array(json.loads(item['active_time']))
    active_time_list_np_sort = np.argsort(-active_time_list_np)[:TOP_ACTIVE_TIME]
    item['active_time'] = active_time_list_np_sort.tolist()

    day_post_num_list = np.array(json.loads(item['day_post_num']))
    item['day_post_num'] = np.mean(day_post_num_list).tolist()
    item['role_name'] = role_name
    
    task_id_new =domain_pinyin + '_' + role_en

    example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json'
    
    try:
        with open(example_model_file_name,"w") as dump_f:
            json.dump(item,dump_f)

        item_dict = dict()
        #item_dict['xnr_user_no'] = xnr_user_no
        item_dict['domain_name'] = domain_name
        item_dict['role_name'] = role_name

        es.index(index=weibo_example_model_index_name,doc_type=weibo_example_model_index_type,\
            body=item_dict,id=task_id_new)

        mark = True
    except:
        mark = False

    return mark