Example #1
0
def domain_create_task(domain_name,
                       create_type,
                       create_time,
                       submitter,
                       description,
                       remark,
                       compute_status=0):
    task_id = pinyin.get(domain_name, format='strip', delimiter='_')
    try:
        es.get(index=fb_domain_index_name,
               doc_type=fb_domain_index_type,
               id=task_id)['_source']
        return 'domain name exists!'
    except:
        try:
            domain_task_dict = dict()
            domain_task_dict['domain_pinyin'] = pinyin.get(domain_name,
                                                           format='strip',
                                                           delimiter='_')
            domain_task_dict['domain_name'] = domain_name
            domain_task_dict['create_type'] = json.dumps(create_type)
            domain_task_dict['create_time'] = create_time
            domain_task_dict['submitter'] = submitter
            domain_task_dict['description'] = description
            domain_task_dict['remark'] = remark
            domain_task_dict['compute_status'] = compute_status
            # print 'domain_task_dict'
            # print domain_task_dict
            # print 'before: r.lrange'
            # print r.lrange(fb_target_domain_detect_queue_name,0,100)
            r.lpush(fb_target_domain_detect_queue_name,
                    json.dumps(domain_task_dict))
            # print 'after: r.lrange'
            # print r.lrange(fb_target_domain_detect_queue_name,0,100)
            item_exist = dict()
            item_exist['domain_pinyin'] = domain_task_dict['domain_pinyin']
            item_exist['domain_name'] = domain_task_dict['domain_name']
            item_exist['create_type'] = domain_task_dict['create_type']
            item_exist['create_time'] = domain_task_dict['create_time']
            item_exist['submitter'] = domain_task_dict['submitter']
            item_exist['description'] = domain_task_dict['description']
            item_exist['remark'] = domain_task_dict['remark']
            item_exist['group_size'] = ''
            item_exist['compute_status'] = 0  # 存入创建信息
            print es.index(index=fb_domain_index_name,
                           doc_type=fb_domain_index_type,
                           id=item_exist['domain_pinyin'],
                           body=item_exist)
            mark = True
        except Exception, e:
            print e
            mark = False
        return mark
Example #2
0
def addto_twitter_corpus(task_detail):
    flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(task_detail['timestamp'])
    try:
        corpus_result = es_xnr.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=task_detail['tid'])['_source']
        task_detail['text']=corpus_result['text']
        
        #查询三个指标字段
        tid_result=lookup_tid_attend_index(task_detail['tid'],task_detail['timestamp'],task_detail['timestamp'])
        if tid_result:
            task_detail['comment']=tid_result['comment']
            task_detail['share']=tid_result['share']
            task_detail['favorite']=tid_result['favorite']
        else:
            task_detail['comment']=0
            task_detail['share']=0
            task_detail['favorite']=0 

            #查询用户昵称
        task_detail['nick_name']=get_user_nickname(item['_source']['uid'])

    except:
        mark=False

    try:
        es_xnr.index(index=twitter_xnr_corpus_index_name,doc_type=twitter_xnr_corpus_index_type,id=task_detail['tid'],body=task_detail)
        mark=True
    except:
        mark=False
    return mark
Example #3
0
def get_save_step_three_1(task_detail):
    task_id = task_detail['task_id']
    # query_body = {'query':{'match_all':{}},'sort':{'user_no':{'order':'desc'}}}
    # es_result = es.search(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,body=query_body)['hits']['hits']
    # task_id = es_result[0]['_source']['xnr_user_no']
    item_exist = es.get(index=fb_xnr_index_name,
                        doc_type=fb_xnr_index_type,
                        id=task_id)['_source']

    item_exist['uid'] = task_detail['id']
    item_exist['nick_name'] = task_detail['nick_name']
    item_exist['fb_mail_account'] = task_detail['fb_mail_account']
    item_exist['fb_phone_account'] = task_detail['fb_phone_account']
    item_exist['password'] = task_detail['password']
    item_exist['career'] = task_detail['career']
    item_exist['description'] = task_detail['description']
    item_exist['age'] = task_detail['age']
    item_exist['location'] = task_detail['location']
    item_exist['create_status'] = 2  # 创建完成
    # 更新 fb_xnr表
    print es.update(index=fb_xnr_index_name,
                    doc_type=fb_xnr_index_type,
                    id=task_id,
                    body={'doc': item_exist})
    mark = True
    return mark
Example #4
0
def addto_warning_corpus(task_detail):
    flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(task_detail['timestamp'])
    try:
        corpus_result = es_xnr_2.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=task_detail['tid'])['_source']
        corpus_result['xnr_user_no'] = task_detail['xnr_user_no']
        corpus_result['warning_source'] = task_detail['warning_source']
        corpus_result['create_time'] = task_detail['create_time']
        corpus_result['validity'] = 1
        corpus_result['nick_name'] = get_user_nickname(task_detail['uid'])

        tid_result=lookup_tid_attend_index(task_detail['tid'],task_detail['timestamp'])
        if tid_result:
            corpus_result['comment']=tid_result['comment']
            corpus_result['share']=tid_result['share']
            corpus_result['favorite']=tid_result['favorite']
        else:
            corpus_result['comment']=0
            corpus_result['share']=0
            corpus_result['favorite']=0  

        #查询好友列表
        lookup_type='fans_list'
        friends_list=lookup_xnr_fans_followers(task_detail['xnr_user_no'],lookup_type)
        set_mark = set_intersection(task_detail['uid'],friends_list)
        if set_mark > 0:
            corpus_result['content_type']='friends'
        else:
            corpus_result['content_type']='unfriends'

        es_xnr_2.index(index=twitter_warning_corpus_index_name,doc_type=twitter_warning_corpus_index_type,id=task_detail['tid'],body=corpus_result)
        mark=True
    except:
        mark=False

    return mark
Example #5
0
def get_role_info(domain_pinyin, role_name):
    role_en = fb_domain_ch2en_dict(role_name)
    role_info_id = domain_pinyin + '_' + role_en
    role_info = es.get(index=fb_domain_index_name,
                       doc_type=fb_domain_index_type,
                       id=role_info_id)['_source']
    return role_info
Example #6
0
def lookup_xnr_uid(xnr_user_no):
    try:
        xnr_result=es_xnr_2.get(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,id=xnr_user_no)['_source']
        xnr_uid=xnr_result['uid']
    except:
        xnr_uid=''
    return xnr_uid
Example #7
0
def lookup_xnr_fans_followers(xnr_user_no,lookup_type):
    try:
        xnr_result=es_xnr_2.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,id=xnr_user_no)['_source']
        lookup_list=xnr_result[lookup_type]
    except:
        lookup_list=[]
    return lookup_list
Example #8
0
def lookup_xnr_concernedusers(xnr_user_no):
    try:
        result=es_xnr.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,id=xnr_user_no)
        followers_list=result['_source']['followers_list']
    except:
        followers_list=[]
    return followers_list
Example #9
0
def get_user_nickname(uid):
    try:
        result=es_xnr.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,id=uid)
        user_name=result['_source']['username']
    except:
        user_name=''
    return user_name
Example #10
0
def get_show_domain_group_detail_portrait(domain_name):
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    es_result = es.get(index=tw_domain_index_name,doc_type=tw_domain_index_type,\
                id=domain_pinyin)['_source']
    member_uids = es_result['member_uids']
    es_mget_result = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,\
                    body={'ids':member_uids})['docs']
    result_all = []
    for result in es_mget_result:
        item = {}
        item['uid'] = ''
        item['nick_name'] = ''
        item['photo_url'] = ''
        item['domain'] = ''
        item['sensitive'] = ''
        item['location'] = ''
        item['fans_num'] = ''
        item['friends_num'] = ''
        # item['gender'] = ''
        item['home_page'] = ""
        item['influence'] = ''
        if result['found']:
            item['uid'] = result['_id']
            result = result['_source']
            if result.has_key('uname'):
                item['nick_name'] = result['uname']
            if result.has_key('photo_url'):
                item['photo_url'] = result['photo_url']
            if result.has_key('domain'):
                item['domain'] = result['domain']
            if result.has_key('sensitive'):
                item['sensitive'] = result['sensitive']
            if result.has_key('location'):
                item['location'] = result['location']
            if result.has_key('fansnum'):
                item['fans_num'] = result['fansnum']
            if result.has_key('friends_num'):
                item['friends_num'] = result['friendsnum']
            # item['gender'] = result['gender']
            if result.has_key('screenname'):
                item['home_page'] = "https://twitter.com/" + result[
                    'screenname']
            if result.has_key('influence'):
                item['influence'] = get_influence_relative(
                    item['uid'], result['influence'])
        # else:
        #     item['uid'] = result['_id']
        #     item['nick_name'] = ''
        #     item['photo_url'] = ''
        #     item['domain'] = ''
        #     item['sensitive'] = ''
        #     item['location'] = ''
        #     item['fans_num'] = ''
        #     item['friends_num'] = ''
        #     # item['gender'] = ''
        #     item['home_page'] = ""
        #     item['influence'] = ''
        result_all.append(item)
    return result_all
Example #11
0
def get_show_domain_role_info(domain_name, role_name):
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    role_en = tw_domain_ch2en_dict[role_name]
    task_id = domain_pinyin + '_' + role_en
    es_result = es.get(index=tw_role_index_name,
                       doc_type=tw_role_index_type,
                       id=task_id)['_source']
    return es_result
Example #12
0
def lookup_xnr_friends(xnr_user_no):
    try:
        xnr_result = es_xnr.get(index=fb_xnr_fans_followers_index_name,
                                doc_type=fb_xnr_fans_followers_index_type,
                                id=xnr_user_no)['_source']
        lookup_list = xnr_result['fans_list']
    except:
        lookup_list = []
    return lookup_list
Example #13
0
def get_access_level_info(account_name):
    level_info = dict()
    res = es_xnr_2.get(index=access_control_index_name,
                       doc_type=access_control_index_type,
                       id=account_name)
    print res
    level_info_type = res['_source'].get('access_level')
    level_info['account_name'] = account_name
    level_info['access_level'] = level_info_type

    return level_info
Example #14
0
def get_role_sort_list(domain_name):
    domain_pinyin = pinyin.get(domain_name, format='strip',delimiter='_')
    try:
        es_result = es.get(index=tw_domain_index_name,doc_type=tw_domain_index_type,id=domain_pinyin)['_source']
        role_sort_list_en = json.loads(es_result['role_distribute'])
        role_sort_list_zh = []
        for item in role_sort_list_en:
            role_zh = tw_domain_en2ch_dict[item[0]]
            role_sort_list_zh.append(role_zh)
        return role_sort_list_zh
    except:
        return []
Example #15
0
def get_modify_base_info(task_detail):
    xnr_user_no = task_detail['xnr_user_no']
    item_exists = es.get(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,id=xnr_user_no)['_source']
    if task_detail.has_key('active_time'):
        item_exists['active_time'] = task_detail['active_time']
    if task_detail.has_key('day_post_average'): 
        day_post_average = task_detail['day_post_average'].split('-')
        item_exists['day_post_average'] = json.dumps(day_post_average)
    if task_detail.has_key('monitor_keywords'): 
        item_exists['monitor_keywords'] = task_detail['monitor_keywords']
    try:
        es.update(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,body={'doc':item_exists}, id=xnr_user_no)
        mark = True
    except Exception,e:
        print e
        mark = False
Example #16
0
def get_save_step_three_1(task_detail):
    task_id = task_detail['task_id']
    item_exist = es.get(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,id=task_id)['_source']
    
    item_exist['uid'] = task_detail['uid']
    item_exist['nick_name'] = task_detail['nick_name']
    item_exist['tw_mail_account'] = task_detail['tw_mail_account']
    item_exist['tw_phone_account'] = task_detail['tw_phone_account']
    item_exist['password'] = task_detail['password']
    item_exist['description'] = task_detail['description']
    item_exist['age'] = task_detail['age']
    item_exist['location'] = task_detail['location']
    item_exist['create_status'] = 2 # 创建完成
    # 更新 tw_xnr表
    print es.update(index=tw_xnr_index_name,doc_type=tw_xnr_index_type,id=task_id,body={'doc':item_exist})        
    mark =True
    return mark
Example #17
0
def get_show_domain_group_detail_portrait(domain_name):
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    es_result = es.get(index=fb_domain_index_name,doc_type=fb_domain_index_type,\
                id=domain_pinyin)['_source']
    member_uids = es_result['member_uids']
    es_mget_result = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,\
                    body={'ids':member_uids})['docs']
    result_all = []
    for result in es_mget_result:
        item = {}
        item['uid'] = ''
        item['nick_name'] = ''
        # item['photo_url'] = ''
        item['domain'] = ''
        item['sensitive'] = ''
        item['location'] = ''
        # item['fans_num'] = ''
        # item['friends_num'] = ''
        # item['gender'] = ''
        item['home_page'] = ''
        # item['home_page'] = 'http://weibo.com/'+result['_id']+'/profile?topnav=1&wvr=6&is_all=1'
        item['influence'] = ''
        if result['found']:
            _id = result['_id']
            result = result['_source']

            item['uid'] = _id
            item[
                'home_page'] = "https://www.facebook.com/profile.php?id=" + str(
                    _id)

            if result.has_key('uname'):
                item['nick_name'] = result['uname']
            if result.has_key('domain'):
                item['domain'] = result['domain']
            if result.has_key('sensitive'):
                item['sensitive'] = result['sensitive']
            if result.has_key('location'):
                item['location'] = result['location']
            if result.has_key('influence'):
                item['influence'] = get_influence_relative(
                    item['uid'], result['influence'])

        result_all.append(item)
    return result_all
Example #18
0
def get_role2feature_info(domain_name,role_name):
    domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_')
    role_name_en = fb_domain_ch2en_dict[role_name]
    _id = domain_pinyin + '_' + role_name_en
    try:
        es_result = es.get(index=fb_role_index_name,doc_type=fb_role_index_type,id=_id)['_source']
        
        feature_info_dict = es_result        
        feature_filter_dict = dict()

        feature_filter_dict['political_side'] = json.loads(feature_info_dict['political_side'])
        try:
            feature_filter_dict['psy_feature'] = json.loads(feature_info_dict['psy_feature'])
        except:
            feature_filter_dict['psy_feature'] = []
        return feature_filter_dict

    except:
        return []
Example #19
0
def update_access_level_info(account_name, access_level):
    try:
        item_exist = es_xnr_2.get(index=access_control_index_name,
                                  doc_type=access_control_index_type,
                                  id=account_name)['_source']
        item_exist['access_level'] = access_level
        #print item_exist
        # res = es.update(index=fb_xnr_fans_followers_index_name, doc_type=fb_xnr_fans_followers_index_type, \
        #           id=xnr_user_no, body={'doc': {'trace_follow_list': trace_follow_list, 'fans_list': followers_list}})
        #
        # print es.update(index=fb_xnr_index_name, doc_type=fb_xnr_index_type, id=task_id, body={'doc': item_exist})
        res = es_xnr_2.update(index=access_control_index_name,
                              doc_type=access_control_index_type,
                              id=account_name,
                              body={'doc': item_exist})

    except Exception as e:
        return {"status": "fail"}
    return {"status": "ok"}
Example #20
0
def addto_facebook_corpus(task_detail):
    flow_text_index_name = facebook_flow_text_index_name_pre + ts2datetime(
        task_detail['timestamp'])
    try:
        corpus_result = es_xnr.get(index=flow_text_index_name,
                                   doc_type=facebook_flow_text_index_type,
                                   id=task_detail['fid'])['_source']
        task_detail['text'] = corpus_result['text']

        #查询三个指标字段
        fid_result = lookup_fid_attend_index(task_detail['fid'],
                                             task_detail['timestamp'],
                                             task_detail['timestamp'])
        if fid_result:
            task_detail['comment'] = fid_result['comment']
            task_detail['share'] = fid_result['share']
            task_detail['favorite'] = fid_result['favorite']
        else:
            task_detail['comment'] = 0
            task_detail['share'] = 0
            task_detail['favorite'] = 0
            #查询用户昵称
        task_detail['nick_name'] = get_user_nickname(corpus_result['uid'])
        # task_detail['retweeted']=corpus_result['retweeted']
        # task_detail['comment']=corpus_result['comment']
        # task_detail['like']=corpus_result['like']
    except:
        mark = False

    try:
        es_xnr.index(index=facebook_xnr_corpus_index_name,
                     doc_type=facebook_xnr_corpus_index_type,
                     id=task_detail['fid'],
                     body=task_detail)
        mark = True
    except:
        mark = False
    return mark
Example #21
0
def get_show_domain_description(domain_name):
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    es_result = es.get(index=tw_domain_index_name,doc_type=tw_domain_index_type,\
                id=domain_pinyin)['_source']
    item = {}
    item['group_size'] = es_result['group_size']
    item['description'] = es_result['description']
    topic_preference_list = json.loads(es_result['topic_preference'])
    topic_preference_list_chinese = []
    for topic_preference_item in topic_preference_list:
        topic_preference_item_chinese = fb_tw_topic_en2ch_dict[
            topic_preference_item[0]]
        topic_preference_list_chinese.append(
            [topic_preference_item_chinese, topic_preference_item[1]])

    item['topic_preference'] = topic_preference_list_chinese
    item['word_preference'] = json.loads(es_result['top_keywords'])
    role_distribute_list = json.loads(es_result['role_distribute'])
    role_distribute_list_chinese = []
    for role_distribute_item in role_distribute_list:
        role_distribute_item_chinese = tw_domain_en2ch_dict[
            role_distribute_item[0]]
        role_distribute_list_chinese.append(
            [role_distribute_item_chinese, role_distribute_item[1]])

    item['role_distribute'] = role_distribute_list_chinese
    political_side_list = json.loads(es_result['political_side'])
    political_side_list_chinese = []
    for political_side_item in political_side_list:
        if political_side_item[0] == 'mid':
            political_side_list_chinese.append([u'中立', political_side_item[1]])
        elif political_side_item[0] == 'right':
            political_side_list_chinese.append([u'右倾', political_side_item[1]])
        else:
            political_side_list_chinese.append([u'左倾', political_side_item[1]])
    item['political_side'] = political_side_list_chinese
    return item
Example #22
0
def report_warming_content(task_detail):
    report_dict=dict()
    report_dict['report_type']=task_detail['report_type']
    report_dict['report_time']=task_detail['report_time']
    report_dict['xnr_user_no']=task_detail['xnr_user_no']
    report_dict['event_name']=task_detail['event_name']
    report_dict['uid']=task_detail['uid']

    report_dict['nick_name']=get_user_nickname(task_detail['uid'])

    tw_list=[]
    user_list=[]
    # print 'type:',type(task_detail['weibo_info']),task_detail['weibo_info']
    tw_info=task_detail['tw_info']
    for item in tw_info:
        lookup_mark=False
        item['timestamp'] = int(item['timestamp'])
        if task_detail['report_type']==u'人物':
            twitter_user_warning_index_name = twitter_user_warning_index_name_pre + ts2datetime(item['timestamp'])
            twitter_user_warming_id=task_detail['xnr_user_no']+'_'+task_detail['uid']
            try:
                twitter_user_result=es_xnr_2.get(index=twitter_user_warning_index_name,doc_type=twitter_user_warning_index_type,id=twitter_user_warming_id)['_source']
                user_warning_content=json.dumps(twitter_user_result['content'])
                for content in user_warning_content:
                    if content['tid'] == item['tid']:
                        lookup_mark=True
                        tw_list.append(content)
                    else:
                        pass
            except:
                print 'user_error!'

        elif task_detail['report_type']==u'言论':
            twitter_speech_warning_index_name = twitter_speech_warning_index_name_pre + ts2datetime(item['timestamp'])
            try:
                twitter_speech_result=es_xnr_2.get(index=twitter_speech_warning_index_name,doc_type=twitter_speech_warning_index_type,id=task_detail['xnr_user_no']+'_'+item['tid'])['_source']
                report_dict['uid']=twitter_speech_result['uid']
                lookup_mark=True
                tw_list.append(twitter_speech_result)
            except:
                # weibo_timing_warning_index_name = weibo_timing_warning_index_name_pre + ts2datetime(item['timestamp'])
                print 'speech_error!'

        elif task_detail['report_type']==u'事件':
            twitter_event_warning_index_name = twitter_event_warning_index_name_pre + ts2datetime(item['timestamp'])
            event_warning_id = task_detail['xnr_user_no']+'_'+task_detail['event_name']
            try:
                event_result=es_xnr_2.get(index=twitter_event_warning_index_name,doc_type=twitter_event_warning_index_type,id=event_warning_id)['_source']
                event_content=json.dumps(event_result['main_twitter_info'])
                for event in event_content:
                    if event['tid'] == item['tid']:
                        lookup_mark=True
                        tw_list.append(event)
                    else:
                        pass
            except:
                print 'event_error!'

        elif task_detail['report_type']==u'时间':
            year = ts2yeartime(item['timestamp'])
            twitter_timing_warning_index_name = twitter_timing_warning_index_name_pre + year +'_' + task_detail['date_time']
            try:
                time_result=es_xnr_2.search(index=twitter_timing_warning_index_name,doc_type=twitter_timing_warning_index_type,query_body={'query':{'match_all':{}}})['hits']['hits']
                time_content=[]
                for timedata in time_result:
                    for data in timedata['twitter_date_warming_content']:
                        if data['tid'] == item['tid']:
                            lookup_mark=True
                            tw_list.append(data)
                        else:
                            pass
            except:
                print 'time_error!'               

        if lookup_mark:
            pass
        else:
            flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(item['timestamp'])
            try:
                tw_result=es_xnr_2.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=item['tid'])['_source']
                tw_result['nick_name']=get_user_nickname(fb_result['uid'])
                tid_result=lookup_tid_attend_index(item['tid'],item['timestamp'])
                if tid_result:
                    tw_result['comment']=tid_result['comment']
                    tw_result['share']=tid_result['share']
                    tw_result['favorite']=tid_result['favorite']
                else:
                    tw_result['comment']=0
                    tw_result['share']=0
                    tw_result['favorite']=0  
                tw_list.append(tw_result)
            except:
                print 'flow_text error!'


    user_info=task_detail['user_info']
    if user_info:
        for uid in user_info:
            user=dict()
            try:
                user_result=es_xnr_2.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,id=uid)['_source']
                user_dict['uid']=item['_id']
                user_dict['username']=user_result['username']
                if user_result.has_key('talking_about_count'):
                    user_dict['talking_about_count']=user_result['talking_about_count']
                else:
                    user_dict['talking_about_count']=0
                if user_result.has_key('likes'):
                    user_dict['likes']=user_result['likes']
                else:
                    user_dict['likes']=0
                if user_result.has_key('category'):
                    user_dict['category']=user_result['category']
                else:
                    user_dict['category']=''
                user_list.append(user)
            except:
                user_dict['uid']=item['_id']
                user_dict['username']=''
                user_dict['talking_about_count']=0
                user_dict['likes']=0
                user_dict['category']=''
                user_list.append(user)
                print 'user_list error!'
    else:
        pass

    report_content=dict()
    report_content['user_list']=user_list
    report_content['tw_list']=tw_list

    report_dict['report_content']=json.dumps(report_content)
    
    report_id=''
    if task_detail['report_type'] == u'言论':
        report_id=weibo_info[0]['tid']
    elif task_detail['report_type'] == u'人物':
        report_id=task_detail['xnr_user_no']+'_'+task_detail['uid']
    elif task_detail['report_type'] == u'事件':
        report_id=task_detail['xnr_user_no']+'_'+task_detail['event_name']
    elif task_detail['report_type'] == u'时间':
        # print weibo_info
        if tw_info:
            report_id=tw_info[0]['tid']
        else:
            report_id=str(task_detail['report_time'])


    if tw_list:
        report_mark=True
    else:
        report_mark=False
    #预警上报后不再显示问题

    now_time=int(time.time())
    twitter_report_management_index_name = twitter_report_management_index_name_pre + ts2datetime(now_time)
    if es_xnr_2.indices.exists(index=twitter_report_management_index_name):
        pass
    else:
        twitter_report_management_mappings() 

    if report_id and report_mark:
        try:
            es_xnr_2.index(index=twitter_report_management_index_name,doc_type=twitter_report_management_index_type,id=report_id,body=report_dict)
            mark=True
        except:
            mark=False
    else:
        mark=False
    return mark
Example #23
0
def get_domain_info(domain_pinyin):
    domain_info = es.get(index=fb_domain_index_name,
                         doc_type=fb_domain_index_type,
                         id=domain_pinyin)['_source']
    return domain_info
Example #24
0
def get_xnr_info_new(xnr_user_no):
    results = es.get(index=fb_xnr_index_name,
                     doc_type=fb_xnr_index_type,
                     id=xnr_user_no)['_source']
    return results
Example #25
0
def get_recommend_step_two(task_detail):
    domain_name = task_detail['domain_name']
    role_name = task_detail['role_name']
    # daily_interests_list = task_detail['daily_interests'].encode('utf-8').split(',')

    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    role_name_en = fb_domain_ch2en_dict[role_name]
    _id = domain_pinyin + '_' + role_name_en

    recommend_results = dict()
    ## 根据角色信息
    es_result = es.get(index=fb_role_index_name,
                       doc_type=fb_role_index_type,
                       id=_id)['_source']
    #### 角色实例
    nick_name_list = []
    user_location_top_list = []
    description_list = []
    sex_list = []
    role_example_dict = {}
    member_uids = json.loads(es_result['member_uids'])
    member_uids_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\
                                            body={'ids':member_uids})['docs']
    count = 0
    for result in member_uids_results:
        if result['found'] == True:
            result = result['_source']
            person_url = "https://www.facebook.com/profile.php?id=" + str(
                result['uid'])
            if result.has_key('name'):
                nick_name = result['name']
                nick_name_list.append(nick_name)
            if result.has_key('gender'):
                if result['gender'] == 'male':
                    sex = 1
                elif result['gender'] == 'female':
                    sex = 2
                sex_list.append(sex)
            if result.has_key('description'):
                description_list.append(result['description'])

            role_example_dict[result['uid']] = [nick_name, person_url]
            count += 1
            if count > NICK_NAME_TOP:
                break
    recommend_results['role_example'] = role_example_dict
    active_time_list_np = np.array(json.loads(es_result['active_time']))
    active_time_list_np_sort = list(
        np.argsort(-active_time_list_np)[:ACTIVE_TIME_TOP])

    recommend_results['active_time'] = active_time_list_np_sort

    day_post_num = json.loads(es_result['day_post_num'])
    day_post_num_new = pd.Series(day_post_num)
    day_post_num_new = day_post_num_new.fillna(0)
    day_post_num_new = list(day_post_num_new)
    day_post_num_average = sum(day_post_num_new) / float(len(day_post_num_new))
    recommend_results['day_post_num_average'] = day_post_num_average

    sex_sort = ''
    if sex_list:
        sex_list_count = Counter(sex_list)
        sex_sort = sorted(sex_list_count.items(),
                          key=lambda x: x[1],
                          reverse=True)[:1][0][0]
    recommend_results['nick_name'] = '&'.join(nick_name_list)
    recommend_results['role_example'] = recommend_results['role_example']
    recommend_results['sex'] = sex_sort
    recommend_results['user_location'] = '&'.join(user_location_top_list)
    recommend_results['description'] = '&'.join(
        description_list[:DESCRIPTION_TOP])
    recommend_results['age'] = ''
    recommend_results['career'] = ''
    return recommend_results
Example #26
0
def get_generate_example_model(domain_name, role_name, mail):

    export_group_info(domain_name, mail)

    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    role_en = tw_domain_ch2en_dict[role_name]
    task_id = domain_pinyin + '_' + role_en
    es_result = es.get(index=tw_role_index_name,
                       doc_type=tw_role_index_type,
                       id=task_id)['_source']
    item = es_result
    #     print 'es_result:::',es_result
    # 政治倾向
    political_side = json.loads(item['political_side'])[0][0]

    if political_side == 'mid':
        item['political_side'] = u'中立'
    elif political_side == 'left':
        item['political_side'] = u'左倾'
    else:
        item['political_side'] = u'右倾'

    # 心理特征
    psy_feature_list = []
    psy_feature = json.loads(item['psy_feature'])
    for i in range(TOP_PSY_FEATURE):
        psy_feature_list.append(psy_feature[i][0])
    item['psy_feature'] = '&'.join(psy_feature_list)
    role_group_uids = json.loads(item['member_uids'])

    if S_TYPE == 'test':
        current_time = datetime2ts(S_DATE)
    else:
        current_time = int(time.time())

    index_name_list = get_flow_text_index_list(current_time)
    query_body_search = {
        'query': {
            'filtered': {
                'filter': {
                    'terms': {
                        'uid': role_group_uids
                    }
                }
            }
        },
        'size': MAX_VALUE,
        '_source': ['keywords_string']
    }

    es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\
                        body=query_body_search)['hits']['hits']
    keywords_string = ''
    for mget_item in es_keyword_results:
        keywords_string += '&'
        keywords_string += mget_item['_source']['keywords_string']
    k_dict = extract_keywords(keywords_string)

    monitor_keywords_list = []
    for item_item in k_dict:
        monitor_keywords_list.append(item_item.word.encode('utf-8'))
    item['monitor_keywords'] = ','.join(monitor_keywords_list)
    mget_results_user = es_user_portrait.mget(index=profile_index_name,
                                              doc_type=profile_index_type,
                                              body={'ids':
                                                    role_group_uids})['docs']
    item['nick_name'] = []
    for mget_item in mget_results_user:
        if mget_item['found']:
            content = mget_item['_source']
            item['nick_name'] = ''
            if content.has_key('username'):
                item['nick_name'] = content['username']
            item['location'] = ''
            if content.has_key('location'):
                item['location'] = content['location']
            item['description'] = ''
            if content.has_key('description'):
                item['description'] = content['description']

    item['business_goal'] = u'渗透'
    # item['daily_interests'] = u'旅游'
    item['age'] = 30
    item['career'] = u'自由职业'

    active_time_list_np = np.array(json.loads(item['active_time']))
    active_time_list_np_sort = np.argsort(
        -active_time_list_np)[:TOP_ACTIVE_TIME]
    item['active_time'] = active_time_list_np_sort.tolist()

    day_post_num_list = np.array(json.loads(item['day_post_num']))
    item['day_post_num'] = np.mean(day_post_num_list).tolist()
    item['role_name'] = role_name

    task_id_new = 'tw_' + domain_pinyin + '_' + role_en
    example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json'
    try:
        with open(example_model_file_name, "w") as dump_f:
            json.dump(item, dump_f)
        item_dict = dict()
        item_dict['domain_name'] = domain_name
        item_dict['role_name'] = role_name
        es.index(index=tw_example_model_index_name,doc_type=tw_example_model_index_type,\
            body=item_dict,id=task_id_new)
        mark = True
    except:
        mark = False
    return mark
Example #27
0
def export_group_info(domain_name, mail):
    mark = True
    res = {
        'domain_name': domain_name,
        'members_num': 0,
        'create_info': {
            'submitter': '',
            'remark': '',
            'create_type': '',
            'create_time': '',
        },
        'members_uid': [],
        'members_info': {
            #         'uid1': {
            #           'nickname': '',
            #           'gender': '',
            #           'location': '',
            #           'link': '',
            #         }
        },
        'count_info': {
            'location_count': {
                #           'zh_TW': 10,
                #           'us': 5
            },
            #         'gender_count': {
            # #           'f': 0,
            # #           'm': 40
            #         },
            'role_count': {
                #           'role1': 12,
                #           'role2': 7
            },
            'words_preference': {
                #           'w1': 20,
                #           'w2': 10
            },
            'topic_preference': {
                #           't1': 20,
                #           't2': 10
            },
            'political_side': {},
        }
    }
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')

    domain_details = get_show_domain_description(domain_name)
    res['count_info']['political_side'] = domain_details['political_side']
    res['count_info']['role_count'] = domain_details['role_distribute']
    res['count_info']['topic_preference'] = domain_details['topic_preference']
    res['count_info']['words_preference'] = domain_details['word_preference']
    res['members_num'] = domain_details['group_size']

    domain_info = es.get(index=tw_domain_index_name,
                         doc_type=tw_domain_index_type,
                         id=domain_pinyin)['_source']
    res['create_info']['remark'] = domain_info['remark']
    res['create_info']['submitter'] = domain_info['submitter']
    res['create_info']['create_type'] = domain_info['create_type']
    res['create_info']['create_time'] = ts2datetime_full(
        domain_info['create_time'])
    res['members_uid'] = domain_info['member_uids']

    query_body = {
        "query": {
            "bool": {
                "must": [
                    {
                        "terms": {
                            "uid": res['members_uid'],
                        }
                    },
                ]
            }
        },
        "size": 9999,
        "fields": ["locale", "link", "uid", "gender", "username"]
    }
    user_info = es.search(profile_index_name, profile_index_type,
                          query_body)['hits']['hits']
    members_info = {}
    gender_count = {}
    location_count = {}
    for user in user_info:
        item = user['fields']
        uid = item.get('uid', [''])[0]
        #         gender = item.get('gender', [''])[0]
        location = item.get('location', [''])[0]
        members_info[uid] = {
            'nickname': item.get('username', [''])[0],
            #             'gender': gender,
            'location': location,
            'link':
            'https://twitter.com/' + item.get('userscreenname', [''])[0]
        }
        #         if gender:
        #             if gender in gender_count:
        #                 gender_count[gender] += 1
        #             else:
        #                 gender_count[gender] = 1

        if location:
            if location in location_count:
                location_count[location] += 1
            else:
                location_count[location] = 1

    res['members_info'] = members_info
    res['count_info']['location_count'] = location_count
    res['count_info']['gender_count'] = gender_count

    export_filename = EXAMPLE_MODEL_PATH + domain_pinyin + '_' + ts2datetime_full(
        time.time()) + '.json'
    try:
        with open(export_filename, "w") as f:
            json.dump(res, f)
        try:
            sendfile2mail(mail, export_filename)
        except Exception, e:
            print e
    except:
        mark = False
    return mark