Esempio n. 1
0
def load_xnr_info():
    res = []
    search_res = es.search(fb_xnr_index_name, fb_xnr_index_type,
                           {'size': 999})['hits']['hits']
    for item in search_res:
        source = item['_source']
        fb_mail_account = source.get('fb_mail_account', '')
        fb_phone_account = source.get('fb_phone_account', '')
        account = ''
        if fb_mail_account:
            account = fb_mail_account
        elif fb_phone_account:
            account = fb_phone_account
        if account:
            xnr_user_no = source.get('xnr_user_no', '')
            '''
            旧的好友列表获取方式,弃用  @hanmc 2019-3-25 15:10:05
            
            try:
                friends_list = es.get(index=fb_xnr_fans_followers_index_name, doc_type=fb_xnr_fans_followers_index_type, id=xnr_user_no)['_source']['fans_list']
            except:
                friends_list = []
            '''
            # 新的好友列表获取方式
            friends_list = []
            query_body = {
                'query': {
                    'bool': {
                        'must': [
                            {
                                'term': {
                                    'xnr_no': xnr_user_no
                                }
                            },
                        ]
                    }
                },
                'size': 99999
            }
            friends_search_res = es.search(facebook_xnr_relations_index_name,
                                           facebook_xnr_relations_index_type,
                                           query_body)['hits']['hits']
            for friends_item in friends_search_res:
                friends_list.append(friends_item['_source']['uid'])

            info = {
                'root_uid': source.get('uid', ''),
                'root_nick_name': source.get('nick_name', ''),
                'xnr_user_no': xnr_user_no,
                'account': account,
                'password': source.get('password', ''),
                'friends_list': friends_list
            }
            res.append(info)
    return res
Esempio n. 2
0
def get_tweets_from_flow(monitor_keywords_list, sort_item_new):

    nest_query_list = []
    for monitor_keyword in monitor_keywords_list:
        nest_query_list.append(
            {'wildcard': {
                'keywords_string': '*' + monitor_keyword + '*'
            }})

    query_body = {
        'query': {
            'bool': {
                'should': nest_query_list
            }
        },
        'sort': [{
            sort_item_new: {
                'order': 'desc'
            }
        }, {
            'timestamp': {
                'order': 'desc'
            }
        }],
        'size':
        TOP_WEIBOS_LIMIT
    }

    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_TW)
    else:
        now_ts = int(time.time())
    datetime = ts2datetime(now_ts - 24 * 3600)

    index_name = twitter_flow_text_index_name_pre + datetime

    es_results = es.search(index=index_name,
                           doc_type=twitter_flow_text_index_type,
                           body=query_body)['hits']['hits']

    if not es_results:
        es_results = es.search(index=index_name,doc_type=twitter_flow_text_index_type,\
                                body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\
                                'sort':{sort_item_new:{'order':'desc'}}})['hits']['hits']
    results_all = []
    for result in es_results:
        result = result['_source']
        uid = result['uid']
        nick_name, photo_url = tw_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
Esempio n. 3
0
def get_submit_tweet_fb(task_detail):

    print 'get_submit_tweet_fb,start execute========================='
    text = task_detail['text']
    tweet_type = task_detail['tweet_type']
    channel = task_detail['channel']
    operate_type = task_detail['operate_type']
    xnr_user_no = task_detail['xnr_user_no']
    try:
        es_xnr_result = es.get(index=fb_xnr_index_name,
                               doc_type=fb_xnr_index_type,
                               id=xnr_user_no)['_source']
    except Exception as e:
        print e
    print es_xnr_result
    fb_mail_account = es_xnr_result['fb_mail_account']
    fb_phone_account = es_xnr_result['fb_phone_account'].strip()
    password = str(es_xnr_result['password'].strip())
    print type(password), password
    print type('13018119931126731x'), '13018119931126731x'
    print es.search('fb_xnr', 'user', {})
    if fb_phone_account:
        account_name = str(fb_phone_account)
        print type(account_name), account_name
        print type('+8613520874771'), '+8613520874771'
    elif fb_mail_account:
        account_name = fb_mail_account
    else:
        account_name = False

    if account_name:
        print '--------------------------------==================================-------------------------------------------------------'
        # add params to aliyunredis kn
        try:
            fb_tweet_params_dict = {}
            fb_tweet_params_dict["account_name"] = account_name
            fb_tweet_params_dict["password"] = password
            fb_tweet_params_dict["text"] = text
            fb_tweet_params_dict["tweet_type"] = tweet_type
            fb_tweet_params_dict["xnr_user_no"] = xnr_user_no
            fb_tweet_params_dict["channel"] = channel
            fb_tweet_params_dict["operate"] = operate_type
            print FB_TWEET_PARAMS, '===================================================fb params'
            ali_re.lpush(FB_TWEET_PARAMS, json.dumps(fb_tweet_params_dict))
            mark = fb_publish(account_name, password, text, tweet_type,
                              xnr_user_no)
        except Exeption as e:
            print e
        #mark = fb_publish('+8613520874771', '13018119931126731x', text, tweet_type, xnr_user_no)
    else:
        mark = False

    return mark
Esempio n. 4
0
def get_hot_recommend_tweets(xnr_user_no, topic_field, sort_item):

    topic_field_en = topic_ch2en_dict[topic_field]

    if sort_item != 'compute_status':
        query_body = {
            'query': {
                'bool': {
                    'must': [{
                        'filtered': {
                            'filter': {
                                'term': {
                                    'topic_field': topic_field_en
                                }
                            }
                        }
                    }]
                }
            },
            'sort': {
                sort_item: {
                    'order': 'desc'
                }
            },
            'size': TOP_WEIBOS_LIMIT
        }

        current_time = time.time()

        if S_TYPE == 'test':
            current_time = datetime2ts(S_DATE_TW)
        #tw_social_sensing_index_name = tw_social_sensing_index_name_pre + ts2datetime(current_time)

        es_results = es.search(index=tw_social_sensing_index_name,
                               doc_type=tw_social_sensing_index_type,
                               body=query_body)['hits']['hits']

        if not es_results:
            es_results = es.search(index=tw_social_sensing_index_name,doc_type=tw_social_sensing_index_type,\
                                    body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\
                                    'sort':{sort_item:{'order':'desc'}}})['hits']['hits']
    results_all = []
    for result in es_results:
        result = result['_source']
        uid = result['uid']
        nick_name, photo_url = tw_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
Esempio n. 5
0
def update_baseinfo(uid_list=[]):
    user_baseinfo = {}
    fb_user_query_body = {
        'query': {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [
                            {
                                "terms": {
                                    "uid": uid_list
                                }
                            },
                        ]
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        "fields": ["location", "gender", "name", "uid"]
    }
    search_results = es.search(index=facebook_user_index_name,
                               doc_type=facebook_user_index_type,
                               body=fb_user_query_body)['hits']['hits']
    for item in search_results:
        content = item['fields']
        uid = content['uid'][0]
        if not uid in user_baseinfo:
            user_baseinfo[uid] = {
                'uid': str(uid),
                'uname': '',
                'gender': 0,
                'location': '',
            }
        location = ''
        if content.has_key('location'):
            location_dict = json.loads(content.get('location')[0])
            location = get_user_location(location_dict)
        gender = 0
        if content.has_key('gender'):
            gender_str = content.get('gender')[0]
            if gender_str == 'male':
                gender = 1
            elif gender_str == 'female':
                gender = 2
        uname = ''
        if content.has_key('name'):
            uname = content.get('name')[0]
        user_baseinfo[uid]['location'] = location
        user_baseinfo[uid]['gender'] = gender
        user_baseinfo[uid]['uname'] = uname
    for uid in uid_list:
        if not uid in user_baseinfo:
            user_baseinfo[uid] = {
                'uid': str(uid),
                'uname': '',
                'gender': 0,
                'location': '',
            }
    return save_data2es(user_baseinfo)
Esempio n. 6
0
def update_domain(uid_list=[]):
    if not uid_list:
        uid_list = load_uid_list()
    fb_flow_text_index_list = get_facebook_flow_text_index_list(
        load_timestamp(), TEST_MAX_FLOW_TEXT_DAYS)
    user_domain_data = {}
    #load num of text
    count_result = count_text_num(uid_list, fb_flow_text_index_list)
    #load baseinfo
    fb_user_query_body = {
        'post_filter': {
            'exists': {
                'field': 'bio_str'
            }
        },
        'query': {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [
                            {
                                "terms": {
                                    "uid": uid_list
                                }
                            },
                        ]
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        "fields": ["bio_str", "category", "uid"]
    }
    try:
        search_results = es.search(index=facebook_user_index_name,
                                   doc_type=facebook_user_index_type,
                                   body=fb_user_query_body)['hits']['hits']
        for item in search_results:
            content = item['fields']
            uid = content['uid'][0]
            if not uid in user_domain_data:
                text_num = count_result[uid]
                user_domain_data[uid] = {
                    'bio_str': '',
                    'category': '',
                    'number_of_text': text_num
                }
            #对于长文本,Goslate 会在标点换行等分隔处把文本分拆为若干接近 2000 字节的子文本,再一一查询,最后将翻译结果拼接后返回用户。通过这种方式,Goslate 突破了文本长度的限制。
            if content.has_key('category'):
                category = content.get('category')[0]
            else:
                category = ''
            if content.has_key('bio_str'):
                bio_str = content.get('bio_str')[0]
            else:
                bio_str = ''
            user_domain_data[uid]['bio_str'] = bio_str
            user_domain_data[uid]['category'] = category
    except Exception, e:
        print e
Esempio n. 7
0
def influence_propagate(fid, index_name):
    query_body = {
        'query': {
            'term': {
                'fid': fid
            }
        },
        'sort': {
            'update_time': {
                'order': 'desc'
            }
        }
    }
    #index_name = facebook_flow_text_index_name_pre + ts2datetime(current_time)
    search_results = es.search(index=index_name,doc_type=facebook_count_index_type,\
        body=query_body)['hits']['hits']

    if not search_results:
        propagate_num = 0
    else:
        result = search_results[0]['_source']
        share = result['share']
        comment = result['comment']
        favorite = result['favorite']

        propagate_num = 5 * share + 3 * comment + 2 * favorite

    return propagate_num
Esempio n. 8
0
def load_twitter_relations_base(xnr_user_no, relations_type):
    """
    :param xnr_user_no:
    :param relations_type: pingtaiguanzhu 或者 pingtaifensi
    :return:
    """
    li = []
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'xnr_no': xnr_user_no
                    }
                }, {
                    'term': {
                        relations_type: 1
                    }
                }]
            }
        },
        'size': 99999
    }
    search_res = es.search(twitter_xnr_relations_index_name,
                           twitter_xnr_relations_index_type,
                           query_body)['hits']['hits']
    for item in search_res:
        li.append(item['_source']['uid'])
    return li
Esempio n. 9
0
def load_xnr_info():
    res = []
    search_res = es.search(tw_xnr_index_name, tw_xnr_index_type,
                           {'size': 999})['hits']['hits']
    for item in search_res:
        source = item['_source']
        tw_mail_account = source.get('tw_mail_account', '')
        tw_phone_account = source.get('tw_phone_account', '')
        account = ''
        if tw_mail_account:
            account = tw_mail_account
        elif tw_phone_account:
            account = tw_phone_account
        if account:
            xnr_user_no = source.get('xnr_user_no', '')

            info = {
                'root_uid': source.get('uid', ''),
                'root_nick_name': source.get('nick_name', ''),
                'xnr_user_no': xnr_user_no,
                'account': account,
                'password': source.get('password', ''),
                'retry_times': 0,
                'remark': '',
            }
            res.append(info)
    return res
Esempio n. 10
0
def get_show_retweet_timing_list(xnr_user_no,start_ts,end_ts):

    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'xnr_user_no':xnr_user_no}},
                    {'range':{'timestamp_set':{'gte':start_ts,'lt':end_ts}}}
                ]
            }
        },
        'size':MAX_SEARCH_SIZE,
        'sort':[
            {'compute_status':{'order':'asc'}},   
            {'timestamp_set':{'order':'desc'}}
        ]
    }
    
    results = es.search(index=fb_xnr_retweet_timing_list_index_name,\
        doc_type=fb_xnr_retweet_timing_list_index_type,body=query_body)['hits']['hits']

    result_all = []
    # print 'results:::',results
    for result in results:
        result = result['_source']
        result_all.append(result)

    return result_alls
Esempio n. 11
0
def load_twitter_pingtaiguanzhu_state(root_uid, uid):
    """
    :param root_uid:
    :param uid:
    :return: 现在表中记录的xnr和user之间的平台关注关系
    """
    pingtaiguanzhu_state = 0
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'term': {
                                'xnr_uid': root_uid
                            }
                        }, {
                            'term': {
                                'uid': uid
                            }
                        }]
                    }
                }
            }
        }
    }
    search_results = es_xnr_2.search(index=twitter_xnr_relations_index_name,
                                     doc_type=twitter_xnr_relations_index_type,
                                     body=query_body)['hits']['hits']
    if search_results:
        print search_results
        pingtaiguanzhu_state = int(
            search_results[0]['_source']['pingtaiguanzhu'])
    return pingtaiguanzhu_state
Esempio n. 12
0
def load_xnr_info():
    res = []
    search_res = es.search(tw_xnr_index_name, tw_xnr_index_type,
                           {'size': 999})['hits']['hits']
    for item in search_res:
        source = item['_source']
        tw_mail_account = source.get('tw_mail_account', '')
        tw_phone_account = source.get('tw_phone_account', '')
        account = ''
        if tw_mail_account:
            account = tw_mail_account
        elif tw_phone_account:
            account = tw_phone_account
        if account:
            xnr_user_no = source.get('xnr_user_no', '')
            guanzhu_list, fensi_list = load_twitter_relations(xnr_user_no)

            info = {
                'root_uid': source.get('uid', ''),
                'root_nick_name': source.get('nick_name', ''),
                'xnr_user_no': xnr_user_no,
                'account': account,
                'password': source.get('password', ''),
                'guanzhu_list': guanzhu_list,
                'fensi_list': fensi_list
            }
            res.append(info)
    return res
Esempio n. 13
0
def load_xnr_info():
    res = []
    search_res = es.search(fb_xnr_index_name, fb_xnr_index_type,
                           {'size': 999})['hits']['hits']
    for item in search_res:
        source = item['_source']
        fb_mail_account = source.get('fb_mail_account', '')
        fb_phone_account = source.get('fb_phone_account', '')
        account = ''
        if fb_mail_account:
            account = fb_mail_account
        elif fb_phone_account:
            account = fb_phone_account
        if account:
            xnr_user_no = source.get('xnr_user_no', '')
            try:
                friends_list = es.get(
                    index=fb_xnr_fans_followers_index_name,
                    doc_type=fb_xnr_fans_followers_index_type,
                    id=xnr_user_no)['_source']['fans_list']
            except:
                friends_list = []

            info = {
                'root_uid': source.get('uid', ''),
                'root_nick_name': source.get('nick_name', ''),
                'xnr_user_no': xnr_user_no,
                'account': account,
                'password': source.get('password', ''),
                'friends_list': friends_list,
                'retry_times': 0,
                'remark': '',
            }
            res.append(info)
    return res
Esempio n. 14
0
def get_tw_influence_relative(uid, influence):
    if S_TYPE == 'test':
        datetime = S_DATE_TW
    else:
        datetime = ts2datetime(time.time() - DAY)
    # new_datetime = datetime[0:4]+datetime[5:7]+datetime[8:10]
    new_datetime = datetime
    tw_bci_index_name = tw_bci_index_name_pre + new_datetime

    query_body = {
        'query': {
            'match_all': {}
        },
        'sort': {
            'influence': {
                'order': 'desc'
            }
        }
    }
    results = es_fb_tw.search(index=tw_bci_index_name,
                              doc_type=tw_bci_index_type,
                              body=query_body)['hits']['hits']
    user_index_max = results[0]['_source']['influence']
    if not user_index_max:  #最大的为0,所有的都为0
        return 0
    else:
        influence_relative = influence / user_index_max
        return influence_relative
Esempio n. 15
0
def get_user_xnr_list(user_account):
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'term': {
                                'submitter': user_account
                            }
                        }, {
                            'term': {
                                'create_status': 2
                            }
                        }]
                    }
                }
            }
        },
        'size': USER_XNR_NUM
    }
    try:
        user_result = es_xnr_2.search(index=fb_xnr_index_name,
                                      doc_type=fb_xnr_index_type,
                                      body=query_body)['hits']['hits']
        xnr_user_no_list = []
        for item in user_result:
            xnr_user_no_list.append(item['_source']['xnr_user_no'])
    except:
        xnr_user_no_list = []
    return xnr_user_no_list
Esempio n. 16
0
def load_facebook_relation_uids(xnr_user_no, term_query_list):
    """

    :param xnr_user_no:
    :param term_query_list: term语句列表
    :return: 根据term语句,返回搜到的跟xnr有关系的人的uid列表
    """
    uids = []
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [
                            {
                                'term': {
                                    'xnr_no': xnr_user_no
                                }
                            },
                        ]
                    }
                }
            }
        }
    }
    query_body['query']['filtered']['filter']['bool']['must'].extend(
        term_query_list)
    search_results = es_xnr_2.search(
        index=facebook_xnr_relations_index_name,
        doc_type=facebook_xnr_relations_index_type,
        body=query_body)['hits']['hits']
    for search_result in search_results:
        uid = search_result['_source']['uid']
        uids.append(uid)
    return uids
Esempio n. 17
0
def get_show_retweet_timing_list_future(xnr_user_no):

    start_ts = int(time.time())

    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'xnr_user_no':xnr_user_no}},
                    {'range':{'timestamp_set':{'gte':start_ts}}}
                ]
            }
        },
        'size':MAX_SEARCH_SIZE,
        'sort':[
            {'compute_status':{'order':'asc'}},   
            {'timestamp_set':{'order':'desc'}}
        ]
    }
    # print 'query_body!!',query_body
    results = es.search(index=weibo_xnr_retweet_timing_list_index_name,\
        doc_type=weibo_xnr_retweet_timing_list_index_type,body=query_body)['hits']['hits']

    result_all = []

    for result in results:
        result = result['_source']
        result_all.append(result)

    return result_all
Esempio n. 18
0
def lookup_twitter_date_warming(keywords, today_datetime):
    keyword_query_list = []
    for keyword in keywords:
        #print 'keyword:',keyword
        keyword_query_list.append(
            {'wildcard': {
                'text': '*' + keyword.encode('utf-8') + '*'
            }})

    twitter_flow_text_index_name = get_timets_set_indexset_list(
        twitter_flow_text_index_name_pre, today_datetime, today_datetime)

    query_body = {
        'query': {
            'bool': {
                'should': keyword_query_list,
                'must': {
                    'range': {
                        'sensitive': {
                            'gte': 1
                        }
                    }
                }
            }
        },
        'size': MAX_WARMING_SIZE,
        'sort': {
            'sensitive': {
                'order': 'desc'
            }
        }
    }
    try:
        temp_result = es_xnr_2.search(index=twitter_flow_text_index_name,
                                      doc_type=twitter_flow_text_index_type,
                                      body=query_body)['hits']['hits']
        date_result = []
        print 'temp_result::', temp_result
        for item in temp_result:
            #查询三个指标字段
            tid_result = lookup_tid_attend_index(item['_source']['tid'],
                                                 today_datetime)
            if tid_result:
                item['_source']['comment'] = tid_result['comment']
                item['_source']['share'] = tid_result['share']
                item['_source']['favorite'] = tid_result['favorite']
            else:
                item['_source']['comment'] = 0
                item['_source']['share'] = 0
                item['_source']['favorite'] = 0

            #查询用户昵称
            item['_source']['nick_name'] = get_user_nickname(
                item['_source']['uid'])

            date_result.append(item['_source'])
    except:
        date_result = []
    return date_result
Esempio n. 19
0
def get_un_trace_follow_operate(xnr_user_no,uid_string,nick_name_string):

    mark = False
    fail_nick_name_list = []
    fail_uids = []

    if uid_string:
        uid_list = uid_string.encode('utf-8').split(',')
        
    elif nick_name_string:
        nick_name_list = nick_name_string.encode('utf-8').split(',')
        uid_list = []
        
        for nick_name in nick_name_list:
            query_body = {
                'query':{
                    'filtered':{
                        'filter':{
                            'term':{'nick_name':nick_name}
                        }
                    }
                },
                '_source':['uid']
            }
            try:
                uid_results = es.search(index=facebook_user_index_name,doc_type=facebook_user_index_type,\
                            body=query_body)['hits']['hits']
                
                uid_result = uid_result[0]['_source']
                uid = uid_result['uid']
                uid_list.append(uid)

            except:
                fail_nick_name_list.append(nick_name)

    try:
        result = es.get(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\
                            id=xnr_user_no)['_source']
        
        trace_follow_list = result['trace_follow_list']

        # 共同uids
        comment_uids = list(set(trace_follow_list).intersection(set(uid_list)))

        # 取消失败uid
        fail_uids = list(set(comment_uids).difference(set(uid_list)))

        # 求差
        trace_follow_list = list(set(trace_follow_list).difference(set(uid_list))) 


        es.update(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\
                            id=xnr_user_no,body={'doc':{'trace_follow_list':trace_follow_list}})

        mark = True
    except:
        mark = False

    return [mark,fail_uids,fail_nick_name_list]    
Esempio n. 20
0
def compute_full_keywords():
    now_time = int(time.time())
    date_time = ts2datetime(now_time)

    flow_text_index_name = facebook_flow_text_index_name_pre + date_time

    query_body = {
        'aggs': {
            'keywords': {
                'terms': {
                    'field': 'keywords_string',
                    'size': 1000
                }
            }
        }
    }

    flow_text_exist=es_xnr_2.search(index=flow_text_index_name,doc_type=facebook_flow_text_index_type,\
           body=query_body)['aggregations']['keywords']['buckets']
    word_dict = dict()

    word_dict_new = dict()

    keywords_string = ''
    for item in flow_text_exist:
        word = item['key']
        count = item['doc_count']
        word_dict[word] = count

        keywords_string += '&'
        keywords_string += item['key']

    k_dict = extract_keywords(keywords_string)

    for item_item in k_dict:
        keyword = item_item.word
        # print 'keyword::',keyword,type(keyword)
        if word_dict.has_key(keyword):
            word_dict_new[keyword] = word_dict[keyword]
        else:
            word_dict_new[keyword] = 1
        # print 'count:',word_dict_new[keyword]

    keywords_task_detail = dict()
    keywords_task_detail['date_time'] = date_time
    keywords_task_detail['timestamp'] = datetime2ts(date_time)
    keywords_task_detail['keyword_value_string'] = json.dumps(word_dict_new)
    keywords_task_id = date_time

    try:
        es_xnr_2.index(index=facebook_full_keyword_index_name,
                       doc_type=facebook_full_keyword_index_type,
                       body=keywords_task_detail,
                       id=keywords_task_id)
        mark = True
    except:
        mark = False
    #print word_dict_new
    return mark
def search_tw_posts(uids, from_ts, to_ts):
    query_body = load_search_query(uids, from_ts, to_ts)
    print query_body
    search_results = es_xnr_2.search(index=twitter_flow_text_index_name_pre +
                                     '*',
                                     doc_type=twitter_flow_text_index_type,
                                     body=query_body)['hits']['hits']
    return [item['_source'] for item in search_results]
Esempio n. 22
0
def search_fb_posts(fb_xnr_user_no, from_ts, to_ts, extend_keywords_size=0):
    fb_keywords = load_fb_keywords(fb_xnr_user_no, extend_keywords_size)
    fb_query_body = load_query_body(fb_keywords)
    fb_index_list = load_index(facebook_flow_text_index_name_pre, from_ts, to_ts)
    print'222222222222 fb_index_list'
    print fb_index_list
    fb_search_results = es_xnr_2.search(index=fb_index_list, doc_type=facebook_flow_text_index_type, body=fb_query_body)['hits']['hits']
    return [item['_source'] for item in fb_search_results]
Esempio n. 23
0
def search_tw_posts(tw_xnr_user_no, from_ts, to_ts, extend_keywords_size=0):
    end_results = []
    tw_keywords = load_tw_keywords(tw_xnr_user_no, extend_keywords_size)
    tw_query_body = load_query_body(tw_keywords)
    tw_index_list = load_index(twitter_flow_text_index_name_pre, from_ts, to_ts)
    print '3333333333333333333 tw_index_list'
    print tw_index_list
    tw_search_results = es_xnr_2.search(index=tw_index_list, doc_type=twitter_flow_text_index_type, body=tw_query_body)['hits']['hits']
    return [item['_source'] for item in tw_search_results]
Esempio n. 24
0
def create_speech_warning(xnr_user_no,today_datetime):
    #查询关注列表
    lookup_type='followers_list'
    followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type)
    
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{'must':{'range':{'sensitive':{'gte':1}}}}
                }
            }
        },
        'size':MAX_SEARCH_SIZE,
        'sort':{'sensitive':{'order':'desc'}}
    }
    twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,today_datetime,today_datetime)
    #print twitter_flow_text_index_name
    results=es_xnr_2.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,body=query_body)['hits']['hits']
    #print results
    result=[]
    for item in results:
        if item['_source']['uid'] in followers_list:
            item['_source']['content_type']='follow'
        else:
            item['_source']['content_type']='unfollow'

        item['_source']['validity']=0
        item['_source']['xnr_user_no']=xnr_user_no

        #查询三个指标字段
        tid_result=lookup_tid_attend_index(item['_source']['tid'],today_datetime)
        if tid_result:
            item['_source']['comment']=tid_result['comment']
            item['_source']['share']=tid_result['share']
            item['_source']['favorite']=tid_result['favorite']
        else:
            item['_source']['comment']=0
            item['_source']['share']=0
            item['_source']['favorite']=0 

        #查询用户昵称
        item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])

        task_id=xnr_user_no+'_'+item['_source']['tid']

        #写入数据库
        today_date=ts2datetime(today_datetime)
        twitter_speech_warning_index_name=twitter_speech_warning_index_name_pre+today_date
        # try:
        es_xnr_2.index(index=twitter_speech_warning_index_name,doc_type=twitter_speech_warning_index_type,body=item['_source'],id=task_id)
        mark=True
        # except:
        #     mark=False

        result.append(mark)
    return result
Esempio n. 25
0
def savedata2es(date, index_pre, index_type, data):
    config = {
        'facebook_feedback_like_':
        ['uid', 'root_uid', 'timestamp', 'text', 'root_text', 'root_mid'],
        'facebook_feedback_comment_': [
            'uid', 'root_uid', 'mid', 'timestamp', 'text', 'root_text',
            'root_mid', 'comment_type'
        ],
        'facebook_feedback_retweet_': [
            'uid', 'root_uid', 'mid', 'timestamp', 'text', 'root_text',
            'root_mid'
        ],
        'facebook_feedback_private_':
        ['uid', 'root_uid', 'timestamp', 'text', 'root_text', 'private_type'],
        'facebook_feedback_friends': ['uid', 'root_uid'],
        'facebook_feedback_at_':
        ['uid', 'root_uid', 'mid', 'timestamp', 'text'],
    }
    if index_pre in [
            'facebook_feedback_at_', 'facebook_feedback_comment_',
            'facebook_feedback_retweet_', 'facebook_feedback_private_',
            'facebook_feedback_like_'
    ]:
        index_name = index_pre + date
        search_index_name = index_pre + '*'
    else:
        index_name = index_pre
        search_index_name = index_name
    for d in data:
        query_body = {
            "query": {
                "filtered": {
                    "filter": {
                        "bool": {
                            "must": []
                        }
                    }
                }
            }
        }
        try:
            for field in config[index_pre]:
                query_body['query']['filtered']['filter']['bool'][
                    'must'].append({'term': {
                        field: d.get(field, '')
                    }})
            query_result = es.search(search_index_name, index_type,
                                     query_body)['hits']['hits']
            if query_result:
                print es.update(index=index_name,
                                doc_type=index_type,
                                body={'doc': d},
                                id=query_result[0]['_id'])
            else:
                print es.index(index_name, index_type, d)
        except Exception, e:
            EXCEPTION += '\n savedata2es Exception: ' + str(e)
Esempio n. 26
0
def get_hashtag(today_datetime):

    facebook_flow_text_index_name = get_timets_set_indexset_list(
        facebook_flow_text_index_name_pre, today_datetime, today_datetime)
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'sensitive': {
                                    'gte': 1
                                }
                            }
                        }]
                    }
                }
            }
        },
        'aggs': {
            'all_hashtag': {
                'terms': {
                    'field': 'hashtag'
                },
                'aggs': {
                    'sum_sensitive': {
                        'sum': {
                            'field': 'sensitive'
                        }
                    }
                }
            }
        },
        'size': EVENT_OFFLINE_COUNT
    }
    flow_text_exist=es_xnr_2.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,\
                body=query_body)['aggregations']['all_hashtag']['buckets']
    #print 'flow_text_exist:',flow_text_exist

    hashtag_list = []
    for item in flow_text_exist:
        event_dict = dict()
        if item['key']:
            event_dict['event_name'] = item['key']
            event_dict['event_count'] = item['doc_count']
            event_dict['event_sensitive'] = item['sum_sensitive']['value']
            hashtag_list.append(event_dict)
        else:
            pass

    hashtag_list.sort(key=lambda k:
                      (k.get('event_sensitive', 0), k.get('event_count', 0)),
                      reverse=True)
    # print hashtag_list
    return hashtag_list
Esempio n. 27
0
def load_fb_flow_text(fb_flow_text_index_list,
                      uid_list,
                      fb_flow_text_query_body={}):
    if not fb_flow_text_query_body:
        fb_flow_text_query_body = {
            'query': {
                "filtered": {
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    "terms": {
                                        "uid": uid_list
                                    }
                                },
                                {
                                    'range': {
                                        'flag_ch': {
                                            'gte': -1
                                        }
                                    }
                                },
                            ]
                        }
                    }
                }
            },
            'size': MAX_SEARCH_SIZE,
            "sort": {
                "timestamp": {
                    "order": "desc"
                }
            },
            "fields": ["text_ch", "uid"]
        }
    fb_flow_text = {}
    for index_name in fb_flow_text_index_list:
        try:
            search_results = es.search(
                index=index_name,
                doc_type=facebook_flow_text_index_type,
                body=fb_flow_text_query_body)['hits']['hits']
            for item in search_results:
                content = item['fields']
                uid = content['uid'][0]
                if not uid in fb_flow_text:
                    fb_flow_text[uid] = {'text_dict': {}}
                if content.has_key('text_ch'):
                    fb_flow_text[uid]['text_dict'][
                        item['_id']] = traditional2simplified(
                            content['text_ch'][0]
                            [:1800])  #对文本内容长度做出限制[:1800],以免翻译时麻烦
                else:
                    fb_flow_text[uid]['text_dict'][item['_id']] = ''
        except Exception, e:
            print e
Esempio n. 28
0
def xnr_keywords_compute(xnr_user_no):
    #查询好友列表
    followers_list=lookup_xnr_concernedusers(xnr_user_no)
    lookup_condition_list=[]
    print 'xnr_user_no, followers_list:', xnr_user_no, followers_list
    lookup_condition_list.append({'filtered':{'filter':{'bool':{'must':{'terms':{'uid':followers_list}}}}}})

    #根据日期确定查询表
    if S_TYPE == 'test':
        date_time = test_date
    else:
        now_time=int(time.time())
        date_time=ts2datetime(now_time)
    flow_text_index_name=twitter_flow_text_index_name_pre+date_time

    #按日期统计
    # print lookup_condition_list
    for item_condition in lookup_condition_list:
        query_body={
            'query':item_condition,
            'aggs':{
                'keywords':{
                    'terms':{
                        'field':'keywords_string',
                        'size': 1000
                    }
                }
            }
        }
        
        flow_text_exist=es_xnr_2.search(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,\
               body=query_body)['aggregations']['keywords']['buckets']

        # print 'flow_text_exist:',flow_text_exist
        word_dict = dict()

        word_dict_new = dict()

        keywords_string = ''
        for item in flow_text_exist:
            word = item['key']
            count = item['doc_count']
            word_dict[word] = count

            keywords_string += '&'
            keywords_string += item['key']

        k_dict = extract_keywords(keywords_string)

        for item_item in k_dict:
            keyword = item_item.word
            # print 'keyword::',type(keyword)
            word_dict_new[keyword] = word_dict[keyword]
        

    return word_dict_new
def load_tw_uids(xnr_user_no):
    query_body = load_uid_query()
    if xnr_user_no != 'ALL':
        query_body['query']['filtered']['filter']['bool']['must'].append(
            {'term': {
                'xnr_no': xnr_user_no
            }})
    search_results = es_xnr_2.search(index=twitter_xnr_relations_index_name,
                                     doc_type=twitter_xnr_relations_index_type,
                                     body=query_body)['hits']['hits']
    return [item['_source']['uid'] for item in search_results]
Esempio n. 30
0
def load_uid_list():
    uid_list = []
    uid_list_query_body = {'size': MAX_SEARCH_SIZE}
    try:
        search_results = es.search(index=twitter_user_index_name,
                                   doc_type=twitter_user_index_type,
                                   body=uid_list_query_body)['hits']['hits']
        for item in search_results:
            uid_list.append(item['_source']['uid'])
    except Exception, e:
        print e