예제 #1
0
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results['utype'] = 0
        return results
    results['utype'] = 1

    results['uid'] = uid
    portrait_result = es.get(index='sensitive_user_portrait',
                             doc_type='user',
                             id=uid)['_source']
    results['uname'] = portrait_result['uname']
    if portrait_result['uname'] == 0:
        results['uname'] = 'unknown'
    if portrait_result['photo_url'] == 0:
        portrait_result['photo_url'] = 'unknown'
    if portrait_result['location'] == 0:
        portrait_result['location'] = 'unknown'
    results['photo_url'] = portrait_result['photo_url']

    # sensitive weibo number statistics
    date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
    date = '20130907'  # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type='bci',
                                   id=uid)['_source']
        results['sensitive_origin_weibo_number'] = influence_results.get(
            's_origin_weibo_number', 0)
        results['sensitive_retweeted_weibo_number'] = influence_results.get(
            's_retweeted_weibo_number', 0)
        results['sensitive_comment_weibo_number'] = int(
            influence_results.get('s_comment_weibo_number', 0))
        results[
            'sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get(
                's_retweeted_weibo_retweeted_total_number', 0)
        results[
            'sensitive_origin_weibo_retweeted_total_number'] = influence_results.get(
                's_origin_weibo_retweeted_total_number', 0)
        results[
            'sensitive_origin_weibo_comment_total_number'] = influence_results.get(
                's_origin_weibo_comment_total_number', 0)
        results[
            'sensitive_retweeted_weibo_comment_total_number'] = influence_results.get(
                's_retweeted_weibo_comment_total_number', 0)
    except:
        results['sensitive_origin_weibo_number'] = 0
        results['sensitive_retweeted_weibo_number'] = 0
        results['sensitive_comment_weibo_number'] = 0
        results['sensitive_origin_weibo_retweeted_total_number'] = 0
        results['sensitive_origin_weibo_comment_total_number'] = 0
        results['sensitive_retweeted_weibo_retweeted_total_number'] = 0
        results['sensitive_retweeted_weibo_comment_total_number'] = 0

    try:
        item = es.get(index=date, doc_type='bci', id=uid)['_source']
    except:
        item = {}
    results['origin_weibo_total_number'] = item.get(
        'origin_weibo_number', 0) + results['sensitive_origin_weibo_number']
    results['retweeted_weibo_total_number'] = item.get(
        'retweeted_weibo_number',
        0) + results['sensitive_retweeted_weibo_number']
    results['comment_weibo_total_number'] = int(
        item.get('comment_weibo_number', 0)) + int(
            results['sensitive_comment_weibo_number'])
    results['origin_weibo_retweeted_total_number'] = item.get(
        'origin_weibo_retweeted_total_number',
        0) + results['sensitive_origin_weibo_retweeted_total_number']
    results['origin_weibo_comment_total_number'] = item.get(
        'origin_weibo_comment_total_number',
        0) + results['sensitive_origin_weibo_comment_total_number']
    results['retweeted_weibo_retweeted_total_number'] = item.get(
        'retweeted_weibo_retweeted_total_number',
        0) + results['sensitive_retweeted_weibo_retweeted_total_number']
    results['retweeted_weibo_comment_total_number'] = item.get(
        'retweeted_weibo_comment_total_number',
        0) + results['sensitive_retweeted_weibo_comment_total_number']

    results['sensitive_text'] = sort_sensitive_text(uid)

    results['sensitive_geo_distribute'] = []
    results['sensitive_time_distribute'] = get_user_trend(uid)[1]
    results['sensitive_hashtag'] = []
    results['sensitive_words'] = []
    results['sensitive_hashtag_dict'] = []
    results['sensitive_words_dict'] = []
    results['sensitive_hashtag_description'] = ''

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results['negetive_index'] = float(emotion_number[2]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0])
    results['negetive_influence'] = float(emotion_number[1]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0])
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace('-', '')
    return_sentiment = dict()
    return_sentiment['positive'] = []
    return_sentiment['neutral'] = []
    return_sentiment['negetive'] = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
    for i in range(1, 8):
        ts = ts + 24 * 3600
        date = ts2datetime(ts).replace('-', '')
        temp = sentiment_dict.get(date, {})
        return_sentiment['positive'].append([temp.get('positive', 0), date])
        return_sentiment['negetive'].append([temp.get('negetive', 0), date])
        return_sentiment['neutral'].append([temp.get('neutral', 0), date])
    results['sentiment_trend'] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait",
                                  doc_type='user',
                                  id=uid)['_source']
        results['politics_trend'] = portrait_results['politics_trend']
        results['domain'] = portrait_results['domain']
        results['sensitive'] = portrait_results['sensitive']
        temp_hashtag = portrait_results['sensitive_hashtag_dict']
        temp_sensitive_words = portrait_results['sensitive_words_dict']
        temp_sensitive_geo = portrait_results['sensitive_geo_activity']
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(),
                                          key=lambda x: x[0],
                                          reverse=False)
            sensitive_geo_list = []
            for k, v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(),
                                    key=lambda x: x[1],
                                    reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results['sensitive_geo_distribute'] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(
                portrait_results['sensitive_hashtag_dict'])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(
                            hashtag_dict_detail.items(),
                            key=lambda x: x[1],
                            reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results['sensitive_hashtag_description'] = hashtag_description(
                hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
        date = '20130907'
        today_sensitive_words = sensitive_words_dict.get(date, {})
        results['today_sensitive_words'] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(),
                              key=lambda x: x[1],
                              reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(),
                                     key=lambda x: x[0],
                                     reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results['sensitive_hashtag'] = sorted_hashtag
        results['sensitive_words'] = new_sorted_dict
        results['sensitive_hashtag_dict'] = sorted_hashtag_dict
        results['sensitive_words_dict'] = sorted_words_dict

    results['sensitive_retweet'] = search_retweet(uid, 1)
    results['sensitive_follow'] = search_follower(uid, 1)
    results['sensitive_at'] = search_mention(uid, 1)

    return results
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results['utype'] = 0
        return results
    results['utype'] = 1

    results['uid'] = uid
    portrait_result = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source']
    results['uname'] = portrait_result['uname']
    if portrait_result['uname'] == 0:
        results['uname'] = 'unknown'
    if portrait_result['photo_url'] == 0:
        portrait_result['photo_url'] = 'unknown'
    if portrait_result['location'] == 0:
        portrait_result['location'] = 'unknown'
    results['photo_url'] = portrait_result['photo_url']

    # sensitive weibo number statistics
    date = ts2datetime(time.time()-24*3600).replace('-', '')
    date = '20130907' # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type='bci', id=uid)['_source']
        results['sensitive_origin_weibo_number'] = influence_results.get('s_origin_weibo_number', 0)
        results['sensitive_retweeted_weibo_number'] = influence_results.get('s_retweeted_weibo_number', 0)
        results['sensitive_comment_weibo_number'] = int(influence_results.get('s_comment_weibo_number', 0))
        results['sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get('s_retweeted_weibo_retweeted_total_number', 0)
        results['sensitive_origin_weibo_retweeted_total_number'] = influence_results.get('s_origin_weibo_retweeted_total_number', 0)
        results['sensitive_origin_weibo_comment_total_number'] = influence_results.get('s_origin_weibo_comment_total_number', 0) 
        results['sensitive_retweeted_weibo_comment_total_number'] = influence_results.get('s_retweeted_weibo_comment_total_number', 0)
    except:
        results['sensitive_origin_weibo_number'] = 0
        results['sensitive_retweeted_weibo_number'] = 0
        results['sensitive_comment_weibo_number'] = 0
        results['sensitive_origin_weibo_retweeted_total_number'] = 0
        results['sensitive_origin_weibo_comment_total_number'] = 0
        results['sensitive_retweeted_weibo_retweeted_total_number'] = 0
        results['sensitive_retweeted_weibo_comment_total_number'] = 0

    try:
        item = es.get(index=date, doc_type='bci', id=uid)['_source']
    except:
        item = {}
    results['origin_weibo_total_number'] = item.get('origin_weibo_number', 0) + results['sensitive_origin_weibo_number']
    results['retweeted_weibo_total_number'] = item.get('retweeted_weibo_number', 0) + results['sensitive_retweeted_weibo_number']
    results['comment_weibo_total_number'] = int(item.get('comment_weibo_number', 0)) + int(results['sensitive_comment_weibo_number'])
    results['origin_weibo_retweeted_total_number'] = item.get('origin_weibo_retweeted_total_number', 0) + results['sensitive_origin_weibo_retweeted_total_number']
    results['origin_weibo_comment_total_number'] = item.get('origin_weibo_comment_total_number', 0) + results['sensitive_origin_weibo_comment_total_number']
    results['retweeted_weibo_retweeted_total_number'] = item.get('retweeted_weibo_retweeted_total_number', 0)+ results['sensitive_retweeted_weibo_retweeted_total_number']
    results['retweeted_weibo_comment_total_number'] = item.get('retweeted_weibo_comment_total_number', 0) + results['sensitive_retweeted_weibo_comment_total_number']

    results['sensitive_text'] = sort_sensitive_text(uid)

    results['sensitive_geo_distribute'] = []
    results['sensitive_time_distribute'] = get_user_trend(uid)[1]
    results['sensitive_hashtag'] = []
    results['sensitive_words'] = []
    results['sensitive_hashtag_dict'] = []
    results['sensitive_words_dict'] = []
    results['sensitive_hashtag_description'] = ''

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results['negetive_index'] = float(emotion_number[2])/(emotion_number[2]+emotion_number[1]+emotion_number[0])
    results['negetive_influence'] = float(emotion_number[1])/(emotion_number[2]+emotion_number[1]+emotion_number[0])
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace('-', '')
    return_sentiment = dict()
    return_sentiment['positive'] = []
    return_sentiment['neutral'] = []
    return_sentiment['negetive'] = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8*24*3600
    for i in range(1,8):
        ts = ts + 24*3600
        date = ts2datetime(ts).replace('-', '')
        temp = sentiment_dict.get(date, {})
        return_sentiment['positive'].append([temp.get('positive', 0), date])
        return_sentiment['negetive'].append([temp.get('negetive', 0), date])
        return_sentiment['neutral'].append([temp.get('neutral', 0), date])
    results['sentiment_trend'] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait", doc_type='user', id=uid)['_source']
        results['politics_trend'] = portrait_results['politics_trend']
        results['domain'] = portrait_results['domain']
        results['sensitive'] = portrait_results['sensitive']
        temp_hashtag = portrait_results['sensitive_hashtag_dict']
        temp_sensitive_words = portrait_results['sensitive_words_dict']
        temp_sensitive_geo =  portrait_results['sensitive_geo_activity']
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x:x[0], reverse=False)
            sensitive_geo_list = []
            for k,v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(), key=lambda x:x[1], reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results['sensitive_geo_distribute'] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(portrait_results['sensitive_hashtag_dict'])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(hashtag_dict_detail.items(), key=lambda x:x[1], reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results['sensitive_hashtag_description'] = hashtag_description(hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time()-24*3600).replace('-', '')
        date = '20130907'
        today_sensitive_words = sensitive_words_dict.get(date,{})
        results['today_sensitive_words'] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(), key = lambda x:x[1], reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(), key = lambda x:x[1], reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(), key = lambda x:x[0], reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(), key = lambda x:x[0], reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results['sensitive_hashtag'] = sorted_hashtag
        results['sensitive_words'] = new_sorted_dict
        results['sensitive_hashtag_dict'] = sorted_hashtag_dict
        results['sensitive_words_dict'] = sorted_words_dict

    results['sensitive_retweet'] = search_retweet(uid, 1)
    results['sensitive_follow'] = search_follower(uid, 1)
    results['sensitive_at'] = search_mention(uid, 1)

    return results
예제 #3
0
def search_attribute_portrait(uid):
    results = dict()
    index_name = 'user_portrait'
    index_type = 'user'
    try:
        results = es_user_portrait.get(index=index_name, doc_type=index_type, id=uid)['_source']
    except:
        results = None
        return None
    keyword_list = []
    if results['keywords']:
        keywords_dict = json.loads(results['keywords'])
        sort_word_list = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)
        #print 'sort_word_list:', sort_word_list
        results['keywords'] = sort_word_list
    else:
        results['keywords'] = []
    #print 'keywords:', results
    geo_top = []
    if results['activity_geo_dict']:
        geo_dict = json.loads(results['activity_geo_dict'])
        sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
        geo_top = sort_geo_dict
        results['activity_geo'] = geo_top
    else:
        results['activity_geo'] = []
    if results['hashtag_dict']:
        hashtag_dict = json.loads(results['hashtag_dict'])
        sort_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True)
        results['hashtag_dict'] = sort_hashtag_dict[:5]
        descriptions = hashtag_description(hashtag_dict)
        results['hashtag_description'] = descriptions
    else:
        results['hashtag_dict'] = []
        results['hashtag_description'] = ''
    emotion_result = {}
    emotion_conclusion_dict = {}
    if results['emotion_words']:
        emotion_words_dict = json.loads(results['emotion_words'])
        for word_type in emotion_mark_dict:
            try:
                word_dict = emotion_words_dict[word_type]
                if word_type=='126' or word_type=='127':
                    emotion_conclusion_dict[word_type] = word_dict
                sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True)
                #print 'sort_word_dict:', sort_word_dict
                word_list = sort_word_dict[:5]
            except:
                word_list = []
            emotion_result[emotion_mark_dict[word_type]] = word_list
    #print 'emotion_words:', type(emotion_result)
    results['emotion_words'] = emotion_result
    #emotion_conclusion
    results['emotion_conclusion'] = get_emotion_conclusion(emotion_conclusion_dict)
    #topic
    if results['topic']:
        topic_dict = json.loads(results['topic'])
        sort_topic_dict = sorted(topic_dict.items(), key=lambda x:x[1], reverse=True)
        results['topic'] = sort_topic_dict[:5]
    else:
        results['topic'] = []
    #domain
    if results['domain']:
        domain_string = results['domain']
        domain_list = domain_string.split('_')
        results['domain'] = domain_list
    else:
        results['domain'] = []
    #emoticon
    if results['emoticon']:
        emoticon_dict = json.loads(results['emoticon'])
        sort_emoticon_dict = sorted(emoticon_dict.items(), key=lambda x:x[1], reverse=True)
        results['emoticon'] = sort_emoticon_dict[:5]
    else:
        results['emoticon'] = []
    #online_pattern
    if results['online_pattern']:
        online_pattern_dict = json.loads(results['online_pattern'])
        sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True)
        results['online_pattern'] = sort_online_pattern_dict[:5]
    else:
        results['online_pattern'] = []
    #psycho_status
    if results['psycho_status']:
        psycho_status_dict = json.loads(results['psycho_status'])
        sort_psycho_status_dict = sorted(psycho_status_dict.items(), key=lambda x:x[1], reverse=True)
        results['psycho_status'] = sort_psycho_status_dict[:5]
    else:
        results['psycho_status'] = []
    #psycho_feature
    if results['psycho_feature']:
        psycho_feature_list = results['psycho_feature'].split('_')
        results['psycho_feature'] = psycho_feature_list
    else:
        results['psycho_feature'] = []
    #state
    if results['uid']:
        uid = results['uid']
        try:
            profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)
        except:
            profile_result = None
        try:
            user_state = profile_result['_source']['description']
            results['description'] = user_state
        except:
            results['description'] = ''
    else:
        results['uid'] = ''
        results['description'] = ''
    
    if results['importance']:
        #print results['importance']
        query_body = {
                'query':{
                    "range":{
                        "importance":{
                        "from": results['importance'],
                        "to": 1000000
                        }
                        }
                    }
                }
        importance_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body)
        if importance_rank['_shards']['successful'] != 0:
            #print 'importance_rank:', importance_rank
            results['importance_rank'] = importance_rank['count']
        else:
            print 'es_importance_rank error'
            results['importance_rank'] = 0
    else:
        results['importance_rank'] = 0
    if results['activeness']:
        query_body = {
                'query':{
                    "range":{
                        "activeness":{
                            "from":results['activeness'],
                            "to": 1000000
                            }
                        }
                    }
                }
        activeness_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body)
        if activeness_rank['_shards']['successful'] != 0:
            results['activeness_rank'] = activeness_rank['count']
        else:
            print 'es_activess_rank error'
            results['activeness_rank'] = 0
    if results['influence']:
        query_body = {
                'query':{
                    'range':{
                        'influence':{
                            'from':results['influence'],
                            'to': 1000000
                            }
                        }
                    }
                }
        influence_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            results['influence_rank'] = influence_rank['count']
        else:
            print 'es_influence_rank error'
            results['influence_rank'] = 0
    #total count in user_portrait
    query_body ={
            'query':{
                'match_all':{}
                }
            }
    all_count_results = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body)
    if all_count_results['_shards']['successful'] != 0:
        results['all_count'] = all_count_results['count']
    else:
        print 'es_user_portrait error'
        results['all_count'] = 0
    #link conclusion
    link_ratio = results['link']
    results['link_conclusion'] = get_link_conclusion(link_ratio)
    return results
예제 #4
0
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results["utype"] = 0
        return results
    results["utype"] = 1

    results["uid"] = uid
    portrait_result = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"]
    results["uname"] = portrait_result["uname"]
    if portrait_result["uname"] == 0:
        results["uname"] = "unknown"
    if portrait_result["photo_url"] == 0:
        portrait_result["photo_url"] = "unknown"
    if portrait_result["location"] == 0:
        portrait_result["location"] = "unknown"
    results["photo_url"] = portrait_result["photo_url"]

    # sensitive weibo number statistics
    date = ts2datetime(time.time() - 24 * 3600).replace("-", "")
    date = "20130907"  # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type="bci", id=uid)["_source"]
        results["sensitive_origin_weibo_number"] = influence_results.get("s_origin_weibo_number", 0)
        results["sensitive_retweeted_weibo_number"] = influence_results.get("s_retweeted_weibo_number", 0)
        results["sensitive_comment_weibo_number"] = int(influence_results.get("s_comment_weibo_number", 0))
        results["sensitive_retweeted_weibo_retweeted_total_number"] = influence_results.get(
            "s_retweeted_weibo_retweeted_total_number", 0
        )
        results["sensitive_origin_weibo_retweeted_total_number"] = influence_results.get(
            "s_origin_weibo_retweeted_total_number", 0
        )
        results["sensitive_origin_weibo_comment_total_number"] = influence_results.get(
            "s_origin_weibo_comment_total_number", 0
        )
        results["sensitive_retweeted_weibo_comment_total_number"] = influence_results.get(
            "s_retweeted_weibo_comment_total_number", 0
        )
    except:
        results["sensitive_origin_weibo_number"] = 0
        results["sensitive_retweeted_weibo_number"] = 0
        results["sensitive_comment_weibo_number"] = 0
        results["sensitive_origin_weibo_retweeted_total_number"] = 0
        results["sensitive_origin_weibo_comment_total_number"] = 0
        results["sensitive_retweeted_weibo_retweeted_total_number"] = 0
        results["sensitive_retweeted_weibo_comment_total_number"] = 0

    try:
        item = es.get(index=date, doc_type="bci", id=uid)["_source"]
    except:
        item = {}
    results["origin_weibo_total_number"] = item.get("origin_weibo_number", 0) + results["sensitive_origin_weibo_number"]
    results["retweeted_weibo_total_number"] = (
        item.get("retweeted_weibo_number", 0) + results["sensitive_retweeted_weibo_number"]
    )
    results["comment_weibo_total_number"] = int(item.get("comment_weibo_number", 0)) + int(
        results["sensitive_comment_weibo_number"]
    )
    results["origin_weibo_retweeted_total_number"] = (
        item.get("origin_weibo_retweeted_total_number", 0) + results["sensitive_origin_weibo_retweeted_total_number"]
    )
    results["origin_weibo_comment_total_number"] = (
        item.get("origin_weibo_comment_total_number", 0) + results["sensitive_origin_weibo_comment_total_number"]
    )
    results["retweeted_weibo_retweeted_total_number"] = (
        item.get("retweeted_weibo_retweeted_total_number", 0)
        + results["sensitive_retweeted_weibo_retweeted_total_number"]
    )
    results["retweeted_weibo_comment_total_number"] = (
        item.get("retweeted_weibo_comment_total_number", 0) + results["sensitive_retweeted_weibo_comment_total_number"]
    )

    results["sensitive_text"] = sort_sensitive_text(uid)

    results["sensitive_geo_distribute"] = []
    results["sensitive_time_distribute"] = get_user_trend(uid)[1]
    results["sensitive_hashtag"] = []
    results["sensitive_words"] = []
    results["sensitive_hashtag_dict"] = []
    results["sensitive_words_dict"] = []
    results["sensitive_hashtag_description"] = ""

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results["negetive_index"] = float(emotion_number[2]) / (emotion_number[2] + emotion_number[1] + emotion_number[0])
    results["negetive_influence"] = float(emotion_number[1]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0]
    )
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace("-", "")
    return_sentiment = dict()
    return_sentiment["positive"] = []
    return_sentiment["neutral"] = []
    return_sentiment["negetive"] = []
    ts = time.time()
    ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
    for i in range(1, 8):
        ts = ts + 24 * 3600
        date = ts2datetime(ts).replace("-", "")
        temp = sentiment_dict.get(date, {})
        return_sentiment["positive"].append([temp.get("positive", 0), date])
        return_sentiment["negetive"].append([temp.get("negetive", 0), date])
        return_sentiment["neutral"].append([temp.get("neutral", 0), date])
    results["sentiment_trend"] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"]
        results["politics_trend"] = portrait_results["politics_trend"]
        results["domain"] = portrait_results["domain"]
        results["sensitive"] = portrait_results["sensitive"]
        temp_hashtag = portrait_results["sensitive_hashtag_dict"]
        temp_sensitive_words = portrait_results["sensitive_words_dict"]
        temp_sensitive_geo = portrait_results["sensitive_geo_activity"]
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace("-", "")
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x: x[0], reverse=False)
            sensitive_geo_list = []
            for k, v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(), key=lambda x: x[1], reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results["sensitive_geo_distribute"] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(portrait_results["sensitive_hashtag_dict"])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace("-", "")
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(hashtag_dict_detail.items(), key=lambda x: x[1], reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results["sensitive_hashtag_description"] = hashtag_description(hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts("2013-09-08") - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace("-", "")
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time() - 24 * 3600).replace("-", "")
        date = "20130907"
        today_sensitive_words = sensitive_words_dict.get(date, {})
        results["today_sensitive_words"] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(), key=lambda x: x[1], reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(), key=lambda x: x[1], reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x: x[0], reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(), key=lambda x: x[0], reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results["sensitive_hashtag"] = sorted_hashtag
        results["sensitive_words"] = new_sorted_dict
        results["sensitive_hashtag_dict"] = sorted_hashtag_dict
        results["sensitive_words_dict"] = sorted_words_dict

    results["sensitive_retweet"] = search_retweet(uid, 1)
    results["sensitive_follow"] = search_follower(uid, 1)
    results["sensitive_at"] = search_mention(uid, 1)

    return results