def sensitive_attribute(uid, date): results = {} portrait = {} utype = user_type(uid) if not utype: results['utype'] = 0 return results results['utype'] = 1 results['uid'] = uid portrait_result = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source'] results['uname'] = portrait_result['uname'] if portrait_result['uname'] == 0: results['uname'] = 'unknown' if portrait_result['photo_url'] == 0: portrait_result['photo_url'] = 'unknown' if portrait_result['location'] == 0: portrait_result['location'] = 'unknown' results['photo_url'] = portrait_result['photo_url'] # sensitive weibo number statistics date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' # test influence_results = [] try: influence_results = es.get(index=date, doc_type='bci', id=uid)['_source'] results['sensitive_origin_weibo_number'] = influence_results.get( 's_origin_weibo_number', 0) results['sensitive_retweeted_weibo_number'] = influence_results.get( 's_retweeted_weibo_number', 0) results['sensitive_comment_weibo_number'] = int( influence_results.get('s_comment_weibo_number', 0)) results[ 'sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get( 's_retweeted_weibo_retweeted_total_number', 0) results[ 'sensitive_origin_weibo_retweeted_total_number'] = influence_results.get( 's_origin_weibo_retweeted_total_number', 0) results[ 'sensitive_origin_weibo_comment_total_number'] = influence_results.get( 's_origin_weibo_comment_total_number', 0) results[ 'sensitive_retweeted_weibo_comment_total_number'] = influence_results.get( 's_retweeted_weibo_comment_total_number', 0) except: results['sensitive_origin_weibo_number'] = 0 results['sensitive_retweeted_weibo_number'] = 0 results['sensitive_comment_weibo_number'] = 0 results['sensitive_origin_weibo_retweeted_total_number'] = 0 results['sensitive_origin_weibo_comment_total_number'] = 0 results['sensitive_retweeted_weibo_retweeted_total_number'] = 0 results['sensitive_retweeted_weibo_comment_total_number'] = 0 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] except: item = {} results['origin_weibo_total_number'] = item.get( 'origin_weibo_number', 0) + results['sensitive_origin_weibo_number'] results['retweeted_weibo_total_number'] = item.get( 'retweeted_weibo_number', 0) + results['sensitive_retweeted_weibo_number'] results['comment_weibo_total_number'] = int( item.get('comment_weibo_number', 0)) + int( results['sensitive_comment_weibo_number']) results['origin_weibo_retweeted_total_number'] = item.get( 'origin_weibo_retweeted_total_number', 0) + results['sensitive_origin_weibo_retweeted_total_number'] results['origin_weibo_comment_total_number'] = item.get( 'origin_weibo_comment_total_number', 0) + results['sensitive_origin_weibo_comment_total_number'] results['retweeted_weibo_retweeted_total_number'] = item.get( 'retweeted_weibo_retweeted_total_number', 0) + results['sensitive_retweeted_weibo_retweeted_total_number'] results['retweeted_weibo_comment_total_number'] = item.get( 'retweeted_weibo_comment_total_number', 0) + results['sensitive_retweeted_weibo_comment_total_number'] results['sensitive_text'] = sort_sensitive_text(uid) results['sensitive_geo_distribute'] = [] results['sensitive_time_distribute'] = get_user_trend(uid)[1] results['sensitive_hashtag'] = [] results['sensitive_words'] = [] results['sensitive_hashtag_dict'] = [] results['sensitive_words_dict'] = [] results['sensitive_hashtag_description'] = '' sentiment_trend = user_sentiment_trend(uid) emotion_number = sentiment_trend[0] results['negetive_index'] = float(emotion_number[2]) / ( emotion_number[2] + emotion_number[1] + emotion_number[0]) results['negetive_influence'] = float(emotion_number[1]) / ( emotion_number[2] + emotion_number[1] + emotion_number[0]) sentiment_dict = sentiment_trend[1] datetime = ts2datetime(time.time()).replace('-', '') return_sentiment = dict() return_sentiment['positive'] = [] return_sentiment['neutral'] = [] return_sentiment['negetive'] = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(1, 8): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') temp = sentiment_dict.get(date, {}) return_sentiment['positive'].append([temp.get('positive', 0), date]) return_sentiment['negetive'].append([temp.get('negetive', 0), date]) return_sentiment['neutral'].append([temp.get('neutral', 0), date]) results['sentiment_trend'] = return_sentiment if 1: portrait_results = es.get(index="sensitive_user_portrait", doc_type='user', id=uid)['_source'] results['politics_trend'] = portrait_results['politics_trend'] results['domain'] = portrait_results['domain'] results['sensitive'] = portrait_results['sensitive'] temp_hashtag = portrait_results['sensitive_hashtag_dict'] temp_sensitive_words = portrait_results['sensitive_words_dict'] temp_sensitive_geo = portrait_results['sensitive_geo_activity'] if temp_sensitive_geo: sensitive_geo_dict = json.loads(temp_sensitive_geo) if len(sensitive_geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if sensitive_geo_dict.has_key(date): pass else: sensitive_geo_dict[date] = {} sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x: x[0], reverse=False) sensitive_geo_list = [] for k, v in sorted_sensitive_geo: temp_list = [] sorted_geo = sorted(v.items(), key=lambda x: x[1], reverse=True)[0:2] # print sorted_geo temp_list.extend([k, sorted_geo]) sensitive_geo_list.append(temp_list) results['sensitive_geo_distribute'] = sensitive_geo_list if temp_hashtag: hashtag_dict = json.loads( portrait_results['sensitive_hashtag_dict']) if len(hashtag_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if hashtag_dict.has_key(date): hashtag_dict_detail = hashtag_dict[date] hashtag_dict[date] = sorted( hashtag_dict_detail.items(), key=lambda x: x[1], reverse=True) else: hashtag_dict[date] = {} results['sensitive_hashtag_description'] = hashtag_description( hashtag_dict) else: hashtag_dict = {} if temp_sensitive_words: sensitive_words_dict = json.loads(temp_sensitive_words) if len(sensitive_words_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if sensitive_words_dict.has_key(date): pass else: sensitive_words_dict[date] = {} else: sensitive_words_dict = {} date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' today_sensitive_words = sensitive_words_dict.get(date, {}) results['today_sensitive_words'] = today_sensitive_words all_hashtag_dict = {} for item in hashtag_dict: detail_hashtag_dict = hashtag_dict[item] for key in detail_hashtag_dict: if all_hashtag_dict.has_key(key[0]): all_hashtag_dict[key[0]] += key[1] else: all_hashtag_dict[key[0]] = key[1] all_sensitive_words_dict = {} for item in sensitive_words_dict: detail_words_dict = sensitive_words_dict[item] for key in detail_words_dict: if all_sensitive_words_dict.has_key(key): all_sensitive_words_dict[key] += detail_words_dict[key] else: all_sensitive_words_dict[key] = detail_words_dict[key] sorted_hashtag = sorted(all_hashtag_dict.items(), key=lambda x: x[1], reverse=True) sorted_words = sorted(all_sensitive_words_dict.items(), key=lambda x: x[1], reverse=True) sorted_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x: x[0], reverse=False) sorted_words_dict = sorted(sensitive_words_dict.items(), key=lambda x: x[0], reverse=False) new_sorted_dict = sort_sensitive_words(sorted_words) results['sensitive_hashtag'] = sorted_hashtag results['sensitive_words'] = new_sorted_dict results['sensitive_hashtag_dict'] = sorted_hashtag_dict results['sensitive_words_dict'] = sorted_words_dict results['sensitive_retweet'] = search_retweet(uid, 1) results['sensitive_follow'] = search_follower(uid, 1) results['sensitive_at'] = search_mention(uid, 1) return results
def search_attribute_portrait(uid): return_results = {} index_name = "sensitive_user_portrait" index_type = "user" try: search_result = es.get(index=index_name, doc_type=index_type, id=uid) except: return None results = search_result['_source'] #return_results = results user_sensitive = user_type(uid) if user_sensitive: #return_results.update(sensitive_attribute(uid)) return_results['user_type'] = 1 return_results['sensitive'] = 1 else: return_results['user_type'] = 0 return_results['sensitive'] = 0 if results['photo_url'] == 0: results['photo_url'] = 'unknown' if results['location'] == 0: results['location'] = 'unknown' return_results['photo_url'] = results['photo_url'] return_results['uid'] = results['uid'] return_results['uname'] = results['uname'] if return_results['uname'] == 0: return_results['uname'] = 'unknown' return_results['location'] = results['location'] return_results['fansnum'] = results['fansnum'] return_results['friendsnum'] = results['friendsnum'] return_results['gender'] = results['gender'] return_results['psycho_status'] = json.loads(results['psycho_status']) keyword_list = [] if results['keywords']: keywords_dict = json.loads(results['keywords']) sort_word_list = sorted(keywords_dict.items(), key=lambda x: x[1], reverse=True) return_results['keywords'] = sort_word_list else: return_results['keywords'] = [] return_results['retweet'] = search_retweet(uid, 0) return_results['follow'] = search_follower(uid, 0) return_results['at'] = search_mention(uid, 0) if results['ip'] and results['geo_activity']: ip_dict = json.loads(results['ip']) geo_dict = json.loads(results['geo_activity']) geo_description = active_geo_description(ip_dict, geo_dict) return_results['geo_description'] = geo_description else: return_results['geo_description'] = '' geo_top = [] temp_geo = {} if results['geo_activity']: geo_dict = json.loads(results['geo_activity']) if len(geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if geo_dict.has_key(date): pass else: geo_dict[date] = {} activity_geo_list = sorted(geo_dict.items(), key=lambda x: x[0], reverse=False) geo_list = geo_dict.values() for k, v in activity_geo_list: sort_v = sorted(v.items(), key=lambda x: x[1], reverse=True) top_geo = [item[0] for item in sort_v] geo_top.append([k, top_geo[0:2]]) for iter_key in v.keys(): if temp_geo.has_key(iter_key): temp_geo[iter_key] += v[iter_key] else: temp_geo[iter_key] = v[iter_key] sort_geo_dict = sorted(temp_geo.items(), key=lambda x: x[1], reverse=True) return_results['top_activity_geo'] = sort_geo_dict return_results['activity_geo_distribute'] = geo_top else: return_results['top_activity_geo'] = [] return_results['activity_geo_distribute'] = geo_top hashtag_dict = get_user_hashtag(uid)[0] return_results['hashtag'] = hashtag_dict ''' emotion_result = {} emotion_conclusion_dict = {} if results['emotion_words']: emotion_words_dict = json.loads(results['emotion_words']) for word_type in emotion_mark_dict: try: word_dict = emotion_words_dict[word_type] if word_type=='126' or word_type=='127': emotion_conclusion_dict[word_type] = word_dict sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True) word_list = sort_word_dict[:5] except: results['emotion_words'] = emotion_result emotion_result[emotion_mark_dict[word_type]] = word_list return_results['emotion_words'] = emotion_result ''' # topic if results['topic']: topic_dict = json.loads(results['topic']) sort_topic_dict = sorted(topic_dict.items(), key=lambda x: x[1], reverse=True) return_results['topic'] = sort_topic_dict[:5] else: return_results['topic'] = [] # domain if results['domain']: domain_string = results['domain'] domain_list = domain_string.split('_') return_results['domain'] = domain_list else: return_results['domain'] = [] ''' # emoticon if results['emotion']: emotion_dict = json.loads(results['emotion']) sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True) return_results['emotion'] = sort_emotion_dict[:5] else: return_results['emotion'] = [] ''' # on_line pattern if results['online_pattern']: online_pattern_dict = json.loads(results['online_pattern']) sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x: x[1], reverse=True) return_results['online_pattern'] = sort_online_pattern_dict[:5] else: return_results['online_pattern'] = [] ''' #psycho_feature if results['psycho_feature']: psycho_feature_list = results['psycho_feature'].split('_') return_results['psycho_feature'] = psycho_feature_list else: return_results['psycho_feature'] = [] ''' # self_state try: profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid) self_state = profile_result['_source'].get('description', '') return_results['description'] = self_state except: return_results['description'] = '' if results['importance']: query_body = { 'query': { 'range': { 'importance': { 'from': results['importance'], 'to': 100000 } } } } importance_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if importance_rank['_shards']['successful'] != 0: return_results['importance_rank'] = importance_rank['count'] else: return_results['importance_rank'] = 0 else: return_results['importance_rank'] = 0 return_results['importance'] = results['importance'] if results['activeness']: query_body = { 'query': { 'range': { 'activeness': { 'from': results['activeness'], 'to': 10000 } } } } activeness_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) print activeness_rank if activeness_rank['_shards']['successful'] != 0: return_results['activeness_rank'] = activeness_rank['count'] else: return_results['activeness_rank'] = 0 else: return_results['activeness_rank'] = 0 return_results['activeness'] = results['activeness'] if results['influence']: query_body = { 'query': { 'range': { 'influence': { 'from': results['influence'], 'to': 100000 } } } } influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if influence_rank['_shards']['successful'] != 0: return_results['influence_rank'] = influence_rank['count'] else: return_results['influence_rank'] = 0 else: return_results['influence_rank'] = 0 return_results['influence'] = results['influence'] if results['sensitive']: query_body = { 'query': { 'range': { 'sensitive': { 'from': results['sensitive'], 'to': 100000 } } } } influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if influence_rank['_shards']['successful'] != 0: return_results['sensitive_rank'] = influence_rank['count'] else: return_results['sensitive_rank'] = 0 else: return_results['sensitive_rank'] = 0 return_results['sensitive'] = results['sensitive'] query_body = {'query': {"match_all": {}}} all_count = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if all_count['_shards']['successful'] != 0: return_results['all_count'] = all_count['count'] else: print 'es_sensitive_user_portrait error' return_results['all_count'] = 0 # link link_ratio = results['link'] return_results['link'] = link_ratio weibo_trend = get_user_trend(uid)[0] return_results['time_description'] = active_time_description(weibo_trend) return_results['time_trend'] = weibo_trend # user influence trend influence_detail = [] influence_value = [] attention_value = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(1, 8): date = ts2datetime(ts + i * 24 * 3600).replace('-', '') detail = [0] * 10 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] ''' if return_results['utype']: detail[0] = item.get('s_origin_weibo_number', 0) detail[1] = item.get('s_retweeted_weibo_number', 0) detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0) else: ''' if 1: detail[0] = item.get('origin_weibo_number', 0) detail[1] = item.get('retweeted_weibo_number', 0) detail[2] = item.get( 'origin_weibo_retweeted_total_number', 0) + item.get( 'retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get( 'origin_weibo_comment_total_number', 0) + item.get( 'retweeted_weibo_comment_total_number', 0) retweeted_id = item.get('origin_weibo_top_retweeted_id', '0') detail[4] = retweeted_id if retweeted_id: try: detail[5] = es.get(index='sensitive_user_text', doc_type='user', id=retweeted_id)['_source']['text'] except: detail[5] = '' else: detail[5] = '' detail[6] = item.get('origin_weibo_retweeted_top_number', 0) detail[7] = item.get('origin_weibo_top_comment_id', '0') if detail[7]: try: detail[8] = es.get(index='sensitive_user_text', doc_type='user', id=detail[7])['_source']['text'] except: detail[8] = '' else: detail[8] = '' detail[9] = item.get('origin_weibo_comment_top_number', 0) attention_number = detail[2] + detail[3] attention = 2 / (1 + math.exp(-0.005 * attention_number)) - 1 influence_value.append([date, item['user_index']]) influence_detail.append([date, detail]) attention_value.append(attention) except: influence_value.append([date, 0]) influence_detail.append([date, detail]) attention_value.append(0) return_results['influence_trend'] = influence_value return_results['common_influence_detail'] = influence_detail return_results['attention_degree'] = attention_value return return_results
def sensitive_attribute(uid, date): results = {} portrait = {} utype = user_type(uid) if not utype: results['utype'] = 0 return results results['utype'] = 1 results['uid'] = uid portrait_result = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source'] results['uname'] = portrait_result['uname'] if portrait_result['uname'] == 0: results['uname'] = 'unknown' if portrait_result['photo_url'] == 0: portrait_result['photo_url'] = 'unknown' if portrait_result['location'] == 0: portrait_result['location'] = 'unknown' results['photo_url'] = portrait_result['photo_url'] # sensitive weibo number statistics date = ts2datetime(time.time()-24*3600).replace('-', '') date = '20130907' # test influence_results = [] try: influence_results = es.get(index=date, doc_type='bci', id=uid)['_source'] results['sensitive_origin_weibo_number'] = influence_results.get('s_origin_weibo_number', 0) results['sensitive_retweeted_weibo_number'] = influence_results.get('s_retweeted_weibo_number', 0) results['sensitive_comment_weibo_number'] = int(influence_results.get('s_comment_weibo_number', 0)) results['sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get('s_retweeted_weibo_retweeted_total_number', 0) results['sensitive_origin_weibo_retweeted_total_number'] = influence_results.get('s_origin_weibo_retweeted_total_number', 0) results['sensitive_origin_weibo_comment_total_number'] = influence_results.get('s_origin_weibo_comment_total_number', 0) results['sensitive_retweeted_weibo_comment_total_number'] = influence_results.get('s_retweeted_weibo_comment_total_number', 0) except: results['sensitive_origin_weibo_number'] = 0 results['sensitive_retweeted_weibo_number'] = 0 results['sensitive_comment_weibo_number'] = 0 results['sensitive_origin_weibo_retweeted_total_number'] = 0 results['sensitive_origin_weibo_comment_total_number'] = 0 results['sensitive_retweeted_weibo_retweeted_total_number'] = 0 results['sensitive_retweeted_weibo_comment_total_number'] = 0 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] except: item = {} results['origin_weibo_total_number'] = item.get('origin_weibo_number', 0) + results['sensitive_origin_weibo_number'] results['retweeted_weibo_total_number'] = item.get('retweeted_weibo_number', 0) + results['sensitive_retweeted_weibo_number'] results['comment_weibo_total_number'] = int(item.get('comment_weibo_number', 0)) + int(results['sensitive_comment_weibo_number']) results['origin_weibo_retweeted_total_number'] = item.get('origin_weibo_retweeted_total_number', 0) + results['sensitive_origin_weibo_retweeted_total_number'] results['origin_weibo_comment_total_number'] = item.get('origin_weibo_comment_total_number', 0) + results['sensitive_origin_weibo_comment_total_number'] results['retweeted_weibo_retweeted_total_number'] = item.get('retweeted_weibo_retweeted_total_number', 0)+ results['sensitive_retweeted_weibo_retweeted_total_number'] results['retweeted_weibo_comment_total_number'] = item.get('retweeted_weibo_comment_total_number', 0) + results['sensitive_retweeted_weibo_comment_total_number'] results['sensitive_text'] = sort_sensitive_text(uid) results['sensitive_geo_distribute'] = [] results['sensitive_time_distribute'] = get_user_trend(uid)[1] results['sensitive_hashtag'] = [] results['sensitive_words'] = [] results['sensitive_hashtag_dict'] = [] results['sensitive_words_dict'] = [] results['sensitive_hashtag_description'] = '' sentiment_trend = user_sentiment_trend(uid) emotion_number = sentiment_trend[0] results['negetive_index'] = float(emotion_number[2])/(emotion_number[2]+emotion_number[1]+emotion_number[0]) results['negetive_influence'] = float(emotion_number[1])/(emotion_number[2]+emotion_number[1]+emotion_number[0]) sentiment_dict = sentiment_trend[1] datetime = ts2datetime(time.time()).replace('-', '') return_sentiment = dict() return_sentiment['positive'] = [] return_sentiment['neutral'] = [] return_sentiment['negetive'] = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(1,8): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') temp = sentiment_dict.get(date, {}) return_sentiment['positive'].append([temp.get('positive', 0), date]) return_sentiment['negetive'].append([temp.get('negetive', 0), date]) return_sentiment['neutral'].append([temp.get('neutral', 0), date]) results['sentiment_trend'] = return_sentiment if 1: portrait_results = es.get(index="sensitive_user_portrait", doc_type='user', id=uid)['_source'] results['politics_trend'] = portrait_results['politics_trend'] results['domain'] = portrait_results['domain'] results['sensitive'] = portrait_results['sensitive'] temp_hashtag = portrait_results['sensitive_hashtag_dict'] temp_sensitive_words = portrait_results['sensitive_words_dict'] temp_sensitive_geo = portrait_results['sensitive_geo_activity'] if temp_sensitive_geo: sensitive_geo_dict = json.loads(temp_sensitive_geo) if len(sensitive_geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(7): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') if sensitive_geo_dict.has_key(date): pass else: sensitive_geo_dict[date] = {} sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x:x[0], reverse=False) sensitive_geo_list = [] for k,v in sorted_sensitive_geo: temp_list = [] sorted_geo = sorted(v.items(), key=lambda x:x[1], reverse=True)[0:2] # print sorted_geo temp_list.extend([k, sorted_geo]) sensitive_geo_list.append(temp_list) results['sensitive_geo_distribute'] = sensitive_geo_list if temp_hashtag: hashtag_dict = json.loads(portrait_results['sensitive_hashtag_dict']) if len(hashtag_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(7): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') if hashtag_dict.has_key(date): hashtag_dict_detail = hashtag_dict[date] hashtag_dict[date] = sorted(hashtag_dict_detail.items(), key=lambda x:x[1], reverse=True) else: hashtag_dict[date] = {} results['sensitive_hashtag_description'] = hashtag_description(hashtag_dict) else: hashtag_dict = {} if temp_sensitive_words: sensitive_words_dict = json.loads(temp_sensitive_words) if len(sensitive_words_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(7): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') if sensitive_words_dict.has_key(date): pass else: sensitive_words_dict[date] = {} else: sensitive_words_dict = {} date = ts2datetime(time.time()-24*3600).replace('-', '') date = '20130907' today_sensitive_words = sensitive_words_dict.get(date,{}) results['today_sensitive_words'] = today_sensitive_words all_hashtag_dict = {} for item in hashtag_dict: detail_hashtag_dict = hashtag_dict[item] for key in detail_hashtag_dict: if all_hashtag_dict.has_key(key[0]): all_hashtag_dict[key[0]] += key[1] else: all_hashtag_dict[key[0]] = key[1] all_sensitive_words_dict = {} for item in sensitive_words_dict: detail_words_dict = sensitive_words_dict[item] for key in detail_words_dict: if all_sensitive_words_dict.has_key(key): all_sensitive_words_dict[key] += detail_words_dict[key] else: all_sensitive_words_dict[key] = detail_words_dict[key] sorted_hashtag = sorted(all_hashtag_dict.items(), key = lambda x:x[1], reverse=True) sorted_words = sorted(all_sensitive_words_dict.items(), key = lambda x:x[1], reverse=True) sorted_hashtag_dict = sorted(hashtag_dict.items(), key = lambda x:x[0], reverse=False) sorted_words_dict = sorted(sensitive_words_dict.items(), key = lambda x:x[0], reverse=False) new_sorted_dict = sort_sensitive_words(sorted_words) results['sensitive_hashtag'] = sorted_hashtag results['sensitive_words'] = new_sorted_dict results['sensitive_hashtag_dict'] = sorted_hashtag_dict results['sensitive_words_dict'] = sorted_words_dict results['sensitive_retweet'] = search_retweet(uid, 1) results['sensitive_follow'] = search_follower(uid, 1) results['sensitive_at'] = search_mention(uid, 1) return results
def search_attribute_portrait(uid): return_results = {} index_name = "sensitive_user_portrait" index_type = "user" try: search_result = es.get(index=index_name, doc_type=index_type, id=uid) except: return None results = search_result['_source'] #return_results = results user_sensitive = user_type(uid) if user_sensitive: #return_results.update(sensitive_attribute(uid)) return_results['user_type'] = 1 return_results['sensitive'] = 1 else: return_results['user_type'] = 0 return_results['sensitive'] = 0 if results['photo_url'] == 0: results['photo_url'] = 'unknown' if results['location'] == 0: results['location'] = 'unknown' return_results['photo_url'] = results['photo_url'] return_results['uid'] = results['uid'] return_results['uname'] = results['uname'] if return_results['uname'] == 0: return_results['uname'] = 'unknown' return_results['location'] = results['location'] return_results['fansnum'] = results['fansnum'] return_results['friendsnum'] = results['friendsnum'] return_results['gender'] = results['gender'] return_results['psycho_status'] = json.loads(results['psycho_status']) keyword_list = [] if results['keywords']: keywords_dict = json.loads(results['keywords']) sort_word_list = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True) return_results['keywords'] = sort_word_list else: return_results['keywords'] = [] return_results['retweet'] = search_retweet(uid, 0) return_results['follow'] = search_follower(uid, 0) return_results['at'] = search_mention(uid, 0) if results['ip'] and results['geo_activity']: ip_dict = json.loads(results['ip']) geo_dict = json.loads(results['geo_activity']) geo_description = active_geo_description(ip_dict, geo_dict) return_results['geo_description'] = geo_description else: return_results['geo_description'] = '' geo_top = [] temp_geo = {} if results['geo_activity']: geo_dict = json.loads(results['geo_activity']) if len(geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(7): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') if geo_dict.has_key(date): pass else: geo_dict[date] = {} activity_geo_list = sorted(geo_dict.items(), key=lambda x:x[0], reverse=False) geo_list = geo_dict.values() for k,v in activity_geo_list: sort_v = sorted(v.items(), key=lambda x:x[1], reverse=True) top_geo = [item[0] for item in sort_v] geo_top.append([k, top_geo[0:2]]) for iter_key in v.keys(): if temp_geo.has_key(iter_key): temp_geo[iter_key] += v[iter_key] else: temp_geo[iter_key] = v[iter_key] sort_geo_dict = sorted(temp_geo.items(), key=lambda x:x[1], reverse=True) return_results['top_activity_geo'] = sort_geo_dict return_results['activity_geo_distribute'] = geo_top else: return_results['top_activity_geo'] = [] return_results['activity_geo_distribute'] = geo_top hashtag_dict = get_user_hashtag(uid)[0] return_results['hashtag'] = hashtag_dict ''' emotion_result = {} emotion_conclusion_dict = {} if results['emotion_words']: emotion_words_dict = json.loads(results['emotion_words']) for word_type in emotion_mark_dict: try: word_dict = emotion_words_dict[word_type] if word_type=='126' or word_type=='127': emotion_conclusion_dict[word_type] = word_dict sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True) word_list = sort_word_dict[:5] except: results['emotion_words'] = emotion_result emotion_result[emotion_mark_dict[word_type]] = word_list return_results['emotion_words'] = emotion_result ''' # topic if results['topic']: topic_dict = json.loads(results['topic']) sort_topic_dict = sorted(topic_dict.items(), key=lambda x:x[1], reverse=True) return_results['topic'] = sort_topic_dict[:5] else: return_results['topic'] = [] # domain if results['domain']: domain_string = results['domain'] domain_list = domain_string.split('_') return_results['domain'] = domain_list else: return_results['domain'] = [] ''' # emoticon if results['emotion']: emotion_dict = json.loads(results['emotion']) sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True) return_results['emotion'] = sort_emotion_dict[:5] else: return_results['emotion'] = [] ''' # on_line pattern if results['online_pattern']: online_pattern_dict = json.loads(results['online_pattern']) sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True) return_results['online_pattern'] = sort_online_pattern_dict[:5] else: return_results['online_pattern'] = [] ''' #psycho_feature if results['psycho_feature']: psycho_feature_list = results['psycho_feature'].split('_') return_results['psycho_feature'] = psycho_feature_list else: return_results['psycho_feature'] = [] ''' # self_state try: profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid) self_state = profile_result['_source'].get('description', '') return_results['description'] = self_state except: return_results['description'] = '' if results['importance']: query_body = { 'query':{ 'range':{ 'importance':{ 'from':results['importance'], 'to': 100000 } } } } importance_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if importance_rank['_shards']['successful'] != 0: return_results['importance_rank'] = importance_rank['count'] else: return_results['importance_rank'] = 0 else: return_results['importance_rank'] = 0 return_results['importance'] = results['importance'] if results['activeness']: query_body = { 'query':{ 'range':{ 'activeness':{ 'from':results['activeness'], 'to': 10000 } } } } activeness_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if activeness_rank['_shards']['successful'] != 0: return_results['activeness_rank'] = activeness_rank['count'] else: return_results['activeness_rank'] = 0 else: return_results['activeness_rank'] = 0 return_results['activeness'] = results['activeness'] if results['influence']: query_body = { 'query':{ 'range':{ 'influence':{ 'from':results['influence'], 'to': 100000 } } } } influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if influence_rank['_shards']['successful'] != 0: return_results['influence_rank'] = influence_rank['count'] else: return_results['influence_rank'] = 0 else: return_results['influence_rank'] = 0 return_results['influence'] = results['influence'] if results['sensitive']: query_body = { 'query':{ 'range':{ 'sensitive':{ 'from':results['sensitive'], 'to': 100000 } } } } influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if influence_rank['_shards']['successful'] != 0: return_results['sensitive_rank'] = influence_rank['count'] else: return_results['sensitive_rank'] = 0 else: return_results['sensitive_rank'] = 0 return_results['sensitive'] = results['sensitive'] query_body = { 'query':{ "match_all":{} } } all_count = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if all_count['_shards']['successful'] != 0: return_results['all_count'] = all_count['count'] else: print 'es_sensitive_user_portrait error' return_results['all_count'] = 0 # link link_ratio = results['link'] return_results['link'] = link_ratio weibo_trend = get_user_trend(uid)[0] return_results['time_description'] = active_time_description(weibo_trend) return_results['time_trend'] = weibo_trend # user influence trend influence_detail = [] influence_value = [] attention_value = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(1,8): date = ts2datetime(ts + i*24*3600).replace('-', '') detail = [0]*10 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] ''' if return_results['utype']: detail[0] = item.get('s_origin_weibo_number', 0) detail[1] = item.get('s_retweeted_weibo_number', 0) detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0) else: ''' if 1: detail[0] = item.get('origin_weibo_number', 0) detail[1] = item.get('retweeted_weibo_number', 0) detail[2] = item.get('origin_weibo_retweeted_total_number', 0) + item.get('retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get('origin_weibo_comment_total_number', 0) + item.get('retweeted_weibo_comment_total_number', 0) retweeted_id = item.get('origin_weibo_top_retweeted_id', '0') detail[4] = retweeted_id if retweeted_id: try: detail[5] = es.get(index='sensitive_user_text', doc_type='user', id=retweeted_id)['_source']['text'] except: detail[5] = '' else: detail[5] = '' detail[6] = item.get('origin_weibo_retweeted_top_number', 0) detail[7] = item.get('origin_weibo_top_comment_id', '0') if detail[7]: try: detail[8] = es.get(index='sensitive_user_text', doc_type='user', id=detail[7])['_source']['text'] except: detail[8] = '' else: detail[8] = '' detail[9] = item.get('origin_weibo_comment_top_number', 0) attention_number = detail[2] + detail[3] attention = 2/(1+math.exp(-0.005*attention_number)) - 1 influence_value.append([date, item['user_index']]) influence_detail.append([date, detail]) attention_value.append(attention) except: influence_value.append([date, 0]) influence_detail.append([date, detail]) attention_value.append(0) return_results['influence_trend'] = influence_value return_results['common_influence_detail'] = influence_detail return_results['attention_degree'] = attention_value return return_results
def sensitive_attribute(uid, date): results = {} portrait = {} utype = user_type(uid) if not utype: results["utype"] = 0 return results results["utype"] = 1 results["uid"] = uid portrait_result = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"] results["uname"] = portrait_result["uname"] if portrait_result["uname"] == 0: results["uname"] = "unknown" if portrait_result["photo_url"] == 0: portrait_result["photo_url"] = "unknown" if portrait_result["location"] == 0: portrait_result["location"] = "unknown" results["photo_url"] = portrait_result["photo_url"] # sensitive weibo number statistics date = ts2datetime(time.time() - 24 * 3600).replace("-", "") date = "20130907" # test influence_results = [] try: influence_results = es.get(index=date, doc_type="bci", id=uid)["_source"] results["sensitive_origin_weibo_number"] = influence_results.get("s_origin_weibo_number", 0) results["sensitive_retweeted_weibo_number"] = influence_results.get("s_retweeted_weibo_number", 0) results["sensitive_comment_weibo_number"] = int(influence_results.get("s_comment_weibo_number", 0)) results["sensitive_retweeted_weibo_retweeted_total_number"] = influence_results.get( "s_retweeted_weibo_retweeted_total_number", 0 ) results["sensitive_origin_weibo_retweeted_total_number"] = influence_results.get( "s_origin_weibo_retweeted_total_number", 0 ) results["sensitive_origin_weibo_comment_total_number"] = influence_results.get( "s_origin_weibo_comment_total_number", 0 ) results["sensitive_retweeted_weibo_comment_total_number"] = influence_results.get( "s_retweeted_weibo_comment_total_number", 0 ) except: results["sensitive_origin_weibo_number"] = 0 results["sensitive_retweeted_weibo_number"] = 0 results["sensitive_comment_weibo_number"] = 0 results["sensitive_origin_weibo_retweeted_total_number"] = 0 results["sensitive_origin_weibo_comment_total_number"] = 0 results["sensitive_retweeted_weibo_retweeted_total_number"] = 0 results["sensitive_retweeted_weibo_comment_total_number"] = 0 try: item = es.get(index=date, doc_type="bci", id=uid)["_source"] except: item = {} results["origin_weibo_total_number"] = item.get("origin_weibo_number", 0) + results["sensitive_origin_weibo_number"] results["retweeted_weibo_total_number"] = ( item.get("retweeted_weibo_number", 0) + results["sensitive_retweeted_weibo_number"] ) results["comment_weibo_total_number"] = int(item.get("comment_weibo_number", 0)) + int( results["sensitive_comment_weibo_number"] ) results["origin_weibo_retweeted_total_number"] = ( item.get("origin_weibo_retweeted_total_number", 0) + results["sensitive_origin_weibo_retweeted_total_number"] ) results["origin_weibo_comment_total_number"] = ( item.get("origin_weibo_comment_total_number", 0) + results["sensitive_origin_weibo_comment_total_number"] ) results["retweeted_weibo_retweeted_total_number"] = ( item.get("retweeted_weibo_retweeted_total_number", 0) + results["sensitive_retweeted_weibo_retweeted_total_number"] ) results["retweeted_weibo_comment_total_number"] = ( item.get("retweeted_weibo_comment_total_number", 0) + results["sensitive_retweeted_weibo_comment_total_number"] ) results["sensitive_text"] = sort_sensitive_text(uid) results["sensitive_geo_distribute"] = [] results["sensitive_time_distribute"] = get_user_trend(uid)[1] results["sensitive_hashtag"] = [] results["sensitive_words"] = [] results["sensitive_hashtag_dict"] = [] results["sensitive_words_dict"] = [] results["sensitive_hashtag_description"] = "" sentiment_trend = user_sentiment_trend(uid) emotion_number = sentiment_trend[0] results["negetive_index"] = float(emotion_number[2]) / (emotion_number[2] + emotion_number[1] + emotion_number[0]) results["negetive_influence"] = float(emotion_number[1]) / ( emotion_number[2] + emotion_number[1] + emotion_number[0] ) sentiment_dict = sentiment_trend[1] datetime = ts2datetime(time.time()).replace("-", "") return_sentiment = dict() return_sentiment["positive"] = [] return_sentiment["neutral"] = [] return_sentiment["negetive"] = [] ts = time.time() ts = datetime2ts("2013-09-08") - 8 * 24 * 3600 for i in range(1, 8): ts = ts + 24 * 3600 date = ts2datetime(ts).replace("-", "") temp = sentiment_dict.get(date, {}) return_sentiment["positive"].append([temp.get("positive", 0), date]) return_sentiment["negetive"].append([temp.get("negetive", 0), date]) return_sentiment["neutral"].append([temp.get("neutral", 0), date]) results["sentiment_trend"] = return_sentiment if 1: portrait_results = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"] results["politics_trend"] = portrait_results["politics_trend"] results["domain"] = portrait_results["domain"] results["sensitive"] = portrait_results["sensitive"] temp_hashtag = portrait_results["sensitive_hashtag_dict"] temp_sensitive_words = portrait_results["sensitive_words_dict"] temp_sensitive_geo = portrait_results["sensitive_geo_activity"] if temp_sensitive_geo: sensitive_geo_dict = json.loads(temp_sensitive_geo) if len(sensitive_geo_dict) < 7: ts = time.time() ts = datetime2ts("2013-09-08") - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace("-", "") if sensitive_geo_dict.has_key(date): pass else: sensitive_geo_dict[date] = {} sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x: x[0], reverse=False) sensitive_geo_list = [] for k, v in sorted_sensitive_geo: temp_list = [] sorted_geo = sorted(v.items(), key=lambda x: x[1], reverse=True)[0:2] # print sorted_geo temp_list.extend([k, sorted_geo]) sensitive_geo_list.append(temp_list) results["sensitive_geo_distribute"] = sensitive_geo_list if temp_hashtag: hashtag_dict = json.loads(portrait_results["sensitive_hashtag_dict"]) if len(hashtag_dict) < 7: ts = time.time() ts = datetime2ts("2013-09-08") - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace("-", "") if hashtag_dict.has_key(date): hashtag_dict_detail = hashtag_dict[date] hashtag_dict[date] = sorted(hashtag_dict_detail.items(), key=lambda x: x[1], reverse=True) else: hashtag_dict[date] = {} results["sensitive_hashtag_description"] = hashtag_description(hashtag_dict) else: hashtag_dict = {} if temp_sensitive_words: sensitive_words_dict = json.loads(temp_sensitive_words) if len(sensitive_words_dict) < 7: ts = time.time() ts = datetime2ts("2013-09-08") - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace("-", "") if sensitive_words_dict.has_key(date): pass else: sensitive_words_dict[date] = {} else: sensitive_words_dict = {} date = ts2datetime(time.time() - 24 * 3600).replace("-", "") date = "20130907" today_sensitive_words = sensitive_words_dict.get(date, {}) results["today_sensitive_words"] = today_sensitive_words all_hashtag_dict = {} for item in hashtag_dict: detail_hashtag_dict = hashtag_dict[item] for key in detail_hashtag_dict: if all_hashtag_dict.has_key(key[0]): all_hashtag_dict[key[0]] += key[1] else: all_hashtag_dict[key[0]] = key[1] all_sensitive_words_dict = {} for item in sensitive_words_dict: detail_words_dict = sensitive_words_dict[item] for key in detail_words_dict: if all_sensitive_words_dict.has_key(key): all_sensitive_words_dict[key] += detail_words_dict[key] else: all_sensitive_words_dict[key] = detail_words_dict[key] sorted_hashtag = sorted(all_hashtag_dict.items(), key=lambda x: x[1], reverse=True) sorted_words = sorted(all_sensitive_words_dict.items(), key=lambda x: x[1], reverse=True) sorted_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x: x[0], reverse=False) sorted_words_dict = sorted(sensitive_words_dict.items(), key=lambda x: x[0], reverse=False) new_sorted_dict = sort_sensitive_words(sorted_words) results["sensitive_hashtag"] = sorted_hashtag results["sensitive_words"] = new_sorted_dict results["sensitive_hashtag_dict"] = sorted_hashtag_dict results["sensitive_words_dict"] = sorted_words_dict results["sensitive_retweet"] = search_retweet(uid, 1) results["sensitive_follow"] = search_follower(uid, 1) results["sensitive_at"] = search_mention(uid, 1) return results
def search_attribute_portrait(uid): return_results = {} index_name = "sensitive_user_portrait" index_type = "user" try: search_result = es.get(index=index_name, doc_type=index_type, id=uid) except: return None results = search_result["_source"] # return_results = results user_sensitive = user_type(uid) if user_sensitive: # return_results.update(sensitive_attribute(uid)) return_results["user_type"] = 1 return_results["sensitive"] = 1 else: return_results["user_type"] = 0 return_results["sensitive"] = 0 if results["photo_url"] == 0: results["photo_url"] = "unknown" if results["location"] == 0: results["location"] = "unknown" return_results["photo_url"] = results["photo_url"] return_results["uid"] = results["uid"] return_results["uname"] = results["uname"] if return_results["uname"] == 0: return_results["uname"] = "unknown" return_results["location"] = results["location"] return_results["fansnum"] = results["fansnum"] return_results["friendsnum"] = results["friendsnum"] return_results["gender"] = results["gender"] return_results["psycho_status"] = json.loads(results["psycho_status"]) keyword_list = [] if results["keywords"]: keywords_dict = json.loads(results["keywords"]) sort_word_list = sorted(keywords_dict.items(), key=lambda x: x[1], reverse=True) return_results["keywords"] = sort_word_list else: return_results["keywords"] = [] return_results["retweet"] = search_retweet(uid, 0) return_results["follow"] = search_follower(uid, 0) return_results["at"] = search_mention(uid, 0) if results["ip"] and results["geo_activity"]: ip_dict = json.loads(results["ip"]) geo_dict = json.loads(results["geo_activity"]) geo_description = active_geo_description(ip_dict, geo_dict) return_results["geo_description"] = geo_description else: return_results["geo_description"] = "" geo_top = [] temp_geo = {} if results["geo_activity"]: geo_dict = json.loads(results["geo_activity"]) if len(geo_dict) < 7: ts = time.time() ts = datetime2ts("2013-09-08") - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace("-", "") if geo_dict.has_key(date): pass else: geo_dict[date] = {} activity_geo_list = sorted(geo_dict.items(), key=lambda x: x[0], reverse=False) geo_list = geo_dict.values() for k, v in activity_geo_list: sort_v = sorted(v.items(), key=lambda x: x[1], reverse=True) top_geo = [item[0] for item in sort_v] geo_top.append([k, top_geo[0:2]]) for iter_key in v.keys(): if temp_geo.has_key(iter_key): temp_geo[iter_key] += v[iter_key] else: temp_geo[iter_key] = v[iter_key] sort_geo_dict = sorted(temp_geo.items(), key=lambda x: x[1], reverse=True) return_results["top_activity_geo"] = sort_geo_dict return_results["activity_geo_distribute"] = geo_top else: return_results["top_activity_geo"] = [] return_results["activity_geo_distribute"] = geo_top hashtag_dict = get_user_hashtag(uid)[0] return_results["hashtag"] = hashtag_dict """ emotion_result = {} emotion_conclusion_dict = {} if results['emotion_words']: emotion_words_dict = json.loads(results['emotion_words']) for word_type in emotion_mark_dict: try: word_dict = emotion_words_dict[word_type] if word_type=='126' or word_type=='127': emotion_conclusion_dict[word_type] = word_dict sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True) word_list = sort_word_dict[:5] except: results['emotion_words'] = emotion_result emotion_result[emotion_mark_dict[word_type]] = word_list return_results['emotion_words'] = emotion_result """ # topic if results["topic"]: topic_dict = json.loads(results["topic"]) sort_topic_dict = sorted(topic_dict.items(), key=lambda x: x[1], reverse=True) return_results["topic"] = sort_topic_dict[:5] else: return_results["topic"] = [] # domain if results["domain"]: domain_string = results["domain"] domain_list = domain_string.split("_") return_results["domain"] = domain_list else: return_results["domain"] = [] """ # emoticon if results['emotion']: emotion_dict = json.loads(results['emotion']) sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True) return_results['emotion'] = sort_emotion_dict[:5] else: return_results['emotion'] = [] """ # on_line pattern if results["online_pattern"]: online_pattern_dict = json.loads(results["online_pattern"]) sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x: x[1], reverse=True) return_results["online_pattern"] = sort_online_pattern_dict[:5] else: return_results["online_pattern"] = [] """ #psycho_feature if results['psycho_feature']: psycho_feature_list = results['psycho_feature'].split('_') return_results['psycho_feature'] = psycho_feature_list else: return_results['psycho_feature'] = [] """ # self_state try: profile_result = es_user_profile.get(index="weibo_user", doc_type="user", id=uid) self_state = profile_result["_source"].get("description", "") return_results["description"] = self_state except: return_results["description"] = "" if results["importance"]: query_body = {"query": {"range": {"importance": {"from": results["importance"], "to": 100000}}}} importance_rank = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body) if importance_rank["_shards"]["successful"] != 0: return_results["importance_rank"] = importance_rank["count"] else: return_results["importance_rank"] = 0 else: return_results["importance_rank"] = 0 return_results["importance"] = results["importance"] if results["activeness"]: query_body = {"query": {"range": {"activeness": {"from": results["activeness"], "to": 10000}}}} activeness_rank = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body) print activeness_rank if activeness_rank["_shards"]["successful"] != 0: return_results["activeness_rank"] = activeness_rank["count"] else: return_results["activeness_rank"] = 0 else: return_results["activeness_rank"] = 0 return_results["activeness"] = results["activeness"] if results["influence"]: query_body = {"query": {"range": {"influence": {"from": results["influence"], "to": 100000}}}} influence_rank = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body) if influence_rank["_shards"]["successful"] != 0: return_results["influence_rank"] = influence_rank["count"] else: return_results["influence_rank"] = 0 else: return_results["influence_rank"] = 0 return_results["influence"] = results["influence"] if results["sensitive"]: query_body = {"query": {"range": {"sensitive": {"from": results["sensitive"], "to": 100000}}}} influence_rank = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body) if influence_rank["_shards"]["successful"] != 0: return_results["sensitive_rank"] = influence_rank["count"] else: return_results["sensitive_rank"] = 0 else: return_results["sensitive_rank"] = 0 return_results["sensitive"] = results["sensitive"] query_body = {"query": {"match_all": {}}} all_count = es.count(index="sensitive_user_portrait", doc_type="user", body=query_body) if all_count["_shards"]["successful"] != 0: return_results["all_count"] = all_count["count"] else: print "es_sensitive_user_portrait error" return_results["all_count"] = 0 # link link_ratio = results["link"] return_results["link"] = link_ratio weibo_trend = get_user_trend(uid)[0] return_results["time_description"] = active_time_description(weibo_trend) return_results["time_trend"] = weibo_trend # user influence trend influence_detail = [] influence_value = [] attention_value = [] ts = time.time() ts = datetime2ts("2013-09-08") - 8 * 24 * 3600 for i in range(1, 8): date = ts2datetime(ts + i * 24 * 3600).replace("-", "") detail = [0] * 10 try: item = es.get(index=date, doc_type="bci", id=uid)["_source"] """ if return_results['utype']: detail[0] = item.get('s_origin_weibo_number', 0) detail[1] = item.get('s_retweeted_weibo_number', 0) detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0) else: """ if 1: detail[0] = item.get("origin_weibo_number", 0) detail[1] = item.get("retweeted_weibo_number", 0) detail[2] = item.get("origin_weibo_retweeted_total_number", 0) + item.get( "retweeted_weibo_retweeted_total_number", 0 ) detail[3] = item.get("origin_weibo_comment_total_number", 0) + item.get( "retweeted_weibo_comment_total_number", 0 ) retweeted_id = item.get("origin_weibo_top_retweeted_id", "0") detail[4] = retweeted_id if retweeted_id: try: detail[5] = es.get(index="sensitive_user_text", doc_type="user", id=retweeted_id)["_source"][ "text" ] except: detail[5] = "" else: detail[5] = "" detail[6] = item.get("origin_weibo_retweeted_top_number", 0) detail[7] = item.get("origin_weibo_top_comment_id", "0") if detail[7]: try: detail[8] = es.get(index="sensitive_user_text", doc_type="user", id=detail[7])["_source"][ "text" ] except: detail[8] = "" else: detail[8] = "" detail[9] = item.get("origin_weibo_comment_top_number", 0) attention_number = detail[2] + detail[3] attention = 2 / (1 + math.exp(-0.005 * attention_number)) - 1 influence_value.append([date, item["user_index"]]) influence_detail.append([date, detail]) attention_value.append(attention) except: influence_value.append([date, 0]) influence_detail.append([date, detail]) attention_value.append(0) return_results["influence_trend"] = influence_value return_results["common_influence_detail"] = influence_detail return_results["attention_degree"] = attention_value return return_results