def get_tweets_from_user_portrait(monitor_keywords_list, sort_item_new): query_body = { 'query': { 'match_all': {} }, 'sort': { sort_item_new: { 'order': 'desc' } }, 'size': USER_POETRAIT_NUMBER } #print 'query_body:::',query_body es_results_portrait = es_tw_user_portrait.search( index=tw_portrait_index_name, doc_type=tw_portrait_index_type, body=query_body)['hits']['hits'] uid_set = set() if es_results_portrait: for result in es_results_portrait: uid = result['_id'] # result = result['_source'] # uid = result['uid'] uid_set.add(uid) uid_list = list(uid_set) es_results = uid_lists2tw_from_flow_text(monitor_keywords_list, uid_list) return es_results
def update_influence(uid_list=[]): if not uid_list: uid_list = load_uid_list() tw_bci_index_list = get_tw_bci_index_list(load_timestamp()) tw_influence_query_body = { 'query':{ "filtered":{ "filter": { "bool": { "must": [ {"terms": {"uid": uid_list}}, ] } } } }, 'size': MAX_SEARCH_SIZE, "sort": {"timestamp": {"order": "desc"}}, "fields": ["influence", "uid"] } user_influence = {} for index_name in tw_bci_index_list: try: search_results = es.search(index=index_name, doc_type=tw_bci_index_type, body=tw_influence_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in user_influence: user_influence[uid] = { 'influence_list': [] } if content.has_key('influence'): user_influence[uid]['influence_list'].append(float(content.get('influence')[0])) except Exception,e: print e
def update_keywords(uid_list=[]): if not uid_list: uid_list = load_uid_list() tw_flow_text_index_list = get_twitter_flow_text_index_list(load_timestamp()) keywords_query_body = { 'query':{ "filtered":{ "filter": { "bool": { "must": [ {"terms": {"uid": uid_list}}, ] } } } }, 'size': MAX_SEARCH_SIZE, "sort": {"timestamp": {"order": "desc"}}, "fields": ["keywords_dict", "uid"] } user_keywords = {} for index_name in tw_flow_text_index_list: try: search_results = es.search(index=index_name, doc_type=twitter_flow_text_index_type, body=keywords_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in user_keywords: user_keywords[uid] = { 'keywords': {} } if content.has_key('keywords_dict'): user_keywords[uid]['keywords'] = merge_dict(user_keywords[uid]['keywords'], json.loads(content['keywords_dict'][0])) except Exception,e: print e
def load_uid_list(): uid_list = [] uid_list_query_body = {'size': MAX_SEARCH_SIZE} try: search_results = es.search(index=twitter_user_index_name, doc_type=twitter_user_index_type, body=uid_list_query_body)['hits']['hits'] for item in search_results: uid_list.append(item['_source']['uid']) except Exception,e: print e
def update_domain(uid_list=[]): if not uid_list: uid_list = load_uid_list() tw_flow_text_index_list = get_twitter_flow_text_index_list(load_timestamp()) user_domain_data = {} #load num of text count_result = count_text_num(uid_list, tw_flow_text_index_list) #load baseinfo tw_user_query_body = { 'query':{ "filtered":{ "filter": { "bool": { "must": [ {"terms": {"uid": uid_list}}, ] } } } }, 'size': MAX_SEARCH_SIZE, "fields": ["location", "username", "description", "uid"] } try: search_results = es.search(index=twitter_user_index_name, doc_type=twitter_user_index_type, body=tw_user_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in user_domain_data: text_num = count_result[uid] user_domain_data[uid] = { 'location': '', 'username': '', 'description': '', 'number_of_text': text_num } if content.has_key('location'): location = content.get('location')[0] else: location = '' if content.has_key('description'): description = content.get('description')[0][:1000] else: description = '' if content.has_key('username'): username = content.get('username')[0] else: username = '' user_domain_data[uid]['location'] = location user_domain_data[uid]['username'] = username user_domain_data[uid]['description'] = description except Exception,e: print e
def update_sentiment(uid_list=[]): ''' SENTIMENT_DICT_NEW = {'0':u'中性', '1':u'积极', '2':u'生气', '3':'焦虑', \ '4':u'悲伤', '5':u'厌恶', '6':u'消极其他', '7':u'消极'} ''' if not uid_list: uid_list = load_uid_list() tw_flow_text_index_list = get_twitter_flow_text_index_list(load_timestamp()) sentiment_query_body = { 'query':{ "filtered":{ "filter": { "bool": { "must": [ {"terms": {"uid": uid_list}}, ] } } } }, 'size': MAX_SEARCH_SIZE, "sort": {"timestamp": {"order": "desc"}}, "fields": ["sentiment", "uid"] } user_sentiment = {} for index_name in tw_flow_text_index_list: try: search_results = es.search(index=index_name, doc_type=twitter_flow_text_index_type, body=sentiment_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in user_sentiment: user_sentiment[uid] = { 'sentiment_list': [] } if content.has_key('sentiment'): user_sentiment[uid]['sentiment_list'].append(int(content.get('sentiment')[0])) except Exception,e: print e
def update_hashtag(uid_list=[]): if not uid_list: uid_list = load_uid_list() tw_flow_text_index_list = get_twitter_flow_text_index_list(load_timestamp()) keywords_query_body = { 'query':{ "filtered":{ "filter": { "bool": { "must": [ {"terms": {"uid": uid_list}}, ] } } } }, 'size': MAX_SEARCH_SIZE, "sort": {"timestamp": {"order": "desc"}}, "fields": ["hashtag", "uid"] } user_hashtag = {} for index_name in tw_flow_text_index_list: try: search_results = es.search(index=index_name, doc_type=twitter_flow_text_index_type, body=keywords_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in user_hashtag: user_hashtag[uid] = { 'hashtag_list': [] } if content.has_key('hashtag'): hashtag = content['hashtag'][0] if hashtag: hashtag_list = hashtag.split('&') user_hashtag[uid]['hashtag_list'].extend(hashtag_list) except Exception,e: print e
def update_baseinfo(uid_list=[]): user_baseinfo = {} fb_user_query_body = { 'query':{ "filtered":{ "filter": { "bool": { "must": [ {"terms": {"uid": uid_list}}, ] } } } }, 'size': MAX_SEARCH_SIZE, "fields": ["location", "userscreenname", "original_profile_image_url", "followers_count", "status_count", "followers_count", "friends_count", "is_verified", "username", "uid"] } search_results = es.search(index=twitter_user_index_name, doc_type=twitter_user_index_type, body=fb_user_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in user_baseinfo: user_baseinfo[uid] = { 'uid': str(uid), 'uname': '', 'location': '', 'verified':'', 'statusnum': 0, 'friendsnum': 0, 'fansnum': 0, 'photo_url': '', 'screenname': '' } location = '' if content.has_key('location'): location = content.get('location')[0] uname = '' if content.has_key('username'): uname = content.get('username')[0] photo_url = '' if content.has_key('original_profile_image_url'): photo_url = content.get('original_profile_image_url')[0] verified = '' if content.has_key('is_verified'): verified = str(content.get('is_verified')[0]) statusnum = '' if content.has_key('status_count'): statusnum = content.get('status_count')[0] friendsnum = '' if content.has_key('friends_count'): friendsnum = content.get('friends_count')[0] fansnum = '' if content.has_key('followers_count'): fansnum = content.get('followers_count')[0] screenname = '' if content.has_key('userscreenname'): screenname = content.get('userscreenname')[0] user_baseinfo[uid]['location'] = location user_baseinfo[uid]['uname'] = uname user_baseinfo[uid]['photo_url'] = photo_url user_baseinfo[uid]['verified'] = verified user_baseinfo[uid]['statusnum'] = statusnum user_baseinfo[uid]['friendsnum'] = friendsnum user_baseinfo[uid]['fansnum'] = fansnum user_baseinfo[uid]['screenname'] = screenname for uid in uid_list: if not uid in user_baseinfo: user_baseinfo[uid] = { 'uid': str(uid), 'uname': '', 'location': '', 'verified':'', 'statusnum': 0, 'friendsnum': 0, 'fansnum': 0, 'photo_url': '', 'screenname': '' } return save_data2es(user_baseinfo)