def get_user_info(uid): user_info = {} result = user_search.search_by_id(int(uid), fields=user_fields_list) if result: user_info['name'] = result['name'] user_info['location'] = result['location'] #user_info['gender'] = result['gender'] user_info['friends_count'] = result['friends_count'] user_info['followers_count'] = result['followers_count'] user_info['profile_image_url'] = result['profile_image_url'] user_info['friends_count'] = result['friends_count'] user_info['followers_count'] = result['followers_count'] user_info['created_at'] = result['created_at'] user_info['statuses_count'] = result['statuses_count'] else: user_info['name'] = u'未知' user_info['location'] = u'未知' user_info['friends_count'] = u'未知' user_info['followers_count'] = u'未知' user_info['profile_image_url'] = 'no' user_info['friends_count'] = u'未知' user_info['followers_count'] = u'未知' user_info['created_at'] = u'未知' user_info['statuses_count'] = u'未知' return user_info
def acquire_user_by_id(uid): user_result = user_search.search_by_id(uid, fields=['name', 'profile_image_url']) user = {} if user_result: user['name'] = user_result['name'] user['image'] = user_result['profile_image_url'] return user
def get_province(uid): results = user_search.search_by_id(uid, fields=['province']) if results: province = results['province'] else: province = None return province
def getUsernameByUid(uid): if not uid: return None user = xapian_search_user.search_by_id(int(uid), fields=['name']) if user: name = user['name'] return name return None
def acquire_user_by_id_v2(uid): result = user_search.search_by_id( int(uid), fields=['name', 'location', 'followers_count', 'friends_count']) user = {} if result: user['name'] = result['name'] user['location'] = result['location'] user['count1'] = result['followers_count'] user['count2'] = result['friends_count'] return user
def read_uid_weibos(topic, date, windowsize, uid): # change end_ts = datetime2ts(date) start_ts = end_ts - Day * windowsize xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = {'user': uid} count, results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list) if count == 0: weibo_list = [] else: weibo_list = [] for weibo in results(): wid = weibo['_id'] uid = weibo['user'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] geo = weibo['geo'] source = weibo['source'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] comments_count = weibo['comments_count'] weibo_link = weiboinfo2url(uid, wid) domain = uid2domain(uid) row = [ wid, uid, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, date, text, geo, source, reposts_count, comments_count, weibo_link ] weibo_list.append(row) sort_weibo_list = sorted(weibo_list, key=lambda x: x[9]) return sort_weibo_list
def get_impor_media(): reader = csv.reader(file('media.csv', 'rb')) media_dict = {} # media_dict = {uid1:followers_count1} for line in reader: uid = line[0] results = user_search.search_by_id(int(uid), fields=['followers_count']) if results: followers_count = results['followers_count'] media_dict[uid] = followers_count else: continue sort_media = sorted(media_dict.iteritems(), key=lambda a:a[1], reverse=False) topmedia = sort_media[len(sort_media)-500:] # topmedia = [(uid1, followers1),(uid2, followers2)] write_impor_media(topmedia)
def get_impor_media(): reader = csv.reader(file('media.csv', 'rb')) media_dict = {} # media_dict = {uid1:followers_count1} for line in reader: uid = line[0] results = user_search.search_by_id(int(uid), fields=['followers_count']) if results: followers_count = results['followers_count'] media_dict[uid] = followers_count else: continue sort_media = sorted(media_dict.iteritems(), key=lambda a: a[1], reverse=False) topmedia = sort_media[ len(sort_media) - 500:] # topmedia = [(uid1, followers1),(uid2, followers2)] write_impor_media(topmedia)
def get_u_info(uid_list): user_info_list = [] row = [] for uid in uid_list: user = user_search.search_by_id( uid, fields=['_id', 'name', 'profile_image_url', 'friends_count']) if user: name = user['name'] profile_image_user = user['profile_image_url'] friends_count = user['friends_count'] else: name = u'未知' profile_image_user = u'no' friends_count = -1 row.append([uid, name, profile_image_user, friends_count]) sort_row = sorted(row, key=lambda x: x[3], reverse=True) user_info_list = [user[:3] for user in sort_row] return user_info_list
def community_result(community_user_list, topic, date, windowsize): #change end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = {'$or': []} for uid in community_user_list: query_dict['$or'].append({'user': int(uid)}) community_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list) if count == 0: return None, None, None sentiment_count = {} for weibo in weibo_results(): uid = weibo['user'] _id = weibo['_id'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] source = weibo['source'] geo = weibo['geo'] comments_count = weibo['comments_count'] sentiment = weibo['sentiment'] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) domain = uid2domain(uid) try: sentiment_count[sentiment] += 1 except KeyError: sentiment_count[sentiment] = 1 community_info.append([ _id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name, weibo_link, domain ]) sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True) #以转发量排序 mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True) top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50) sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True) new_sentiment_list = [] for sentiment in sentiment_count: sentiment_ch = emotions_kv[int(sentiment)] num = sentiment_count[sentiment] ratio = float(num) / float(count) new_sentiment_list.append([sentiment_ch, num, ratio]) return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict
def c_weibo_by_ts(topic, date, windowsize, uid, network_type, cid, rank_method): real_topic_id = acquire_real_topic_id(topic, date, windowsize) if not real_topic_id: return None, None, None # 该话题存在进行下面的计算 key_pre = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) # 选择有向图进行社区信息的计算 if network_type=='source_graph': key = str(GRAPH_PATH)+key_pre + '_gg_graph.gexf' elif network_type=='direct_superior_graph': key = str(GRAPH_PATH)+key_pre + '_ds_udg_graph.gexf' g = nx.read_gexf(key) # 获取图结构中节点uid对应的社区包括的节点list community_user_list = get_community_user(g, uid, cid) # 考虑节点社区属性存放的位置 # change end_ts = datetime2ts(date) start_ts = end_ts - Day * windowsize xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = { '$or' : [] } for uid in community_user_list: query_dict['$or'].append({'user': int(uid)}) community_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list) if count==0: return None for weibo in weibo_results(): uid = weibo['user'] _id = weibo['_id'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] source = weibo['source'] geo = weibo['geo'] comments_count = weibo['comments_count'] sentiment = weibo['sentiment'] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) community_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid, timestamp]) if rank_method == 'reposts_count': sort_community_info = sorted(community_info, key=lambda x:x[10], reverse=True) #以转发量排序 else: sort_community_info = sorted(community_info, key=lambda x:x[17]) # 以时间戳排序 return sort_community_info