def get_user_geo(uid): result = [] user_geo_result = {} user_ip_dict = {} user_ip_result = dict() now_ts = time.time() now_date = ts2datetime(now_ts) #run_type if RUN_TYPE == 1: ts = datetime2ts(now_date) else: ts = datetime2ts(RUN_TEST_TIME) for i in range(1, 8): ts = ts - 3600*24 results = r_cluster.hget('new_ip_'+str(ts), uid) if results: ip_dict = json.loads(results) for ip in ip_dict: ip_count = len(ip_dict[ip].split('&')) try: user_ip_result[ip] += ip_count except: user_ip_result[ip] = ip_count user_geo_dict = ip2geo(user_ip_result) user_geo_result = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True) return user_geo_result
def get_user_geo(uid): result = [] user_geo_result = {} user_ip_dict = {} user_ip_result = dict() now_ts = time.time() now_date = ts2datetime(now_ts) #run_type if RUN_TYPE == 1: ts = datetime2ts(now_date) else: ts = datetime2ts(RUN_TEST_TIME) for i in range(1, 8): ts = ts - 3600*24 results = r_cluster2.hget('new_ip_'+str(ts), uid) if results: ip_dict = json.loads(results) for ip in ip_dict: ip_count = len(ip_dict[ip].split('&')) try: user_ip_result[ip] += ip_count except: user_ip_result[ip] = ip_count user_geo_dict = ip2geo(user_ip_result) user_geo_result = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True) return user_geo_result
def search_mention(now_ts, uid): date = ts2datetime(now_ts) ts = datetime2ts(date) #print 'at date-ts:', ts stat_results = dict() results = dict() for i in range(1,8): ts = ts - 24 * 3600 try: result_string = r_cluster.hget('at_' + str(ts), str(uid)) except: result_string = '' if not result_string: continue result_dict = json.loads(result_string) for at_uid in result_dict: try: stat_results[at_uid] += result_dict[at_uid] except: stat_results[at_uid] = result_dict[at_uid] for at_uid in stat_results: # search uid ''' uname = search_uid2uname(at_uid) if not uname: ''' uid = '' count = stat_results[at_uid] results[at_uid] = [uid, count] if results: sort_results = sorted(results.items(), key=lambda x:x[1][1], reverse=True) return [sort_results[:20], len(results)] else: return [None, 0]
def get_geo_track(uid): date_results = [] # {'2013-09-01':[(geo1, count1),(geo2, count2)], '2013-09-02'...} now_ts = time.time() now_date = ts2datetime(now_ts) #test now_date = '2013-09-08' ts = datetime2ts(now_date) city_list = [] city_set = set() for i in range(7, 0, -1): timestamp = ts - i*24*3600 #print 'timestamp:', ts2datetime(timestamp) ip_dict = dict() results = r_cluster.hget('ip_'+str(timestamp), uid) ip_dict = dict() date = ts2datetime(timestamp) date_key = '-'.join(date.split('-')[1:]) if results: ip_dict = json.loads(results) geo_dict = ip_dict2geo(ip_dict) city_list.extend(geo_dict.keys()) sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True) date_results.append([date_key, sort_geo_dict[:2]]) else: date_results.append([date_key, []]) print 'results:', date_results city_set = set(city_list) geo_conclusion = get_geo_conclusion(uid, city_set) return [date_results, geo_conclusion]
def search_location(now_ts, uid): date = ts2datetime(now_ts) #print 'date:', date ts = datetime2ts(date) #print 'date-ts:', ts stat_results = dict() results = dict() for i in range(1, 8): ts = ts - 24 * 3600 #print 'for-ts:', ts result_string = r_cluster.hget('ip_' + str(ts), str(uid)) if not result_string: continue result_dict = json.loads(result_string) for ip in result_dict: try: stat_results[ip] += result_dict[ip] except: stat_results[ip] = result_dict[ip] for ip in stat_results: city = ip2city(ip) if city: try: results[city][ip] = stat_results[ip] except: results[city] = {ip: stat_results[ip]} description = active_geo_description(results) results['description'] = description #print 'location results:', results return results
def get_user_geo(uid): result = [] user_geo_result = {} user_ip_dict = {} user_ip_result = dict() now_ts = time.time() now_date = ts2datetime(now_ts) ts = datetime2ts(now_date) #test ts = datetime2ts('2013-09-08') for i in range(1, 8): ts = ts - 3600*24 results = r_cluster.hget('ip_'+str(ts), uid) if results: ip_dict = json.loads(results) for ip in ip_dict: try: user_ip_result[ip] += ip_dict[ip] except: user_ip_result[ip] = ip_dict[ip] #print 'user_ip_result:', user_ip_result user_geo_dict = ip2geo(user_ip_result) user_geo_result = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True) return user_geo_result
def new_get_user_location(uid): results = {} now_date = ts2datetime(time.time()) now_date_ts = datetime2ts(now_date) #run type if RUN_TYPE == 0: now_date_ts = datetime2ts(RUN_TEST_TIME) - DAY now_date = ts2datetime(now_date_ts) #now ip try: ip_time_string = r_cluster.hget('new_ip_'+str(now_date_ts), uid) except Exception, e: raise e
def search_activity(now_ts, uid): date = ts2datetime(now_ts) print 'date:', date ts = datetime2ts(date) timestamp = ts print 'date-timestamp:', ts activity_result = dict() results = dict() segment_result = dict() for i in range(1, 8): ts = timestamp - 24 * 3600*i #print 'for-ts:', ts try: result_string = r_cluster.hget('activity_' + str(ts), str(uid)) except: result_string = '' #print 'activity:', result_string if not result_string: continue result_dict = json.loads(result_string) for time_segment in result_dict: try: results[int(time_segment)/16*15*60*16+ts] += result_dict[time_segment] except: results[int(time_segment)/16*15*60*16+ts] = result_dict[time_segment] try: segment_result[int(time_segment)/16*15*60*16] += result_dict[time_segment] except: segment_result[int(time_segment)/16*15*60*16] = result_dict[time_segment] trend_list = [] for i in range(1,8): ts = timestamp - i*24*3600 for j in range(0, 6): time_seg = ts + j*15*60*16 if time_seg in results: trend_list.append((time_seg, results[time_seg])) else: trend_list.append((time_seg, 0)) sort_trend_list = sorted(trend_list, key=lambda x:x[0], reverse=False) #print 'sort_trend_list:', sort_trend_list activity_result['activity_trend'] = sort_trend_list sort_segment_list = sorted(segment_result.items(), key=lambda x:x[1], reverse=True) activity_result['activity_time'] = sort_segment_list[:2] #print segment_result description = active_time_description(segment_result) activity_result['description'] = description return activity_result
def get_user_hashtag(uid): user_hashtag_result = {} now_ts = time.time() now_date = ts2datetime(now_ts) ts = datetime2ts(now_date) #test ts = datetime2ts('2013-09-08') for i in range(1, 8): ts = ts - 3600*24 results = r_cluster.hget('hashtag_'+str(ts), uid) if results: hashtag_dict = json.loads(results) for hashtag in hashtag_dict: try: user_hashtag_result[hashtag] += hashtag_dict[hashtag] except: user_hashtag_result[hashtag] = hashtag_dict[hashtag] sort_hashtag_dict = sorted(user_hashtag_result.items(), key=lambda x:x[1], reverse=True) return sort_hashtag_dict
def get_user_trend(uid): activity_result = dict() now_ts = time.time() date = ts2datetime(now_ts) #run_type if RUN_TYPE == 1: ts = datetime2ts(date) else: ts = datetime2ts(RUN_TEST_TIME) timestamp = ts results = dict() for i in range(1, 8): ts = timestamp - 24 * 3600 * i try: result_string = r_cluster.hget('activity_' + str(ts), str(uid)) except: result_string = '' if not result_string: continue result_dict = json.loads(result_string) for time_segment in result_dict: try: results[int(time_segment) / 16 * 15 * 60 * 16 + ts] += result_dict[time_segment] except: results[int(time_segment) / 16 * 15 * 60 * 16 + ts] = result_dict[time_segment] trend_list = [] for i in range(1, 8): ts = timestamp - i * 24 * 3600 for j in range(0, 6): time_seg = ts + j * 15 * 60 * 16 if time_seg in results: trend_list.append((time_seg, results[time_seg])) else: trend_list.append((time_seg, 0)) sort_trend_list = sorted(trend_list, key=lambda x: x[0], reverse=True) x_axis = [item[0] for item in sort_trend_list] y_axis = [item[1] for item in sort_trend_list] return [x_axis, y_axis]
def search_mention(uid): now_date_ts = datetime2ts(ts2datetime(time.time())) #run type if RUN_TYPE == 0: now_date_ts = datetime2ts(RUN_TEST_TIME) day_result_dict_list = [] for i in range(7,0, -1): iter_ts = now_date_ts - i * DAY try: result_string = r_cluster.hget('at_' + str(ts), str(uid)) except: result_string = '' if not result_string: continue day_result_dict = json.loads(results_string) day_result_dict_list.append(day_result_dict) if day_result_dict_list: week_result_dict = union_dict(day_result_dict_list) else: week_result_dict = {} return week_result_dict
def get_user_hashtag(uid): user_hashtag_result = {} now_ts = time.time() now_date = ts2datetime(now_ts) #run_type if RUN_TYPE == 1: ts = datetime2ts(now_date) else: ts = datetime2ts(RUN_TEST_TIME) for i in range(1, 8): ts = ts - 3600*24 results = r_cluster.hget('hashtag_'+str(ts), uid) if results: hashtag_dict = json.loads(results) for hashtag in hashtag_dict: try: user_hashtag_result[hashtag] += hashtag_dict[hashtag] except: user_hashtag_result[hashtag] = hashtag_dict[hashtag] sort_hashtag_dict = sorted(user_hashtag_result.items(), key=lambda x:x[1], reverse=True) return sort_hashtag_dict
def weibo_count(): uid_list = weibo_get_uid_list('uid.txt') today = today_time() hashtag_list = {} for uid in uid_list: hashtag = r_cluster.hget('hashtag_' + '1480176000', uid) if hashtag != None: hashtag = hashtag.encode('utf8') hashtag = json.loads(hashtag) for k, v in hashtag.iteritems(): try: hashtag_list[k] += v except: hashtag_list[k] = v #r_cluster.hget('hashtag_'+str(a)) hashtag_list = sorted(hashtag_list.items(), key=lambda x: x[1], reverse=True)[:20] return json.dumps(hashtag_list)
def get_user_trend(uid): activity_result = dict() now_ts = time.time() date = ts2datetime(now_ts) #run_type if RUN_TYPE == 1: ts = datetime2ts(date) else: ts = datetime2ts(RUN_TEST_TIME) timestamp = ts results = dict() for i in range(1, 8): ts = timestamp - 24*3600*i try: result_string = r_cluster.hget('activity_'+str(ts), str(uid)) except: result_string = '' if not result_string: continue result_dict = json.loads(result_string) for time_segment in result_dict: try: results[int(time_segment)/16*15*60*16+ts] += result_dict[time_segment] except: results[int(time_segment)/16*15*60*16+ts] = result_dict[time_segment] trend_list = [] for i in range(1, 8): ts = timestamp - i*24*3600 for j in range(0, 6): time_seg = ts + j*15*60*16 if time_seg in results: trend_list.append((time_seg, results[time_seg])) else: trend_list.append((time_seg, 0)) sort_trend_list = sorted(trend_list, key=lambda x:x[0], reverse=True) x_axis = [item[0] for item in sort_trend_list] y_axis = [item[1] for item in sort_trend_list] return [x_axis, y_axis]
def search_mention(now_ts, uid, top_count): date = ts2datetime(now_ts) #evaluate_max_dict = get_evaluate_max() ts = datetime2ts(date) stat_results = dict() results = dict() uid_dict = {} for i in range(1,8): ts = ts - DAY try: result_string = r_cluster.hget('at_' + str(ts), str(uid)) except: result_string = '' if not result_string: continue result_dict = json.loads(result_string) for at_uname in result_dict: try: stat_results[at_uname] += result_dict[at_uname] except: stat_results[at_uname] = result_dict[at_uname] sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True) # print sort_stat_results out_portrait_list = [] out_list = stat_results.keys() #use to get user information from user profile out_query_list = [{'match':{'uname':item}} for item in out_list] if len(out_query_list) != 0: query = [{'bool':{'should': out_query_list}}] try: out_profile_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'bool':{'must':query}}, 'size':100})['hits']['hits'] except: out_profile_result = [] else: out_profile_result = [] out_in_profile_list = [] bci_search_id_list = [] for out_item in out_profile_result: source = out_item['_source'] uname = source['nick_name'] uid = source['uid'] location = source['location'] friendsnum = source['friendsnum'] out_portrait_list.append([uid, uname, stat_results[uname], '', location, friendsnum, '']) out_in_profile_list.append(uname) #use to search bci history bci_search_id_list.append(uid) out_out_profile_list = list(set(out_list) - set(out_in_profile_list)) for out_out_item in out_out_profile_list: out_portrait_list.append(['', out_out_item, stat_results[out_out_item],'', '', '', '']) #add index from bci_history new_out_portrait_list = [] try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': bci_search_id_list}, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs'] except: bci_history_result = [] iter_count = 0 for out_portrait_item in out_portrait_list: append_dict = {} try: bci_history_item = bci_history_result[iter_count] except: bci_history_item = {} new_out_portrait_item = out_portrait_item append_dict['uid'] = out_portrait_item[0] append_dict['uname'] = out_portrait_item[1] append_dict['count'] = out_portrait_item[2] if bci_history_item: if bci_history_item['found'] == True: fansnum = bci_history_item['fields']['user_fansnum'][0] user_weibo_count = bci_history_item['fields']['weibo_month_sum'][0] user_friendsnum = bci_history_item['fields']['user_friendsnum'][0] else: fansnum = '' user_weibo_count = '' user_friendsnum = '' else: fansnum = '' user_weibo_count = '' user_friendsnum = '' append_dict['fansnum'] = fansnum append_dict['weibo_count'] = user_weibo_count append_dict['friendsnum'] = user_friendsnum # new_out_portrait_item[3] = fansnum # new_out_portrait_item[6] = user_weibo_count # new_out_portrait_item[-2] = user_friendsnum #new_out_portrait_list.append(new_out_portrait_item) new_out_portrait_list.append(append_dict) iter_count += 1 #print append_dict return new_out_portrait_list # uid,名字,提及次数,粉丝数,注册地,关注数,微博数
def get_user_detail(date, input_result, status, user_type="influence", auth=""): results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: senstive_words = [] results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results
sort_ip_timestamp = sorted(ip_max_timestamp_list, key=lambda x:int(x[1]), reverse=True) day_ip_list = [ip_item[0] for ip_item in sort_ip_timestamp] try: now_ip = sort_ip_timestamp[0][0] now_city = ip2city(now_ip) except: now_ip = '' now_city = '' results['now_ip'] = [now_ip, now_city] #main ip day_result = {} week_result = {} for i in range(7, 0, -1): timestamp = now_date_ts - i * DAY try: ip_time_string = r_cluster.hget('new_ip_'+str(timestamp), uid) except: ip_time_string = {} if ip_time_string: ip_time_dict = json.loads(ip_time_string) else: ip_time_dict = {} for ip in ip_time_dict: ip_time_list = ip_time_dict[ip].split('&') for ip_timestamp in ip_time_list: ip_timesegment = (int(ip_timestamp) - timestamp) / IP_TIME_SEGMENT if ip_timesegment not in day_result: day_result[ip_timesegment] = {} try: day_result[ip_timesegment][ip] += 1 except:
def get_user_detail(date, input_result, status, user_type="influence", auth=""): bci_date = ts2datetime(datetime2ts(date) - DAY) results = [] if status=='show_in': uid_list = input_result if status=='show_compute': uid_list = input_result.keys() if status=='show_in_history': uid_list = input_result.keys() if date!='all': index_name = 'bci_' + ''.join(bci_date.split('-')) else: now_ts = time.time() now_date = ts2datetime(now_ts) index_name = 'bci_' + ''.join(now_date.split('-')) index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index'] influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10) influence = influence * 100 else: influence = '' try: profile_source = profile_dict['_source'] except: profile_source = None if profile_source: uname = profile_source['nick_name'] location = profile_source['user_location'] fansnum = profile_source['fansnum'] statusnum = profile_source['statusnum'] else: uname = '' location = '' fansnum = '' statusnum = '' if status == 'show_in': if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() else: sensitive_words = [] results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence]) if auth: hashname_submit = "submit_recomment_" + date tmp_data = json.loads(r.hget(hashname_submit, uid)) recommend_list = (tmp_data['operation']).split('&') admin_list = [] admin_list.append(tmp_data['system']) admin_list.append(list(set(recommend_list))) admin_list.append(len(recommend_list)) results[-1].extend(admin_list) if status == 'show_compute': in_date = json.loads(input_result[uid])[0] compute_status = json.loads(input_result[uid])[1] if compute_status == '1': compute_status = '3' results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status]) if status == 'show_in_history': in_status = input_result[uid] if user_type == "sensitive": tmp_ts = datetime2ts(date) - DAY tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid) if tmp_data: sensitive_dict = json.loads(tmp_data) sensitive_words = sensitive_dict.keys() results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words]) else: results.append([uid, uname, location, fansnum, statusnum, influence, in_status]) return results