def search_mention(uid, sensitive): date = ts2datetime(time.time()).replace('-','') stat_results = dict() results = dict() test_ts = time.time() test_ts = datetime2ts('2013-09-07') for i in range(0,7): ts = test_ts -i*24*3600 date = ts2datetime(ts).replace('-', '') if not sensitive: at_temp = r_cluster.hget('at_' + str(date), str(uid)) else: at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid)) if not at_temp: continue else: result_dict = json.loads(at_temp) for at_uid in result_dict: if stat_results.has_key(at_uid): stat_results[uid] += result_dict[at_uid] else: stat_results[uid] = result_dict[at_uid] if not stat_results: return [None, 0] in_status = identify_uid_list_in(result_dict.keys()) for at_uid in result_dict: if at_uid in in_status: results[at_uid] = [result_dict[at_uid], '1'] else: results[at_uid] = [result_dict[at_uid], '0'] sorted_results = sorted(results.items(), key=lambda x:x[1][0], reverse=True) return [sorted_results[0:20], len(results)]
def get_sensitive_user_detail(uid_list, date, sensitive): es_cluster = es_user_profile ts = datetime2ts(date) results = [] index_name = pre_influence_index + str(date).replace( '-', '') # index_name:20130901 user_bci_results = es_bci.mget(index=index_name, doc_type='bci', body={'ids': uid_list}, _source=False, fields=['user_index'])['docs'] user_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list}, _source=True)['docs'] top_influnce_value = get_top_value("user_index", es_bci, index_name, "bci") for i in range(0, len(uid_list)): personal_info = [''] * 6 uid = uid_list[i] personal_info[0] = uid_list[i] personal_info[1] = uid_list[i] if user_profile_results[i]['found']: profile_dict = user_profile_results[i]['_source'] uname = profile_dict['nick_name'] if uname: personal_info[1] = uname personal_info[2] = profile_dict['user_location'] personal_info[3] = profile_dict['fansnum'] personal_info[4] = profile_dict['statusnum'] if user_bci_results[i]['found']: try: tmp_bci = user_bci_results[i]['fields']['user_index'][0] influence = math.log( tmp_bci / float(top_influnce_value) * 9 + 1, 10) * 100 personal_info[5] = influence except: personal_info[5] = 0 else: personal_info[5] = 0 if sensitive: sensitive_words = redis_cluster.hget('sensitive_' + str(ts), str(uid)) if sensitive_words: sensitive_dict = json.loads(sensitive_words) personal_info.append(sensitive_dict.keys()) else: personal_info.append([]) else: personal_info.append([]) results.append(personal_info) return results
def search_mention(uid, sensitive): date = ts2datetime(time.time()).replace('-', '') stat_results = dict() results = dict() test_ts = time.time() test_ts = datetime2ts('2013-09-07') for i in range(0, 7): ts = test_ts - i * 24 * 3600 date = ts2datetime(ts).replace('-', '') if not sensitive: at_temp = r_cluster.hget('at_' + str(date), str(uid)) else: at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid)) if not at_temp: continue else: result_dict = json.loads(at_temp) for at_uid in result_dict: if stat_results.has_key(at_uid): stat_results[uid] += result_dict[at_uid] else: stat_results[uid] = result_dict[at_uid] if not stat_results: return [None, 0] in_status = identify_uid_list_in(result_dict.keys()) for at_uid in result_dict: if at_uid in in_status: results[at_uid] = [result_dict[at_uid], '1'] else: results[at_uid] = [result_dict[at_uid], '0'] sorted_results = sorted(results.items(), key=lambda x: x[1][0], reverse=True) return [sorted_results[0:20], len(results)]
def get_sensitive_user_detail(uid_list, date, sensitive): es_cluster = es_user_profile ts = datetime2ts(date) results = [] index_name = pre_influence_index + str(date).replace('-','') # index_name:20130901 user_bci_results = es_bci.mget(index=index_name, doc_type='bci', body={'ids':uid_list}, _source=False, fields=['user_index'])['docs'] user_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids":uid_list}, _source=True)['docs'] top_influnce_value = get_top_value("user_index", es_bci, index_name, "bci") for i in range(0, len(uid_list)): personal_info = ['']*6 uid = uid_list[i] personal_info[0] = uid_list[i] personal_info[1] = uid_list[i] if user_profile_results[i]['found']: profile_dict = user_profile_results[i]['_source'] uname = profile_dict['nick_name'] if uname: personal_info[1] = uname personal_info[2] = profile_dict['user_location'] personal_info[3] = profile_dict['fansnum'] personal_info[4] = profile_dict['statusnum'] if user_bci_results[i]['found']: try: tmp_bci = user_bci_results[i]['fields']['user_index'][0] influence = math.log(tmp_bci/float(top_influnce_value)*9+1, 10)*100 personal_info[5] = influence except: personal_info[5] = 0 else: personal_info[5] = 0 if sensitive: sensitive_words = redis_cluster.hget('sensitive_' + str(ts), str(uid)) if sensitive_words: sensitive_dict = json.loads(sensitive_words) personal_info.append(sensitive_dict.keys()) else: personal_info.append([]) else: personal_info.append([]) results.append(personal_info) return results