def get_influence(uid): result = 0 now_ts = time.time() now_date = ts2datetime(now_ts - 3600*24) # test now_date = '2013-09-07' index_time = ''.join(now_date.split('-')) index_type = 'bci' try: result = es.get(index=index_time, id=uid, doc_type=index_type)['_source']['user_index'] #print 'result_dict:', result ''' query_body = { 'query':{ 'filtered':{ 'query':{ 'match_all':{} }, 'filter':{ 'range':{ 'user_index':{ 'gte':result } } } } } } rank = es.count(index=index_time, doc_type=index_type, body=query_body)['count'] #print 'rank:', rank ''' except: return 0 return result
def get_influence(uid): result = 0 now_ts = time.time() now_date = ts2datetime(now_ts - 3600*24) # test #now_date = '2013-09-07' index_time = ''.join(now_date.split('-')) index_type = 'bci' try: result = es.get(index=index_time, id=uid, doc_type=index_type)['_source']['user_index'] #print 'result_dict:', result ''' query_body = { 'query':{ 'filtered':{ 'query':{ 'match_all':{} }, 'filter':{ 'range':{ 'user_index':{ 'gte':result } } } } } } rank = es.count(index=index_time, doc_type=index_type, body=query_body)['count'] #print 'rank:', rank ''' except: return 0 return result
def get_importance(uid, domain, topic): result = 0 domain_result = 0 domain_list = domain.split(' ') #print 'domain_list:', domain_list for domain in domain_list: try: domain_result += domain_weight_dict[domain] except: pass topic_result = 0 topic_list = topic.split(' ') #print 'topic_list:', topic_list for topic in topic_list: try: topic_result += topic_weight_dict[topic] except: pass #get fansnum, origin_weibo_retweeted_total_number, retweeted_weibo_retweeted_total_number now_ts = time.time() date = ts2datetime(now_ts-3600*24) #test date = '2013-09-07' index_time = ''.join(date.split('-')) index_type = 'bci' try: es_result = es.get(index=index_time, doc_type=index_type, id=uid)['_source'] fansnum = es_result['user_fansnum'] retweetednum = es_result['origin_weibo_retweeted_total_number'] + es_result['retweeted_weibo_retweeted_total_number'] result = importance_weight_dict['fansnum']*fansnum + importance_weight_dict['retweeted_num']*retweetednum + \ importance_weight_dict['domain']*domain_result + importance_weight_dict['topic']*topic_result #print 'importance result:', result return result except: return 0
result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid if __name__ == '__main__': ''' f = open('sensitive_uid_list.txt', 'wb') uid_list = search_sensitive_weibo('20130904') for uid in uid_list: f.write(str(uid) + '\n') f.close() ''' f = open('sensitive_uid_list.txt', 'rb') for line in f: uid = line.strip() try: result = es_cluster.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source'] except: print uid continue if result['sensitive_words_string']: es.update(index='sensitive_user_portrait', doc_type='user', id=uid, body={"doc":{"type":1}}) else: es.update(index='sensitive_user_portrait', doc_type='user', id=uid, body={"doc":{"type":0}})