Ejemplo n.º 1
0
def get_users(topic,begin_ts,end_ts):
	uid_list = set()
	query_body = {   
	    'query':{
	        'bool':{
	            'must':[
	                {'term':{'en_name':topic}},
	                {'range':{
	                    'timestamp':{'gte': begin_ts, 'lt':end_ts} 
	                }
	            }]
	        }
	    },
	    'size':999999999
	}
	result = es_event.search(index=event_text,doc_type=event_text_type, fields=['uid'],body=query_body)['hits']['hits']
	for i in result:
		uid_list.add(i['fields']['uid'][0])
	print len(uid_list)
	if RUN_TYPE == 0:
		post = datetime2ts(RUN_TEST_TIME) #datetimestr2ts(RUN_TEST_TIME) 
		post = ts2datetimestr(post)
	else:
		post = ts2datetimestr(time.time())
		
	print  bci_day_pre+post,bci_day_type,es_user_portrait
	user_result = es_bci.mget(index=bci_day_pre+post ,doc_type=bci_day_type,body={'ids':list(uid_list)})['docs']
	
	user_influence_dict = {}
	for i in user_result:
		#print i
		if i['found']:
			i = i['_source']
			user_influence_dict[i['user']] = i['user_index']
			#print i,type(i)
			
			#print i['activeness'],i['influence'],i['fansnum']

	user = sorted(user_influence_dict.iteritems(),key=lambda x:x[1],reverse=True)[:100]
	#print user
	user_dict = {}
	for i in user:
		try:
			result = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0])
			u_type = result['_source']['verified_type']
			if u_type in auth_list:
				u_type = auth_type
			else:
				u_type = user_type
			user_dict[i[0]] = {'user_type':u_type,'influ':i[1]}
		except:
			user_dict[i[0]] = {'user_type':user_type,'influ':i[1]}


	try:
		es_event.update(index=event_analysis_name,doc_type=event_type,id=topic,body={'doc':{'user_results':json.dumps(user_dict)}})
	except Exception,e:
	    es_event.index(index=event_analysis_name,doc_type=event_type,id=topic,body={'user_results':json.dumps(user_dict)})
def get_users(topic,begin_ts,end_ts,relation):
	uid_list = set()
	query_body = {   
	'query':{
		'bool':{
			'must':[
					{'term':{'en_name':topic}},
					# {'wildcard':{'text':'【*】*'}},
					{'range':{
					'timestamp':{'gte': begin_ts, 'lt':end_ts} 
					}
				}]
			}
		},
		'size':999999999
	}
	result = es_event.search(index=topic,doc_type=event_text_type, fields=['uid'],body=query_body)['hits']['hits']
	for i in result:
		uid_list.add(i['fields']['uid'][0])
	print len(uid_list)
	if RUN_TYPE == 0:
		post = datetime2ts(RUN_TEST_TIME) #datetimestr2ts(RUN_TEST_TIME) 
		post = ts2datetimestr(post)
	else:
		post = ts2datetimestr(time.time())
		
	print  bci_day_pre+post,bci_day_type,es_user_portrait
	user_result = es_bci.mget(index=bci_day_pre+post ,doc_type=bci_day_type,body={'ids':list(uid_list)})['docs']
	
	user_influence_dict = {}
	for i in user_result:
		# print i
		if i['found']:
			i = i['_source']
			user_influence_dict[i['user']] = i['user_index']
			#print i,type(i)
			
			#print i['activeness'],i['influence'],i['fansnum']

	user = sorted(user_influence_dict.iteritems(),key=lambda x:x[1],reverse=True)[:100]
	#print user
	not_in_user_list = event_user_portrait([i[0] for i in user])
	user_dict = {}
	p_list = []
	a_list = []
	for i in user:
		# if i[0] not in not_in_user_list:
		# print es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0])

		try:
			result = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0])
			print result
			u_type = result['_source']['verified_type']
			print u_type
			if u_type in org_list:
				u_type = auth_type
				a_list.append(i[0])
			else:
				u_type = user_type
				p_list.append(i[0])
			user_dict[i[0]] = {'user_type':u_type,'influ':i[1]}

		except:
			user_dict[i[0]] = {'user_type':user_type,'influ':i[1]}
			p_list.append(i[0])
	print len(a_list),len(p_list)
	if('discuss' in relation.split('&')):
		rel_list = []
		for i in p_list:
			resu = create_person(people_node,people_primary,i,node_index_name)
			if resu != 'Node Wrong':
				rel_list.append([[2,topic],'discuss',[1,i]])
		for i in a_list:
			resu = create_person(org_node,org_primary,i,org_index_name)
			if resu != 'Node Wrong':
				rel_list.append([[2,topic],'discuss',[0,i]])
		try:
			nodes_rels(rel_list)
		except:
			pass


	try:
		es_event.update(index=event_analysis_name,doc_type=event_type,id=topic,body={'doc':{'user_results':json.dumps(user_dict)}})
	except Exception,e:
		es_event.index(index=event_analysis_name,doc_type=event_type,id=topic,body={'user_results':json.dumps(user_dict)})
Ejemplo n.º 3
0
    if RUN_TYPE == 1:
        now_date = ts2datetime(ts - 24 * 3600)
    else:
        now_date = RUN_TEST_TIME
    bci_index_name = bci_day_pre + ''.join(now_date.split('-'))
    #print 'bci_index_name:', bci_index_name
    try:
        fansnum_max_results = es_bci.search(index=bci_index_name,
                                            doc_type=bci_day_type,
                                            body=query_body)['hits']['hits']
    except Exception, e:
        raise e
    fansnum_max = int(fansnum_max_results[0]['_source']['user_fansnum'])
    #user_fansnum_dict
    search_result = es_bci.mget(index=bci_index_name,
                                doc_type=bci_day_type,
                                body={'ids': uid_list},
                                _source=True)['docs']
    user_fansnum_dict = dict()
    for item in search_result:
        uid = item['_id']
        try:
            user_fansnum_dict[uid] = item['_source']['user_fansnum']
        except:
            user_fansnum_dict[uid] = 0
    return fansnum_max, user_fansnum_dict


def topic_en2ch(topic_label):
    insert_topic_label_list = []
    for en_label in topic_label:
        ch_label = topic_en2ch_dict[en_label]