def save_ws_results_es(topic, ts, during, n_limit, province,city,weibos):

    #mappings_event_geo_province_weibos()
    #index_name = index_event_geo_province_weibos
    #index_type = type_event_geo_province_weibos

    #mappings_event_analysis_results(topic)
    index_name = index_event_analysis_results
    index_type = type_event_analysis_results

    item = {}

    item['en_name'] = topic
    item['end_ts'] = ts
    item['range'] = during
    item['limit'] = n_limit
    item['province'] = province
    item['city'] = city
    item['weibo'] = json.dumps(weibos)
    
    id = topic + '_' + ts

    try:
        item_exist = es_event.get(index=index_name,doc_type=index_type,id=id)['_source']
        es_event.update(index=index_name,doc_type=index_type,id=id,body={'doc':item})
    except Exception,e:
        es_event.index(index=index_name,doc_type=index_type,id=id,body=item)
def save_results_es(calc,
                    topic,
                    results,
                    during,
                    klimit=TOP_KEYWORDS_LIMIT,
                    wlimit=TOP_WEIBOS_LIMIT):

    if calc == 'time_results':

        id = topic

        #results = json.dumps(results)

        try:
            item_exist = es_event.get(index=event_analysis_name,
                                      doc_type=event_type,
                                      id=id)['_source']
            try:
                time_results = json.loads(item_exist['time_results'])
            except:
                time_results = []
            time_results.append(results)
            es_event.update(
                index=event_analysis_name,
                doc_type=event_type,
                id=id,
                body={'doc': {
                    'time_results': json.dumps(time_results)
                }})
        except Exception, e:
            es_event.index(index=event_analysis_name,
                           doc_type=event_type,
                           id=id,
                           body={'time_results': json.dumps(results)})
def get_users(topic,begin_ts,end_ts):
	uid_list = set()
	query_body = {   
	    'query':{
	        'bool':{
	            'must':[
	                {'term':{'en_name':topic}},
	                {'range':{
	                    'timestamp':{'gte': begin_ts, 'lt':end_ts} 
	                }
	            }]
	        }
	    },
	    'size':999999999
	}
	result = es_event.search(index=event_text,doc_type=event_text_type, fields=['uid'],body=query_body)['hits']['hits']
	for i in result:
		uid_list.add(i['fields']['uid'][0])
	print len(uid_list)
	if RUN_TYPE == 0:
		post = datetime2ts(RUN_TEST_TIME) #datetimestr2ts(RUN_TEST_TIME) 
		post = ts2datetimestr(post)
	else:
		post = ts2datetimestr(time.time())
		
	print  bci_day_pre+post,bci_day_type,es_user_portrait
	user_result = es_bci.mget(index=bci_day_pre+post ,doc_type=bci_day_type,body={'ids':list(uid_list)})['docs']
	
	user_influence_dict = {}
	for i in user_result:
		#print i
		if i['found']:
			i = i['_source']
			user_influence_dict[i['user']] = i['user_index']
			#print i,type(i)
			
			#print i['activeness'],i['influence'],i['fansnum']

	user = sorted(user_influence_dict.iteritems(),key=lambda x:x[1],reverse=True)[:100]
	#print user
	user_dict = {}
	for i in user:
		try:
			result = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0])
			u_type = result['_source']['verified_type']
			if u_type in auth_list:
				u_type = auth_type
			else:
				u_type = user_type
			user_dict[i[0]] = {'user_type':u_type,'influ':i[1]}
		except:
			user_dict[i[0]] = {'user_type':user_type,'influ':i[1]}


	try:
		es_event.update(index=event_analysis_name,doc_type=event_type,id=topic,body={'doc':{'user_results':json.dumps(user_dict)}})
	except Exception,e:
	    es_event.index(index=event_analysis_name,doc_type=event_type,id=topic,body={'user_results':json.dumps(user_dict)})
Example #4
0
def save_rt_results_es(calc,
                       topic,
                       results,
                       during,
                       klimit=TOP_KEYWORDS_LIMIT,
                       wlimit=TOP_WEIBOS_LIMIT):

    #mappings_event_analysis_results(topic)
    index_name = event_analysis_name  #index_event_analysis_results
    index_type = event_type  #type_event_analysis_results

    if calc == 'sentiment_results':

        id = topic

        try:
            item_exist = es_event.get(index=index_name,
                                      doc_type=index_type,
                                      id=id)['_source']
            try:
                sentiment_results = json.loads(item_exist['sentiment_results'])
            except:
                sentiment_results = []
            sentiment_results.append(results)
            es_event.update(index=index_name,
                            doc_type=index_type,
                            id=id,
                            body={
                                'doc': {
                                    'sentiment_results':
                                    json.dumps(sentiment_results)
                                }
                            })
        except Exception, e:
            es_event.index(index=index_name,
                           doc_type=index_type,
                           id=id,
                           body={'sentiment_results': json.dumps(results)})
def compute_real_info(topic,begin_ts,end_ts,relation):
	info_dict = {}
	
	query_body = {   
		'query':{
			'bool':{
				'must':[
					{'term':{'en_name':topic}},
					{'term':{'message_type':1}},
					{'wildcard':{'text':'【*】*'}},
						{'range':{
						'timestamp':{'gte': begin_ts, 'lt':end_ts} 
						}
					}]
					}
				},
			'size':1,
			'sort':{'retweeted':{'order':'desc'}}
	}
	result = es_event.search(index=topic,doc_type=event_text_type,body=query_body)['hits']['hits']
	#抽取事件的人物、机构、地点和时间
	print result[0]['_source']['text']
	basics = get_news_main(result[0]['_source']['text'])
	print basics
	info_dict['real_auth'] = basics['organization']
	info_dict['real_geo'] = basics['place']
	info_dict['real_time'] = basics['time']
	info_dict['real_person'] = basics['people']
	#存关系
	if('join' in relation.split('&')):
		rel_list = []
		if info_dict['real_auth'] !='NULL':
			resu = create_person(org_node,org_primary,info_dict['real_auth'],org_index_name)
			if resu != 'Node Wrong':
				rel_list.append([[2,topic],'join',[0,info_dict['real_auth']]])
		if info_dict['real_person'] !='NULL':
			resu = create_person(people_node,people_primary,info_dict['real_person'],node_index_name)
			if resu != 'Node Wrong':
				rel_list.append([[2,topic],'join',[1,info_dict['real_person']]])
		try:
			nodes_rels(rel_list)
		except:
			pass

	query_body = {   
		'query':{
		'bool':{
			'must':[
				{'term':{'en_name':topic}},
				{'range':{
					'timestamp':{'gte': begin_ts, 'lt':end_ts} 
				}
				}]
			}
		},
		'size':10000
	}
	result = es_event.search(index=topic,doc_type=event_text_type,fields=['text'],body=query_body)['hits']['hits']
	text_list = []
	for i in result:
		text_list.append(i['fields']['text'][0])
	# print text_list

	#事件类型
	try:
		event = es_event.get(index=event_task_name,doc_type=event_task_type,id=topic)['_source']
		info_dict['event_type'] = event['event_type']
	except:
		info_dict['event_type'] = cut_weibo(text_list)
	info_dict['topics'] = json.dumps(get_topic_word(text_list,10))
	
	keywords = get_keyword(''.join(text_list),2)
	info_dict['keywords'] = '&'.join([i[0] for i in keywords])
	info_dict['keywords_list'] = json.dumps(keywords)

	hashtag = get_hashtag(''.join(text_list))
	info_dict['hashtag_dict'] = json.dumps(hashtag)
	info_dict['hashtag'] = '&'.join(list(hashtag.keys()))
	

	try:
		es_event.update(index=event_analysis_name,doc_type=event_type,id=topic,body={'doc':info_dict})
	except Exception,e:
		es_event.index(index=event_analysis_name,doc_type=event_type,id=topic,body=info_dict)
def get_users(topic,begin_ts,end_ts,relation):
	uid_list = set()
	query_body = {   
	'query':{
		'bool':{
			'must':[
					{'term':{'en_name':topic}},
					# {'wildcard':{'text':'【*】*'}},
					{'range':{
					'timestamp':{'gte': begin_ts, 'lt':end_ts} 
					}
				}]
			}
		},
		'size':999999999
	}
	result = es_event.search(index=topic,doc_type=event_text_type, fields=['uid'],body=query_body)['hits']['hits']
	for i in result:
		uid_list.add(i['fields']['uid'][0])
	print len(uid_list)
	if RUN_TYPE == 0:
		post = datetime2ts(RUN_TEST_TIME) #datetimestr2ts(RUN_TEST_TIME) 
		post = ts2datetimestr(post)
	else:
		post = ts2datetimestr(time.time())
		
	print  bci_day_pre+post,bci_day_type,es_user_portrait
	user_result = es_bci.mget(index=bci_day_pre+post ,doc_type=bci_day_type,body={'ids':list(uid_list)})['docs']
	
	user_influence_dict = {}
	for i in user_result:
		# print i
		if i['found']:
			i = i['_source']
			user_influence_dict[i['user']] = i['user_index']
			#print i,type(i)
			
			#print i['activeness'],i['influence'],i['fansnum']

	user = sorted(user_influence_dict.iteritems(),key=lambda x:x[1],reverse=True)[:100]
	#print user
	not_in_user_list = event_user_portrait([i[0] for i in user])
	user_dict = {}
	p_list = []
	a_list = []
	for i in user:
		# if i[0] not in not_in_user_list:
		# print es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0])

		try:
			result = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=i[0])
			print result
			u_type = result['_source']['verified_type']
			print u_type
			if u_type in org_list:
				u_type = auth_type
				a_list.append(i[0])
			else:
				u_type = user_type
				p_list.append(i[0])
			user_dict[i[0]] = {'user_type':u_type,'influ':i[1]}

		except:
			user_dict[i[0]] = {'user_type':user_type,'influ':i[1]}
			p_list.append(i[0])
	print len(a_list),len(p_list)
	if('discuss' in relation.split('&')):
		rel_list = []
		for i in p_list:
			resu = create_person(people_node,people_primary,i,node_index_name)
			if resu != 'Node Wrong':
				rel_list.append([[2,topic],'discuss',[1,i]])
		for i in a_list:
			resu = create_person(org_node,org_primary,i,org_index_name)
			if resu != 'Node Wrong':
				rel_list.append([[2,topic],'discuss',[0,i]])
		try:
			nodes_rels(rel_list)
		except:
			pass


	try:
		es_event.update(index=event_analysis_name,doc_type=event_type,id=topic,body={'doc':{'user_results':json.dumps(user_dict)}})
	except Exception,e:
		es_event.index(index=event_analysis_name,doc_type=event_type,id=topic,body={'user_results':json.dumps(user_dict)})
Example #7
0
def compute_task(task):
    print task
    # task=['雾霾','type','1480003100','1480176000','1483500427743']
    task_id = task['_id']
    task = task['_source']
    topic = task['name']#task[0]#['name']
    #en_name = task['en_name']
    RUN_TYPE = 1
    if RUN_TYPE == 0:
        start_ts = 1480003200#task['start_ts']
        begin_ts = 1480003200
        end_ts = 1480176000#task['end_ts']
    else:
        start_ts = task['start_ts']
        begin_ts = task['start_ts']
        end_ts = task['end_ts']

    try:
        start_ts = task['compute_ts']
        task['compute_ts'] = time.time()
    except:
        task['compute_ts'] = time.time()

    if end_ts > time.time():
        end_ts = time.time()
    submit_ts = task['submit_ts']#int(task[4])
    #可选的计算关系realtion  用&连接的字符串
    relation = task['relation_compute']#task[5]  

    keywords = task['keywords'].split('&')    #关键词或者mid
    #compute_status = task['status']
    # mid = task['mid']
    # task_id = 'event-'+str(start_ts)+'-'+str(end_ts)+'-'+str(submit_ts)
    en_name = task_id
    t1=time.time()
    re_mid = re.compile('^\d{16}$')
    try:
        mid = re.match(re_mid,task_id).group()
    except:
        mid = ''
    exist_flag = exist(task_id)
    get_topic_weibo(topic,task_id,start_ts,end_ts,keywords,mid)
    print exist_flag
    if exist_flag:
        #start compute
        #try:

        resu = create_person(event_node,event_primary,en_name,event_index_name)
        if resu == 'Node Wrong':
            return 'Node Wrong'
        weibo_counts,uid_counts=counts(start_ts,end_ts,topic,en_name,keywords)

        es_event.update(index=event_task_name,doc_type=event_task_type,id=task_id,body={'doc':{'compute_status':-1}})

        # es_event.index(index=event_analysis_name,doc_type=event_type,id=task_id,body={'name':topic,'start_ts':start_ts,'end_ts':end_ts,'submit_ts':submit_ts,'compute_status':0,'en_name':task_id,'relation_compute':relation})
        task['compute_status']=-1
        task['weibo_counts']=weibo_counts
        task['uid_counts']=uid_counts
        try:
            flag = es_event.get(index=event_analysis_name,doc_type=event_type,id=task_id)['_source']
            w_counts = flag['weibo_counts']+weibo_counts
            u_counts = flag['uid_counts']+uid_counts
            es_event.update(index=event_analysis_name,doc_type=event_type,id=task_id,body={'doc':{'compute_status':-1,'weibo_counts':w_counts,'uid_counts':u_counts}})
        except:
            es_event.index(index=event_analysis_name,doc_type=event_type,id=task_id,body=task)
            es_event.update(index=event_analysis_name,doc_type=event_type,id=task_id,body={'doc':{'hashtag_dict':'','topics':'','geo_results':'','real_geo':'','real_auth':'','sentiment_results':'','time_results':'','hashtag':'','real_time':'','user_results':'','real_person':'','keywords_list:''}})

        print 'finish change status'
        
        if es_event.get(index=event_analysis_name,doc_type=event_type,id=task_id)['_source']['weibo_counts'] == 0:
            return 1

        #geo
        
        cityTopic(en_name, start_ts, end_ts)
        es_event.update(index=event_analysis_name,doc_type=event_type,id=task_id,body={'doc':{'compute_status':-2}})
        print 'finish geo analyze'
        #language
        compute_real_info(en_name, begin_ts, end_ts,relation,task['submit_user'],task['submit_ts'])
        es_event.update(index=event_analysis_name,doc_type=event_type,id=task_id,body={'doc':{'compute_status':-3}})
        print 'finish language analyze'
        #time
        propagateCronTopic(en_name, start_ts, end_ts)
        es_event.update(index=event_analysis_name,doc_type=event_type,id=task_id,body={'doc':{'compute_status':-4}})
        print 'finish time analyze'

        
        #sentiment
        sentimentTopic(en_name, start_ts=start_ts, over_ts=end_ts)
        print 'finish sentiment analyze'
        #finish compute

        print es_event.update(index=event_analysis_name,doc_type=event_type,id=task_id,body={'doc':{'compute_status':1,'finish_ts':int(time.time())}})
        print 'finish change status done'
        print time.time()
        
        if('contain' in relation.split('&')):
            #计算关系
            related_event_ids = event_input(keywords,en_name)
            rel_list = []
            for i in related_event_ids:
                create_person(event_node,event_primary,i,event_index_name)
                rel_list.append([[2,en_name],'contain',[2,i]])
            nodes_rels(rel_list)

        es_event.update(index=event_task_name,doc_type=event_task_type,id=task_id,body={'doc':{'compute_status':1}})


    
    t2=time.time()-t1
    print task_id,t2
            # except:
            #   raise
            #   break
            #get_attr(en_name, start_ts, end_ts)
        # else:
        #     pass
    return 1
Example #8
0
def compute_real_info(topic, begin_ts, end_ts, relation, submit_user,
                      submit_ts):
    info_dict = {}
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'en_name': topic
                    }
                }, {
                    'range': {
                        'timestamp': {
                            'gte': begin_ts,
                            'lt': end_ts
                        }
                    }
                }]
            }
        },
        'size': 10000
    }
    result = es_event.search(index=topic,
                             doc_type=event_text_type,
                             fields=['text'],
                             body=query_body)['hits']['hits']
    text_list = []
    for i in result:
        text_list.append(i['fields']['text'][0])

    #事件类型
    try:
        event = es_event.get(index=event_task_name,
                             doc_type=event_task_type,
                             id=topic)['_source']
        info_dict['event_type'] = event['event_type']
    except:
        info_dict['event_type'] = cut_weibo(text_list)

    try:
        event = es_event.get(index=event_task_name,
                             doc_type=event_task_type,
                             id=topic)['_source']
        info_dict['real_auth'] = event['real_auth']
        info_dict['real_geo'] = event['real_geo']
        info_dict['real_time'] = event['real_time']
        info_dict['real_person'] = event['real_person']
    except:
        info_dict = get_real(info_dict, topic, begin_ts, end_ts, relation)

    info_dict['topics'] = json.dumps(get_topic_word(text_list, 10))

    keywords = get_keyword(''.join(text_list), 2)
    info_dict['keywords'] = '&'.join([i[0] for i in keywords])
    info_dict['keywords_list'] = json.dumps(keywords)

    hashtag = get_hashtag(''.join(text_list))
    info_dict['hashtag_dict'] = json.dumps(hashtag)
    info_dict['hashtag'] = '&'.join(list(hashtag.keys()))

    try:
        es_event.update(index=event_analysis_name,
                        doc_type=event_type,
                        id=topic,
                        body={'doc': info_dict})
    except Exception, e:
        es_event.index(index=event_analysis_name,
                       doc_type=event_type,
                       id=topic,
                       body=info_dict)