def get_topic_weibo(topic,en_name,start_ts,end_ts,keywords,mid): query_body = {'query':{'match_all':{}},'sort':'timestamp','size':1} try: task_exist = es_event.search(index=en_name,doc_type=event_type,body=query_body)['hits']['hits'] except: get_mappings(en_name) find_flow_texts_scan(start_ts,end_ts,topic,en_name,keywords,mid)
def get_topic_weibo(topic, en_name, start_ts, end_ts): query_body = {'query': {'match_all': {}}, 'sort': 'timestamp', 'size': 1} try: task_exist = weibo_es.search(index=en_name, doc_type=topic_index_type, body=query_body)['hits']['hits'] except: get_mappings(en_name) find_flow_texts(start_ts, end_ts, topic, en_name)
def get_topic_tweets(task_id, task_source, event_keywords, create_time): task_exist = es_intel.indices.exists(index=task_id) #print 'task_exist..',task_exist if not task_exist: if task_source == 'weibo': get_mappings(task_id, index_type='weibo') elif task_source == 'facebook': print facebook_flow_text_mappings(task_id, index_type='facebook') else: twitter_flow_text_mappings(task_id, index_type='twitter') find_flow_texts(task_source, task_id, event_keywords)
else: r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_words_dict)) #identify whether to mapping new es weibo_timestamp = item['timestamp'] should_index_name_date = ts2datetime(weibo_timestamp) if should_index_name_date != now_index_name_date: if action != [] and xdata != []: index_name = index_name_pre + now_index_name_date if bulk_action: es.bulk(bulk_action, index=index_name, doc_type=index_type, timeout=60) bulk_action = [] count = 0 now_index_name_date = should_index_name_date index_name = index_name_pre + now_index_name_date get_mappings(index_name) # save action, xdata = expand_index_action(item) bulk_action.extend([action, xdata]) count += 1 if count % 1000 == 0 and count != 0: index_name = index_name_pre + now_index_name_date if bulk_action: es.bulk(bulk_action, index=index_name, doc_type=index_type, timeout=60) bulk_action = [] count = 0 class_te = time.time() class_ts = class_te
#identify whether to mapping new es weibo_timestamp = item['timestamp'] should_index_name_date = ts2datetime(weibo_timestamp) if should_index_name_date != now_index_name_date: if action != [] and xdata != []: index_name = index_name_pre + now_index_name_date if bulk_action: es.bulk(bulk_action, index=index_name, doc_type=index_type, timeout=60) bulk_action = [] count = 0 now_index_name_date = should_index_name_date index_name = index_name_pre + now_index_name_date get_mappings(index_name) # save action, xdata = expand_index_action(item) bulk_action.extend([action, xdata]) count += 1 if count % 1000 == 0 and count != 0: index_name = index_name_pre + now_index_name_date if bulk_action: es.bulk(bulk_action, index=index_name, doc_type=index_type, timeout=60) bulk_action = [] count = 0