def create_task_list(given_ts): # 1. search from manage_sensing_task # 2. push to redis list-----task_work # print start info current_path = os.getcwd() file_path = os.path.join(current_path, 'task_list.py') now_ts = datehour2ts(ts2datehour(time.time() - 3600)) print_log = "&".join([file_path, "start", ts2date(now_ts)]) print print_log #ts = ts - 3600 query_body = {"query": {"match_all": {}}} search_results = es.search(index=index_sensing, doc_type=type_sensing, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: _id = iter_item['_id'] item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name task.append(json.loads(item['social_sensors'])) # social sensors #task.append(now_ts) task.append(given_ts) r.lpush('task_name', json.dumps(task)) count += 1 print count print_log = "&".join([file_path, "end", ts2date(time.time())]) print print_log
def create_task_list(): # 1. search from manage_sensing_task # 2. push to redis list-----task_work # print start info current_path = os.getcwd() file_path = os.path.join(current_path, 'task_list.py') if RUN_TYPE == 0: now_ts = 1463241600 # 1378008000 else: i = int(sys.argv[1]) now_ts = 1463241600 + 3600 * i #now_ts = date_hour2ts(ts2date_hour(time.time())) print_log = "&".join([file_path, "start", ts2date(now_ts)]) print print_log #ts = ts - 3600 query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "finish": "0" } }, { "term": { "processing_status": "1" } }] } } } } } search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: _id = iter_item['_id'] item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name task.append(item['keywords']) # keywords task.append(item['stop_time']) # stop time task.append(item['create_by']) task.append(now_ts) r.lpush('task_name', json.dumps(task)) count += 1 print count print_log = "&".join([file_path, "end", ts2date(time.time())]) print print_log
def create_task_list(ts): # 1. search from manage_sensing_task # 2. push to redis list-----task_work # print start info current_path = os.getcwd() file_path = os.path.join(current_path, 'task_list.py') now_ts = str(int(time.time())) print_log = "&".join([file_path, "start", now_ts]) print print_log query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "finish": "0" } }, { "term": { "processing_status": "1" } }] } } } } } search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name task.append(json.loads(item['social_sensors'])) # social sensors task.append(json.loads(item['keywords'])) # filter keywords task.append(json.loads( item['sensitive_words'])) #load sensitive_words task.append(item['stop_time']) # stop time task.append(item['warning_status']) # last step status task.append(item['task_type']) # task type task.append(ts) r.lpush('task_name', json.dumps(task)) count += 1 print count now_ts = str(int(time.time())) print_log = "&".join([file_path, "end", now_ts]) print print_log
def create_task_list(): # 1. search from manage_sensing_task # 2. push to redis list-----task_work # print start info current_path = os.getcwd() file_path = os.path.join(current_path, 'task_list.py') if RUN_TYPE == 0: now_ts = 1463241600 # 1378008000 else: i = int(sys.argv[1]) now_ts = 1463241600 + 3600 * i #now_ts = date_hour2ts(ts2date_hour(time.time())) print_log = "&".join([file_path, "start", ts2date(now_ts)]) print print_log #ts = ts - 3600 query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"finish": "0"}}, {"term":{"processing_status": "1"}} ] } } } } } search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: _id = iter_item['_id'] item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name task.append(item['keywords']) # keywords task.append(item['stop_time']) # stop time task.append(item['create_by']) task.append(now_ts) r.lpush('task_name', json.dumps(task)) count += 1 print count print_log = "&".join([file_path, "end", ts2date(time.time())]) print print_log
def create_task_list(ts): # 1. search from manage_sensing_task # 2. push to redis list-----task_work # print start info current_path = os.getcwd() file_path = os.path.join(current_path, 'task_list.py') now_ts = str(int(time.time())) print_log = "&".join([file_path, "start", now_ts]) print print_log query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"finish": "0"}}, {"term":{"processing_status": "1"}} ] } } } } } search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name task.append(json.loads(item['social_sensors'])) # social sensors task.append(json.loads(item['keywords'])) # filter keywords task.append(json.loads(item['sensitive_words'])) #load sensitive_words task.append(item['stop_time']) # stop time task.append(item['warning_status']) # last step status task.append(item['task_type']) # task type task.append(ts) task.append(item['create_by']) r.lpush('task_name', json.dumps(task)) count += 1 print count now_ts = str(int(time.time())) print_log = "&".join([file_path, "end", now_ts]) print print_log
def social_sensing_task(ts): # 1. print start info count = 0 current_path = os.getcwd() file_path = os.path.join(current_path, 'social_sensing.py') now_ts = str(ts) print_log = "&".join([file_path, "start", now_ts]) print print_log #打印开始信息 while 1: temp = r.rpop("task_name") if not temp: print count now_ts = str(int(time.time())) print_log = "&".join([file_path, "end", now_ts]) print print_log # 打印终止信息 break # finish all task in task_list task_detail = json.loads(temp) count += 1 # """ if int(task_detail[6]) == 2: specific_keywords_burst_dection(task_detail) elif int(task_detail[6]) == 3: sensors_keywords_detection(task_detail) else: pass
def push_crawler_task_redis(): query_body = {"query": {"match_all": {}}} search_results = es.search(index=index_monitor_task, doc_type=type_monitor_task, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name task.append(item['keywords']) # keywords r.lpush(item['task_name'], json.dumps(task)) count += 1 print 'task_count_sum:', count
def social_sensing_task(): while 1: temp = r.rpop("task_name") if not temp: now_date = ts2date(time.time()) print 'All tasks Finished:',now_date break task_detail = json.loads(temp) social_sensing(task_detail) print json.loads(temp)[0],':Finished'
def create_task_list(): # 1. search from manage_sensing_task # 2. push to redis list-----task_work # print start info current_path = os.getcwd() file_path = os.path.join(current_path, 'task_list.py') if S_TYPE == 'test': now_ts = datetime2ts(S_DATE) else: now_ts = datehour2ts(ts2datehour(time.time() - 3600)) print_log = " ".join([file_path, "--start:"]) print print_log query_body = {"query": {"match_all": {}}} search_results = es.search(index=index_manage_sensing, doc_type=type_manage_sensing, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: _id = iter_item['_id'] item = iter_item['_source'] task = [] task.append(item['task_name']) # task_name try: task.append(json.loads( item['social_sensors'])) # social sensors except: task.append(item['social_sensors']) # social sensors task.append(now_ts) r.lpush("task_name", json.dumps(task)) count += 1 print 'task_count_sum:', count
def push_calculate_task_redis(): query_body = {"query": {"match_all": {}}} search_results = es.search(index=index_monitor_task, doc_type=type_monitor_task, body=query_body)['hits']['hits'] count = 0 if search_results: for iter_item in search_results: item = iter_item['_source'] task = [] task.append(item['task_name']) task.append(item['event_category']) task.append(item['create_at']) task.append(item['processing_status']) r.lpush(item['task_name'], json.dumps(task)) count += 1 print 'task_count_sum:', count
def pop_crawler_task_redis(): count = 0 while 1: temp = r.rpop("task_name") if temp: print "current_task:", json.loads(temp)[0] if not temp: print 'the last task NO:', count now_date = ts2date(time.time()) print 'All tasks Finished:', now_date break task_detail = json.loads(temp) count += 1 crawler_task_start(task_detail) print json.loads(temp)[0], ':Finished'
def social_sensing_task(): # 1. print start info count = 0 current_path = os.getcwd() file_path = os.path.join(current_path, 'social_sensing.py') now_ts = ts2date(time.time()) print_log = "&".join([file_path, "start", now_ts]) # print print_log #打印开始信息 while 1: temp = r.rpop("task_name") if not temp: print count now_ts = str(int(time.time())) print_log = "&".join([file_path, "end", now_ts]) break # finish all task in task_list task_detail = json.loads(temp) count += 1 social_sensing(task_detail)
def social_sensing_task(): count = 0 now_ts = ts2date(time.time()) while 1: temp = r.rpop("task_name") if temp: print "current_task:", json.loads(temp)[0] if not temp: print 'the last task:', count now_date = ts2date(time.time()) print 'All tasks Finished:', now_date break task_detail = json.loads(temp) count += 1 social_sensing(task_detail) print json.loads(temp)[0], ':Finished'
# -*- coding:utf-8 -*- import json import sys reload(sys) sys.path.append('./../') from global_utils import R_SOCIAL_SENSING as r sensing_words = [ "民主", "法治", "宪政", "维权", "上访", "强拆", "政府", "歹徒", "腐败", "暴恐", "爆炸", "袭击", "地震", "坠亡", "不雅", "火灾", "车祸", "中毒", "抢劫", "强奸", "死亡", "雾霾", "污染" ] r.hset("sensing_words", "sensing_words", json.dumps(sensing_words)) sensitive_words = ["宪政", "暴恐", "维权", "强拆"] r.hset("sensitive_words", "sensitive_words", json.dumps(sensitive_words))
# -*- coding:utf-8 -*- import json import sys reload(sys) sys.path.append('./../') from global_utils import R_SOCIAL_SENSING as r sensing_words = ["民主", "法治", "宪政", "维权", "上访", "强拆", "政府", "歹徒", "腐败", "暴恐", "爆炸", "袭击", "地震", "坠亡","不雅", "火灾", "车祸", "中毒", "抢劫", "强奸", "死亡", "雾霾", "污染"] r.hset("sensing_words", "sensing_words", json.dumps(sensing_words)) sensitive_words = ["宪政", "暴恐", "维权", "强拆"] r.hset("sensitive_words", "sensitive_words", json.dumps(sensitive_words))
def find_hashtag(uid): re_scan = r.hsan('hashtag_1456848000' 0)