Beispiel #1
0
def create_task_list(given_ts):
    # 1. search from manage_sensing_task
    # 2. push to redis list-----task_work

    # print start info
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'task_list.py')
    now_ts = datehour2ts(ts2datehour(time.time() - 3600))
    print_log = "&".join([file_path, "start", ts2date(now_ts)])
    print print_log
    #ts = ts - 3600

    query_body = {"query": {"match_all": {}}}

    search_results = es.search(index=index_sensing,
                               doc_type=type_sensing,
                               body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            _id = iter_item['_id']
            item = iter_item['_source']
            task = []
            task.append(item['task_name'])  # task_name
            task.append(json.loads(item['social_sensors']))  # social sensors
            #task.append(now_ts)
            task.append(given_ts)
            r.lpush('task_name', json.dumps(task))
            count += 1

    print count
    print_log = "&".join([file_path, "end", ts2date(time.time())])
    print print_log
Beispiel #2
0
def create_task_list():
    # 1. search from manage_sensing_task
    # 2. push to redis list-----task_work

    # print start info
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'task_list.py')
    if RUN_TYPE == 0:
        now_ts = 1463241600  # 1378008000
    else:
        i = int(sys.argv[1])
        now_ts = 1463241600 + 3600 * i
        #now_ts = date_hour2ts(ts2date_hour(time.time()))
    print_log = "&".join([file_path, "start", ts2date(now_ts)])
    print print_log
    #ts = ts - 3600

    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [{
                            "term": {
                                "finish": "0"
                            }
                        }, {
                            "term": {
                                "processing_status": "1"
                            }
                        }]
                    }
                }
            }
        }
    }

    search_results = es.search(index=index_name,
                               doc_type=task_doc_type,
                               body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            _id = iter_item['_id']
            item = iter_item['_source']
            task = []
            task.append(item['task_name'])  # task_name
            task.append(item['keywords'])  # keywords
            task.append(item['stop_time'])  # stop time
            task.append(item['create_by'])
            task.append(now_ts)
            r.lpush('task_name', json.dumps(task))
            count += 1

    print count
    print_log = "&".join([file_path, "end", ts2date(time.time())])
    print print_log
def create_task_list(ts):
    # 1. search from manage_sensing_task
    # 2. push to redis list-----task_work

    # print start info
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'task_list.py')
    now_ts = str(int(time.time()))
    print_log = "&".join([file_path, "start", now_ts])
    print print_log

    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [{
                            "term": {
                                "finish": "0"
                            }
                        }, {
                            "term": {
                                "processing_status": "1"
                            }
                        }]
                    }
                }
            }
        }
    }

    search_results = es.search(index=index_name,
                               doc_type=task_doc_type,
                               body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            item = iter_item['_source']
            task = []
            task.append(item['task_name'])  # task_name
            task.append(json.loads(item['social_sensors']))  # social sensors
            task.append(json.loads(item['keywords']))  # filter keywords
            task.append(json.loads(
                item['sensitive_words']))  #load sensitive_words
            task.append(item['stop_time'])  # stop time
            task.append(item['warning_status'])  # last step status
            task.append(item['task_type'])  # task type
            task.append(ts)
            r.lpush('task_name', json.dumps(task))
            count += 1

    print count
    now_ts = str(int(time.time()))
    print_log = "&".join([file_path, "end", now_ts])
    print print_log
def create_task_list():
    # 1. search from manage_sensing_task
    # 2. push to redis list-----task_work

    # print start info
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'task_list.py')
    if RUN_TYPE == 0:
        now_ts = 1463241600 # 1378008000
    else:
        i = int(sys.argv[1])
        now_ts = 1463241600 + 3600 * i
        #now_ts = date_hour2ts(ts2date_hour(time.time()))
    print_log = "&".join([file_path, "start", ts2date(now_ts)])
    print print_log
    #ts = ts - 3600

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"finish": "0"}},
                            {"term":{"processing_status": "1"}}
                        ]
                    }
                }
            }
        }
    }

    search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            _id = iter_item['_id']
            item = iter_item['_source']
            task = []
            task.append(item['task_name']) # task_name
            task.append(item['keywords']) # keywords
            task.append(item['stop_time']) # stop time
            task.append(item['create_by'])
            task.append(now_ts)
            r.lpush('task_name', json.dumps(task))
            count += 1

    print count
    print_log = "&".join([file_path, "end", ts2date(time.time())])
    print print_log
def create_task_list(ts):
    # 1. search from manage_sensing_task
    # 2. push to redis list-----task_work

    # print start info
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'task_list.py')
    now_ts = str(int(time.time()))
    print_log = "&".join([file_path, "start", now_ts])
    print print_log

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"finish": "0"}},
                            {"term":{"processing_status": "1"}}
                        ]
                    }
                }
            }
        }
    }

    search_results = es.search(index=index_name, doc_type=task_doc_type, body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            item = iter_item['_source']
            task = []
            task.append(item['task_name']) # task_name
            task.append(json.loads(item['social_sensors'])) # social sensors
            task.append(json.loads(item['keywords'])) # filter keywords
            task.append(json.loads(item['sensitive_words'])) #load sensitive_words
            task.append(item['stop_time']) # stop time
            task.append(item['warning_status']) # last step status
            task.append(item['task_type']) # task type
            task.append(ts)
            task.append(item['create_by'])
            r.lpush('task_name', json.dumps(task))
            count += 1

    print count
    now_ts = str(int(time.time()))
    print_log = "&".join([file_path, "end", now_ts])
    print print_log
Beispiel #6
0
def social_sensing_task(ts):
    # 1. print start info
    count = 0
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'social_sensing.py')
    now_ts = str(ts)
    print_log = "&".join([file_path, "start", now_ts])
    print print_log #打印开始信息

    while 1:
        temp = r.rpop("task_name")
        if not temp:
            print count
            now_ts = str(int(time.time()))
            print_log = "&".join([file_path, "end", now_ts])
            print print_log # 打印终止信息
            break  # finish all task in task_list
        task_detail = json.loads(temp)
        count += 1
#        """
        if int(task_detail[6]) == 2:
            specific_keywords_burst_dection(task_detail)
        elif int(task_detail[6]) == 3:
            sensors_keywords_detection(task_detail)
        else:
            pass
Beispiel #7
0
def push_crawler_task_redis():
    query_body = {"query": {"match_all": {}}}

    search_results = es.search(index=index_monitor_task,
                               doc_type=type_monitor_task,
                               body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            item = iter_item['_source']

            task = []
            task.append(item['task_name'])  # task_name
            task.append(item['keywords'])  # keywords

            r.lpush(item['task_name'], json.dumps(task))
            count += 1

    print 'task_count_sum:', count
Beispiel #8
0
def social_sensing_task():
    while 1:
        temp = r.rpop("task_name")

        if not temp:
            now_date = ts2date(time.time())
            print 'All tasks Finished:',now_date
            break  
            
        task_detail = json.loads(temp)
        social_sensing(task_detail)
        print json.loads(temp)[0],':Finished'
Beispiel #9
0
def create_task_list():
    # 1. search from manage_sensing_task
    # 2. push to redis list-----task_work

    # print start info
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'task_list.py')
    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE)
    else:
        now_ts = datehour2ts(ts2datehour(time.time() - 3600))

    print_log = " ".join([file_path, "--start:"])
    print print_log

    query_body = {"query": {"match_all": {}}}

    search_results = es.search(index=index_manage_sensing,
                               doc_type=type_manage_sensing,
                               body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            _id = iter_item['_id']
            item = iter_item['_source']

            task = []
            task.append(item['task_name'])  # task_name
            try:
                task.append(json.loads(
                    item['social_sensors']))  # social sensors
            except:
                task.append(item['social_sensors'])  # social sensors
            task.append(now_ts)

            r.lpush("task_name", json.dumps(task))
            count += 1

    print 'task_count_sum:', count
Beispiel #10
0
def push_calculate_task_redis():
    query_body = {"query": {"match_all": {}}}

    search_results = es.search(index=index_monitor_task,
                               doc_type=type_monitor_task,
                               body=query_body)['hits']['hits']

    count = 0
    if search_results:
        for iter_item in search_results:
            item = iter_item['_source']

            task = []
            task.append(item['task_name'])
            task.append(item['event_category'])
            task.append(item['create_at'])
            task.append(item['processing_status'])

            r.lpush(item['task_name'], json.dumps(task))
            count += 1

    print 'task_count_sum:', count
Beispiel #11
0
def pop_crawler_task_redis():
    count = 0
    while 1:
        temp = r.rpop("task_name")
        if temp:
            print "current_task:", json.loads(temp)[0]

        if not temp:
            print 'the last task NO:', count
            now_date = ts2date(time.time())
            print 'All tasks Finished:', now_date
            break

        task_detail = json.loads(temp)
        count += 1
        crawler_task_start(task_detail)
        print json.loads(temp)[0], ':Finished'
Beispiel #12
0
def social_sensing_task():
    # 1. print start info
    count = 0
    current_path = os.getcwd()
    file_path = os.path.join(current_path, 'social_sensing.py')
    now_ts = ts2date(time.time())
    print_log = "&".join([file_path, "start", now_ts])
#    print print_log #打印开始信息

    while 1:
        temp = r.rpop("task_name")
        if not temp:
            print count
            now_ts = str(int(time.time()))
            print_log = "&".join([file_path, "end", now_ts])
            break  # finish all task in task_list
        task_detail = json.loads(temp)
        count += 1
        social_sensing(task_detail)
Beispiel #13
0
def social_sensing_task():

    count = 0
    now_ts = ts2date(time.time())

    while 1:
        temp = r.rpop("task_name")
        if temp:
            print "current_task:", json.loads(temp)[0]

        if not temp:
            print 'the last task:', count
            now_date = ts2date(time.time())
            print 'All tasks Finished:', now_date
            break

        task_detail = json.loads(temp)
        count += 1
        social_sensing(task_detail)
        print json.loads(temp)[0], ':Finished'
Beispiel #14
0
# -*- coding:utf-8 -*-

import json
import sys

reload(sys)
sys.path.append('./../')
from global_utils import R_SOCIAL_SENSING as r

sensing_words = [
    "民主", "法治", "宪政", "维权", "上访", "强拆", "政府", "歹徒", "腐败", "暴恐", "爆炸", "袭击",
    "地震", "坠亡", "不雅", "火灾", "车祸", "中毒", "抢劫", "强奸", "死亡", "雾霾", "污染"
]
r.hset("sensing_words", "sensing_words", json.dumps(sensing_words))

sensitive_words = ["宪政", "暴恐", "维权", "强拆"]
r.hset("sensitive_words", "sensitive_words", json.dumps(sensitive_words))
Beispiel #15
0
# -*- coding:utf-8 -*-

import json
import sys
reload(sys)
sys.path.append('./../')
from global_utils import R_SOCIAL_SENSING as r

sensing_words = ["民主", "法治", "宪政", "维权", "上访", "强拆", "政府", "歹徒", "腐败", "暴恐", "爆炸", "袭击", "地震", "坠亡","不雅", "火灾", "车祸", "中毒", "抢劫", "强奸", "死亡", "雾霾", "污染"]
r.hset("sensing_words", "sensing_words", json.dumps(sensing_words))

sensitive_words = ["宪政", "暴恐", "维权", "强拆"]
r.hset("sensitive_words", "sensitive_words", json.dumps(sensitive_words))

def find_hashtag(uid):
    re_scan = r.hsan('hashtag_1456848000' 0)