Ejemplo n.º 1
0
    def __init__(self, debug=False):
        self.debug = debug
        self.config = load_config("./config/assed_config.json")

        self._encoder = KeyedVectors.load_word2vec_format(
            './pipelines/assed_landslide/ml/encoders/GoogleNews-vectors-negative300.bin',
            binary=True,
            unicode_errors='ignore',
            limit=100000)
        self.zero_v = zeros(shape=(300, ))
        self.model = keras.models.load_model(
            "./pipelines/assed_landslide/ml/models/tf_model.h5")

        self.DB_CONN = get_db_connection(self.config)
        self.cursor = self.DB_CONN.cursor()
        pass

        self.cursor_timer = time.time()

        self.cursor_refresh = 300
        self.true_counter = 0
        self.false_counter = 0
        self.total_counter = 0

        self.db_insert = 'INSERT INTO ASSED_Social_Events ( \
        social_id, cell, \
        latitude, longitude, timestamp, link, text, location, topic_name, source, valid, streamtype) \
        VALUES (%s,%s,%s,%s,%s,%s, %s, %s,%s, %s, %s, %s)'

        self.stream_tracker = {}
Ejemplo n.º 2
0
 def __init__(self, debug=False):
     self.debug = debug
     self.time = time.time()
     pool = redis.ConnectionPool(host='localhost',port=6379, db=0)
     self.r=redis.Redis(connection_pool = pool)
     self.timecheck = 600
     self.locations = {}
     self.update_location_store()
     self.NER =  Ner(host="localhost", port=9199)
     self.counter = 0
     self.memory={}
     config = load_config("./config/assed_config.json")
     self.APIKEY = config["APIKEYS"]["googlemaps"]
     self.stream_tracker = {}
Ejemplo n.º 3
0
    def __init__(self, debug=False):
        self.debug = debug
        self.config = load_config("./config/assed_config.json")
        self.DB_CONN = get_db_connection(self.config)
        self.cursor = self.DB_CONN.cursor()
        pass

        self.cursor_timer = time.time()

        self.cursor_refresh = 300
        self.MS_IN_DAYS = 86400000
        self.true_counter = 0
        self.unk = 0
        self.stream_tracker = {}
Ejemplo n.º 4
0
# This initializees several things...
import pdb
import argparse
import glob
from utils import db_utils
from utils.file_utils import load_config

parser = argparse.ArgumentParser(description="Initialize sets up various parts of LITMUS")
parser.add_argument("--env",
                    choices=["mysql", "dirs"],
                    help="Environment to setup")

argums = vars(parser.parse_args())
assed_config = load_config('config/assed_config.json')


if argums['env'] == 'dirs':
    import os
    dirs = ['downloads','logfiles', 'config', 'redis', 'ml', 'ml/models', 'ml/encoders']
    for directory in dirs:
        if not os.path.exists(directory):
            os.makedirs(directory)

if argums['env'] == 'mysql':
    #set up mysql stuff (news and everything)
    db_conn = db_utils.get_db_connection(assed_config)
    for file_ in glob.glob('initialization/mysql/*.SQL'):
        db_utils.run_sql_file(file_,db_conn)
    db_conn.close()
        
Ejemplo n.º 5
0
import multiprocessing
from SocialStreamerSrc.TweetProcess import TweetProcess
from SocialStreamerSrc.KeyServer import KeyServer

# Utils import
from utils.file_utils import load_config
from utils.helper_utils import dict_equal, setup_pid, readable_time, std_flush
import utils.CONSTANTS as CONSTANTS

SOCIAL_STREAMER_FIRST_FILE_CHECK = True

if __name__ == '__main__':
    pid_name = os.path.basename(sys.argv[0]).split('.')[0]
    setup_pid(pid_name)
    #Set up configOriginal dict
    configOriginal = load_config(CONSTANTS.TOPIC_CONFIG_PATH)
    StreamerManager = {}
    for _streamer_ in configOriginal["SocialStreamers"]:
        StreamerManager[_streamer_] = {}
        StreamerManager[_streamer_]["name"] = configOriginal[
            "SocialStreamers"][_streamer_]["name"]
        StreamerManager[_streamer_]["type"] = configOriginal[
            "SocialStreamers"][_streamer_]["type"]
        StreamerManager[_streamer_]["apikey_name"] = configOriginal[
            "SocialStreamers"][_streamer_]["apikey"]
        StreamerManager[_streamer_]["apimax"] = configOriginal[
            "SocialStreamers"][_streamer_]["apimax"]
        _scriptname = configOriginal["SocialStreamers"][_streamer_]["script"]
        moduleImport = __import__("SocialStreamerSrc.%s" % _scriptname,
                                  fromlist=[_scriptname])
        StreamerManager[_streamer_]["executor"] = getattr(
Ejemplo n.º 6
0
import multiprocessing
from utils.file_utils import load_config
from utils.helper_utils import setup_pid, readable_time, std_flush
import utils.CONSTANTS as CONSTANTS

from SocialStreamFileProcessorSrc.StreamFilesProcessor import StreamFilesProcessor


if __name__ == "__main__":
    #set up the PID for this
    pid_name = os.path.basename(sys.argv[0]).split('.')[0]
    setup_pid(pid_name)


    #Load the keywords
    keywordConfig = load_config(CONSTANTS.TOPIC_CONFIG_PATH)
    errorQueue = multiprocessing.Queue()
    messageQueue = multiprocessing.Queue()

    keyStreamConfig = {}
    # for each keyword-lang pair type, launch a StreamFilesProcessor
    for physicalEvent in keywordConfig['topic_names'].keys():
        for language in keywordConfig['topic_names'][physicalEvent]["languages"]:
            eventLangTuple = (physicalEvent,language)
            keyStreamConfig[eventLangTuple] = {}
            keyStreamConfig[eventLangTuple]['name'] = physicalEvent
            keyStreamConfig[eventLangTuple]['lang'] = language
            keyStreamConfig[eventLangTuple]['keywords'] = keywordConfig['topic_names'][physicalEvent]["languages"][language]
            keyStreamConfig[eventLangTuple]['postpone'] = False
            std_flush(" ".join(["Deploying",str(eventLangTuple), "at", readable_time()]))
            try:
Ejemplo n.º 7
0
def main():

    local_timer = 0
    refresh_timer = 7200
    sleep_timer = 300
    while True:
        if time.time() - local_timer > refresh_timer:

            local_timer = time.time()

            helper_utils.std_flush("[%s] -- Initializing EventDetection" %
                                   helper_utils.readable_time())
            cell_cache = {}

            assed_config = file_utils.load_config("./config/assed_config.json")

            helper_utils.std_flush("[%s] -- Obtained DB Connection" %
                                   helper_utils.readable_time())
            DB_CONN = db_utils.get_db_connection(assed_config)
            cursor = DB_CONN.cursor()

            available_streamers = [
                item for item in assed_config["SocialStreamers"]
            ]
            streamer_results = {}
            helper_utils.std_flush(
                "[%s] -- Available streamers: %s" %
                (helper_utils.readable_time(), str(available_streamers)))

            for _streamer_ in available_streamers:
                helper_utils.std_flush(
                    "[%s] -- Generating query for: %s" %
                    (helper_utils.readable_time(), _streamer_))
                _query_ = generate_social_query(_streamer_=_streamer_,
                                                _topic_="landslide")
                cursor.execute(_query_)
                streamer_results[_streamer_] = cursor.fetchall()
                helper_utils.std_flush(
                    "[%s] -- Obtained results for : %s" %
                    (helper_utils.readable_time(), _streamer_))

            helper_utils.std_flush("[%s] -- Generating query for: %s" %
                                   (helper_utils.readable_time(), "TRMM"))
            _query_ = generate_trmm_query()
            cursor.execute(_query_)
            trmm_results = cursor.fetchall()
            helper_utils.std_flush("[%s] -- Obtained resuts for: %s" %
                                   (helper_utils.readable_time(), "TRMM"))

            helper_utils.std_flush("[%s] -- Generating query for: %s" %
                                   (helper_utils.readable_time(), "USGS"))
            _query_ = generate_usgs_query()
            cursor.execute(_query_)
            usgs_results = cursor.fetchall()
            helper_utils.std_flush("[%s] -- Obtained resuts for: %s" %
                                   (helper_utils.readable_time(), "USGS"))

            helper_utils.std_flush("[%s] -- Generating query for: %s" %
                                   (helper_utils.readable_time(), "News"))
            _query_ = generate_news_query()
            cursor.execute(_query_)
            news_results = cursor.fetchall()
            helper_utils.std_flush("[%s] -- Obtained resuts for: %s" %
                                   (helper_utils.readable_time(), "News"))
            cursor.close()

            helper_utils.std_flush(
                "[%s] -- Generating local cache with scoring:\tSocial-ML - 0.3\tSocial-HDI - 1\tNews - 3\tUSGS - 5\tTRMM - 1"
                % helper_utils.readable_time())
            # Scoring -- Twitter-Social: 0.3    Twitter-HDI - 1     News:       3       USGS:   5       TRMM:   1
            for _streamer_ in streamer_results:
                helper_utils.std_flush(
                    "[%s] -- Local caching for %s" %
                    (helper_utils.readable_time(), _streamer_))
                for tuple_cell_ in streamer_results[_streamer_]:
                    _cell_ = tuple_cell_[0]
                    if _cell_ not in cell_cache:
                        cell_cache[_cell_] = {}
                    if int(float(tuple_cell_[1])) > 0:
                        cell_cache[_cell_][_streamer_ + "-hdi"] = (int(
                            float(tuple_cell_[1])), float(tuple_cell_[1]))
                    if int(float(tuple_cell_[2]) / 0.34) > 0:
                        cell_cache[_cell_][_streamer_ + "-ml"] = (int(
                            float(tuple_cell_[2]) / 0.34), float(
                                tuple_cell_[2]))

            helper_utils.std_flush("[%s] -- Local caching for %s" %
                                   (helper_utils.readable_time(), "TRMM"))
            for tuple_cell_ in trmm_results:
                _cell_ = tuple_cell_[0]
                if _cell_ not in cell_cache:
                    cell_cache[_cell_] = {}
                cell_cache[_cell_]["TRMM"] = (float(tuple_cell_[1]),
                                              float(tuple_cell_[1] * 1)
                                              )  # 1 <-- TRMM score

            helper_utils.std_flush("[%s] -- Local caching for %s" %
                                   (helper_utils.readable_time(), "USGS"))
            for tuple_cell_ in usgs_results:
                _cell_ = tuple_cell_[0]
                if _cell_ not in cell_cache:
                    cell_cache[_cell_] = {}
                cell_cache[_cell_]["USGS"] = (float(tuple_cell_[1]),
                                              float(tuple_cell_[1] * 5))

            helper_utils.std_flush("[%s] -- Local caching for %s" %
                                   (helper_utils.readable_time(), "News"))
            for tuple_cell_ in news_results:
                _cell_ = tuple_cell_[0]
                if _cell_ not in cell_cache:
                    cell_cache[_cell_] = {}
                cell_cache[_cell_]["News"] = (float(tuple_cell_[1]),
                                              float(tuple_cell_[1] * 3))

            helper_utils.std_flush(
                "[%s] -- Local cache score total generation" %
                helper_utils.readable_time())
            for _cell_ in cell_cache:
                cell_cache[_cell_]["total"] = sum([
                    cell_cache[_cell_][item][1] for item in cell_cache[_cell_]
                ])

            pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
            r = redis.Redis(connection_pool=pool)
            helper_utils.std_flush("[%s] -- Connected to Redis" %
                                   helper_utils.readable_time())

            # Correct-key -- v1 or v2
            # Key Push
            # Actual keys...
            # list_tracker_key tracks where the data is (either v1 or v2)
            # list_push_key contains the list of cells
            list_tracker_key = "assed:event:detection:multisource:listkey"
            list_push_key = "assed:event:detection:multisource:list"
            list_info_key = "assed:event:detection:multisource:info"
            key_version = r.get(list_tracker_key)
            if key_version is None:
                key_version = "v2"
            else:
                key_version = key_version.decode()
            push_key = 'v1'
            if key_version == 'v1':
                helper_utils.std_flush(
                    "[%s] -- v1 key already in effect. Pushing to v2" %
                    helper_utils.readable_time())
                push_key = 'v2'
            else:
                helper_utils.std_flush(
                    "[%s] -- v2 key already in effect. Pushing to v1" %
                    helper_utils.readable_time())

            cell_list = [item for item in cell_cache]
            true_list_push_key = list_push_key + ":" + push_key
            helper_utils.std_flush(
                "[%s] -- Deleting existing %s, if any" %
                (helper_utils.readable_time(), true_list_push_key))
            r.delete(true_list_push_key)

            r.lpush(true_list_push_key, *cell_list)
            helper_utils.std_flush(
                "[%s] -- Pushed cell list to %s" %
                (helper_utils.readable_time(), true_list_push_key))

            helper_utils.std_flush("[%s] -- Pushing individual cell results" %
                                   helper_utils.readable_time())
            cell_counter = 0
            for _cell_ in cell_cache:
                cell_push_contents = json.dumps(cell_cache[_cell_])
                cell_specific_suffix = ":".join(_cell_.split("_"))
                cell_push_key = ":".join(
                    [list_info_key, cell_specific_suffix, push_key])
                r.set(cell_push_key, cell_push_contents)
                if cell_counter == 0:
                    helper_utils.std_flush("[%s] -- First push: %s --- %s" %
                                           (helper_utils.readable_time(),
                                            cell_push_key, cell_push_contents))
                cell_counter += 1

            helper_utils.std_flush(
                "[%s] -- Completed individual cell pushes with %s cells" %
                (helper_utils.readable_time(), str(cell_counter)))

            r.set(list_tracker_key, push_key)
            helper_utils.std_flush(
                "[%s] -- Setting versioning in %s to %s" %
                (helper_utils.readable_time(), list_tracker_key, push_key))

            helper_utils.std_flush("--------   COMPLETE AT  %s ----------\n" %
                                   helper_utils.readable_time())
        else:
            #helper_utils.std_flush("Sleeping for %s"%sleep_timer)
            time.sleep(sleep_timer)
Ejemplo n.º 8
0
import sys, time, os, json, codecs, traceback
import pdb
from datetime import datetime

import multiprocessing
from utils.file_utils import load_config
from utils.helper_utils import dict_equal, setup_pid, readable_time, std_flush
import utils.CONSTANTS as CONSTANTS

if __name__ == '__main__':
    pid_name = os.path.basename(sys.argv[0]).split('.')[0]
    setup_pid(pid_name)

    assed_config = load_config(CONSTANTS.ASSED_CONFIG)

    configOriginal = load_config(CONSTANTS.HIGH_CONFIDENCE_CONFIG_PATH)

    HCS_configuration = {}
    errorQueue = multiprocessing.Queue()
    messageQueue = multiprocessing.Queue()

    for hcs_type in configOriginal:
        _cfg = configOriginal[hcs_type]
        kwargs = {}
        HCS_configuration[hcs_type] = {}
        HCS_configuration[hcs_type]["name"] = _cfg["name"]
        HCS_configuration[hcs_type]["db_name"] = _cfg["db_name"]
        HCS_configuration[hcs_type]["source_file"] = _cfg["source_file"]
        HCS_configuration[hcs_type]["type"] = _cfg["type"]
        if HCS_configuration[hcs_type]["type"] == "scheduled":
            HCS_configuration[hcs_type]["schedule"] = _cfg["schedule"]