def __init__(self, debug=False): self.debug = debug self.config = load_config("./config/assed_config.json") self._encoder = KeyedVectors.load_word2vec_format( './pipelines/assed_landslide/ml/encoders/GoogleNews-vectors-negative300.bin', binary=True, unicode_errors='ignore', limit=100000) self.zero_v = zeros(shape=(300, )) self.model = keras.models.load_model( "./pipelines/assed_landslide/ml/models/tf_model.h5") self.DB_CONN = get_db_connection(self.config) self.cursor = self.DB_CONN.cursor() pass self.cursor_timer = time.time() self.cursor_refresh = 300 self.true_counter = 0 self.false_counter = 0 self.total_counter = 0 self.db_insert = 'INSERT INTO ASSED_Social_Events ( \ social_id, cell, \ latitude, longitude, timestamp, link, text, location, topic_name, source, valid, streamtype) \ VALUES (%s,%s,%s,%s,%s,%s, %s, %s,%s, %s, %s, %s)' self.stream_tracker = {}
def __init__(self,assed_config,root_name, errorQueue, messageQueue, **kwargs): multiprocessing.Process.__init__(self) # set up DB connections self.DB_CONN = get_db_connection(assed_config) self.root_name = root_name self.errorQueue = errorQueue self.messageQueue = messageQueue self.cached_list = self.getCachedList() pass
def __init__(self, debug=False): self.debug = debug self.config = load_config("./config/assed_config.json") self.DB_CONN = get_db_connection(self.config) self.cursor = self.DB_CONN.cursor() pass self.cursor_timer = time.time() self.cursor_refresh = 300 self.MS_IN_DAYS = 86400000 self.true_counter = 0 self.unk = 0 self.stream_tracker = {}
def __init__(self, assed_config, root_name, errorQueue, messageQueue, **kwargs): multiprocessing.Process.__init__(self) # set up DB connections self.DB_CONN = get_db_connection(assed_config) self.client = NewsApiClient(api_key="f715251d799140f793e63a1aec194920") self.root_name = root_name self.errorQueue = errorQueue self.messageQueue = messageQueue # No cached list because we are getting new stuff every day... self.config = kwargs["config"] self.NER = Ner(host='localhost', port=9199) pool = redis.ConnectionPool(host='localhost', port=6379, db=0) self.r = redis.Redis(connection_pool=pool) pass
def generate_top_slow_trend(case_ids): sql_script = """SELECT start_time, case_id, duration FROM aa_test_stats WHERE case_id in %s""" conn = get_db_connection() cursor = conn.cursor() cursor.execute(sql_script, (tuple(case_ids),)) df = DataFrame(cursor.fetchall(), columns=['start_time', 'case_id','duration']) fig, ax = plt.subplots() labels = [] for key, grp in df.groupby(['case_id']): color = get_random_color() ax = grp.plot(ax=ax, kind='line', x='start_time', y='duration', c=color) labels.append(key) lines, _ = ax.get_legend_handles_labels() ax.legend(lines, labels, loc=2, prop={'size': 5}) plt.savefig(TREND_CHART)
# This initializees several things... import pdb import argparse import glob from utils import db_utils from utils.file_utils import load_config parser = argparse.ArgumentParser(description="Initialize sets up various parts of LITMUS") parser.add_argument("--env", choices=["mysql", "dirs"], help="Environment to setup") argums = vars(parser.parse_args()) assed_config = load_config('config/assed_config.json') if argums['env'] == 'dirs': import os dirs = ['downloads','logfiles', 'config', 'redis', 'ml', 'ml/models', 'ml/encoders'] for directory in dirs: if not os.path.exists(directory): os.makedirs(directory) if argums['env'] == 'mysql': #set up mysql stuff (news and everything) db_conn = db_utils.get_db_connection(assed_config) for file_ in glob.glob('initialization/mysql/*.SQL'): db_utils.run_sql_file(file_,db_conn) db_conn.close()
def load_data(self, response, tp_accessorial): acy_table = response[response['Product_Mode'] == 'ACY'] prod_table = response[response['Product_Mode'] != 'ACY'] query = "" sql = "" last = int(self.bid_number[-1]) if self.get_randomizer(last): # check available accessorials if not acy_table.empty: acy_table = acy_table.drop( 'SVC_GRP_NR', axis=1) # remove due to blank causing issues acy_table = acy_table.merge(tp_accessorial, how='inner', on=[ 'MVM_DRC_CD', 'SVC_FEA_TYP_CD', 'SVM_TYP_CD', 'ASY_SVC_TYP_CD', 'PKG_CHA_TYP_CD', 'PKG_ACQ_MTH_TYP_CD' ]) # add regular products try: connection = get_db_connection() cursor = connection.cursor() # remove accessorial from list acc_svcs = tp_accessorial.SVC_GRP_NR.unique() prod_table = prod_table[~prod_table['SVC_GRP_NR'].isin(acc_svcs )] # once update once per service group prod_table = prod_table[[ 'SVC_GRP_NR', 'Incentive_Freight', 'Target_Low', 'Target_High' ]].drop_duplicates() for index, row in prod_table.iterrows(): # formatting at %.3f truncates the value to 3 decimal places svc_grp_num = row["SVC_GRP_NR"] incentive = round(row["Incentive_Freight"], 3) min_inc = round(row["Target_Low"], 3) max_inc = round(row["Target_High"], 3) query = "UPDATE TNCVCEL " \ "SET RCM_NCV_QY = " + str(incentive) + ", NCV_MIN_QY = " + str( min_inc) + ", NCV_MAX_QY = " + str(max_inc) + \ " WHERE NVP_BID_NR = '" + str(self.bid_number) + "' AND SVC_GRP_NR = '" + str(svc_grp_num) \ + "' AND NCV_DTR_DAT_TYP_CD = 'P'" cursor.execute(query) cursor.execute('COMMIT') cursor.close() connection.close() # logger.info(str(prod_table.shape[0]) + " rows available to update to TNCVCEL") except Exception, e: raise RuntimeError( 'Error 4.1: Oracle DB cannot be updated with query: \n' + query + 'with error: ' + str(e))
def main(): local_timer = 0 refresh_timer = 7200 sleep_timer = 300 while True: if time.time() - local_timer > refresh_timer: local_timer = time.time() helper_utils.std_flush("[%s] -- Initializing EventDetection" % helper_utils.readable_time()) cell_cache = {} assed_config = file_utils.load_config("./config/assed_config.json") helper_utils.std_flush("[%s] -- Obtained DB Connection" % helper_utils.readable_time()) DB_CONN = db_utils.get_db_connection(assed_config) cursor = DB_CONN.cursor() available_streamers = [ item for item in assed_config["SocialStreamers"] ] streamer_results = {} helper_utils.std_flush( "[%s] -- Available streamers: %s" % (helper_utils.readable_time(), str(available_streamers))) for _streamer_ in available_streamers: helper_utils.std_flush( "[%s] -- Generating query for: %s" % (helper_utils.readable_time(), _streamer_)) _query_ = generate_social_query(_streamer_=_streamer_, _topic_="landslide") cursor.execute(_query_) streamer_results[_streamer_] = cursor.fetchall() helper_utils.std_flush( "[%s] -- Obtained results for : %s" % (helper_utils.readable_time(), _streamer_)) helper_utils.std_flush("[%s] -- Generating query for: %s" % (helper_utils.readable_time(), "TRMM")) _query_ = generate_trmm_query() cursor.execute(_query_) trmm_results = cursor.fetchall() helper_utils.std_flush("[%s] -- Obtained resuts for: %s" % (helper_utils.readable_time(), "TRMM")) helper_utils.std_flush("[%s] -- Generating query for: %s" % (helper_utils.readable_time(), "USGS")) _query_ = generate_usgs_query() cursor.execute(_query_) usgs_results = cursor.fetchall() helper_utils.std_flush("[%s] -- Obtained resuts for: %s" % (helper_utils.readable_time(), "USGS")) helper_utils.std_flush("[%s] -- Generating query for: %s" % (helper_utils.readable_time(), "News")) _query_ = generate_news_query() cursor.execute(_query_) news_results = cursor.fetchall() helper_utils.std_flush("[%s] -- Obtained resuts for: %s" % (helper_utils.readable_time(), "News")) cursor.close() helper_utils.std_flush( "[%s] -- Generating local cache with scoring:\tSocial-ML - 0.3\tSocial-HDI - 1\tNews - 3\tUSGS - 5\tTRMM - 1" % helper_utils.readable_time()) # Scoring -- Twitter-Social: 0.3 Twitter-HDI - 1 News: 3 USGS: 5 TRMM: 1 for _streamer_ in streamer_results: helper_utils.std_flush( "[%s] -- Local caching for %s" % (helper_utils.readable_time(), _streamer_)) for tuple_cell_ in streamer_results[_streamer_]: _cell_ = tuple_cell_[0] if _cell_ not in cell_cache: cell_cache[_cell_] = {} if int(float(tuple_cell_[1])) > 0: cell_cache[_cell_][_streamer_ + "-hdi"] = (int( float(tuple_cell_[1])), float(tuple_cell_[1])) if int(float(tuple_cell_[2]) / 0.34) > 0: cell_cache[_cell_][_streamer_ + "-ml"] = (int( float(tuple_cell_[2]) / 0.34), float( tuple_cell_[2])) helper_utils.std_flush("[%s] -- Local caching for %s" % (helper_utils.readable_time(), "TRMM")) for tuple_cell_ in trmm_results: _cell_ = tuple_cell_[0] if _cell_ not in cell_cache: cell_cache[_cell_] = {} cell_cache[_cell_]["TRMM"] = (float(tuple_cell_[1]), float(tuple_cell_[1] * 1) ) # 1 <-- TRMM score helper_utils.std_flush("[%s] -- Local caching for %s" % (helper_utils.readable_time(), "USGS")) for tuple_cell_ in usgs_results: _cell_ = tuple_cell_[0] if _cell_ not in cell_cache: cell_cache[_cell_] = {} cell_cache[_cell_]["USGS"] = (float(tuple_cell_[1]), float(tuple_cell_[1] * 5)) helper_utils.std_flush("[%s] -- Local caching for %s" % (helper_utils.readable_time(), "News")) for tuple_cell_ in news_results: _cell_ = tuple_cell_[0] if _cell_ not in cell_cache: cell_cache[_cell_] = {} cell_cache[_cell_]["News"] = (float(tuple_cell_[1]), float(tuple_cell_[1] * 3)) helper_utils.std_flush( "[%s] -- Local cache score total generation" % helper_utils.readable_time()) for _cell_ in cell_cache: cell_cache[_cell_]["total"] = sum([ cell_cache[_cell_][item][1] for item in cell_cache[_cell_] ]) pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) helper_utils.std_flush("[%s] -- Connected to Redis" % helper_utils.readable_time()) # Correct-key -- v1 or v2 # Key Push # Actual keys... # list_tracker_key tracks where the data is (either v1 or v2) # list_push_key contains the list of cells list_tracker_key = "assed:event:detection:multisource:listkey" list_push_key = "assed:event:detection:multisource:list" list_info_key = "assed:event:detection:multisource:info" key_version = r.get(list_tracker_key) if key_version is None: key_version = "v2" else: key_version = key_version.decode() push_key = 'v1' if key_version == 'v1': helper_utils.std_flush( "[%s] -- v1 key already in effect. Pushing to v2" % helper_utils.readable_time()) push_key = 'v2' else: helper_utils.std_flush( "[%s] -- v2 key already in effect. Pushing to v1" % helper_utils.readable_time()) cell_list = [item for item in cell_cache] true_list_push_key = list_push_key + ":" + push_key helper_utils.std_flush( "[%s] -- Deleting existing %s, if any" % (helper_utils.readable_time(), true_list_push_key)) r.delete(true_list_push_key) r.lpush(true_list_push_key, *cell_list) helper_utils.std_flush( "[%s] -- Pushed cell list to %s" % (helper_utils.readable_time(), true_list_push_key)) helper_utils.std_flush("[%s] -- Pushing individual cell results" % helper_utils.readable_time()) cell_counter = 0 for _cell_ in cell_cache: cell_push_contents = json.dumps(cell_cache[_cell_]) cell_specific_suffix = ":".join(_cell_.split("_")) cell_push_key = ":".join( [list_info_key, cell_specific_suffix, push_key]) r.set(cell_push_key, cell_push_contents) if cell_counter == 0: helper_utils.std_flush("[%s] -- First push: %s --- %s" % (helper_utils.readable_time(), cell_push_key, cell_push_contents)) cell_counter += 1 helper_utils.std_flush( "[%s] -- Completed individual cell pushes with %s cells" % (helper_utils.readable_time(), str(cell_counter))) r.set(list_tracker_key, push_key) helper_utils.std_flush( "[%s] -- Setting versioning in %s to %s" % (helper_utils.readable_time(), list_tracker_key, push_key)) helper_utils.std_flush("-------- COMPLETE AT %s ----------\n" % helper_utils.readable_time()) else: #helper_utils.std_flush("Sleeping for %s"%sleep_timer) time.sleep(sleep_timer)
def fetch_data(self): connection = get_db_connection() if connection: # Load the views in an order. # 1. tp_bid q1 = "SELECT * FROM V_TP20_BID where NVP_BID_NR = '%s' " % self.bid_number df_tp20_bid = pd.read_sql(q1, con=connection) # 2. tp_bid_shpr q2 = "SELECT * FROM V_TP20_BID_SHPR_INFO WHERE NVP_BID_NR = '%s' " % self.bid_number df_tp20_bid_shpr = pd.read_sql(q2, con=connection).sort_values( by=['NVP_BID_NR', 'SHR_AC_NR'], ascending=[1, 0]) # 3. tp_svc_grp q3 = "SELECT * FROM V_TP20_SERVICE_GROUP WHERE NVP_BID_NR = '%s' " % self.bid_number df_tp20_svc_grp = pd.read_sql(q3, con=connection) # 4. tp_ceiling_svc q4 = "SELECT * FROM V_TP20_CEILING_SERVICES WHERE NVP_BID_NR = '%s' " % self.bid_number df_tp20_ceiling_svc = pd.read_sql(q4, con=connection) # 5. tp20_shpr_svc q5 = "SELECT * FROM V_TP20_SHPR_SVC_VOL_REV WHERE NVP_BID_NR = '%s'" % self.bid_number df_tp20_shpr_svc = pd.read_sql(q5, con=connection) # TODO: Filter out the columns not required. # 6. ttpsvgp q6 = "SELECT NVP_BID_NR, SVC_GRP_NR, PND_STS_CD, CPE_ETM_RPP_A, SVC_GRP_TRG_PSE_A, SVC_TRG_LOW_RNG_A, " \ "SVC_TRG_HI_RNG_A, TRG_PSE_FCR_NR FROM TTPSVGP WHERE NVP_BID_NR = '%s' " % self.bid_number df_ttpsvgp = pd.read_sql(q6, con=connection) # 7. zone_weight q7 = "SELECT NVP_BID_NR,SVC_GRP_NR,SVC_GRP_SUF_NR,DEL_ZN_NR,WGT_MS_UNT_TYP_CD, " \ "WGT_CGY_WGY_QY, PKGBOL," \ "(CASE WGT_MS_UNT_TYP_CD WHEN 'OZ' " \ "THEN cast(WGT_CGY_WGY_QY as DECIMAL(9,2)) / 16.0 " \ "ELSE cast(WGT_CGY_WGY_QY as DECIMAL(9,2)) END) " \ "as WEIGHT FROM V_TP20_ZONE_WGT_VOL_DIST WHERE NVP_BID_NR = '%s' AND DEL_ZN_NR != 'ALL'" % self.bid_number # df_zone_weight = pd.read_sql(q7, con=connection) # tncvcel q8 = "SELECT DISTINCT C.*, D.MVM_DRC_CD, D.SVC_TYP_CD, D.SVC_FEA_TYP_CD FROM " \ "(SELECT A.* FROM " \ "(SELECT NVP_BID_NR, SVC_GRP_NR, RCM_NCV_QY, NCV_MIN_QY, NCV_MAX_QY FROM TNCVCEL " \ "WHERE NVP_BID_NR = '%s') A " \ "INNER JOIN V_TP20_CEILING_SERVICES B " \ "ON A.NVP_BID_NR = B.NVP_BID_NR AND A.SVC_GRP_NR = B.SVC_GRP_NR) C " \ "INNER JOIN V_TP20_SERVICE_GROUP D ON C.NVP_BID_NR = D.NVP_BID_NR AND C.SVC_GRP_NR = D.SVC_GRP_NR" % self.bid_number df_tncvcel = pd.read_sql(q8, con=connection) # tp_accessorial q9 = "SELECT * FROM V_TP20_ACCESSORIAL WHERE NVP_BID_NR = '%s' " % self.bid_number df_tp_accessorial = pd.read_sql(q9, con=connection) import joblib import tempfile import os outfile = os.path.join(tempfile.gettempdir(), '%s.data' % self.bid_number) joblib.dump([ df_tp20_bid, df_tp20_bid_shpr, df_tp20_svc_grp, df_tp20_ceiling_svc, df_tp20_shpr_svc, df_ttpsvgp, df_zone_weight, df_tp_accessorial ], outfile, compress=5) if (self.validate_data(df_tp20_bid, df_tp20_bid_shpr, df_tp20_svc_grp, df_tp20_ceiling_svc, df_tp20_shpr_svc, df_ttpsvgp, df_zone_weight, df_tp_accessorial)): return df_tp20_bid, df_tp20_bid_shpr, df_tp20_svc_grp, df_tp20_ceiling_svc, df_tp20_shpr_svc, df_ttpsvgp, df_zone_weight, df_tp_accessorial else: self.log.critical( 'Data validation for the bid number %s failed.' % (self.bid_number)) else: self.log.critical('Unable to access the databse.')