def __init__( self, doc_loader, n_words, classify_tweets, minimum_gram_length, max_distance_entities_doc, doc_score_types, ): """Get out doc_analyzer, save the minimum score neccesary for docs and if the event detection module is turned on, initalize the class for that (spinup)""" self.n_words = n_words self.classify_tweets = classify_tweets self.es = Elastic(host=ELASTIC_HOST) self.check_toponym_index() self.pg = PostgreSQL('gfm') super().__init__(self.pg, self.es, doc_score_types, max_distance_entities_doc) if self.classify_tweets == 'bert': self.text_classifier = TextClassifier() self.docs = {} doc_loader_args = (doc_score_types, n_words, minimum_gram_length) from doc_loader import DocLoaderES self.doc_loader = DocLoaderES(*doc_loader_args)
array = np.arange(ysize * xsize).reshape((ysize, xsize)) ds.SetGeoTransform(gt) ds.GetRasterBand(1).WriteArray(array) source = osr.SpatialReference() source.ImportFromEPSG(EPSG) ds.SetProjection(source.ExportToWkt()) ds = None if not os.path.exists(shp_file): subprocess.call( r"python C:\Users\jadeb\Anaconda3\Scripts\gdal_polygonize.py" + f" {tif_file} {shp_file}", shell=True) pg = PostgreSQL('classification') if not pg.table_exists(f'{RAINFALL_TYPE.lower()}_raster'): gdf = gpd.GeoDataFrame.from_file(shp_file) print('finished reading file') def x(): for _ in range(ysize): for j in range(xsize): yield j def y(): for i in range(ysize): for _ in range(xsize): yield i
import csv import psycopg2 from psycopg2.extensions import AsIs import pandas as pd from db.postgresql import PostgreSQL from db.elastic import Elastic from config import LEVEL_2_COUNTRIES, PG_DB, DOCUMENT_INDEX, POSTGRESQL_USER pd.options.mode.chained_assignment = None TOWN_CODES = set([ 'PPL', 'PPLA', 'PPLA2', 'PPLA3', 'PPLA4', 'PPLC', 'PPLG', 'PPLR', 'PPLS', 'PPLX', 'STLMT' ]) pg = PostgreSQL('gfm') es = Elastic() class Preprocess(): def __init__(self): self.level_0_codes = self._load_level_0_codes() def _load_level_0_codes(self): gdf = gpd.GeoDataFrame.from_file( os.path.join('input', 'maps', 'level0.json')) return set(['g-' + geonameid for geonameid in gdf['geoNameId']]) def get_location_type(self, country_code, feature_code,
def load_docs(self, docs_queue, n_docs_to_unload, start, analysis_length, timestep_length, event_1, event_2, timestep_end_str, is_real_time, datetime=datetime): try: es = Elastic(host=ELASTIC_HOST) pg = PostgreSQL('gfm') doc_analyzer = DocAnalyzer(es, pg, self.doc_score_types, self.n_words, self.minimum_gram_length) spinup_start = start - analysis_length + timestep_length self.load_timestep_es(es, doc_analyzer, docs_queue, n_docs_to_unload, spinup_start, start) timestep = 1 timestep_end = start + timestep * timestep_length while timestep_end < datetime.utcnow(): query_start = timestep_end - timestep_length self.load_timestep_es(es, doc_analyzer, docs_queue, n_docs_to_unload, query_start, timestep_end) timestep_end_str.value = self.encode_dt(timestep_end) timestep += 1 timestep_end = start + timestep * timestep_length event_2.clear() event_1.set() event_2.wait() last_timestep_end = timestep_end - timestep_length is_real_time.value = True while True: timestep_end = datetime.utcnow() sleep = (timedelta(minutes=3) - (timestep_end - last_timestep_end)).total_seconds() if sleep > 0: time.sleep(sleep) timestep_end = datetime.utcnow() self.load_timestep_es(es, doc_analyzer, docs_queue, n_docs_to_unload, last_timestep_end, timestep_end) last_timestep_end = timestep_end timestep_end_str.value = self.encode_dt(timestep_end) event_2.clear() event_1.set() event_2.wait() except Exception as e: raise
# Name of the toponym resolution table TOPONYM_RESOLUTION_TABLE = 'toponym_resolution_table' # Refresh time of the realtime geotagging module REAL_TIME_TAGGER_REFRESH_TIME = 300 # sec # Name of the Elasticsearch index with tweets TWEETS_INDEX = 'taggs' # Name of the Elasticsearch index with toponyms TOPONYM_INDEX = 'toponyms' # Update tweets in the database with their locations (flag for testing purposes) UPDATE = False # Connect to databases es_tweets = Elastic() es_toponyms = es_tweets pg_Geotag = PostgreSQL(POSTGRESQL_DB) pg = PostgreSQL(POSTGRESQL_DB) # The functions below are meant to connect to your database. class TweetAnalyzerCustom: # ID = ID of the tweet as str # tweet = { # 'date': '%a %b %d %H:%M:%S +0000 %Y', # 'user': { # 'id': user ID, # 'location': user location, # 'time zone': user time zone, # }, # 'text': text in utf-8 - retweeted_status if retweet, otherwise text # 'retweet': Boolean: True or False,
def __init__(self): # Connect to PostgreSQL PostgreSQL.__init__(self, POSTGRESQL_DB) PostgreSQL.initialize_postgis(self)