def geoi_assemble( db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', lookup_schema='geoimputation', lookup_table='ged', month_start=400, month_end=400): engine = create_engine(db_engine_name) with engine.connect() as con: trans = con.begin() query = alchemy_text("DROP TABLE IF EXISTS " + lookup_schema + ".geoi_out_all;") trans.commit() con.execute(query) query = alchemy_text(''' CREATE TABLE ''' + lookup_schema + '''.geoi_out_all AS SELECT * FROM ''' + lookup_schema + '''.geoi_out_4 WHERE "1"<>1 UNION SELECT * FROM ''' + lookup_schema + '''.geoi_out_6 WHERE "1"<>1 UNION SELECT id, priogrid_gid as "1", priogrid_gid as "2", priogrid_gid as "3", priogrid_gid as "4", priogrid_gid as "5", priogrid_gid as "6", priogrid_gid as "7", priogrid_gid as "8", priogrid_gid as "9", priogrid_gid as "10", priogrid_gid as "11", priogrid_gid as "12", priogrid_gid as "13", priogrid_gid as "14", priogrid_gid as "15" FROM ''' + lookup_schema + '''.''' + lookup_table + ''' WHERE month_id_end BETWEEN :m1 AND :m2; ''') con.execute(query, m1=month_start, m2=month_end) query = alchemy_text( ''' ALTER TABLE ''' + lookup_schema + '''.geoi_out_all ADD COLUMN month_start BIGINT; ALTER TABLE ''' + lookup_schema + '''.geoi_out_all ADD COLUMN month_end BIGINT; ALTER TABLE ''' + lookup_schema + '''.geoi_out_all ADD COLUMN type_of_violence BIGINT; ''') con.execute(query) query = alchemy_text(''' UPDATE ''' + lookup_schema + '''.geoi_out_all SET month_start=c.month_id_start, month_end=c.month_id_end, type_of_violence=c.type_of_violence FROM ''' + lookup_schema + '''.gedfull c WHERE ''' + lookup_schema + '''.geoi_out_all.id = c.id; ''') con.execute(query)
def report_pgm ( month_start, month_end, include_previous_months=0, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', ): month_start -= include_previous_months query=alchemy_text(""" with a as ( SELECT year_id, sum(to_dummy(ged_count_sb)) AS sb_count_pg, sum(to_dummy(ged_count_ns)) AS ns_count_pg, sum(to_dummy(ged_count_os)) AS os_count_pg, sum(ged_count_sb) AS sb_count_events, sum(ged_count_ns) AS ns_count_events, sum(ged_count_os) AS os_count_events, sum(ged_best_sb) AS sb_sum_deaths, sum(ged_best_ns) AS ns_sum_deaths, sum(ged_best_os) AS os_sum_deaths, month_id FROM preflight.flight_pgm WHERE month_id BETWEEN :s AND :e GROUP BY month_id, year_id ) SELECT m.month, a.* FROM a, staging.month as m WHERE a.month_id=m.id; """) query = query.bindparams(s=month_start, e=month_end) engine = create_engine(db_engine_name) with engine.connect() as con: report = pd.read_sql(sql=query,con=con) return (report)
def _check_ged(month_start: int, api_version: str) -> None: """ Check that GED has data for month_start """ # Imputation needs 12 months, check that we have that in the API? month_check = month_start - 11 log.debug(f"Checking GED has data for {month_check}") engine = create_engine(CONNECTSTRING) with engine.connect() as con: query = alchemy_text( "SELECT month, year_id FROM staging.month WHERE id=:sd") result = con.execute(query, sd=month_check).fetchone() iso_start_check = "{1:04d}-{0:02d}-01".format(*result) iso_end_check = "{1:04d}-{0:02d}-25".format(*result) next_page_url = ("http://ucdpapi.pcr.uu.se/api/gedevents/" + api_version + "?pagesize=1&StartDate=" + iso_start_check + "&EndDate=" + iso_end_check) next_page_url, ged_slice, total_pages = _get_ged_slice( next_page_url=next_page_url) if total_pages > 0: log.debug(f"GED data exists for {month_check}") else: raise RuntimeError( f"No data in api version {api_version} for month {month_check}" )
def geteventdensity( priogrids_df, month, conflict_id, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', schema='geoimputation', table='ged'): """ :param priogrids_df: :param month: :param db_engine_name: :param db_table_name: :return: """ engine = create_engine(db_engine_name) with engine.connect() as con: query = alchemy_text( "SELECT priogrid_gid, count(*)*100 as density FROM " + schema + "." + table + " WHERE conflict_new_id = :conflict AND month_id_end=:date GROUP BY priogrid_gid" ) query = query.bindparams(conflict=conflict_id, date=month) density_df = pd.read_sql(query, con) #print(density_df) df_merge = priogrids_df.merge(density_df, left_on='gid', right_on='priogrid_gid', how='left') df_merge = df_merge.drop('priogrid_gid', axis=1) df_merge = df_merge.fillna(1) #print (df_merge) #print ("**************************") return df_merge
def _geteventdensity( priogrids_df: pd.DataFrame, month: int, conflict_id: int, schema: str, table: str, ): engine = create_engine(CONNECTSTRING) with engine.connect() as con: query = alchemy_text(f""" SELECT priogrid_gid, count(*)*100 as density FROM {schema}.{table} WHERE conflict_new_id = :conflict AND month_id_end=:date GROUP BY priogrid_gid """) query = query.bindparams(conflict=conflict_id, date=month) density_df = pd.read_sql(query, con) df_merge = priogrids_df.merge(density_df, left_on="gid", right_on="priogrid_gid", how="left") df_merge = df_merge.drop("priogrid_gid", axis=1) df_merge = df_merge.fillna(1) return df_merge
def checkGED(db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', version_number='5.9.99', month_start=400, imputation=True): print(month_start) if imputation: month_start = month_start - 11 engine = create_engine(db_engine_name) with engine.connect() as con: query = alchemy_text( "SELECT month, year_id FROM staging.month WHERE id=:sd") go = False try: result = con.execute(query, sd=month_start).fetchone() iso_start_check = '{1:04d}-{0:02d}-01'.format(*result) iso_end_check = '{1:04d}-{0:02d}-25'.format(*result) next_page_url = 'http://ucdpapi.pcr.uu.se/api/gedevents/' + version_number + '?pagesize=1&StartDate=' + iso_start_check + '&EndDate=' + iso_end_check print(next_page_url) next_page_url, ged_slice, total_pages = _get_ged_slice( next_page_url=next_page_url, token='48dda3460c347f3b') # print(iso_start_check, iso_end_check, total_pages) if total_pages > 0: go = True except: go = False return go
def geoi_run(count=15, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', lookup_schema='geoimputation', lookup_table='ged', adm1=True): engine = create_engine(db_engine_name) out_df = pd.DataFrame() if adm1: poly_table = 'gadm1' table_id = '4' else: poly_table = 'country' table_id = '6' with engine.connect() as con: query = alchemy_text("SELECT count(*) FROM " + lookup_schema + '.toimp' + table_id) row_count = con.execute(query).fetchone() i = 0 tools.printProgressBar(0, row_count[0], prefix='Progress:', suffix='Complete', bar_length=50) query = alchemy_text( "SELECT id,geom_wkt,month_id_end as month_id,conflict_new_id as conflict_id " "FROM " + lookup_schema + '.toimp' + table_id) for row in con.execute(query): #print(row) i += 1 tools.printProgressBar(i, row_count[0], prefix='Progress:', suffix='Complete', bar_length=50) out_df = out_df.append( geom_inputation(db_engine_name, lookup_schema, lookup_table, poly_table, count, *row)) out_df = out_df.rename(columns={0: 'id'}) out_df.to_sql(con=con, name='geoi_out_' + table_id, schema=lookup_schema, if_exists='replace', index=False)
def monthid_to_ymd(month_id, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'): '''given month_id, return Y-M-01 in ISO format''' engine = create_engine(db_engine_name) with engine.connect() as con: query = alchemy_text("SELECT month, year_id FROM staging.month WHERE id=:id") try: result = con.execute(query,id=int(month_id)).fetchone() result = '{1:04d}-{0:02d}-01'.format(*result) except: result = None return result
def geoi_prepare(db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', lookup_schema='geoimputation', lookup_table='ged', month_start=400, month_end=400): """Copies the current attached GED for the lookup to work""" engine = create_engine(db_engine_name) with engine.connect() as con: con.execute('DROP TABLE IF EXISTS ' + lookup_schema + '.' + lookup_table) con.execute('DROP TABLE IF EXISTS ' + lookup_schema + '.gedfull') con.execute('DROP TABLE IF EXISTS ' + lookup_schema + '.toimp4') con.execute('DROP TABLE IF EXISTS ' + lookup_schema + '.toimp6') trans = con.begin() con.execute('CREATE TABLE ' + lookup_schema + '.' + lookup_table + ' AS SELECT id,priogrid_gid,conflict_new_id,month_id_end,' 'month_id_start,type_of_violence, geom, best ' 'FROM preflight.ged_attached') con.execute( 'CREATE TABLE ' + lookup_schema + '.gedfull AS SELECT id,priogrid_gid,conflict_new_id,month_id_end,' 'month_id_start,type_of_violence, geom, best ' 'FROM preflight.ged_attached_full') con.execute('CREATE INDEX lookup_idx ON ' + lookup_schema + '.' + lookup_table + ' (conflict_new_id,month_id_end)') con.execute('CREATE INDEX lookup2_idx ON ' + lookup_schema + '.' + lookup_table + ' (priogrid_gid)') con.execute(alchemy_text( 'CREATE TABLE ' + lookup_schema + '.toimp4 AS SELECT * FROM preflight.ged_attached_full where where_prec=4' ' AND month_id_end between :m1 and :m2'), m1=month_start, m2=month_end) con.execute(alchemy_text( 'CREATE TABLE ' + lookup_schema + '.toimp6 AS SELECT * FROM preflight.ged_attached_full where where_prec=6' ' AND month_id_end between :m1 and :m2 AND geom IS NOT NULL'), m1=month_start, m2=month_end) trans.commit()
def _geoi_run(adm1: bool) -> None: log.debug("Started _geoi_run()") count = 15 lookup_table = "ged" lookup_schema = "left_imputation" engine = create_engine(CONNECTSTRING) out_df = pd.DataFrame() if adm1: poly_table = "gadm1" table_id = "4" else: poly_table = "country" table_id = "6" with engine.connect() as con: query = alchemy_text("SELECT count(*) FROM " + lookup_schema + ".toimp" + table_id) row_count = con.execute(query).fetchone() query = alchemy_text( "SELECT id,geom_wkt,month_id_end as month_id,conflict_new_id as conflict_id " "FROM " + lookup_schema + ".toimp" + table_id) for row in con.execute(query): out_df = out_df.append( _geom_imputation(lookup_schema, lookup_table, poly_table, count, *row)) out_df = out_df.rename(columns={0: "id"}) out_df.to_sql( con=con, name="geoi_out_" + table_id, schema=lookup_schema, if_exists="replace", index=False, ) log.debug("Finished _geoi_run()")
def backupOldTable (db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', schema_name='dataprep', table_name='ged'): ''' creates a backup copy of a table with the name TABLEnnnnnnn where n is unix timestamp :param db_engine_name engine URI :param schema_name schema :param table_name ''' ot = schema_name+'.'+table_name nt= schema_name+'.'+table_name+'_'+str(int(time())) query = alchemy_text('CREATE TABLE '+nt+' AS SELECT * FROM '+ot) engine = create_engine(db_engine_name) with engine.connect() as con: con.execute(query)
def validate (month_start, month_end, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', schema='preflight', table='flight_pgm'): engine = create_engine(db_engine_name) with engine.connect() as con: query = alchemy_text("""SELECT min(month_id) as min, max(month_id) as max FROM """+schema+"."+table+""" WHERE ged_count_sb IS NOT NULL AND ged_count_sb > 0 AND month_id BETWEEN :m1 and :m2 """) try: result = con.execute(query, m1=month_start, m2=month_end).fetchone() except: result = None, None if result[0] is None: return False if int(result[0]) == int(month_start) and int(result[1]) == int(month_end): return True return False
def getMonthID(year, month, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'): """ converts a year and month into a Views MonthID :param year: Year as YYYY :param month: :param db_engine_name: Location of the VIEWS urI :return: """ engine = create_engine(db_engine_name) with engine.connect() as con: query = alchemy_text(""" SELECT id FROM staging.month WHERE year_id=:y AND month=:m""") try: month_id = con.execute(query, y=year, m=month).fetchone()[0] except TypeError: raise ValueError( "Supplied year and month do not fit the 1980-2030 range for values:", year, month) return month_id
def preflightRUN(db_engine_name, level='pgm'): """ :param db_engine_name: the Views DB URI :param level: the Views level to which you want to denormalize. CM and PGM are implemented :return: 0 on success, error code on failure. Runs a normal preflight run for Views, i.e. denormalizes the Views Staging tables to a flat format suitable for model running. Current implemented levels are cm and pgm. """ script_path = os.path.join( os.path.dirname(os.path.realpath('__file__')), '../SQLFuelingPlans/flight_' + level.lower() + '.sql') try: preflight_file = open(script_path, 'r') except: return 1 preflight_query = alchemy_text(preflight_file.read()) if db_engine_name is None: print(preflight_query) return 2 else: engine = create_engine(db_engine_name) with engine.connect() as con: trans = con.begin() con.execute("DROP TABLE IF EXISTS preflight.flight_" + level.lower()) trans.commit() trans = con.begin() print(preflight_query) con.execute(preflight_query) trans.commit() return 0
def date_to_monthid(*args,**kwargs): '''if called with date parse ISO string to Y-M-D and return month_id. if called with year,month,day obtain return month_id and if called with only an actual month_id, guess what, pass through!''' db_engine_name = 'postgresql://VIEWSADMIN@VIEWSHOST:5432/views' if 'db_engine_name' in kwargs: db_engine_name = kwargs['db_engine_name'] if len(args) not in (1,3): return None if len(args)==1: if '-' not in args[0]: try: passthrough = int(args[0]) except: passthrough = None return passthrough ymd=args[0].split('-') if len(ymd) != 3: return None try: y=int(ymd[0]) m=int(ymd[1]) except ValueError: return None if len(args)==3: try: y=int(args[0]) m=int(args[1]) except ValueError: return None engine = create_engine(db_engine_name) with engine.connect() as con: query = alchemy_text("SELECT id FROM staging.month WHERE year_id=:y and month=:m") try: result = con.execute(query,y=y, m=m).fetchone() return result[0] except: return None
def _geoi_prepare(month_start: int, month_end: int) -> None: """Copies the current attached GED for the lookup to work""" log.debug("Starting _geoi_prepare()") engine = create_engine(CONNECTSTRING) db.drop_table("left_imputation.ged") db.drop_table("left_imputation.gedfull") db.drop_table("left_imputation.toimp4") db.drop_table("left_imputation.toimp6") with engine.connect() as con: trans = con.begin() log.debug("Creating left_imputation.ged") con.execute(""" CREATE TABLE left_imputation.ged AS SELECT id, priogrid_gid, conflict_new_id, month_id_end, month_id_start, type_of_violence, geom, best FROM preflight.ged_attached """) log.debug("Creating left_imputation.gedfull") con.execute(""" CREATE TABLE left_imputation.gedfull AS SELECT id, priogrid_gid, conflict_new_id, month_id_end, month_id_start, type_of_violence, geom, best FROM preflight.ged_attached_full """) con.execute( "CREATE INDEX lookup_idx ON left_imputation.ged (conflict_new_id, month_id_end)" ) con.execute( "CREATE INDEX lookup2_idx ON left_imputation.ged (priogrid_gid)") log.debug("Creating left_imputation.toimp4") con.execute( alchemy_text(""" CREATE TABLE left_imputation.toimp4 AS SELECT * FROM preflight.ged_attached_full where where_prec=4 AND month_id_end between :m1 and :m2 """), m1=month_start, m2=month_end, ) log.debug("Creating left_imputation.toimp6") con.execute( alchemy_text(""" CREATE TABLE left_imputation.toimp6 AS SELECT * FROM preflight.ged_attached_full WHERE where_prec=6 AND month_id_end between :m1 and :m2 AND geom IS NOT NULL """), m1=month_start, m2=month_end, ) trans.commit() log.debug("Finished _geoi_prepare()")
def stageGED2CM(month_start=445, month_end=446, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'): if month_start > month_end: month_start, month_end = month_end, month_start engine = create_engine(db_engine_name) with engine.connect() as con: limits = con.execute( "SELECT min(month_id_end) AS int, max(month_id_end) AS int FROM preflight.ged_attached" ).fetchone() if limits[0] > month_start: month_start = limits[0] if limits[1] <= month_end: month_end = limits[1] print("start/end:", month_start, month_end) print("Staging Country-Month with base GED variables and spatial lags...") query = alchemy_text(""" UPDATE staging.country_month SET ged_best_sb = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,1), ged_best_ns = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,2), ged_best_os = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,3), ged_count_sb = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,1), ged_count_ns = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,2), ged_count_os = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,3), ged_best_sb_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,1), ged_best_ns_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,2), ged_best_os_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,3), ged_count_sb_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,1), ged_count_ns_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,2), ged_count_os_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,3) WHERE month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) print("Staging Country-Month t-lags...") query = alchemy_text(""" SELECT public.make_country_month_temporal_lags(1,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(2,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(3,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(4,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(5,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(6,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(7,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(8,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(9,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(10,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(11,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(12,FALSE,109,:m2); """) with engine.connect() as con: con.execute(query, m2=month_end) print("Staging Country-Month distances to nearest event...") query = alchemy_text(""" UPDATE staging.country_month SET ged_months_since_last_sb = public.cm_months_since_last_event('ged_count_sb', country_id, month_id), ged_months_since_last_ns = public.cm_months_since_last_event('ged_count_ns', country_id, month_id), ged_months_since_last_os = public.cm_months_since_last_event('ged_count_os', country_id, month_id), ged_months_since_last_sb_lag1 = public.cm_months_since_last_event('ged_count_sb_lag1', country_id, month_id), ged_months_since_last_ns_lag1 = public.cm_months_since_last_event('ged_count_ns_lag1', country_id, month_id), ged_months_since_last_os_lag1 = public.cm_months_since_last_event('ged_count_os_lag1', country_id, month_id) WHERE month_id BETWEEN :m1 AND :m2""") with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) with engine.connect() as con: print("Updating base onset variables for CM...") trans = con.begin() query = alchemy_text( """with a as (SELECT * FROM onset_months_table('staging','country_month','ged_best_sb','country_id')) UPDATE staging.country_month SET onset_months_sb = a.onset_distance FROM a WHERE a.id=staging.country_month.id; with a as (SELECT * FROM onset_months_table('staging','country_month','ged_best_ns','country_id')) UPDATE staging.country_month SET onset_months_ns = a.onset_distance FROM a WHERE a.id=staging.country_month.id; with a as (SELECT * FROM onset_months_table('staging','country_month','ged_best_os','country_id')) UPDATE staging.country_month SET onset_months_os = a.onset_distance FROM a WHERE a.id=staging.country_month.id;""") con.execute(query) trans.commit()
def geoi_pgm_dummy_update( imputation_id=1, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', lookup_schema='geoimputation', month_start=400, month_end=400): if not isinstance(imputation_id, (int, long)): print("ERROR") return None imputation_id = str(imputation_id) engine = create_engine(db_engine_name) with engine.connect() as con: trans = con.begin() query = alchemy_text('''UPDATE ''' + lookup_schema + '''.pgm SET ged_sb_dummy_''' + imputation_id + '''=0, ged_ns_dummy_''' + imputation_id + '''=0, ged_os_dummy_''' + imputation_id + '''=0 WHERE month_id BETWEEN :m1 AND :m2''') con.execute(query, m1=month_start, m2=month_end) trans.commit() trans = con.begin() query = alchemy_text(''' with a as ( SELECT "''' + imputation_id + '''" as priogrid_gid, type_of_violence, random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id FROM ''' + lookup_schema + '''.geoi_out_all ) UPDATE ''' + lookup_schema + '''.pgm as i SET ged_ns_dummy_''' + imputation_id + '''=1 FROM a WHERE a.type_of_violence=2 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id''' ) con.execute(query) query = alchemy_text(''' with a as ( SELECT "''' + imputation_id + '''" as priogrid_gid, type_of_violence, random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id FROM ''' + lookup_schema + '''.geoi_out_all ) UPDATE ''' + lookup_schema + '''.pgm as i SET ged_os_dummy_''' + imputation_id + '''=1 FROM a WHERE a.type_of_violence=3 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id''' ) con.execute(query) query = alchemy_text(''' with a as ( SELECT "''' + imputation_id + '''" as priogrid_gid, type_of_violence, random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id FROM ''' + lookup_schema + '''.geoi_out_all ) UPDATE ''' + lookup_schema + '''.pgm as i SET ged_sb_dummy_''' + imputation_id + '''=1 FROM a WHERE a.type_of_violence=3 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id''' ) #print (query) con.execute(query) trans.commit()
def _stage_ged_2_cm(month_start: int, month_end: int) -> None: """ Update staging.country_month """ log.debug("Starting _stage_ged_2_cm with " f"month_start: {month_start} month_end: {month_end}") engine = create_engine(CONNECTSTRING) log.debug("Finding limits ") with engine.connect() as con: limits = con.execute( "SELECT min(month_id_end) AS int, max(month_id_end) AS int FROM preflight.ged_attached" ).fetchone() if limits[0] > month_start: month_start = limits[0] if limits[1] <= month_end: month_end = limits[1] log.debug(f"Limits of preflight.ged_attached are {limits}") log.debug("Staging CM with base GED variables and spatial lags.") query = alchemy_text(""" UPDATE staging.country_month SET ged_best_sb = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,1), ged_best_ns = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,2), ged_best_os = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,3), ged_count_sb = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,1), ged_count_ns = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,2), ged_count_os = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,3), ged_best_sb_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,1), ged_best_ns_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,2), ged_best_os_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,3), ged_count_sb_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,1), ged_count_ns_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,2), ged_count_os_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,3) WHERE month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) log.debug("Staging Country-Month t-lags.") query = alchemy_text(""" SELECT public.make_country_month_temporal_lags(1,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(2,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(3,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(4,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(5,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(6,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(7,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(8,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(9,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(10,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(11,FALSE,109,:m2); SELECT public.make_country_month_temporal_lags(12,FALSE,109,:m2); """) with engine.connect() as con: con.execute(query, m2=month_end) log.debug("Staging Country-Month distances to nearest event...") query = alchemy_text(""" UPDATE staging.country_month SET ged_months_since_last_sb = public.cm_months_since_last_event('ged_count_sb', country_id, month_id), ged_months_since_last_ns = public.cm_months_since_last_event('ged_count_ns', country_id, month_id), ged_months_since_last_os = public.cm_months_since_last_event('ged_count_os', country_id, month_id), ged_months_since_last_sb_lag1 = public.cm_months_since_last_event('ged_count_sb_lag1', country_id, month_id), ged_months_since_last_ns_lag1 = public.cm_months_since_last_event('ged_count_ns_lag1', country_id, month_id), ged_months_since_last_os_lag1 = public.cm_months_since_last_event('ged_count_os_lag1', country_id, month_id) WHERE month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) log.debug("Computing months_since_last") query = alchemy_text(""" UPDATE staging.country_month SET ged_months_since_last_sb_tx5 = public.cm_months_since_last_event_threshold('ged_best_sb', country_id, month_id,5), ged_months_since_last_ns_tx5 = public.cm_months_since_last_event_threshold('ged_best_ns', country_id, month_id,5), ged_months_since_last_os_tx5 = public.cm_months_since_last_event_threshold('ged_best_os', country_id, month_id,5), ged_months_since_last_sb_lag1_tx5 = public.cm_months_since_last_event_threshold('ged_best_sb_lag1', country_id, month_id,5), ged_months_since_last_ns_lag1_tx5 = public.cm_months_since_last_event_threshold('ged_best_ns_lag1', country_id, month_id,5), ged_months_since_last_os_lag1_tx5 = public.cm_months_since_last_event_threshold('ged_best_os_lag1', country_id, month_id,5), ged_months_since_last_sb_tx25 = public.cm_months_since_last_event_threshold('ged_best_sb', country_id, month_id,25), ged_months_since_last_ns_tx25 = public.cm_months_since_last_event_threshold('ged_best_ns', country_id, month_id,25), ged_months_since_last_os_tx25 = public.cm_months_since_last_event_threshold('ged_best_os', country_id, month_id,25), ged_months_since_last_sb_lag1_tx25 = public.cm_months_since_last_event_threshold('ged_best_sb_lag1', country_id, month_id,25), ged_months_since_last_ns_lag1_tx25 = public.cm_months_since_last_event_threshold('ged_best_ns_lag1', country_id, month_id,25), ged_months_since_last_os_lag1_tx25 = public.cm_months_since_last_event_threshold('ged_best_os_lag1', country_id, month_id,25), ged_months_since_last_sb_tx100 = public.cm_months_since_last_event_threshold('ged_best_sb', country_id, month_id,100), ged_months_since_last_ns_tx100 = public.cm_months_since_last_event_threshold('ged_best_ns', country_id, month_id,100), ged_months_since_last_os_tx100 = public.cm_months_since_last_event_threshold('ged_best_os', country_id, month_id,100), ged_months_since_last_sb_lag1_tx100 = public.cm_months_since_last_event_threshold('ged_best_sb_lag1', country_id, month_id,100), ged_months_since_last_ns_lag1_tx100 = public.cm_months_since_last_event_threshold('ged_best_ns_lag1', country_id, month_id,100), ged_months_since_last_os_lag1_tx100 = public.cm_months_since_last_event_threshold('ged_best_os_lag1', country_id, month_id,100), ged_months_since_last_sb_tx500 = public.cm_months_since_last_event_threshold('ged_best_sb', country_id, month_id,500), ged_months_since_last_ns_tx500 = public.cm_months_since_last_event_threshold('ged_best_ns', country_id, month_id,500), ged_months_since_last_os_tx500 = public.cm_months_since_last_event_threshold('ged_best_os', country_id, month_id,500), ged_months_since_last_sb_lag1_tx500 = public.cm_months_since_last_event_threshold('ged_best_sb_lag1', country_id, month_id,500), ged_months_since_last_ns_lag1_tx500 = public.cm_months_since_last_event_threshold('ged_best_ns_lag1', country_id, month_id,500), ged_months_since_last_os_lag1_tx500 = public.cm_months_since_last_event_threshold('ged_best_os_lag1', country_id, month_id,500) WHERE month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) log.debug("Updating base onset variables for CM.") query = alchemy_text(""" WITH a AS (SELECT * FROM onset_months_table('staging', 'country_month', 'ged_best_sb', 'country_id')) UPDATE staging.country_month SET onset_months_sb = a.onset_distance FROM a WHERE a.id = staging.country_month.id; WITH a AS (SELECT * FROM onset_months_table('staging', 'country_month', 'ged_best_ns', 'country_id')) UPDATE staging.country_month SET onset_months_ns = a.onset_distance FROM a WHERE a.id = staging.country_month.id; WITH a AS (SELECT * FROM onset_months_table('staging', 'country_month', 'ged_best_os', 'country_id')) UPDATE staging.country_month SET onset_months_os = a.onset_distance FROM a WHERE a.id = staging.country_month.id; """) with engine.connect() as con: trans = con.begin() con.execute(query) trans.commit() log.debug("Updating windowed onsets at cm level.") query = alchemy_text(""" WITH a AS ( SELECT id, max(ged_best_sb) OVER (PARTITION BY country_id ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS sb_x1, max(ged_best_os) OVER (PARTITION BY country_id ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS os_x1, max(ged_best_ns) OVER (PARTITION BY country_id ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS ns_x1 FROM staging.country_month WHERE month_id BETWEEN :m1 - 48 AND :m2 ) UPDATE staging.country_month SET max24_best_sb = a.sb_x1, max24_best_ns = a.ns_x1, max24_best_os = a.os_x1 FROM a WHERE a.id = staging.country_month.id AND month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: trans = con.begin() con.execute(query, m1=month_start, m2=month_end) trans.commit()
def _stage_ged_2_pgm(month_start: int, month_end: int) -> None: """ Update staging.priogrid_month """ log.debug("Starting _stage_ged_2_pgm with " f"month_start: {month_start} month_end: {month_end}") engine = create_engine(CONNECTSTRING) log.debug("Finding limits ") with engine.connect() as con: limits = con.execute( "SELECT min(month_id_end) AS int, max(month_id_end) AS int FROM preflight.ged_attached" ).fetchone() if limits[0] > month_start: month_start = limits[0] if limits[1] <= month_end: month_end = limits[1] log.debug(f"Limits of preflight.ged_attached are {limits}") log.debug(f"Updating staging.priogrid_month sums.") query = alchemy_text(""" UPDATE staging.priogrid_month SET ged_best_sb = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 0, 1), ged_best_ns = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 0, 2), ged_best_os = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 0, 3), ged_count_sb = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 0, 1), ged_count_ns = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 0, 2), ged_count_os = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 0, 3), ged_best_sb_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, FALSE, FALSE, 0, 1), ged_best_ns_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, FALSE, FALSE, 0, 2), ged_best_os_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, FALSE, FALSE, 0, 3), ged_count_sb_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, TRUE, FALSE, 0, 1), ged_count_ns_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, TRUE, FALSE, 0, 2), ged_count_os_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, TRUE, FALSE, 0, 3), ged_best_sb_lag1 = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 1, 1), ged_best_ns_lag1 = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 1, 2), ged_best_os_lag1 = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 1, 3), ged_count_sb_lag1 = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 1, 1), ged_count_ns_lag1 = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 1, 2), ged_count_os_lag1 = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 1, 3), ged_best_sb_lag2 = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 2, 1), ged_best_ns_lag2 = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 2, 2), ged_best_os_lag2 = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 2, 3), ged_count_sb_lag2 = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 2, 1), ged_count_ns_lag2 = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 2, 2), ged_count_os_lag2 = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 2, 3) WHERE month_id >= :m1 AND month_id <= :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) log.debug("Creating temporal lags.") query = alchemy_text(""" SELECT public.make_priogrid_month_temporal_lags(1,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(2,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(3,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(4,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(5,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(6,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(7,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(8,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(9,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(10,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(11,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(12,FALSE,:m1,:m2); """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) log.debug("Updating count variables.") query = alchemy_text(""" UPDATE staging.priogrid_month SET ged_months_since_last_sb = public.months_since_last_event('ged_count_sb', priogrid_gid, month_id), ged_months_since_last_ns = public.months_since_last_event('ged_count_ns', priogrid_gid, month_id), ged_months_since_last_os = public.months_since_last_event('ged_count_os', priogrid_gid, month_id), ged_months_since_last_sb_lag1 = public.months_since_last_event('ged_count_sb_lag1', priogrid_gid, month_id), ged_months_since_last_ns_lag1 = public.months_since_last_event('ged_count_ns_lag1', priogrid_gid, month_id), ged_months_since_last_os_lag1 = public.months_since_last_event('ged_count_os_lag1', priogrid_gid, month_id), ged_months_since_last_sb_lag2 = public.months_since_last_event('ged_count_sb_lag2', priogrid_gid, month_id), ged_months_since_last_ns_lag2 = public.months_since_last_event('ged_count_ns_lag2', priogrid_gid, month_id), ged_months_since_last_os_lag2 = public.months_since_last_event('ged_count_os_lag2', priogrid_gid, month_id) WHERE month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) log.debug("Updating monthly counts for thresholds.") query = alchemy_text(""" UPDATE staging.priogrid_month SET ged_months_since_last_sb_tx5 = public.months_since_last_event_threshold('ged_best_sb', priogrid_gid, month_id,5), ged_months_since_last_ns_tx5 = public.months_since_last_event_threshold('ged_best_ns', priogrid_gid, month_id,5), ged_months_since_last_os_tx5 = public.months_since_last_event_threshold('ged_best_os', priogrid_gid, month_id,5), ged_months_since_last_sb_lag1_tx5 = public.months_since_last_event_threshold('ged_best_sb_lag1', priogrid_gid, month_id,5), ged_months_since_last_ns_lag1_tx5 = public.months_since_last_event_threshold('ged_best_ns_lag1', priogrid_gid, month_id,5), ged_months_since_last_os_lag1_tx5 = public.months_since_last_event_threshold('ged_best_os_lag1', priogrid_gid, month_id,5), ged_months_since_last_sb_tx25 = public.months_since_last_event_threshold('ged_best_sb', priogrid_gid, month_id,25), ged_months_since_last_ns_tx25 = public.months_since_last_event_threshold('ged_best_ns', priogrid_gid, month_id,25), ged_months_since_last_os_tx25 = public.months_since_last_event_threshold('ged_best_os', priogrid_gid, month_id,25), ged_months_since_last_sb_lag1_tx25 = public.months_since_last_event_threshold('ged_best_sb_lag1', priogrid_gid, month_id,25), ged_months_since_last_ns_lag1_tx25 = public.months_since_last_event_threshold('ged_best_ns_lag1', priogrid_gid, month_id,25), ged_months_since_last_os_lag1_tx25 = public.months_since_last_event_threshold('ged_best_os_lag1', priogrid_gid, month_id,25), ged_months_since_last_sb_tx100 = public.months_since_last_event_threshold('ged_best_sb', priogrid_gid, month_id,100), ged_months_since_last_ns_tx100 = public.months_since_last_event_threshold('ged_best_ns', priogrid_gid, month_id,100), ged_months_since_last_os_tx100 = public.months_since_last_event_threshold('ged_best_os', priogrid_gid, month_id,100), ged_months_since_last_sb_lag1_tx100 = public.months_since_last_event_threshold('ged_best_sb_lag1', priogrid_gid, month_id,100), ged_months_since_last_ns_lag1_tx100 = public.months_since_last_event_threshold('ged_best_ns_lag1', priogrid_gid, month_id,100), ged_months_since_last_os_lag1_tx100 = public.months_since_last_event_threshold('ged_best_os_lag1', priogrid_gid, month_id,100), ged_months_since_last_sb_tx500 = public.months_since_last_event_threshold('ged_best_sb', priogrid_gid, month_id,500), ged_months_since_last_ns_tx500 = public.months_since_last_event_threshold('ged_best_ns', priogrid_gid, month_id,500), ged_months_since_last_os_tx500 = public.months_since_last_event_threshold('ged_best_os', priogrid_gid, month_id,500), ged_months_since_last_sb_lag1_tx500 = public.months_since_last_event_threshold('ged_best_sb_lag1', priogrid_gid, month_id,500), ged_months_since_last_ns_lag1_tx500 = public.months_since_last_event_threshold('ged_best_ns_lag1', priogrid_gid, month_id,500), ged_months_since_last_os_lag1_tx500 = public.months_since_last_event_threshold('ged_best_os_lag1', priogrid_gid, month_id,500) WHERE month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) log.debug("Compute spatial distances.") query = alchemy_text(""" UPDATE staging.priogrid_month SET dist_ged_sb_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,1), dist_ged_ns_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,2), dist_ged_os_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,3) WHERE month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) log.debug("Updating base onset variables for PGM.") query = alchemy_text(""" WITH a AS (SELECT * FROM onset_months_table('staging', 'priogrid_month', 'ged_best_sb')) UPDATE staging.priogrid_month SET onset_months_sb = a.onset_distance FROM a WHERE a.id = staging.priogrid_month.id; WITH a AS (SELECT * FROM onset_months_table('staging', 'priogrid_month', 'ged_best_ns')) UPDATE staging.priogrid_month SET onset_months_ns = a.onset_distance FROM a WHERE a.id = staging.priogrid_month.id; WITH a AS (SELECT * FROM onset_months_table('staging', 'priogrid_month', 'ged_best_os')) UPDATE staging.priogrid_month SET onset_months_os = a.onset_distance FROM a WHERE a.id = staging.priogrid_month.id; """) with engine.connect() as con: trans = con.begin() con.execute(query) trans.commit() log.debug("Updating onsets based on spatial lags at PGM.") query = alchemy_text(""" UPDATE staging.priogrid_month SET onset_month_sb_lag1 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 1, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_sb'::varchar), onset_month_sb_lag2 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 2, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_sb'::varchar) WHERE onset_months_sb > 0 AND month_id BETWEEN :m1 AND :m2; UPDATE staging.priogrid_month SET onset_month_ns_lag1 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 1, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_ns'::varchar), onset_month_ns_lag2 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 2, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_ns'::varchar) WHERE onset_months_ns > 0 AND month_id BETWEEN :m1 AND :m2; UPDATE staging.priogrid_month SET onset_month_os_lag1 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 1, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_os'::varchar), onset_month_os_lag2 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 2, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_os'::varchar) WHERE onset_months_os > 0 AND month_id BETWEEN :m1 AND :m2; """) with engine.connect() as con: trans = con.begin() con.execute(query, m1=month_start, m2=month_end) trans.commit() log.debug("Updating windowed onsets.") query = alchemy_text(""" WITH a AS ( SELECT id, max(ged_best_sb) OVER (PARTITION BY priogrid_gid ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS sb_x1, max(ged_best_os) OVER (PARTITION BY priogrid_gid ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS os_x1, max(ged_best_ns) OVER (PARTITION BY priogrid_gid ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS ns_x1 FROM staging.priogrid_month WHERE month_id BETWEEN :m1 - 48 AND :m2 ) UPDATE staging.priogrid_month SET max24_best_sb = a.sb_x1, max24_best_ns = a.ns_x1, max24_best_os = a.os_x1 FROM a WHERE a.id = staging.priogrid_month.id AND month_id BETWEEN :m1 AND :m2 """) with engine.connect() as con: trans = con.begin() con.execute(query, m1=month_start, m2=month_end) trans.commit() log.info("Finished _stage_ged_2_pgm()")
def _geoi_pgm_dummy_update( imputation_id: int, month_start: int, month_end: int, ): log.debug("Started _geoi_pgm_dummy_update().") if not isinstance(imputation_id, numbers.Integral): raise RuntimeError(f"{imputation_id} isn't a numbers.Integral") lookup_schema = "left_imputation" # imputation_id = str(imputation_id) engine = create_engine(CONNECTSTRING) with engine.connect() as con: trans = con.begin() query = alchemy_text(f""" UPDATE {lookup_schema}.pgm SET ged_sb_dummy_{imputation_id} = 0, ged_ns_dummy_{imputation_id} = 0, ged_os_dummy_{imputation_id} = 0 WHERE month_id BETWEEN :m1 AND :m2 """) con.execute(query, m1=month_start, m2=month_end) trans.commit() trans = con.begin() query = alchemy_text(f""" WITH a AS ( SELECT "{imputation_id}" as priogrid_gid, type_of_violence, random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id FROM {lookup_schema}.geoi_out_all ) UPDATE {lookup_schema}.pgm as i SET ged_ns_dummy_{imputation_id}=1 FROM a WHERE a.type_of_violence=2 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id """) con.execute(query) query = alchemy_text(f""" WITH a AS ( SELECT "{imputation_id}" as priogrid_gid, type_of_violence, random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id FROM {lookup_schema}.geoi_out_all ) UPDATE {lookup_schema}.pgm as i SET ged_os_dummy_{imputation_id}=1 FROM a WHERE a.type_of_violence=3 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id """) con.execute(query) query = alchemy_text(f""" WITH a AS ( SELECT "{imputation_id}" as priogrid_gid, type_of_violence, random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id FROM {lookup_schema}.geoi_out_all ) UPDATE {lookup_schema}.pgm as i SET ged_sb_dummy_{imputation_id}=1 FROM a WHERE a.type_of_violence=1 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id """) con.execute(query) trans.commit() log.debug("Finished _geoi_pgm_dummy_update().")
def stageGED2PGM( month_start=445, month_end=446, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'): if month_start > month_end: month_start, month_end = month_end, month_start engine = create_engine(db_engine_name) with engine.connect() as con: limits = con.execute( "SELECT min(month_id_end) AS int, max(month_id_end) AS int FROM preflight.ged_attached" ).fetchone() if limits[0] > month_start: month_start = limits[0] if limits[1] <= month_end: month_end = limits[1] print(month_start, month_end) print("Updating sums...") query = alchemy_text("""UPDATE staging.priogrid_month SET ged_best_sb = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 0, 1), ged_best_ns = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 0, 2), ged_best_os = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 0, 3), ged_count_sb = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 0, 1), ged_count_ns = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 0, 2), ged_count_os = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 0, 3), ged_best_sb_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, FALSE, FALSE, 0, 1), ged_best_ns_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, FALSE, FALSE, 0, 2), ged_best_os_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, FALSE, FALSE, 0, 3), ged_count_sb_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, TRUE, FALSE, 0, 1), ged_count_ns_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, TRUE, FALSE, 0, 2), ged_count_os_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, TRUE, FALSE, 0, 3), ged_best_sb_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 1, 1), ged_best_ns_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 1, 2), ged_best_os_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 1, 3), ged_count_sb_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 1, 1), ged_count_ns_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 1, 2), ged_count_os_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 1, 3), ged_best_sb_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 2, 1), ged_best_ns_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 2, 2), ged_best_os_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 2, 3), ged_count_sb_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 2, 1), ged_count_ns_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 2, 2), ged_count_os_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 2, 3) WHERE month_id >= :m1 AND month_id <= :m2 """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) print("Creating temporal lags...") #This will compute temporal lags. #This must be run AFTER the above query has commited query = alchemy_text(''' SELECT public.make_priogrid_month_temporal_lags(1,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(2,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(3,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(4,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(5,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(6,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(7,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(8,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(9,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(10,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(11,FALSE,:m1,:m2); SELECT public.make_priogrid_month_temporal_lags(12,FALSE,:m1,:m2); ''') with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) print("Updating count variables...") query = alchemy_text(''' UPDATE staging.priogrid_month SET ged_months_since_last_sb = public.months_since_last_event('ged_count_sb', priogrid_gid, month_id), ged_months_since_last_ns = public.months_since_last_event('ged_count_ns', priogrid_gid, month_id), ged_months_since_last_os = public.months_since_last_event('ged_count_os', priogrid_gid, month_id), ged_months_since_last_sb_lag1 = public.months_since_last_event('ged_count_sb_lag1', priogrid_gid, month_id), ged_months_since_last_ns_lag1 = public.months_since_last_event('ged_count_ns_lag1', priogrid_gid, month_id), ged_months_since_last_os_lag1 = public.months_since_last_event('ged_count_os_lag1', priogrid_gid, month_id), ged_months_since_last_sb_lag2 = public.months_since_last_event('ged_count_sb_lag2', priogrid_gid, month_id), ged_months_since_last_ns_lag2 = public.months_since_last_event('ged_count_ns_lag2', priogrid_gid, month_id), ged_months_since_last_os_lag2 = public.months_since_last_event('ged_count_os_lag2', priogrid_gid, month_id) WHERE month_id BETWEEN :m1 AND :m2''') with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) print("Computing Spatial Distances...") with engine.connect() as con: query = alchemy_text(""" UPDATE staging.priogrid_month SET dist_ged_sb_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,1), dist_ged_ns_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,2), dist_ged_os_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,3) WHERE month_id BETWEEN :m1 AND :m2 """) con.execute(query, m1=month_start, m2=month_end) print("Updating base onset variables for PGM...") with engine.connect() as con: trans = con.begin() query = alchemy_text(""" with a as (SELECT * FROM onset_months_table('staging','priogrid_month','ged_best_sb')) UPDATE staging.priogrid_month SET onset_months_sb = a.onset_distance FROM a WHERE a.id=staging.priogrid_month.id; with a as (SELECT * FROM onset_months_table('staging','priogrid_month','ged_best_ns')) UPDATE staging.priogrid_month SET onset_months_ns = a.onset_distance FROM a WHERE a.id=staging.priogrid_month.id; with a as (SELECT * FROM onset_months_table('staging','priogrid_month','ged_best_os')) UPDATE staging.priogrid_month SET onset_months_os = a.onset_distance FROM a WHERE a.id=staging.priogrid_month.id; """) con.execute(query) trans.commit() print("Updating onsets based on spatial lags at PGM...") with engine.connect() as con: trans = con.begin() query = alchemy_text("""UPDATE staging.priogrid_month SET onset_month_sb_lag1 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 1, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_sb'::varchar), onset_month_sb_lag2 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 2, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_sb'::varchar) WHERE onset_months_sb > 0 AND month_id BETWEEN :m1 AND :m2; UPDATE staging.priogrid_month SET onset_month_ns_lag1 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 1, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_ns'::varchar), onset_month_ns_lag2 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 2, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_ns'::varchar) WHERE onset_months_ns > 0 AND month_id BETWEEN :m1 AND :m2; UPDATE staging.priogrid_month SET onset_month_os_lag1 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 1, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_os'::varchar), onset_month_os_lag2 = onset_lags( priogrid := priogrid_gid, month_id := month_id, lags := 2, schema_name := 'staging'::varchar, table_name := 'priogrid_month'::varchar, column_name := 'ged_best_os'::varchar) WHERE onset_months_os > 0 AND month_id BETWEEN :m1 AND :m2; """) con.execute(query, m1=month_start, m2=month_end) trans.commit() print("Update complete!")
def prepareACLED(db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', schema_name='dataprep', table_name='acled'): engine = create_engine(db_engine_name) print("Dropping old attached ACLED...") with engine.connect() as con: trans = con.begin() con.execute("DROP TABLE IF EXISTS preflight.acled_full") con.execute("DROP TABLE IF EXISTS preflight.acled") trans.commit() print("Reattaching ACLED...") with engine.connect() as con: trans = con.begin() con.execute(""" CREATE TABLE preflight.acled_full AS WITH month_acled AS ( SELECT *, EXTRACT(MONTH FROM event_date :: DATE) AS month, public.priogrid(latitude::float4,longitude::float4) AS priogrid_gid FROM """ + schema_name + '.' + table_name + """ WHERE latitude::float BETWEEN -180 AND 180 AND longitude::float BETWEEN -90 AND 90 ), month_acled2 AS ( SELECT month_acled.*, staging.month.id AS month_id FROM month_acled, staging.month WHERE (month_acled.year :: INT = staging.month.year_id AND month_acled.month = staging.month.month) ) SELECT * FROM month_acled2; """) trans.commit() trans = con.begin() con.execute( "ALTER TABLE preflight.acled_full ADD COLUMN type_of_violence INT") con.execute( "ALTER TABLE preflight.acled_full ADD COLUMN type_of_protest VARCHAR(10)" ) trans.commit() trans = con.begin() print('Attaching ViEWS categories to attached ACLED... State-based...', end='') #1. We are emulating UCDP/ViEWS StateBased category using ACLED data. #i.e. Military Forces vs. others/other Military Forces, only "battles" and "remote violence" #no civilians involved. #TODO: shelling and remote violence may need to be treated differently con.execute(''' UPDATE preflight.acled_full SET type_of_violence = 1 WHERE (event_type ilike '%%battle%%' OR event_type ilike '%%remote%%') AND actor1||actor2 ilike '%%military forces%%' AND actor1||actor2 NOT ilike '%%civilians%%' ''') #2. We are emulating UCDP/ViEWS StateBased category using ACLED data. #i.e. no military forces, no civilians, only "battles" and "remote violence" #UCDP's artificial organizational criteria are not included and cannot for now be included print('Non-state...', end='') con.execute(''' UPDATE preflight.acled_full SET type_of_violence = 2 WHERE (event_type ilike '%%battle%%' OR event_type ilike '%%remote%%') AND actor1||actor2 not ilike '%%military forces%%' AND actor1||actor2 NOT ilike '%%civilians%%' ''') print('One-Sided...', end='') #3: Emulate UCDP/Views OneSided category. # Remote violence, battle and violence against civilians # TODO: This may be improved using a better division of "Remote Violence" con.execute(''' UPDATE preflight.acled_full SET type_of_violence = 3 WHERE (event_type ilike '%%battle%%' OR event_type ilike '%%remote%%' OR event_type ilike '%%civi%%') AND actor1||actor2 ilike '%%civilians%%' ''') print('Protest') #4: Protests #The entire protest category, as is con.execute(''' UPDATE preflight.acled_full SET type_of_violence = 4 WHERE event_type ilike '%%protest%%' ''') trans.commit() trans = con.begin() query = alchemy_text(""" UPDATE preflight.acled_full SET type_of_protest = 'p' WHERE type_of_violence=4 AND (inter1::int=6 OR inter2::int=6); """) con.execute(query) query = alchemy_text(""" UPDATE preflight.acled_full SET type_of_protest = coalesce(type_of_protest,'') || 'r' WHERE type_of_violence=4 AND (inter1::int=5 OR inter2::int=5); """) con.execute(query) query = alchemy_text(""" UPDATE preflight.acled_full SET type_of_protest = COALESCE(type_of_protest,'') || 'x' WHERE event_type ilike '%violence against civi%' AND interaction::int IN (15,16,25,26,35,36,45,46); """) con.execute(query) query = alchemy_text(""" UPDATE preflight.acled_full SET type_of_protest = COALESCE(type_of_protest,'') || 'y' WHERE event_type ilike '%violence against civi%' AND interaction::int IN (15,16); """) con.execute(query) trans.commit() # We are only using events precise enough to have locations within PGM cells # Thus, we exclude geo_precision 3 which indicates "larger area" # (unclear what that means but during testing, it was nearly always ADM1 or higher. trans = con.begin() print("Espen's categroies...") con.execute('DROP TABLE IF EXISTS preflight.acled') trans.commit() trans = con.begin() con.execute( 'CREATE TABLE preflight.acled AS SELECT * FROM preflight.acled_full WHERE geo_precision::int<3' ) trans.commit() trans = con.begin() print("Indexing...") con.execute(''' ALTER TABLE preflight.acled ADD PRIMARY KEY(index); ALTER TABLE preflight.acled_full ADD PRIMARY KEY (index); CREATE INDEX acled_idx ON preflight.acled(priogrid_gid, month_id, type_of_violence); CREATE INDEX acled_full_idx ON preflight.acled_full(priogrid_gid, month_id, type_of_violence); CREATE INDEX acled2_idx ON preflight.acled(priogrid_gid, month_id, type_of_violence,type_of_protest); CREATE INDEX acled2_full_idx ON preflight.acled_full(priogrid_gid, month_id, type_of_violence,type_of_protest) ''') trans.commit()
def stageACLED(month_start=448, month_end=449, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'): # swap if month start > month end if month_start > month_end: month_start, month_end = month_end, month_start engine = create_engine(db_engine_name) with engine.connect() as con: # Check we don't want to update some column that doesn't exist in the downloaded ACLED # Last month in ACLED is always incomplete, thus use month-1 for end limit. limits = con.execute( "SELECT min(month_id) AS int, max(month_id)-1 AS int FROM preflight.acled" ).fetchone() if limits[0] > month_start: month_start = limits[0] if limits[1] <= month_end: month_end = limits[1] print("Updating PGM for months between ", month_start, " and ", month_end, "with sum/counts and geographic lags...") #This will compute event counts for priogrid-month observations as well as for 1st and 2nd order lags query = alchemy_text(''' UPDATE staging.priogrid_month SET acled_count_sb = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,1), acled_count_ns = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,2), acled_count_os = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,3), acled_count_pr = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,4), acled_count_prp= public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,'p'), acled_count_prr= public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,'r'), acled_count_prx= public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,'x'), acled_count_pry= public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,'y'), acled_fat_sb = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,1), acled_fat_ns = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,2), acled_fat_os = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,3), acled_fat_pr = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,4), acled_fat_prp= public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,'p'), acled_fat_prr= public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,'r'), acled_fat_prx= public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,'x'), acled_fat_pry= public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,'y'), acled_count_sb_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,1), acled_count_ns_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,2), acled_count_os_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,3), acled_count_pr_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,4), acled_count_prp_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,'p'), acled_count_prr_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,'r'), acled_count_prx_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,'x'), acled_count_pry_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,'y'), acled_fat_sb_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,1), acled_fat_ns_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,2), acled_fat_os_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,3), acled_fat_pr_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,4), acled_fat_prp_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,'p'), acled_fat_prr_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,'r'), acled_fat_prx_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,'x'), acled_fat_pry_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,'y'), acled_count_sb_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,1), acled_count_ns_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,2), acled_count_os_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,3), acled_count_pr_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,4), acled_count_prp_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,'p'), acled_count_prr_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,'r'), acled_count_prx_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,'x'), acled_count_pry_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,'y'), acled_fat_sb_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,1), acled_fat_ns_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,2), acled_fat_os_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,3), acled_fat_pr_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,4), acled_fat_prp_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,'p'), acled_fat_prr_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,'r'), acled_fat_prx_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,'x'), acled_fat_pry_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,'y') WHERE month_id BETWEEN :m1 AND :m2 AND priogrid_gid IN (SELECT gid FROM staging.priogrid WHERE in_africa) ''') with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) print("Creating temporal lags...") #This will compute temporal lags. #This must be run AFTER the above query has commited query = alchemy_text(''' SELECT public.make_priogrid_month_acled_temporal_lags(1,FALSE,:m1,:m2); SELECT public.make_priogrid_month_acled_temporal_lags(2,FALSE,:m1,:m2); SELECT public.make_priogrid_month_acled_temporal_lags(3,FALSE,:m1,:m2); SELECT public.make_priogrid_month_acled_temporal_lags(4,FALSE,:m1,:m2); SELECT public.make_priogrid_month_acled_temporal_lags(5,FALSE,:m1,:m2); SELECT public.make_priogrid_month_acled_temporal_lags(6,FALSE,:m1,:m2); SELECT public.make_priogrid_month_acled_temporal_lags(7,FALSE,:m1,:m2); SELECT public.make_priogrid_month_acled_temporal_lags(8,FALSE,:m1,:m2); SELECT public.make_priogrid_month_acled_temporal_lags(9,FALSE,:m1,:m2); ''') with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) print("Computing time since last event...") #This will compute time since last event. #This must be run AFTER the above query has commited query = alchemy_text(''' UPDATE staging.priogrid_month SET acled_months_since_last_sb = public.months_since_last_event('acled_count_sb', priogrid_gid, month_id), acled_months_since_last_ns = public.months_since_last_event('acled_count_ns', priogrid_gid, month_id), acled_months_since_last_os = public.months_since_last_event('acled_count_os', priogrid_gid, month_id), acled_months_since_last_pr = public.months_since_last_event('acled_count_pr', priogrid_gid, month_id), acled_months_since_last_prp = public.months_since_last_event('acled_count_prp', priogrid_gid, month_id), acled_months_since_last_prr = public.months_since_last_event('acled_count_prr', priogrid_gid, month_id), acled_months_since_last_prx = public.months_since_last_event('acled_count_prx', priogrid_gid, month_id), acled_months_since_last_pry = public.months_since_last_event('acled_count_pry', priogrid_gid, month_id), acled_months_since_last_sb_lag1 = public.months_since_last_event('acled_count_sb_lag1', priogrid_gid, month_id), acled_months_since_last_ns_lag1 = public.months_since_last_event('acled_count_ns_lag1', priogrid_gid, month_id), acled_months_since_last_os_lag1 = public.months_since_last_event('acled_count_os_lag1', priogrid_gid, month_id), acled_months_since_last_pr_lag1 = public.months_since_last_event('acled_count_pr_lag1', priogrid_gid, month_id), acled_months_since_last_prp_lag1 = public.months_since_last_event('acled_count_prp_lag1', priogrid_gid, month_id), acled_months_since_last_prr_lag1 = public.months_since_last_event('acled_count_prr_lag1', priogrid_gid, month_id), acled_months_since_last_prx_lag1 = public.months_since_last_event('acled_count_prx_lag1', priogrid_gid, month_id), acled_months_since_last_pry_lag1 = public.months_since_last_event('acled_count_pry_lag1', priogrid_gid, month_id), acled_months_since_last_sb_lag2 = public.months_since_last_event('acled_count_sb_lag2', priogrid_gid, month_id), acled_months_since_last_ns_lag2 = public.months_since_last_event('acled_count_ns_lag2', priogrid_gid, month_id), acled_months_since_last_os_lag2 = public.months_since_last_event('acled_count_os_lag2', priogrid_gid, month_id), acled_months_since_last_pr_lag2 = public.months_since_last_event('acled_count_pr_lag2', priogrid_gid, month_id), acled_months_since_last_prp_lag2 = public.months_since_last_event('acled_count_prp_lag2', priogrid_gid, month_id), acled_months_since_last_prr_lag2 = public.months_since_last_event('acled_count_prr_lag2', priogrid_gid, month_id), acled_months_since_last_prx_lag2 = public.months_since_last_event('acled_count_prx_lag2', priogrid_gid, month_id), acled_months_since_last_pry_lag2 = public.months_since_last_event('acled_count_pry_lag2', priogrid_gid, month_id) WHERE month_id BETWEEN :m1 AND :m2; ''') with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) print("Preparing ACLED for CM...") with engine.connect() as con: trans = con.begin() query = "ALTER TABLE preflight.acled_full ADD COLUMN gwno INT" con.execute(query) trans.commit() trans = con.begin() query = "UPDATE preflight.acled_full SET gwno=isonum_to_gwcode(iso::int)" con.execute(query) trans.commit() trans = con.begin() try: con.execute( "ALTER TABLE preflight.acled_full ADD COLUMN country_month_id INT" ) except: pass trans.commit() trans = con.begin() query = alchemy_text(""" with a as (SELECT cm.*, c.gwcode FROM staging.country_month cm left join staging.country c on (cm.country_id=c.id)) UPDATE preflight.acled_full SET country_month_id=a.id FROM a WHERE (a.gwcode::int = acled_full.gwno::int AND a.month_id = acled_full.month_id); """) con.execute(query) con.execute( "CREATE INDEX acled_full_cm_idx ON preflight.acled_full(country_month_id, type_of_violence)" ) trans.commit() print("Updating CM aggregates for ACLED...") trans = con.begin() query = alchemy_text("""UPDATE staging.country_month SET acled_count_sb = public.aggregate_cm_acled(id,TRUE,0,1), acled_count_ns = public.aggregate_cm_acled(id,TRUE,0,2), acled_count_os = public.aggregate_cm_acled(id,TRUE,0,3), acled_count_pr = public.aggregate_cm_acled(id,TRUE,0,4), acled_count_sb_lag1 = public.aggregate_cm_acled(id,TRUE,1,1), acled_count_ns_lag1 = public.aggregate_cm_acled(id,TRUE,1,2), acled_count_os_lag1 = public.aggregate_cm_acled(id,TRUE,1,3), acled_count_pr_lag1 = public.aggregate_cm_acled(id,TRUE,1,4) WHERE month_id BETWEEN :m1 AND :m2""") con.execute(query, m1=month_start, m2=month_end) trans.commit() print("Updating CM months since...") trans = con.begin() query = alchemy_text("""UPDATE staging.country_month SET acled_months_since_last_sb = public.cm_months_since_last_event('acled_count_sb', country_id, month_id), acled_months_since_last_ns = public.cm_months_since_last_event('acled_count_ns', country_id, month_id), acled_months_since_last_os = public.cm_months_since_last_event('acled_count_os', country_id, month_id), acled_months_since_last_pr = public.cm_months_since_last_event('acled_count_pr', country_id, month_id), acled_months_since_last_sb_lag1 = public.cm_months_since_last_event('acled_count_sb_lag1', country_id, month_id), acled_months_since_last_ns_lag1 = public.cm_months_since_last_event('acled_count_ns_lag1', country_id, month_id), acled_months_since_last_os_lag1 = public.cm_months_since_last_event('acled_count_os_lag1', country_id, month_id), acled_months_since_last_pr_lag1 = public.cm_months_since_last_event('acled_count_pr_lag1', country_id, month_id) WHERE month_id BETWEEN :m1 AND :m2""") con.execute(query, m1=month_start - 12, m2=month_end + 12) trans.commit() print("Updating CM temporal lags...") trans = con.begin() query = alchemy_text(""" SELECT make_country_month_acled_temporal_lags(1, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(2, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(3, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(4, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(5, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(6, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(7, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(8, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(9, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(10, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(11, FALSE, :m1, :m2); SELECT make_country_month_acled_temporal_lags(12, FALSE, :m1, :m2); """) con.execute(query, m1=month_start, m2=month_end) trans.commit() print("Update complete!")
def prepareGED(db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views', schema_name='dataprep', table_name='ged'): engine = create_engine(db_engine_name) print("Dropping old attached GED...") with engine.connect() as con: con.execute("DROP TABLE IF EXISTS preflight.ged_attached_full") con.execute("DROP TABLE IF EXISTS preflight.ged_attached") print("Attaching new GED...") query = alchemy_text(""" CREATE TABLE preflight.ged_attached AS ( WITH month_ged AS ( SELECT *, EXTRACT(MONTH FROM date_start :: DATE) AS month_start, EXTRACT(MONTH FROM date_end :: DATE) AS month_end FROM """ + schema_name + '.' + table_name + """ ), month_ged_start AS ( SELECT month_ged.*, staging.month.id AS month_id_start FROM month_ged, staging.month WHERE (month_ged.year :: INT = staging.month.year_id AND month_ged.month_start = staging.month.month) ), month_ged_full AS ( SELECT month_ged_start.*, staging.month.id AS month_id_end FROM month_ged_start, staging.month WHERE (month_ged_start.year :: INT = staging.month.year_id AND month_ged_start.month_end = staging.month.month) ) SELECT * FROM month_ged_full ); """) print("Geometrifying...") with engine.connect() as con: con.execute(query) con.execute('ALTER TABLE preflight.ged_attached ADD PRIMARY KEY (id)') con.execute( 'ALTER TABLE preflight.ged_attached ADD COLUMN country_month_id_end bigint' ) con.execute( 'ALTER TABLE preflight.ged_attached ADD COLUMN country_month_id_start bigint' ) con.execute( 'ALTER TABLE preflight.ged_attached DROP COLUMN IF EXISTS geom') con.execute( 'ALTER TABLE preflight.ged_attached ADD COLUMN geom geometry (point,4326)' ) con.execute( "UPDATE preflight.ged_attached SET geom=st_setsrid(st_geometryfromtext(geom_wkt),4326) WHERE geom_wkt<>''" ) print("Attaching country information...") with engine.connect() as con: trans = con.begin() con.execute( 'CREATE TABLE preflight.ged_attached_full AS SELECT * FROM preflight.ged_attached' ) trans.commit() print("Indexing...") with engine.connect() as con: con.execute( 'DELETE FROM preflight.ged_attached WHERE where_prec IN (4,6,7)') con.execute( 'ALTER TABLE preflight.ged_attached_full ADD PRIMARY KEY (id)') con.execute( 'CREATE INDEX ged_attached_gidx ON preflight.ged_attached USING GIST(geom)' ) con.execute( 'CREATE INDEX ged_attached_idx ON preflight.ged_attached (priogrid_gid,month_id_end, type_of_violence)' ) con.execute( 'CREATE INDEX ged_attached_s_idx ON preflight.ged_attached (priogrid_gid,month_id_start, type_of_violence)' ) con.execute( 'CREATE INDEX ged_attached_full_gidx ON preflight.ged_attached_full USING GIST(geom)' ) con.execute( 'CREATE INDEX ged_attached_fullx_s_idx ON preflight.ged_attached_full (priogrid_gid,month_id_end, type_of_violence)' ) con.execute( 'CREATE INDEX ged_attached_fullx_gidx ON preflight.ged_attached_full (priogrid_gid,month_id_start, type_of_violence)' ) with engine.connect() as con: trans = con.begin() con.execute("""with a as (SELECT cm.*, c.gwcode FROM staging.country_month cm left join staging.country c on (cm.country_id=c.id)) UPDATE preflight.ged_attached_full SET country_month_id_end=a.id FROM a WHERE (a.gwcode = ged_attached_full.country_id AND a.month_id = ged_attached_full.month_id_end); """) trans.commit() with engine.connect() as con: trans = con.begin() con.execute("""with a as (SELECT cm.*, c.gwcode FROM staging.country_month cm left join staging.country c on (cm.country_id=c.id)) UPDATE preflight.ged_attached_full SET country_month_id_start=a.id FROM a WHERE (a.gwcode = ged_attached_full.country_id AND a.month_id = ged_attached_full.month_id_start); """) trans.commit()
def _geoi_assemble(month_start: int, month_end: int): log.debug("Started _geoi_assemble") lookup_schema = "left_imputation" lookup_table = "ged" engine = create_engine(CONNECTSTRING) db.drop_table(fqtable=f"{lookup_schema}.geoi_out_all") query = alchemy_text(f""" CREATE TABLE {lookup_schema}.geoi_out_all AS SELECT * FROM {lookup_schema}.geoi_out_4 WHERE "1"<>1 UNION SELECT * FROM {lookup_schema}.geoi_out_6 WHERE "1"<>1 UNION SELECT id, priogrid_gid AS "1", priogrid_gid AS "2", priogrid_gid AS "3", priogrid_gid AS "4", priogrid_gid AS "5", priogrid_gid AS "6", priogrid_gid AS "7", priogrid_gid AS "8", priogrid_gid AS "9", priogrid_gid AS "10", priogrid_gid AS "11", priogrid_gid AS "12", priogrid_gid AS "13", priogrid_gid AS "14", priogrid_gid AS "15" FROM {lookup_schema}.{lookup_table} WHERE month_id_end BETWEEN :m1 AND :m2; """) with engine.connect() as con: con.execute(query, m1=month_start, m2=month_end) query = alchemy_text(f""" ALTER TABLE {lookup_schema}.geoi_out_all ADD COLUMN month_start BIGINT; ALTER TABLE {lookup_schema}.geoi_out_all ADD COLUMN month_end BIGINT; ALTER TABLE {lookup_schema}.geoi_out_all ADD COLUMN type_of_violence BIGINT; """) with engine.connect() as con: con.execute(query) query = alchemy_text(f""" UPDATE {lookup_schema}.geoi_out_all SET month_start=c.month_id_start, month_end=c.month_id_end, type_of_violence=c.type_of_violence FROM {lookup_schema}.gedfull c WHERE {lookup_schema}.geoi_out_all.id = c.id; """) with engine.connect() as con: con.execute(query) log.debug("Finished _geoi_assemble")