uname=uname) # -- prepare pgm data -- # get pgm and subset to 1994-2014 columns = [ 'month_id', 'pg_id', 'ged_dummy_sb', 'ged_dummy_ns', 'ged_dummy_os', 'l12_ged_dummy_sb', 'l12_ged_dummy_ns', 'l12_ged_dummy_os', 'ln_bdist3', 'ln_ttime', 'ln_capdist', 'ln_pop', 'gcp_li_mer', 'imr_mean', 'mountains_mean', 'urban_ih_li', 'excluded_dummy_li', 'decay_12_cw_ged_dummy_sb_0', 'decay_12_cw_ged_dummy_ns_0', 'decay_12_cw_ged_dummy_os_0', 'q_1_1_l1_ged_dummy_sb', 'q_1_1_l1_ged_dummy_ns', 'q_1_1_l1_ged_dummy_os' ] df = dbutils.db_to_df(connectstring, schema="launched", table="transforms_pgm_imp_1", columns=columns) limit = (df['month_id'] >= 169) & (df['month_id'] <= 420) df = df[limit] # add log imr df['ln_imr_mean'] = np.log1p(df['imr_mean']) # add dummy variables for border areas (+2 here due to small ds error) df['border'] = np.where(df['ln_bdist3'] < np.log(25 + 2), 1, 0) # set index df.set_index(['pg_id', 'month_id'], inplace=True) # print finish
import sys sys.path.append("../..") import dbutils uname = "YOUR USERNAME" schema = "SCHEMA" table = "TABLE" columns_example = ["pg_id", "month_id", "ged_dummy_sb"] # Leave empty to get all cols from table or replace with your list columns = [] connectstring = dbutils.make_connectstring(db="views", hostname="VIEWSHOST", port="5432", prefix="postgres", uname=uname) df = dbutils.db_to_df(connectstring, schema, table, columns) df.to_csv("my_table.csv") df.to_stata("my_table.dta")
schema = "landed" if_exists = "replace" # set index on id_dict = {"cm": ["month_id", "country_id"], "pgm": ["month_id", "pg_id"]} # inserts tables = [ "ensemble_cm_fcast_test", "ensemble_cm_eval_test", "ensemble_pgm_fcast_test", "ensemble_pgm_eval_test" ] # get startmonth table = tables[1] df_current = dbutils.db_to_df(connectstring, "landed", table) df_current.set_index(id_dict["cm"], inplace=True) startmonth = int(df_current.index.get_level_values('month_id').min()) # get endmonth table = tables[0] df_current = dbutils.db_to_df(connectstring, "landed", table) df_current.set_index(id_dict["cm"], inplace=True) endmonth = int(df_current.index.get_level_values('month_id').min()) print(f"Getting deltas for decay between {startmonth} and {endmonth}.") # loop over ensemble tables for table in tables: # set table name for the deltas tablename = f"deltapred_{table}" # get data from previous and landed
gid = lat_component + lon_component + 1 return int(gid) # set up connectstring uname = "VIEWSADMIN" connectstring = dbutils.make_connectstring(db="views", hostname="VIEWSHOST", port="5432", prefix="postgres", uname=uname) try: df = pd.read_hdf("geopko_cached.hdf5") except: df = dbutils.db_to_df(connectstring, schema="dataprep", table="geo_pko") df = df.drop(columns=["index", "month", "year"]) groupvar = "mission_location" timevar = "month_id" # Mission location is the groupvar to use as ID df[groupvar] = df['mission'] + "__" + df['location'] df.set_index([timevar, groupvar], inplace=True) df.sort_index(inplace=True) # To keep our extended group dfs in dfs = [] # level=1 is mission_location for key, group in df.groupby(groupvar):
sys.path.append("..") import dbutils # set up connectstring uname = "VIEWSADMIN" connectstring = dbutils.make_connectstring(db="views", hostname="VIEWSHOST", port="5432", prefix="postgres",uname=uname) ## preparation of cm_actuals_preds # indexes for cm_evalpreds timevar = "month_id" groupvar = "country_id" # get cm evalpreds from db (change to SQL query) cm_preds = dbutils.db_to_df(connectstring, schema="landed", table="calibrated_cm_eval_test", ids=[timevar, groupvar]) cm_ensemble = dbutils.db_to_df(connectstring, schema="landed", table="ensemble_cm_eval_test", ids=[timevar, groupvar]) cm_actuals = dbutils.db_to_df(connectstring, schema="launched", table="transforms_cm_imp_1", ids=[timevar, groupvar], columns=["ged_dummy_sb", "ged_dummy_ns", "ged_dummy_os"]) df_cm_actualspreds = cm_actuals.merge(cm_preds, left_index=True, right_index=True) df_cm_actualspreds = df_cm_actualspreds.merge(cm_ensemble, left_index=True, right_index=True) # get country information df_c = dbutils.db_to_df(connectstring, schema="staging", table="country", columns=["id", "name"])