def main(): """ Ensemble main """ uname = "VIEWSADMIN" prefix = "postgresql" db = "views" port = "5432" hostname = "VIEWSHOST" connectstring = dbutils.make_connectstring(prefix, db, uname, hostname, port) levels = ["cm", "pgm"] runtypes = ["eval", "fcast"] outcomes = ["sb", "ns", "os"] with open("ensembles_cm.json", 'r') as f: template_cm = json.load(f) with open("ensembles_pgm.json", 'r') as f: template_pgm = json.load(f) templates = {'pgm': template_pgm, 'cm': template_cm} for level in levels: for runtype in runtypes: df = get_data(connectstring, level, runtype) df_ens = compute_ensembles(df, templates, level, runtype, outcomes) print(df_ens) publish_results(connectstring, df_ens, level, runtype)
def main(): """ Run calibration """ uname = "VIEWSADMIN" prefix = "postgresql" db = "views" port = "5432" hostname = "VIEWSHOST" connectstring = dbutils.make_connectstring(prefix, db, uname, hostname, port) levels = ["cm", "pgm"] runtypes = ["eval", "fcast"] for level in levels: for runtype in runtypes: df_actuals, df_calib, df_test = get_data(connectstring, level, runtype) df_test_calibrated = calibrate_models(df_actuals, df_calib, df_test) publish_results(connectstring, df_test_calibrated, level, runtype)
def main(): """ crosslevel main """ uname = "VIEWSADMIN" prefix = "postgresql" db = "views" port = "5432" hostname = "VIEWSHOST" connectstring = dbutils.make_connectstring(prefix, db, uname, hostname, port) runtypes = ["eval", "fcast"] periods = ["calib", "test"] outcomes = ["sb", "ns", "os"] with open("crosslevels.json", 'r') as f: template = json.load(f) for runtype in runtypes: for period in periods: df = get_data(connectstring, runtype, period) df_cl = compute_crosslevels(df, template, runtype, period, outcomes) publish_results(connectstring, df_cl, runtype, period)
from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier sys.path.insert(0, "../../../../") import views_utils.dbutils as dbutils sys.path.insert(0, "../../../osa") from osa.wrapper_sm import SMLogit import osa.utils as osa uname = "VIEWSADMIN" prefix = "postgresql" db = "views" port = "5432" hostname = "VIEWSHOST" connectstring = dbutils.make_connectstring(prefix, db, uname, hostname, port) rf_500 = RandomForestClassifier(n_estimators = 500, n_jobs=10) scaler = StandardScaler() pipe_rf_500 = Pipeline([ ('scaler', scaler), ('rf', rf_500)]) output_schema = "landed_test" output_table = "osa_pgm_pronly_wcm_eval_calib_pr" models = [ { "dir_pickles": "$SNIC_TMP/osa/pickles/osa_pgm_pronly_wcm_eval_calib_pr/pgm_pronly_wcm_eval_calib_logit_fullsample_pr", "estimator": SMLogit(), "features": [
parser.add_argument("--uname", type=str, required=True, help="DB username") parser.add_argument("--path_input", type=str, required=True, help="path to .csv to import") args = parser.parse_args() path_input = args.path_input db_uname = args.uname db_prefix = "postgresql" db_db = "views" db_port = "5432" db_hostname = "VIEWSHOST" #Janus db_connectstring = dbutils.make_connectstring(db_prefix, db_db, db_uname, db_hostname, db_port) db_schema = "dataprep_test" db_table = "fvp" path_varlist = "../varlists/fvp/rawnames.txt" path_renames = "../varlists/fvp/renames.txt" varlist = datautils.read_varlist(path_varlist) renames = datautils.read_renames(path_renames) df = dbutils.file_to_df(path_input) df.columns = df.columns.str.lower() available = list(df.columns) wanted_not_in_data = [col for col in varlist if col not in available] wanted_in_data = [col for col in varlist if col in available]
""" Gets REIGN data from git repository and stages it on db. Currently restages full dataset with the latest month's release. """ import sys sys.path.append("../..") import urllib import pandas as pd import numpy as np from views_utils import dbutils from bs4 import BeautifulSoup connectstring = dbutils.make_connectstring(prefix="postgresql", db="views", uname="VIEWSADMIN", hostname="VIEWSHOST", port="5432") def getREIGN(schema="dataprep", table="reign", if_exists="replace"): """ Fetches most recent full REIGN dataset. """ #find current download link url = 'https://oefdatascience.github.io/REIGN.github.io/menu/reign_current.html' html_doc = urllib.request.urlopen(url).read() soup = BeautifulSoup(html_doc, 'lxml') container = soup.find("div", {"class": "post-container"}) link = container.find("a", href=True)['href'] print("downloading data from {}".format(link))
import sys import pandas as pd sys.path.insert(0, "../..") from views_utils.dbutils import make_connectstring, df_to_db prefix = "postgres" db = "views" uname = "VIEWSADMIN" hostname = "VIEWSHOST" port = "5432" connectstring = make_connectstring(prefix, db, uname, hostname, port) loas = ["pgm", "cm"] schema = "launched" if_exists = "replace" for loa in loas: for imp in range(1, 6): table = loa + "_imp_ds_" + str(imp) path_input = "/storage/runs/current/ds/results/" path_input += loa + "_transforms/" + loa + "_imp_" + str(imp) + ".hdf5" print(schema, table) print(path_input) #df = pd.read_hdf(path_input) #df_to_db(connectstring, df, schema, table, if_exists, write_index=True)