Esempio n. 1
0
def main():
    """ Ensemble main """
    uname = "VIEWSADMIN"
    prefix = "postgresql"
    db = "views"
    port = "5432"
    hostname = "VIEWSHOST"
    connectstring = dbutils.make_connectstring(prefix, db, uname, hostname,
                                               port)

    levels = ["cm", "pgm"]
    runtypes = ["eval", "fcast"]
    outcomes = ["sb", "ns", "os"]

    with open("ensembles_cm.json", 'r') as f:
        template_cm = json.load(f)
    with open("ensembles_pgm.json", 'r') as f:
        template_pgm = json.load(f)
    templates = {'pgm': template_pgm, 'cm': template_cm}

    for level in levels:
        for runtype in runtypes:
            df = get_data(connectstring, level, runtype)
            df_ens = compute_ensembles(df, templates, level, runtype, outcomes)
            print(df_ens)
            publish_results(connectstring, df_ens, level, runtype)
Esempio n. 2
0
def main():
    """ Run calibration """
    uname = "VIEWSADMIN"
    prefix = "postgresql"
    db = "views"
    port = "5432"
    hostname = "VIEWSHOST"
    connectstring = dbutils.make_connectstring(prefix, db, uname,
                                               hostname, port)

    levels = ["cm", "pgm"]
    runtypes = ["eval", "fcast"]

    for level in levels:
        for runtype in runtypes:
            df_actuals, df_calib, df_test = get_data(connectstring,
                                                     level,
                                                     runtype)
            df_test_calibrated = calibrate_models(df_actuals, df_calib, df_test)

            publish_results(connectstring, df_test_calibrated, level, runtype)
Esempio n. 3
0
def main():
    """ crosslevel main """
    uname = "VIEWSADMIN"
    prefix = "postgresql"
    db = "views"
    port = "5432"
    hostname = "VIEWSHOST"
    connectstring = dbutils.make_connectstring(prefix, db, uname, hostname,
                                               port)

    runtypes = ["eval", "fcast"]
    periods = ["calib", "test"]
    outcomes = ["sb", "ns", "os"]

    with open("crosslevels.json", 'r') as f:
        template = json.load(f)

    for runtype in runtypes:
        for period in periods:
            df = get_data(connectstring, runtype, period)
            df_cl = compute_crosslevels(df, template, runtype, period,
                                        outcomes)
            publish_results(connectstring, df_cl, runtype, period)
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

sys.path.insert(0, "../../../../")
import views_utils.dbutils as dbutils
sys.path.insert(0, "../../../osa")
from osa.wrapper_sm import SMLogit

import osa.utils as osa

uname    = "VIEWSADMIN"
prefix   = "postgresql"
db       = "views"
port     = "5432"
hostname = "VIEWSHOST"
connectstring = dbutils.make_connectstring(prefix, db, uname, hostname, port)

rf_500 =  RandomForestClassifier(n_estimators = 500, n_jobs=10)
scaler =  StandardScaler()
pipe_rf_500 = Pipeline([
    ('scaler', scaler),
    ('rf', rf_500)])

output_schema   = "landed_test"
output_table    = "osa_pgm_pronly_wcm_eval_calib_pr"

models = [
  {
    "dir_pickles": "$SNIC_TMP/osa/pickles/osa_pgm_pronly_wcm_eval_calib_pr/pgm_pronly_wcm_eval_calib_logit_fullsample_pr",
    "estimator": SMLogit(),
    "features": [
Esempio n. 5
0
parser.add_argument("--uname", type=str, required=True, help="DB username")
parser.add_argument("--path_input",
                    type=str,
                    required=True,
                    help="path to .csv to import")

args = parser.parse_args()

path_input = args.path_input
db_uname = args.uname

db_prefix = "postgresql"
db_db = "views"
db_port = "5432"
db_hostname = "VIEWSHOST"  #Janus
db_connectstring = dbutils.make_connectstring(db_prefix, db_db, db_uname,
                                              db_hostname, db_port)

db_schema = "dataprep_test"
db_table = "fvp"

path_varlist = "../varlists/fvp/rawnames.txt"
path_renames = "../varlists/fvp/renames.txt"
varlist = datautils.read_varlist(path_varlist)
renames = datautils.read_renames(path_renames)

df = dbutils.file_to_df(path_input)
df.columns = df.columns.str.lower()

available = list(df.columns)
wanted_not_in_data = [col for col in varlist if col not in available]
wanted_in_data = [col for col in varlist if col in available]
Esempio n. 6
0
"""
Gets REIGN data from git repository and stages it on db. 
Currently restages full dataset with the latest month's release.
"""

import sys
sys.path.append("../..")
import urllib
import pandas as pd
import numpy as np
from views_utils import dbutils
from bs4 import BeautifulSoup

connectstring = dbutils.make_connectstring(prefix="postgresql",
                                           db="views",
                                           uname="VIEWSADMIN",
                                           hostname="VIEWSHOST",
                                           port="5432")


def getREIGN(schema="dataprep", table="reign", if_exists="replace"):
    """
    Fetches most recent full REIGN dataset.
    """
    #find current download link
    url = 'https://oefdatascience.github.io/REIGN.github.io/menu/reign_current.html'
    html_doc = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(html_doc, 'lxml')
    container = soup.find("div", {"class": "post-container"})
    link = container.find("a", href=True)['href']
    print("downloading data from {}".format(link))
Esempio n. 7
0
import sys

import pandas as pd

sys.path.insert(0, "../..")

from views_utils.dbutils import make_connectstring, df_to_db

prefix = "postgres"
db = "views"
uname = "VIEWSADMIN"
hostname = "VIEWSHOST"
port = "5432"
connectstring = make_connectstring(prefix, db, uname, hostname, port)

loas = ["pgm", "cm"]

schema = "launched"
if_exists = "replace"
for loa in loas:
    for imp in range(1, 6):
        table = loa + "_imp_ds_" + str(imp)
        path_input = "/storage/runs/current/ds/results/"
        path_input += loa + "_transforms/" + loa + "_imp_" + str(imp) + ".hdf5"
        print(schema, table)
        print(path_input)
        #df = pd.read_hdf(path_input)
        #df_to_db(connectstring, df, schema, table, if_exists, write_index=True)