def __init__(self, start_date, end_date, fake_today): engine, config = setup_environment.get_database() self.con = engine.raw_connection() self.con.cursor().execute("SET SCHEMA '{}'".format(config['schema'])) self.start_date = start_date self.end_date = end_date self.fake_today = fake_today self.tables = config # Dict of tables self.schema = config['schema']
import numpy as np import pdb import pandas as pd import yaml import logging import sys import datetime from eis import setup_environment from eis.features import class_map log = logging.getLogger(__name__) engine, config = setup_environment.get_database() con = engine.raw_connection() con.cursor().execute("SET SCHEMA '{}'".format(config["schema"])) def get_baseline(ids, start_date, end_date): """ Gets EIS baseline - whether or not an officer is flagged by the EIS at any point in the labelling window. Returns dataframe with ids and boolean value corresponding to if an officer was flagged or not. """ flagged_officers = ( "select distinct newid from {} " "WHERE datecreated >= '{}'::date " "AND datecreated <='{}'::date" ).format(config["eis_table"], start_date, end_date) labels = pd.read_sql(flagged_officers, con=con)
Code to take top performing recent models and put them in the evaluation webapp for further examination. Examples: -------- python prepare.py '2016-08-03' 'auc' python prepare.py '2016-08-03' 'recall@' -p '0.01' python prepare.py '2016-08-03' 'precision@' -p '10.0' -n 10 python prepare.py '2016-08-03' 'precision@' -p '10.0' -n 10 -d 'example_directory/' """ engine = setup_environment.get_database() try: con = engine.raw_connection() con.cursor().execute("SET SCHEMA '{}'".format('models')) except: pass def get_metric_best_models(timestamp, metric, parameter=None, number=25): """ -------------------------------------------------------- Get the EVALUATION METRIC VALUE of the best models by the specified timestamp and given metric -------------------------------------------------------- ARGUMENTS: timestamp: models run on or after given timestamp