Example #1
0
    def __init__(self, start_date, end_date, fake_today):

        engine, config = setup_environment.get_database()

        self.con = engine.raw_connection()
        self.con.cursor().execute("SET SCHEMA '{}'".format(config['schema']))
        self.start_date = start_date
        self.end_date = end_date
        self.fake_today = fake_today
        self.tables = config  # Dict of tables
        self.schema = config['schema']
Example #2
0
import numpy as np
import pdb
import pandas as pd
import yaml
import logging
import sys
import datetime

from eis import setup_environment
from eis.features import class_map

log = logging.getLogger(__name__)
engine, config = setup_environment.get_database()
con = engine.raw_connection()
con.cursor().execute("SET SCHEMA '{}'".format(config["schema"]))


def get_baseline(ids, start_date, end_date):
    """
    Gets EIS baseline - whether or not an officer is flagged 
    by the EIS at any point in the labelling window. 

    Returns dataframe with ids and boolean value corresponding to if
    an officer was flagged or not.
    """

    flagged_officers = (
        "select distinct newid from {} " "WHERE datecreated >= '{}'::date " "AND datecreated <='{}'::date"
    ).format(config["eis_table"], start_date, end_date)

    labels = pd.read_sql(flagged_officers, con=con)
Example #3
0
Code to take top performing recent models and
put them in the evaluation webapp for further
examination.

Examples:
--------

python prepare.py '2016-08-03' 'auc'
python prepare.py '2016-08-03' 'recall@' -p '0.01'
python prepare.py '2016-08-03' 'precision@' -p '10.0' -n 10
python prepare.py '2016-08-03' 'precision@' -p '10.0' -n 10 -d 'example_directory/'


"""

engine = setup_environment.get_database()
try:
    con = engine.raw_connection()
    con.cursor().execute("SET SCHEMA '{}'".format('models'))
except:
    pass


def get_metric_best_models(timestamp, metric, parameter=None, number=25):
    """
    --------------------------------------------------------
    Get the EVALUATION METRIC VALUE of the best models
    by the specified timestamp and given metric
    --------------------------------------------------------
    ARGUMENTS:
        timestamp:  models run on or after given timestamp