from lib_cinci.config import main from lib_cinci.config import load from lib_cinci.folders import (path_to_predictions, path_to_pickled_models, path_to_pickled_scalers, path_to_pickled_imputers) import os import logging import logging.config ''' Using the --pickle option in model.py will dump the model, scaler and imputer objects. Use this to only keep the top_n models from each experiment and delete the rest. ''' #logger config logging.config.dictConfig(load('logger_config.yaml')) logger = logging.getLogger() #Directories to check for files directories = [path_to_predictions, path_to_pickled_models, path_to_pickled_scalers, path_to_pickled_imputers] #db connection client = MongoClient(main['logger']['uri']) db = client['models'] collection = db['cincinnati'] #Top n models to keep from each experiment n = 20
import pandas as pd from sqlalchemy import create_engine import os import yaml from lib_cinci.config import load import sys folder = os.environ['ROOT_FOLDER'] output_folder = os.environ['OUTPUT_FOLDER'] path_to_output = os.path.join(output_folder, 'feature_crosstabs.csv') connparams = load('config.yaml')['db'] uri = '{dialect}://{user}:{password}@{host}:{port}/{database}'.format(**connparams) engine = create_engine(uri) validation_schema = sys.argv[1] # get all tables from feature schema, excluding # 'insp2' tables (they are lookups, not features), # named_entities and parcels_inspections query = ''' SELECT DISTINCT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND SUBSTRING(table_name FROM 1 FOR 5) != 'insp2' AND table_name NOT IN ('parc_year', 'parcels_inspections', 'named_entities'); '''.format(schema=validation_schema) all_tables = pd.read_sql(query, engine)
#!/usr/bin/env python import logging import logging.config import numpy as np import matplotlib.pyplot as plt import pandas as pd from sqlalchemy import create_engine import datetime from lib_cinci.config import load logging.config.dictConfig(load('logger_config.yaml')) logger = logging.getLogger() years = [ '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015' ] tax_dfs = { '2007': 'taxes07', '2008': 'taxes08', '2009': 'taxes09', '2010': 'taxes10', '2011': 'taxes11', '2012': 'taxes12', '2013': 'taxes13', '2014': 'taxes14', '2015': 'taxes15' } def format_parcels_list(parcels):
import logging import logging.config from feature_utils import make_inspections_address_nmonths_table, compute_frequency_features from feature_utils import format_column_names, group_and_count_from_db from lib_cinci.config import load from lib_cinci.features import check_date_boundaries # Config logger logging.config.dictConfig(load("logger_config.yaml")) logger = logging.getLogger() def make_fire_features(con, n_months, max_dist): """ Make Fire features Input: db_connection: connection to postgres database. "set schema ..." must have been called on this connection to select the correct schema from which to load inspections Output: A pandas dataframe, with one row per inspection and one column per feature. """ dataset = "fire" date_column = "incident_date" # Get the time window for which you can generate features min_insp, max_insp = check_date_boundaries(con, n_months, dataset, date_column) make_inspections_address_nmonths_table(