Example #1
0
def get_all_preds():
    very_important_vars = [
        'PopulationDensityperSqMile2010',
        #                        'MedicareEnrollment,AgedTot2017',
        'PopulationEstimate2018',
        '#ICU_beds',
        'MedianAge2010',
        'Smokers_Percentage',
        'DiabetesPercentage',
        'HeartDiseaseMortality',
        'Respiratory Mortality',
        '#Hospitals'
    ]
    exponential = {'model_type': 'exponential'}
    shared_exponential = {'model_type': 'shared_exponential'}
    demographics = {
        'model_type': 'shared_exponential',
        'demographic_vars': very_important_vars
    }
    linear = {'model_type': 'linear'}

    df = load_data.load_county_level(data_dir='../data/')
    max_cases = [max(v) for v in df['cases']]
    df['max_cases'] = max_cases
    df = df[df['max_cases'] > 0]
    df = fit_and_predict.fit_and_predict_ensemble(
        df,
        target_day=NUM_DAYS_LIST,
        mode='predict_future',
        outcome='deaths',
        methods=[exponential, shared_exponential, demographics, linear],
        output_key=f'predicted_deaths_ensemble_all')
    df = fit_and_predict.fit_and_predict_ensemble(
        df,
        target_day=NUM_DAYS_LIST,
        mode='predict_future',
        outcome='deaths',
        methods=[shared_exponential, demographics, linear],
        output_key=f'predicted_deaths_ensemble_no_exponential_all')
    df = fit_and_predict.fit_and_predict_ensemble(
        df,
        target_day=NUM_DAYS_LIST,
        mode='predict_future',
        outcome='deaths',
        methods=[shared_exponential, linear],
        output_key=f'predicted_deaths_ensemble_shared_linear_all')
    method_keys = [c for c in df if 'predicted' in c]
    for key in method_keys:
        for d in range(1, 8):
            newkey = key[:-3] + str(d)
            df[newkey] = np.array([p[d - 1] for p in df[key].values])
    geo = ['countyFIPS', 'CountyNamew/StateAbbrev']
    preds_df = df[geo + method_keys]
    return preds_df
Example #2
0
def data_loader():
    if not "county_data.pkl" in os.listdir():

        df = load_data.load_county_level("covid19-severity-prediction/data")

        with open("county_data.pkl", 'wb') as f:
            pickle.dump(df, f)

    else:
        with open("county_data.pkl", 'rb') as f:
            df = pickle.load(f)

    return df
            cached_fname = oj(cached_dir,
                              f'preds_{d.month}_{d.day}_cached.pkl')
            if os.path.exists(cached_fname):
                date2.append(d + timedelta(days=6))
                add_predictions_7day(pd.read_pickle(cached_fname), df_tab)
            else:
                k += 1
                if k > 1:
                    break
    return df_tab, date2


if __name__ == '__main__':
    print('loading data...')
    NUM_DAYS_LIST = [1, 2, 3, 4, 5, 6, 7]
    df_county = load_data.load_county_level(
        data_dir=oj(parentdir, 'data')).fillna(0)
    df_county = add_preds(
        df_county,
        NUM_DAYS_LIST=NUM_DAYS_LIST,
        cached_dir=oj(parentdir,
                      'data'))  # adds keys like "Predicted Deaths 1-day"

    ## orgnize predicts as array

    add_pre(df_county, 'Predicted Cases ', 'pred_cases', 'pred_new_cases')
    add_pre(df_county, 'Predicted Deaths ', 'pred_deaths', 'pred_new_deaths')

    ## add new cases/death to dataframe
    add_new(df_county)
    ## Add new cases/deaths predictions and their intervals
    df_county = add_new_pre(df_county, 'Predicted Cases ', 'tot_cases',
import numpy as np
import pandas as pd
from os.path import join as oj
import pygsheets
import pandas as pd
import sys
sys.path.append('../modeling')
sys.path.append('..')
import load_data
from fit_and_predict import add_preds
from functions import merge_data

if __name__ == '__main__':
    NUM_DAYS_LIST = [1]
    df_county = load_data.load_county_level(data_dir='../data')
    df_hospital = load_data.load_hospital_level(
        data_dir='../data_hospital_level')
    df_county = add_preds(
        df_county,
        NUM_DAYS_LIST=NUM_DAYS_LIST)  # adds keys like "Predicted Deaths 1-day"
    print('succesfully ran pipeline!')
Example #5
0
linear = {'model_type': 'linear'}
corrected = False
#df_county = pd.read_pickle("df_county_6_21.pkl")
df_county = pd.read_pickle("all_deaths_preds_6_21.pkl")
linear_weights_by_day = {}
today = date(2020, 6, 21)
earliest_day = date(2020, 3, 7)
ndays = (today - earliest_day).days
outcome = 'deaths'
horizon = 21

if corrected:
    """
    correcting the uptick on 4/14
    """
    df_county_orig = load_data.load_county_level(data_dir='../data/')
    df_county_predictions = pd.read_pickle("all_deaths_preds_6_21.pkl")
    df_county = copy.deepcopy(df_county_orig)
    # today = date(2020, 6, 8)
    uptick_date = date(2020, 4, 14)
    days_to_correct = (today - uptick_date).days
    for i in range(len(df_county)):
        r = df_county.iloc[i]
        if r['StateName'] == 'NY':
            pred = df_county_predictions[
                f'all_deaths_pred_4_14_ensemble_21'].values[i][0]
            actual = r['deaths'][-days_to_correct]
            correction = actual - pred
            df_county['deaths'].values[i] = np.array([
                x if x < actual else int(x - correction) for x in r['deaths']
            ])
import load_data
from Project_Models import RNN_Model
import torch
import torch.nn as nn


def normalize(x):
    maxtensor = x.max(0, keepdim=True)[0]
    maxtensor[maxtensor == 0] = 1e-4
    x_normed = x / maxtensor
    return x_normed, maxtensor


if __name__ == '__main__':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    df = load_data.load_county_level()

    x = form_input_tensor(df, [
        '#Hospitals', '#ICU_beds', 'MedicareEnrollment,AgedTot2017',
        'DiabetesPercentage'
    ]).to(device)
    x, xmaxtensor = normalize(x)
    print(f"#x nan: {(torch.sum(torch.isnan(x)))}")
    print("x:", x)

    y = form_labels_tensor(df).to(device)
    y, ymaxtensor = normalize(y)
    print(f"#y nan: {torch.sum(torch.isnan(y))}")
    print("y:", y * ymaxtensor)

    model = RNN_Model.RNN(x.shape[2], 128, 2).to(device)
    os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)
sys.path.append(parentdir + '/modeling')
sys.path.append(parentdir + '/viz')

from fit_and_predict import add_preds
from viz_interactive import plot_counties_slider
import load_data
import numpy as np
import pandas as pd

if __name__ == "__main__":
    data_dir = oj(parentdir, 'data')
    # load in county data
    df = load_data.load_county_level(data_dir=oj(parentdir, 'data'))
    # add lat and lon to the dataframe
    county_lat_lon = pd.read_csv(oj(data_dir, 'county_pop_centers.csv'),
                                 dtype={
                                     'STATEFP': str,
                                     'COUNTYFP': str
                                 })
    county_lat_lon['fips'] = (county_lat_lon['STATEFP'] +
                              county_lat_lon['COUNTYFP']).astype(np.int64)
    # add predictions
    df = add_preds(df, NUM_DAYS_LIST=[1, 2, 3, 4, 5], cached_dir=data_dir)
    # join lat / lon to df
    df = df.join(county_lat_lon.set_index('fips'), on='countyFIPS',
                 how='left').rename(columns={
                     'LATITUDE': 'lat',
                     'LONGITUDE': 'lon'
        'Sonoma'
    ]
    dd = df[df.StateName == 'CA']
    dd = dd[dd.CountyName.isin(bay_area)]

    plt.subplot(R, C, 3)
    viz_static.plot_forecasts(dd.head(5), death_thresh=20)
    plt.title('Bay area counties')
    plt.tight_layout()
    plt.savefig(oj(parentdir, 'results', 'forecasts.svg'))


if __name__ == '__main__':
    print('loading data...')
    NUM_DAYS_LIST = [1, 2, 3, 4, 5, 6, 7]
    df_county = load_data.load_county_level(
        data_dir=oj(parentdir, 'data')).fillna(0)
    df_county_dis = load_data.load_county_level(
        data_dir=oj(parentdir, 'data'),
        discard=True).fillna(0)  # discard one day in time series

    num_days_in_past = 3
    output_key = f'Predicted Deaths {num_days_in_past}-day Lagged'
    df_county = add_preds(
        df_county,
        NUM_DAYS_LIST=NUM_DAYS_LIST,
        cached_dir=oj(parentdir, 'data'),
        discard=False)  # adds keys like "Predicted Deaths 1-day"
    df_county_old = add_preds(
        df_county_dis,
        NUM_DAYS_LIST=NUM_DAYS_LIST,
        cached_dir=oj(parentdir, 'data'),
sys.path.append(rootdir + '/modeling')
sys.path.append(rootdir + '/functions')

from fit_and_predict import add_preds
from viz import viz_map
import update_severity_index as severity_index
import load_data
import merge_data

server = flask.Flask('app')
server.secret_key = os.environ.get('secret_key', 'secret')

data_dir = oj(rootdir, 'data')

# load in county data
df_county = load_data.load_county_level(data_dir=oj(rootdir, 'data'))
# add lat and lon to the dataframe
county_lat_lon = pd.read_csv(
    oj(rootdir, 'data/county_level/raw/county_ids/county_popcenters.csv'),
    dtype={'STATEFP': str, 'COUNTYFP': str}
)
county_lat_lon['fips'] = (county_lat_lon['STATEFP'] + county_lat_lon['COUNTYFP'])

# add predictions
NUM_DAYS_LIST = [1, 2, 3, 4, 5]
df_county = add_preds(df_county, NUM_DAYS_LIST=NUM_DAYS_LIST, cached_dir=data_dir)

# load in hospital data and merge
df_hospital = load_data.load_hospital_level(
    data_dir=oj(os.path.dirname(rootdir), 'covid-19-private-data')
)