Esempio n. 1
0
def upload_forecast(forecast_name):
    path = get_forecast_info(forecast_name)
    db = read_validation_db()

    metadata = metadata_dict_for_file(
        list(Path(path).parent.glob('metadata-*.txt'))[0])
    if f"{metadata['team_abbr']}-{metadata['model_abbr']}" not in [
            m.abbreviation for m in models
    ]:
        create_model(path, metadata)

    time_zero_date = '-'.join(forecast_name.split('-')[:3])

    if time_zero_date not in [
            timezero.timezero_date for timezero in project_obj.timezeros
    ]:
        create_timezero(time_zero_date)

    # print(forecast_name, metadata, time_zero_date)
    if path is not None:
        errors_from_validation = validate_quantile_csv_file(path)
        if errors_from_validation != "no errors":
            print(errors_from_validation)
            return errors_from_validation, True
        with open(path) as fp:
            print('uploading %s' % path)
            checksum = hashlib.md5(str(fp.read()).encode('utf-8')).hexdigest()
            fp.seek(0)
            quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS)

            if len(error_from_transformation) > 0:
                return error_from_transformation, True

            try:
                fr = util.upload_forecast(
                    conn, quantile_json, forecast, project_name,
                    f"{metadata['team_abbr']}-{metadata['model_abbr']}",
                    time_zero_date)
                db[forecast_name] = checksum
                write_db(db)
                return None, fr
            except Exception as e:
                raise e
                return e, True
    pass
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_names

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)
    conn.re_authenticate_if_necessary()
    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                          'metadata-' + dir_name + '.txt')
    except Exception as ex:
        return ex
    model_name = metadata['model_name']
    if model_name not in model_names:
        model_config = {}
        model_config['name'], model_config['abbreviation'], model_config['team_name'], \
        model_config['description'], model_config['home_url'], model_config['aux_data_url'] \
            = metadata['model_name'], metadata['team_abbr'] + '-' + metadata['model_abbr'], \
              metadata['team_name'], metadata['methods'], metadata['website_url'] if metadata.get(
            'website_url') != None else url + dir_name, 'NA'
        try:
            print('Create model %s' % model_name)
            project_obj.create_model(model_config)
            models = project_obj.models
            model_names = [model.name for model in models]
        except Exception as ex:
            return ex
    print('Time: %s \t Model: %s' % (datetime.now(), model_name))
    model = [model for model in models if model.name == model_name][0]

    # Get names of existing forecasts to avoid re-upload
    existing_time_zeros = [
        forecast.timezero.timezero_date for forecast in model.forecasts
    ]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:

        # Default config
        over_write = False
        checksum = 0
        time_zero_date = forecast.split(dir_name)[0][:-1]

        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts + forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            # if db.get(forecast, None) != checksum:
            #     print(forecast, db.get(forecast, None))
            #     if time_zero_date in existing_time_zeros:
            #         over_write = True
            # else:
            #     continue

            # if timezero existing, then don't write again
            if time_zero_date in existing_time_zeros:
                #update checksum
                # db[forecast] = checksum
                continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts + forecast) as fp:
            # Create timezero on zoltar if not existed
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    return ex

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                if len(error_from_transformation) > 0:
                    return error_from_transformation
                else:
                    try:
                        print('Upload forecast for model: %s \t|\t File: %s' %
                              (model_name, forecast))
                        print()
                        util.upload_forecast(conn,
                                             quantile_json,
                                             forecast,
                                             project_name,
                                             model_name,
                                             time_zero_date,
                                             overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        print(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()

    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
Esempio n. 3
0
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_names

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)

    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts+'metadata-'+dir_name+'.txt')
    except Exception as ex:
        return ex 
    model_name = metadata['model_name']
    if model_name not in model_names:
        model_config = {}
        model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \
            = metadata['model_name'], metadata['team_abbr']+'-'+metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA'
        try:
            project_obj.create_model(model_config)
            models = project_obj.models
            model_names = [model.name for model in models]
        except Exception as ex:
            return ex  
    model = [model for model in models if model.name == model_name][0]

    # Get names of existing forecasts to avoid re-upload
    existing_forecasts = [forecast.source for forecast in model.forecasts]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:
        over_write = False
        checksum = 0
        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts+forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            if db.get(forecast, None) != checksum:
                print(forecast)
                if forecast in existing_forecasts:
                    over_write = True
            else:
                continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts+forecast) as fp:

            # Get timezero and create timezero on zoltar if not existed
            time_zero_date = forecast.split(dir_name)[0][:-1]
            # if time_zero_date != "2020-05-25":
            #     continue
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    return ex

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(path_to_processed_model_forecasts+forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(fp, VALID_TARGET_NAMES, covid19_row_validator)
                if len(error_from_transformation) >0 :
                    return error_from_transformation
                else:
                    try:
                        util.upload_forecast(conn, quantile_json, forecast, 
                                                project_name, model_name , time_zero_date, overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        print(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()
    
    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
Esempio n. 4
0
def upload_covid_all_forecasts(path_to_processed_model_forecasts):
    # meta info
    project_name = 'COVID-19 Forecasts'
    project_obj = None
    project_timezeros = []
    forecasts = os.listdir(path_to_processed_model_forecasts)
    conn = util.authenticate()

    # Get all existing timezeros in the project
    for project in conn.projects:
        if project.name == project_name:
            project_obj = project
            for timezero in project.timezeros:
                project_timezeros.append(timezero.timezero_date)
            break

    # Get model name
    separator = '-'
    dir_name = separator.join(
        forecasts[0].split(separator)[3:]).split('.csv')[0]
    metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                      'metadata-' + dir_name + '.txt')
    model_name = metadata['model_name']
    model = [
        model for model in project_obj.models if model.name == model_name
    ][0]

    # Get names of existing forecasts to avoid re-upload
    existing_forecasts = [forecast.source for forecast in model.forecasts]

    for forecast in forecasts:

        # Skip if forecast is already on zoltar
        if forecast in existing_forecasts:
            continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts + forecast) as fp:

            # Get timezero and create timezero on zoltar if not existed
            time_zero_date = forecast.split(dir_name)[0][:-1]
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                except Exception as ex:
                    print(ex)

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID19_TARGET_NAMES, covid19_row_validator)
                if len(error_from_transformation) > 0:
                    print(error_from_transformation)
                else:
                    try:
                        util.upload_forecast(conn,
                                             quantile_json,
                                             forecast,
                                             project_name,
                                             model_name,
                                             time_zero_date,
                                             overwrite=False)
                    except Exception as ex:
                        print(ex)
            else:
                print(errors_from_validation)
            fp.close()
Esempio n. 5
0
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_abbrs

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)

    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                          'metadata-' + dir_name + '.txt')
    except Exception as ex:
        return ex
    model_abbreviation = metadata['model_abbr']

    # get the corresponding model_config for the metadata file
    model_config = zoltar_config_from_metadata(metadata)

    if model_abbreviation not in model_abbrs:
        pprint.pprint('%s not in models' % model_abbreviation)
        if 'home_url' not in model_config:
            model_config['home_url'] = url + dir_name

        try:
            logger.info(f"Creating model {model_config}")
            models.append(project_obj.create_model(model_config))
            model_abbrs = [model.abbreviation for model in models]
        except Exception as ex:
            return ex

    # fetch model based on model_abbr
    model = [
        model for model in models if model.abbreviation == model_abbreviation
    ][0]

    if has_changed(metadata, model):
        # model metadata has changed, call the edit function in zoltpy to update metadata
        print(
            f"{metadata['model_abbr']!r} model has changed metadata contents. Updating on Zoltar..."
        )
        model.edit(model_config)

    # Get names of existing forecasts to avoid re-upload
    existing_time_zeros = [
        forecast.timezero.timezero_date for forecast in model.forecasts
    ]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:

        # Skip metadata text file
        if not forecast.endswith('.csv'):
            continue

        # Default config
        over_write = False
        checksum = 0
        time_zero_date = forecast.split(dir_name)[0][:-1]

        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts + forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            if db.get(forecast, None) != checksum:
                print(forecast, db.get(forecast, None))
                if time_zero_date in existing_time_zeros:
                    over_write = True
            else:
                continue

        with open(path_to_processed_model_forecasts + forecast) as fp:
            # Create timezero on zoltar if not existed
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    print(ex)
                    return ex

            # Validate covid19 file
            print(f"Validating {forecast}")
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                if len(error_from_transformation) > 0:
                    return error_from_transformation
                else:
                    try:
                        logger.debug(
                            'Upload forecast for model: %s \t|\t File: %s\n' %
                            (metadata['model_abbr'], forecast))
                        util.upload_forecast(conn,
                                             quantile_json,
                                             forecast,
                                             project_name,
                                             metadata['model_abbr'],
                                             time_zero_date,
                                             overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        logger.error(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()

    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
Esempio n. 6
0
def upload_to_zoltar_fte(conn, proj_config):
    project, existing_timezeros = get_project_and_timezeros(conn, proj_config)

    pres_nat = pd.read_csv(DOWNLOAD_PATHS[FTE]['pres-nat'])
    pres_state = pd.read_csv(DOWNLOAD_PATHS[FTE]['pres-state'])

    # calculate Dem share in two-party popular vote
    # Dem share in two-party popular vote = (avg Dem share / (avg Dem share + avg Rep share)) * 100
    # (multiplied by 100 for percentage)
    # (subject to revision?)
    pres_nat['voteshare_dem_twoparty'] = \
        (pres_nat['national_voteshare_chal'] / (pres_nat['national_voteshare_inc'] +
                                                pres_nat['national_voteshare_chal'])) * 100

    pres_state['voteshare_dem_twoparty'] = \
        (pres_state['voteshare_chal'] / (pres_state['voteshare_inc'] +
                                         pres_state['voteshare_chal'])) * 100

    forecasts_by_timezero = {}
    jobs = []

    # Presidential, national
    for i in range(len(pres_nat)):
        row = pres_nat.iloc[i]

        month, day, year = row.at['modeldate'].split('/')
        forecast = check_and_make_timezero_and_get_forecast_object(year, month, day, project,
                                                                   existing_timezeros, forecasts_by_timezero)

        # each row in raw 538 csv has multiple targets for us
        for target in proj_config['targets']:
            target_name = target['name']
            if target_name == 'popvote_win_dem':
                forecast['predictions'].append({
                    'unit': 'US-pres',
                    'target': target_name,
                    'class': 'bin',
                    'prediction': {
                        'cat': [True, False],
                        'prob': list(row.loc[['popwin_chal', 'popwin_inc']])
                    }
                })
            elif target_name == 'ec_win_dem':
                forecast['predictions'].append({
                    'unit': 'US-pres',
                    'target': target_name,
                    'class': 'bin',
                    'prediction': {
                        'cat': ['Dem win', 'Rep win', 'tie'],
                        'prob': list(row.loc[['ecwin_chal', 'ecwin_inc', 'ec_nomajority']])
                    }
                })
            elif target_name == 'ev_won_dem':
                forecast['predictions'].extend([
                    {
                        'unit': 'US-pres',
                        'target': target_name,
                        'class': 'point',
                        'prediction': {
                            'value': row.at['ev_chal']
                        }
                    },
                    {
                        'unit': 'US-pres',
                        'target': target_name,
                        'class': 'quantile',
                        'prediction': {
                            'quantile': [0.1, 0.9],
                            'value': list(row.loc[['ev_chal_lo', 'ev_chal_hi']].astype('float64'))
                        }
                    }
                ])
            elif target_name == 'voteshare_dem_twoparty':
                forecast['predictions'].extend([
                    {
                        'unit': 'US-pres',
                        'target': target_name,
                        'class': 'point',
                        'prediction': {
                            'value': row.at['voteshare_dem_twoparty']
                        }
                    }
                ])

    # Presidential, state-level
    for i in range(len(pres_state)):
        row = pres_state.iloc[i]

        month, day, year = row.at['modeldate'].split('/')
        forecast = check_and_make_timezero_and_get_forecast_object(year, month, day, project,
                                                                   existing_timezeros, forecasts_by_timezero)

        for target in proj_config['targets']:
            target_name = target['name']
            if target_name == 'popvote_win_dem':
                forecast['predictions'].append({
                    'unit': f'{LOCATION_CODES_REVERSE[row.at["state"]]}-pres',
                    'target': target_name,
                    'class': 'bin',
                    'prediction': {
                        'cat': [True, False],
                        'prob': list(row.loc[['winstate_chal', 'winstate_inc']])
                    }
                })

            elif target_name == 'voteshare_dem_twoparty':
                forecast['predictions'].append({
                    'unit': f'{LOCATION_CODES_REVERSE[row.at["state"]]}-pres',
                    'target': target_name,
                    'class': 'point',
                    'prediction': {
                        'value': row.at['voteshare_dem_twoparty']
                    }
                })

    # 538_pp model ends here, senatorial forecasts are done by 3 different models
    for timezero in forecasts_by_timezero:
        util.upload_forecast(
            conn=conn,
            json_io_dict=forecasts_by_timezero[timezero],
            forecast_filename='538-agg.csv',
            project_name=proj_config['name'],
            model_abbr='538_pp',
            timezero_date=timezero.isoformat(),
            overwrite=True
        )

    # senatorial elections
    senate_nat = pd.read_csv(DOWNLOAD_PATHS[FTE]['senate-nat'])
    senate_state = pd.read_csv(DOWNLOAD_PATHS[FTE]['senate-state'])

    total_dem_voteshare = senate_state.loc[:, ['voteshare_mean_D1', 'voteshare_mean_D2',
                                                   'voteshare_mean_D3', 'voteshare_mean_D4']].sum(axis=1)
    total_rep_voteshare = senate_state.loc[:, ['voteshare_mean_R1', 'voteshare_mean_R2',
                                                   'voteshare_mean_R3', 'voteshare_mean_R4']].sum(axis=1)

    senate_state['voteshare_dem_twoparty'] = total_dem_voteshare / (total_dem_voteshare + total_rep_voteshare)

    forecasts_by_timezero = {'_lite': {}, '_classic': {}, '_deluxe': {}}

    # Senate, national
    for i in range(len(senate_nat)):
        row = senate_nat.iloc[i]
        model = row.at['expression']

        month, day, year = row.at['forecastdate'].split('/')
        forecast = check_and_make_timezero_and_get_forecast_object('20' + year, month, day, project, existing_timezeros,
                                                                   forecasts_by_timezero, model=model)

        forecast['predictions'].extend([
            {
                'unit': 'US-sen',
                'target': 'senate_win_dem',
                'class': 'bin',
                'prediction': {
                    'cat': [True, False],
                    'prob': list(row.loc[['chamber_Dparty', 'chamber_Rparty']])
                }
            },
            {
                'unit': 'US-sen',
                'target': 'senate_seats_won_dem',
                'class': 'quantile',
                'prediction': {
                    'quantile': [0.1, 0.5, 0.9],
                    'value': list(row.loc[['p10_seats_Dparty',
                                           'median_seats_Dparty',
                                           'p90_seats_Dparty'
                                           ]].astype('int32'))
                }
            }
        ])

    # Senate, state-level
    for i in range(len(senate_state)):
        row = senate_state.iloc[i]
        model = row.at['expression']
        seat = row.at['district'].split('-')
        if seat[0] == 'GA':
            if seat[1] == 'S3':
                unit = 'GA-sen-sp'
            else:
                unit = 'GA-sen'
        else:
            unit = f'{seat[0]}-sen'

        month, day, year = row.at['forecastdate'].split('/')
        forecast = check_and_make_timezero_and_get_forecast_object('20' + year, month, day, project, existing_timezeros,
                                                                   forecasts_by_timezero, model=model)

        forecast['predictions'].extend([
            {
                'unit': unit,
                'target': 'popvote_win_dem',
                'class': 'bin',
                'prediction': {
                    'cat': [True, False],
                    'prob': list(row.loc[['winner_Dparty', 'winner_Rparty']])
                }
            },
            {
                'unit': unit,
                'target': 'voteshare_dem_twoparty',
                'class': 'point',
                'prediction': {
                    'value': row.at['voteshare_dem_twoparty']
                }
            }
        ])

    for model in forecasts_by_timezero:
        for timezero in forecasts_by_timezero[model]:
            util.upload_forecast(
                conn=conn,
                json_io_dict=forecasts_by_timezero[model][timezero],
                forecast_filename='538-agg.csv',
                project_name=proj_config['name'],
                model_abbr=f'538{model}',
                timezero_date=timezero.isoformat(),
                overwrite=True
            )
Esempio n. 7
0
def upload_to_zoltar_economist(conn, proj_config):
    project, existing_timezeros = get_project_and_timezeros(conn, proj_config)

    ec_sims = pd.read_csv(f'{DOWNLOAD_PATHS[ECONOMIST]["root"]}electoral_college_simulations.csv')
    ec_prob = pd.read_csv(f'{DOWNLOAD_PATHS[ECONOMIST]["root"]}electoral_college_probability_over_time.csv')
    ec_votes = pd.read_csv(f'{DOWNLOAD_PATHS[ECONOMIST]["root"]}electoral_college_votes_over_time.csv')
    pres_nat = pd.read_csv(f'{DOWNLOAD_PATHS[ECONOMIST]["root"]}projected_eday_vote_over_time.csv')
    pres_nat_topline = pd.read_csv(f'{DOWNLOAD_PATHS[ECONOMIST]["root"]}national_ec_popvote_topline.csv')
    pres_state_topline = pd.read_csv(f'{DOWNLOAD_PATHS[ECONOMIST]["root"]}state_averages_and_predictions_topline.csv')

    for year, month, day in map(
        lambda date_str: date_str.split('-'),
        pres_nat['model_run_date']
    ):
        check_and_make_timezero(year, month, day, project, existing_timezeros)

    with open(f'{DOWNLOAD_PATHS[ECONOMIST]["root"]}timestamp.json', 'rb') as timestamp_file:
        model_most_recent_date = date.fromtimestamp(json.load(timestamp_file)['timestamp'])
        timestamp_file.close()

    forecasts = {}

    # process electoral college simulations
    # 1. sample 1000 out of 40000 simulations provided
    # 2. convert to Zoltar format
    # 3. save for upload
    ec_sims_sampled_1000 = ec_sims.sample(n=1000, random_state=SEED)
    forecast = get_forecast_object_of_timezero(model_most_recent_date, forecasts)
    ec_win_dem_samples, ev_won_dem_samples, popvote_win_dem_samples, voteshare_dem_twoparty_samples = [], [], [], []

    state_list = list(LOCATION_CODES.keys())[1:52]
    state_won_dem_samples, state_voteshare_samples = {k: [] for k in state_list}, {k: [] for k in state_list}
    for sample in ec_sims_sampled_1000.itertuples(name='Sample'):
        dem_ev = int(sample.dem_ev)
        if dem_ev > 270:
            ec_result = 'Dem win'
        elif dem_ev < 270:
            ec_result = 'Rep win'
        else:  # dev_ev == 270
            ec_result = 'tie'

        # extremely unlikely to be exactly 0.5 due to floating point value (and reality)
        voteshare_dem_twoparty = sample.natl_pop_vote
        popvote_win_dem = False if voteshare_dem_twoparty < 0.5 else True

        ec_win_dem_samples.append(ec_result)
        ev_won_dem_samples.append(dem_ev)
        popvote_win_dem_samples.append(popvote_win_dem)
        voteshare_dem_twoparty_samples.append(voteshare_dem_twoparty)

        # get the state popular voteshares
        state_list = list(LOCATION_CODES.keys())[1:52]
        for state, state_voteshare in zip(state_list, sample[4:]):
            state_won_dem = False if state_voteshare < 0.5 else True
            state_won_dem_samples[state].append(state_won_dem)
            state_voteshare_samples[state].append(state_voteshare)

    # converting to Zoltar format
    forecast['predictions'].extend([
        {
            'unit': 'US-pres',
            'target': 'ec_win_dem',
            'class': 'sample',
            'prediction': {
                'sample': ec_win_dem_samples
            }
        },
        {
            'unit': 'US-pres',
            'target': 'ev_won_dem',
            'class': 'sample',
            'prediction': {
                'sample': ev_won_dem_samples
            }
        },
        {
            'unit': 'US-pres',
            'target': 'popvote_win_dem',
            'class': 'sample',
            'prediction': {
                'sample': popvote_win_dem_samples
            }
        },
        {
            'unit': 'US-pres',
            'target': 'voteshare_dem_twoparty',
            'class': 'sample',
            'prediction': {
                'sample': voteshare_dem_twoparty_samples
            }
        },
    ])

    for state in state_list:
        forecast['predictions'].extend([
            {
                'unit': f'{state}-pres',
                'target': 'popvote_win_dem',
                'class': 'sample',
                'prediction': {
                    'sample': state_won_dem_samples[state]
                }
            },
            {
                'unit': f'{state}-pres',
                'target': 'voteshare_dem_twoparty',
                'class': 'sample',
                'prediction': {
                    'sample': state_voteshare_samples[state]
                }
            }
        ])

    # process electoral college win probabilities over time
    ec_prob_dem = ec_prob[ec_prob['party'] == 'Democratic']
    ec_prob_rep = ec_prob[ec_prob['party'] == 'Republican']
    for row_dem, row_rep in zip(ec_prob_dem.itertuples(name='Row'), ec_prob_rep.itertuples(name='Row')):
        year, month, day = row_dem.date.split('-')
        timezero = date(int(year), int(month), int(day))
        forecast = get_forecast_object_of_timezero(timezero, forecasts)
        tie_prob = 1 - row_dem.win_prob - row_rep.win_prob

        forecast['predictions'].append({
            'unit': 'US-pres',
            'target': 'ec_win_dem',
            'class': 'bin',
            'prediction': {
                'cat': ['Dem win', 'Rep win', 'tie'],
                'prob': [row_dem.win_prob, row_rep.win_prob, tie_prob]
            }
        })

    # process electoral college votes over time
    ec_votes_dem = ec_votes[ec_votes['party'] == 'Democratic']
    for row in ec_votes_dem.itertuples(name='Row'):
        year, month, day = row.date.split('-')
        timezero = date(int(year), int(month), int(day))
        forecast = get_forecast_object_of_timezero(timezero, forecasts)

        forecast['predictions'].append({
            'unit': 'US-pres',
            'target': 'ev_won_dem',
            'class': 'quantile',
            'prediction': {
                'quantile': [0.05, 0.4, 0.5, 0.6, 0.95],
                'value': [float(ev) for ev in row[2:]]
            }
        })

    # process national popular voteshare over time
    for row in pres_nat.itertuples(name='Row'):
        year, month, day = row.model_run_date.split('-')
        timezero = date(int(year), int(month), int(day))
        forecast = get_forecast_object_of_timezero(timezero, forecasts)

        forecast['predictions'].extend([
            {
                'unit': 'US-pres',
                'target': 'voteshare_dem_twoparty',
                'class': 'quantile',
                'prediction': {
                    'quantile': [0.05, 0.5, 0.95],
                    'value': [row.lower_95_dem_vote, row.mean_dem_vote, row.upper_95_dem_vote]
                }
            },
        ])

    # process national popular vote topline (for popular vote win probability)
    for row in pres_nat_topline.itertuples(name='Row'):
        year, month, day = row.date.split('-')
        timezero = date(int(year), int(month), int(day))
        forecast = get_forecast_object_of_timezero(timezero, forecasts)

        forecast['predictions'].append({
            'unit': 'US-pres',
            'target': 'popvote_win_dem',
            'class': 'bin',
            'prediction': {
                'cat': [True, False],
                'prob': [row.dem_vote_win_prob, row.rep_vote_win_prob]
            }
        })

    # process state popular vote topline:
    for row in pres_state_topline.itertuples(name='Row'):
        year, month, day = row.date.split('-')
        timezero = date(int(year), int(month), int(day))
        forecast = get_forecast_object_of_timezero(timezero, forecasts)

        forecast['predictions'].extend([
            {
                'unit': f'{row.state}-pres',
                'target': 'popvote_win_dem',
                'class': 'bin',
                'prediction': {
                    'cat': [True, False],
                    'prob': [row.projected_win_prob, 1 - row.projected_win_prob]
                }
            },
            {
                'unit': f'{row.state}-pres',
                'target': 'voteshare_dem_twoparty',
                'class': 'quantile',
                'prediction': {
                    'quantile': [0.05, 0.5, 0.95],
                    'value': [row.projected_vote_low, row.projected_vote_mean, row.projected_vote_high]
                }
            }
        ])

    # upload to Zoltar
    for timezero in forecasts:
        util.upload_forecast(
            conn=conn,
            json_io_dict=forecasts[timezero],
            forecast_filename='economist-agg.csv',
            project_name=proj_config['name'],
            model_abbr='economist',
            timezero_date=timezero.isoformat(),
            overwrite=True
        )
    EW2submitDates = sortDateOfSubmissions()
    EW2submissionFile = relateEW2SubmissionFiles()

    EW2submitDates = {
        'EW42': '20191014',
        'EW43': '20191021',
        'EW44': '20191028',
        'EW45': '20191104',
        'EW46': '20191111',
        'EW47': '20191118'
    }

    for EW in EW2submitDates:
        try:
            timezero_date = EW2submitDates[EW]
            forecast_file_path = EW2submissionFile[EW]
            predx_json, forecast_filename = util.convert_cdc_csv_to_json_io_dict(
                forecast_file_path)
            conn = util.authenticate()
            util.upload_forecast(conn,
                                 predx_json,
                                 forecast_filename,
                                 project_name,
                                 model_name,
                                 timezero_date,
                                 overwrite=True)
            print('uploaded {:s}'.format(forecast_file_path))
        except Exception as e:
            print('error uploading {:s}'.format(forecast_file_path))
            print('{:s}'.format(str(e)))