Esempio n. 1
0
def get_state_projections_df(input_dir, initial_intervention_type,
                             state_interventions_df):
    """
    for each state in our data look at the results we generated via run.py
    to create the projections

    columns=CALCULATED_PROJECTION_HEADERS_STATES
    """

    states_df = pd.DataFrame(US_STATE_ABBREV.values(), columns=["state"])
    states_df.loc[:, "intervention_type"] = states_df.state.apply(
        lambda x: _get_intervention_type(initial_intervention_type, x,
                                         state_interventions_df))
    states_df.loc[:, "path"] = states_df.apply(
        lambda x: get_file_path(
            input_dir, x.state, x.intervention_type, fips=None),
        axis=1,
    ).values
    new_df = states_df.parallel_apply(
        lambda x: _calculate_projection_data(x.state, x.path, fips=None),
        axis=1)
    num_processed_states = new_df.notnull().sum()["State"]

    print(
        f"Missing {num_processed_states} states were in input_dir: {input_dir}"
    )
    return new_df
Esempio n. 2
0
def _raise_error_if_not_data_from_all_states(key, df, expected_missing):
    states = set(US_STATE_ABBREV.keys())
    states_in_df = set(df['Province/State'].unique())

    missing_states_in_df = states - states_in_df - expected_missing
    extra_states_in_df = states_in_df - states
    if missing_states_in_df or extra_states_in_df:
        raise DataExportException(
            key,
            f"Missing Data from states: {missing_states_in_df}. Have extra states in df {extra_states_in_df}"
        )
def _raise_error_if_not_data_from_all_states(key, df, expected_missing):
    states = set(US_STATE_ABBREV.keys())
    states_in_df = set(df[CommonFields.STATE_FULL_NAME].unique())

    missing_states_in_df = states - states_in_df - expected_missing
    extra_states_in_df = states_in_df - states
    if missing_states_in_df or extra_states_in_df:
        raise DataExportException(
            key,
            f"Missing Data from states: {missing_states_in_df}. Have extra "
            f"states in df {extra_states_in_df}",
        )
def get_state_projections_df(input_dir, initial_intervention_type,
                             state_interventions_df):
    """
    for each state in our data look at the results we generated via run.py
    to create the projections
    """

    # save results in a list of lists, converted to df later
    results = []
    for state in list(US_STATE_ABBREV.values()):
        intervention_type = _get_intervention_type(initial_intervention_type,
                                                   state,
                                                   state_interventions_df)
        file_name = f"{state}.{intervention_type}.json"
        path = os.path.join(input_dir, file_name)
        # if the file exists in that directory then process
        projection_data = _calculate_projection_data(path)
        if projection_data:
            results.append([state] + projection_data)
    return pd.DataFrame(results, columns=CALCULATED_PROJECTION_HEADERS_STATES)
Esempio n. 5
0
def validate_results(result_dir: str) -> None:
    """
    For each state, check that we have a file for each intervention,
    and that the file is non-empty
    """
    per_state_expected = len(build_params.get_interventions())
    missing_or_empty = []
    for state in US_STATE_ABBREV.values():
        if state in UNSUPPORTED_REGIONS:
            continue
        for i in range(per_state_expected):
            fname = os.path.join(result_dir, ".".join([state, str(i), "json"]))
            try:
                result = os.stat(fname)
                if result.st_size == 0:
                    missing_or_empty.append(fname)
            except FileNotFoundError:
                missing_or_empty.append(fname)
    if len(missing_or_empty) > 0:
        raise RuntimeError(
            f'Missing or empty expected files: {", ".join(missing_or_empty)}')
Esempio n. 6
0
def parse_state(state):
    if pd.isnull(state):
        return state
    state_split = [val.strip() for val in state.split(",")]
    state = state_split[1] if len(state_split) == 2 else state_split[0]
    return US_STATE_ABBREV.get(state, state)
Esempio n. 7
0
def _get_abbrev_df():
    # TODO: read this from a dataset class
    return pd.DataFrame(list(US_STATE_ABBREV.items()),
                        columns=["state", "abbreviation"])