def get_state_projections_df(input_dir, initial_intervention_type, state_interventions_df): """ for each state in our data look at the results we generated via run.py to create the projections columns=CALCULATED_PROJECTION_HEADERS_STATES """ states_df = pd.DataFrame(US_STATE_ABBREV.values(), columns=["state"]) states_df.loc[:, "intervention_type"] = states_df.state.apply( lambda x: _get_intervention_type(initial_intervention_type, x, state_interventions_df)) states_df.loc[:, "path"] = states_df.apply( lambda x: get_file_path( input_dir, x.state, x.intervention_type, fips=None), axis=1, ).values new_df = states_df.parallel_apply( lambda x: _calculate_projection_data(x.state, x.path, fips=None), axis=1) num_processed_states = new_df.notnull().sum()["State"] print( f"Missing {num_processed_states} states were in input_dir: {input_dir}" ) return new_df
def _raise_error_if_not_data_from_all_states(key, df, expected_missing): states = set(US_STATE_ABBREV.keys()) states_in_df = set(df['Province/State'].unique()) missing_states_in_df = states - states_in_df - expected_missing extra_states_in_df = states_in_df - states if missing_states_in_df or extra_states_in_df: raise DataExportException( key, f"Missing Data from states: {missing_states_in_df}. Have extra states in df {extra_states_in_df}" )
def _raise_error_if_not_data_from_all_states(key, df, expected_missing): states = set(US_STATE_ABBREV.keys()) states_in_df = set(df[CommonFields.STATE_FULL_NAME].unique()) missing_states_in_df = states - states_in_df - expected_missing extra_states_in_df = states_in_df - states if missing_states_in_df or extra_states_in_df: raise DataExportException( key, f"Missing Data from states: {missing_states_in_df}. Have extra " f"states in df {extra_states_in_df}", )
def get_state_projections_df(input_dir, initial_intervention_type, state_interventions_df): """ for each state in our data look at the results we generated via run.py to create the projections """ # save results in a list of lists, converted to df later results = [] for state in list(US_STATE_ABBREV.values()): intervention_type = _get_intervention_type(initial_intervention_type, state, state_interventions_df) file_name = f"{state}.{intervention_type}.json" path = os.path.join(input_dir, file_name) # if the file exists in that directory then process projection_data = _calculate_projection_data(path) if projection_data: results.append([state] + projection_data) return pd.DataFrame(results, columns=CALCULATED_PROJECTION_HEADERS_STATES)
def validate_results(result_dir: str) -> None: """ For each state, check that we have a file for each intervention, and that the file is non-empty """ per_state_expected = len(build_params.get_interventions()) missing_or_empty = [] for state in US_STATE_ABBREV.values(): if state in UNSUPPORTED_REGIONS: continue for i in range(per_state_expected): fname = os.path.join(result_dir, ".".join([state, str(i), "json"])) try: result = os.stat(fname) if result.st_size == 0: missing_or_empty.append(fname) except FileNotFoundError: missing_or_empty.append(fname) if len(missing_or_empty) > 0: raise RuntimeError( f'Missing or empty expected files: {", ".join(missing_or_empty)}')
def parse_state(state): if pd.isnull(state): return state state_split = [val.strip() for val in state.split(",")] state = state_split[1] if len(state_split) == 2 else state_split[0] return US_STATE_ABBREV.get(state, state)
def _get_abbrev_df(): # TODO: read this from a dataset class return pd.DataFrame(list(US_STATE_ABBREV.items()), columns=["state", "abbreviation"])