def _intercept_shift(acause, y_star, years, measure, gbd_round_id, draws=100, no_arima=False, past_version="best"): """Incorporates past uncertainty by intercept-shifting future draws by the draw-level residual for the last year of the past. For non-ntd causes, the shift is the residual of the last-past-year draws to the last-past-year mean (data only, not modeled draws or means), while for ntds, the shift is the draw-level residual between the modeled past (draws) and the past data (draws). :param str acause: name of the acause for which the intercept shift is desired :param xarray.DataArray y_star: the draw-level arima-ed estimates for the log of the measure and acause in question :param list[int] years: [past_start, forecast_start, forecast_end] """ past_draws = _get_y_past(acause, years, measure, gbd_round_id, draws=draws, draw_level=True, last_year_only=True, past_version=past_version) modeled_draws = y_star.loc[{ "year_id": years.forecast_start - 1, "scenario": 0 }].drop("scenario") # make sure the number of draws matches up num_draws = len(modeled_draws.draw) # get the shift values from the past data if there was an arima, but # from the modeled past if there wasn't if not no_arima: past_mean = past_draws.mean("draw") sampled_past_draws = resample(past_draws, num_draws) shift = -1. * (sampled_past_draws - past_mean).drop("year_id") else: sampled_modeled_draws = resample(modeled_draws, num_draws) sampled_past = resample(past_draws, num_draws) sampled_past = sampled_past.transpose( *sampled_modeled_draws.coords.dims) shift = (sampled_modeled_draws - sampled_past).drop("year_id") # get the residuals and shift return y_star - shift
def intercept_shift_at_draw(preds, acause, past_version, gbd_round_id, years, draws): """ intercept shift at draw level for GK results used in mortality """ input_past = FILEPATH / f"{acause}_hat.nc" past = xr.open_dataset(str(input_past)).sel(year_id = years.past_end)["value"] past = resample(past, draws) preds = resample(preds, draws) modeled_last = preds.sel( year_id = years.forecast_end, scenario = 0).drop("scenario") modeled_first = preds.sel( year_id = years.past_end, scenario = 0).drop("scenario") shifted = shift_draws(preds, modeled_last, modeled_first, past) return shifted
def _get_y_star(y_hat, epsilon_hat, years): """Returns draws of mortality or yld rates with estimated uncertainty. :param xarray.DataArray y_hat: expected value of mortality or yld rates. :param xarray.DataArray epsilon_hat: expected value of error. :param fbd_core.argparse.YearRange years: a container for the three years which define our forecast. :return xarray.DataArray: draws of mortality or yld rates with estimated uncertainty. """ logger.info("Creating y_star by adding y_hat with epsilon_hat.") logger.debug("Make sure y_hat has the right number of draws.") draws = len(epsilon_hat.coords["draw"]) y_hat_resampled = resample(y_hat, draws) # make sure the two dimensions are ordered in the same way y_hat_resampled = y_hat_resampled.transpose( *(list(epsilon_hat.coords.dims) + ["scenario"])) # correlate the time series draws with modeled estimates for uncertainty epsilon_correlated = ar1_utils.correlate_draws(epsilon_hat.copy(), y_hat_resampled.copy(), years) return y_hat_resampled + epsilon_correlated
def _get_y_past(acause, years, measure, gbd_round_id, draw_level=False, draws=None, last_year_only=False, past_version=PAST_VERSION): """Gets expected value of cause specific mortality rates. Past data is saved in normal rate space. The past data is returned in log rate space. :param str acause: name of the target acause to aggregate to. :param fbd_core.argparse.YearRange years: a container for the three years which define our forecast. :param int year_start: start of the past. :param int year_end: end of the past, inclusive. :param bool draw_level: whether the past should be retrieved at the draw level (default is mean) :param bool last_year_only: whether to only get data from the last past year :return xarray.DataArray: The expected value of the cause specific mortality or yld rate. """ logger.info("Getting past data from {} for years {}-{}.".format( FILEPATH, years.past_start, years.forecast_start - 1)) y_past = xr.open_dataarray(str(FILEPATH)) # select the years of interest and convert to log space if last_year_only: past_years = years.forecast_start - 1 else: past_years = years.past_years if draw_level: return resample(y_past.loc[dict(year_id=past_years)], draws) else: return y_past.loc[dict(year_id=past_years)].mean("draw")
def read_xarray_sev(risk, date): """ Read SEV in an xarray format. Args: risk (str): risk name. date (str): date str indicating the folder where data comes from. Returns: ds (xarray.Dataset): contains sev values, indexed by demography dims. """ inpath = os.path.join(INDIR_SEV.format(d=date), '{}.nc'.format(risk)) # We need to use open_dataset if there are more than one variable, like # summary data (mean, median, lower, upper). ds = xr.open_dataset(inpath) num_of_draws_in = len(ds.coords["draw"]) if num_of_draws_in != NUMBER_OF_DRAWS: da_name = ds.data_vars.keys()[0] da = resample(ds[da_name], NUMBER_OF_DRAWS) ds = da.to_dataset() return ds
def _get_modeled_y_hat(acause, version, measure, period, gbd_round_id, draws): """Gets mortality data for a modeled acause. For modeled causes, if the data is split by sex, then it is assumed that it is in log rate space. If the data is not split by sex, then it is assumed that it is in normal rate space. :param str acause: acause for a modeled acause. :param str version: name of the mortality or yld version which modeled this acauaArray: the mortality or yld data for acause. """ if period == "past": input_file = FILEPATH / "{}.nc".format(acause) y_hat_exp = xr.open_dataset(str(input_file))["value"] + FLOOR y_hat_exp = resample(y_hat_exp, draws) y_hat = xr.ufuncs.log(y_hat_exp) y_hat.coords["acause"] = acause else: try: logger.info( "No children. y_hat is from mort/yld file {}".format(FILEPATH)) # Because the data is modeled and not split by sex, it is saved in # normal rate space. Log it. y_hat_exp = xr.open_dataarray(str(FILEPATH)) y_hat_exp = resample(y_hat_exp, draws) y_hat = xr.ufuncs.log(y_hat_exp + FLOOR) # some of the yld files are missing acause, so add that info y_hat.coords["acause"] = acause except IOError: # Modeled data is split by sex. input_files = [ FILES for FILES in POTENTIAL_FILES if FILES.exists() ] logger.info("Input results are split by sex. Files are {}".format( input_files)) if len(input_files) == 1: logger.info("This is a sex specific cause. Gotta give it a " "real coordinate on sex.") if "female" in input_files[0].as_posix(): sex_id = 2 else: sex_id = 1 dataarray_one_sex = xr.open_dataarray( str(input_files[0]), drop_variables=["measure", "cov"]) dataarray_one_sex = resample(dataarray_one_sex, draws) new_vals = np.expand_dims(dataarray_one_sex.values, 0) new_dims = ["sex_id"] + list(dataarray_one_sex.dims) logger.info("New dimensions: {}".format(new_dims)) new_coords = ([[sex_id]] + [ coord.values for coord in list( dataarray_one_sex.coords.indexes.values()) ]) y_hat = xr.DataArray( new_vals, dims=new_dims, coords=new_coords).to_dataset(name="value") y_hat.coords["acause"] = acause elif len(input_files) == 2: y_hat = xr.open_mfdataset( [str(input_file) for input_file in input_files], concat_dim="sex_id", drop_variables=["measure", "cov"]) y_hat = resample(y_hat[list(y_hat.data_vars.keys())[0]], draws) else: logger.error(( "{} has no modeled mortality/ylds for version {}. ruh-roh." ).format(acause, version)) raise Exception("Modeled acause has no saved results.") # if data are split by sex, they are in log space. convert back to # regular space to add the floor y_hat = xr.ufuncs.log(xr.ufuncs.exp(y_hat) + FLOOR) return y_hat