Esempio n. 1
0
def _intercept_shift(acause,
                     y_star,
                     years,
                     measure,
                     gbd_round_id,
                     draws=100,
                     no_arima=False,
                     past_version="best"):
    """Incorporates past uncertainty by intercept-shifting future draws by the
    draw-level residual for the last year of the past. For non-ntd causes,
    the shift is the residual of the last-past-year draws to the last-past-year
    mean (data only, not modeled draws or means), while for ntds, the shift is
    the draw-level residual between the modeled past (draws) and the past data
    (draws).

    :param str acause: name of the acause for which the intercept shift is
        desired
    :param xarray.DataArray y_star: the draw-level arima-ed estimates for the
        log of the measure and acause in question
    :param list[int] years: [past_start, forecast_start, forecast_end]
    """
    past_draws = _get_y_past(acause,
                             years,
                             measure,
                             gbd_round_id,
                             draws=draws,
                             draw_level=True,
                             last_year_only=True,
                             past_version=past_version)
    modeled_draws = y_star.loc[{
        "year_id": years.forecast_start - 1,
        "scenario": 0
    }].drop("scenario")

    # make sure the number of draws matches up
    num_draws = len(modeled_draws.draw)
    # get the shift values from the past data if there was an arima, but
    # from the modeled past if there wasn't
    if not no_arima:
        past_mean = past_draws.mean("draw")
        sampled_past_draws = resample(past_draws, num_draws)
        shift = -1. * (sampled_past_draws - past_mean).drop("year_id")
    else:
        sampled_modeled_draws = resample(modeled_draws, num_draws)
        sampled_past = resample(past_draws, num_draws)
        sampled_past = sampled_past.transpose(
            *sampled_modeled_draws.coords.dims)
        shift = (sampled_modeled_draws - sampled_past).drop("year_id")

    # get the residuals and shift
    return y_star - shift
def intercept_shift_at_draw(preds, acause, past_version, gbd_round_id, years,
                            draws):
    """
    intercept shift at draw level for GK results used in mortality
    """
    input_past = FILEPATH / f"{acause}_hat.nc"
    past = xr.open_dataset(str(input_past)).sel(year_id = years.past_end)["value"]
    past = resample(past, draws)
    preds = resample(preds, draws)
    modeled_last = preds.sel(
        year_id = years.forecast_end, scenario = 0).drop("scenario")
    modeled_first = preds.sel(
        year_id = years.past_end, scenario = 0).drop("scenario")
    shifted = shift_draws(preds, modeled_last, modeled_first, past)

    return shifted
Esempio n. 3
0
def _get_y_star(y_hat, epsilon_hat, years):
    """Returns draws of mortality or yld rates with estimated uncertainty.

    :param xarray.DataArray y_hat: expected value of mortality or yld rates.
    :param xarray.DataArray epsilon_hat: expected value of error.
    :param fbd_core.argparse.YearRange years: a container for the three years
        which define our forecast.
    :return xarray.DataArray: draws of mortality or yld rates with estimated
        uncertainty.
    """
    logger.info("Creating y_star by adding y_hat with epsilon_hat.")
    logger.debug("Make sure y_hat has the right number of draws.")
    draws = len(epsilon_hat.coords["draw"])
    y_hat_resampled = resample(y_hat, draws)
    # make sure the two dimensions are ordered in the same way
    y_hat_resampled = y_hat_resampled.transpose(
        *(list(epsilon_hat.coords.dims) + ["scenario"]))
    # correlate the time series draws with modeled estimates for uncertainty
    epsilon_correlated = ar1_utils.correlate_draws(epsilon_hat.copy(),
                                                   y_hat_resampled.copy(),
                                                   years)
    return y_hat_resampled + epsilon_correlated
Esempio n. 4
0
def _get_y_past(acause,
                years,
                measure,
                gbd_round_id,
                draw_level=False,
                draws=None,
                last_year_only=False,
                past_version=PAST_VERSION):
    """Gets expected value of cause specific mortality rates.

    Past data is saved in normal rate space. The past data is returned in log
    rate space.

    :param str acause: name of the target acause to aggregate to.
    :param fbd_core.argparse.YearRange years: a container for the three years
        which define our forecast.
    :param int year_start: start of the past.
    :param int year_end: end of the past, inclusive.
    :param bool draw_level: whether the past should be retrieved at the draw
        level (default is mean)
    :param bool last_year_only: whether to only get data from the last past
        year
    :return xarray.DataArray: The expected value of the cause specific
        mortality or yld rate.
    """
    logger.info("Getting past data from {} for years {}-{}.".format(
        FILEPATH, years.past_start, years.forecast_start - 1))
    y_past = xr.open_dataarray(str(FILEPATH))

    # select the years of interest and convert to log space
    if last_year_only:
        past_years = years.forecast_start - 1
    else:
        past_years = years.past_years
    if draw_level:
        return resample(y_past.loc[dict(year_id=past_years)], draws)
    else:
        return y_past.loc[dict(year_id=past_years)].mean("draw")
Esempio n. 5
0
def read_xarray_sev(risk, date):
    """
    Read SEV in an xarray format.

    Args:
        risk (str): risk name.
        date (str): date str indicating the folder where data comes from.

    Returns:
        ds (xarray.Dataset): contains sev values, indexed by demography dims.
    """
    inpath = os.path.join(INDIR_SEV.format(d=date), '{}.nc'.format(risk))
    # We need to use open_dataset if there are more than one variable, like
    # summary data (mean, median, lower, upper).
    ds = xr.open_dataset(inpath)

    num_of_draws_in = len(ds.coords["draw"])
    if num_of_draws_in != NUMBER_OF_DRAWS:
        da_name = ds.data_vars.keys()[0]
        da = resample(ds[da_name], NUMBER_OF_DRAWS)
        ds = da.to_dataset()

    return ds
Esempio n. 6
0
def _get_modeled_y_hat(acause, version, measure, period, gbd_round_id, draws):
    """Gets mortality data for a modeled acause.

    For modeled causes, if the data is split by sex, then it is assumed that it
    is in log rate space. If the data is not split by sex, then it is assumed
    that it is in normal rate space.

    :param str acause: acause for a modeled acause.
    :param str version: name of the mortality or yld version which modeled this
        acauaArray: the mortality or yld data for acause.
    """
    if period == "past":
        input_file = FILEPATH / "{}.nc".format(acause)
        y_hat_exp = xr.open_dataset(str(input_file))["value"] + FLOOR
        y_hat_exp = resample(y_hat_exp, draws)
        y_hat = xr.ufuncs.log(y_hat_exp)
        y_hat.coords["acause"] = acause
    else:
        try:
            logger.info(
                "No children. y_hat is from mort/yld file {}".format(FILEPATH))
            # Because the data is modeled and not split by sex, it is saved in
            # normal rate space. Log it.
            y_hat_exp = xr.open_dataarray(str(FILEPATH))
            y_hat_exp = resample(y_hat_exp, draws)
            y_hat = xr.ufuncs.log(y_hat_exp + FLOOR)
            # some of the yld files are missing acause, so add that info
            y_hat.coords["acause"] = acause

        except IOError:  # Modeled data is split by sex.
            input_files = [
                FILES for FILES in POTENTIAL_FILES if FILES.exists()
            ]
            logger.info("Input results are split by sex. Files are {}".format(
                input_files))

            if len(input_files) == 1:
                logger.info("This is a sex specific cause. Gotta give it a "
                            "real coordinate on sex.")
                if "female" in input_files[0].as_posix():
                    sex_id = 2
                else:
                    sex_id = 1
                dataarray_one_sex = xr.open_dataarray(
                    str(input_files[0]), drop_variables=["measure", "cov"])
                dataarray_one_sex = resample(dataarray_one_sex, draws)
                new_vals = np.expand_dims(dataarray_one_sex.values, 0)
                new_dims = ["sex_id"] + list(dataarray_one_sex.dims)
                logger.info("New dimensions: {}".format(new_dims))
                new_coords = ([[sex_id]] + [
                    coord.values for coord in list(
                        dataarray_one_sex.coords.indexes.values())
                ])
                y_hat = xr.DataArray(
                    new_vals, dims=new_dims,
                    coords=new_coords).to_dataset(name="value")
                y_hat.coords["acause"] = acause

            elif len(input_files) == 2:
                y_hat = xr.open_mfdataset(
                    [str(input_file) for input_file in input_files],
                    concat_dim="sex_id",
                    drop_variables=["measure", "cov"])
                y_hat = resample(y_hat[list(y_hat.data_vars.keys())[0]], draws)

            else:
                logger.error((
                    "{} has no modeled mortality/ylds for version {}. ruh-roh."
                ).format(acause, version))
                raise Exception("Modeled acause has no saved results.")
            # if data are split by sex, they are in log space. convert back to
            # regular space to add the floor
            y_hat = xr.ufuncs.log(xr.ufuncs.exp(y_hat) + FLOOR)
    return y_hat