Exemple #1
0
def _get_thresholds(stratifications: List[Tuple], means: pd.DataFrame,
                    sds: pd.DataFrame, weights_df: pd.DataFrame,
                    draw: int) -> pd.Series:
    col = f'draw_{draw}'
    thresholds = pd.Series(0, index=means.index, name=col)

    ts = time.time()
    print(f'Start: {ts}')

    for i, stratification in enumerate(stratifications):
        mu = means.loc[stratification, col]
        sigma = sds.loc[stratification, col]
        threshold = 0
        if mu and sigma:
            weights = weights_df.loc[stratification].reset_index()
            weights = (weights[weights['parameter'] != 'glnorm'].
                       loc[:, ['parameter', 'value']].set_index(
                           'parameter').to_dict()['value'])
            weights = {k: [v] for k, v in weights.items()}
            ens_dist = EnsembleDistribution(weights=weights, mean=mu, sd=sigma)
            threshold = minimize(lambda x: (ens_dist.ppf(x) - 7)**2, [0.5],
                                 bounds=Bounds(0, 1.0),
                                 method='Nelder-Mead').x[0]

        print(f'mu: {mu}, sigma: {sigma}, threshold: {threshold}')
        thresholds.loc[stratification] = threshold

    tf = time.time()
    print(f'End: {tf}')
    print(f'Duration: {tf - ts}')

    return thresholds
Exemple #2
0
def calc_hypertensive(location, draw):
    art_path = HYPERTENSION_DATA_FOLDER / f'{location}/data.hdf'
    art = Artifact(str(art_path), filter_terms=[f'draw=={draw}'])

    # I can drop indices and know that the means/sds/weights will be aligned b/c we sort the data in vivarium_inputs
    mean = art.load('risk_factor.high_systolic_blood_pressure.exposure')
    demographic_index = mean.index  # but we'll need it later for the proportions
    mean = mean.reset_index(drop=True)
    sd = art.load(
        'risk_factor.high_systolic_blood_pressure.exposure_standard_deviation'
    ).reset_index(drop=True)

    # these will be the same for all draws
    weights = prep_weights(art)
    threshold = pd.Series(HYPERTENSION_THRESHOLD, index=mean.index)

    dist = EnsembleDistribution(weights=weights,
                                mean=mean[f'draw_{draw}'],
                                sd=sd[f'draw_{draw}'])
    props = (1 - dist.cdf(threshold)).fillna(
        0)  # we want the proportion above the threshold

    props.index = demographic_index
    props.name = f'draw_{draw}'
    props = props.droplevel('parameter').fillna(0)

    return props
 def ppf(self, q):
     if not q.empty:
         q = clip(q)
         weights = self.weights(q.index)
         parameters = {
             name: parameter(q.index)
             for name, parameter in self.parameters.items()
         }
         x = EnsembleDistribution(weights, parameters).ppf(q)
         x[x.isnull()] = 0
     else:
         x = pd.Series([])
     return x
    def find_rr(weights,
                mean,
                standard_dev,
                attributable_fraction,
                sample_size=10000):
        target = 1 / (1 - attributable_fraction)

        dist = EnsembleDistribution(weights, mean=mean, sd=standard_dev)
        q = .98 * np.random.random(sample_size) + 0.01
        x_ = dist.ppf(q)

        def loss(guess):
            y = np.maximum(x_ - tmrel, 0) / scale
            mean_rr = 1 / sample_size * np.sum(guess**y)
            return (mean_rr - target)**2

        return optimize.minimize(loss, 2)
 def _get_parameters(self, weights, mean, sd):
     index_cols = ['sex', 'age_start', 'age_end', 'year_start', 'year_end']
     weights = weights.set_index(index_cols)
     mean = mean.set_index(index_cols)['value']
     sd = sd.set_index(index_cols)['value']
     weights, parameters = EnsembleDistribution.get_parameters(weights,
                                                               mean=mean,
                                                               sd=sd)
     return weights.reset_index(), {
         name: p.reset_index()
         for name, p in parameters.items()
     }
def get_dist(dist_params, stratification, draw):
    if isinstance(dist_params, pd.DataFrame):
        return IKFDist(dist_params.loc[stratification, draw])

    mu = dist_params.mean.loc[stratification, draw]
    sigma = dist_params.sd.loc[stratification, draw]
    if mu and sigma:
        weights = dist_params.weights.loc[stratification].reset_index()
        weights = (weights[weights['parameter'] != 'glnorm'].
                   loc[:, ['parameter', 'value']].set_index(
                       'parameter').to_dict()['value'])
        weights = {k: [v] for k, v in weights.items()}
        return EnsembleDistribution(weights=weights, mean=mu, sd=sigma)
    else:
        return None