Exemple #1
0
def _get_thresholds(stratifications: List[Tuple], means: pd.DataFrame,
                    sds: pd.DataFrame, weights_df: pd.DataFrame,
                    draw: int) -> pd.Series:
    col = f'draw_{draw}'
    thresholds = pd.Series(0, index=means.index, name=col)

    ts = time.time()
    print(f'Start: {ts}')

    for i, stratification in enumerate(stratifications):
        mu = means.loc[stratification, col]
        sigma = sds.loc[stratification, col]
        threshold = 0
        if mu and sigma:
            weights = weights_df.loc[stratification].reset_index()
            weights = (weights[weights['parameter'] != 'glnorm'].
                       loc[:, ['parameter', 'value']].set_index(
                           'parameter').to_dict()['value'])
            weights = {k: [v] for k, v in weights.items()}
            ens_dist = EnsembleDistribution(weights=weights, mean=mu, sd=sigma)
            threshold = minimize(lambda x: (ens_dist.ppf(x) - 7)**2, [0.5],
                                 bounds=Bounds(0, 1.0),
                                 method='Nelder-Mead').x[0]

        print(f'mu: {mu}, sigma: {sigma}, threshold: {threshold}')
        thresholds.loc[stratification] = threshold

    tf = time.time()
    print(f'End: {tf}')
    print(f'Duration: {tf - ts}')

    return thresholds
    def find_rr(weights,
                mean,
                standard_dev,
                attributable_fraction,
                sample_size=10000):
        target = 1 / (1 - attributable_fraction)

        dist = EnsembleDistribution(weights, mean=mean, sd=standard_dev)
        q = .98 * np.random.random(sample_size) + 0.01
        x_ = dist.ppf(q)

        def loss(guess):
            y = np.maximum(x_ - tmrel, 0) / scale
            mean_rr = 1 / sample_size * np.sum(guess**y)
            return (mean_rr - target)**2

        return optimize.minimize(loss, 2)