Ejemplo n.º 1
0
    def _get_weights_ec(self, estimand):
        x = self._dataset.get_covariates(add_pscore=False)
        t = self._dataset.get_treatment()
        x_treated = x[t == 1]
        x_control = x[t == 0]

        # Get weights.
        weights = np.ones(t.shape, dtype=float)
        if estimand == 'ATT':
            w0, _ = ec.maybe_exact_calibrate(covariates=x_control,
                                             target_covariates=x_treated,
                                             autoscale=True)
            weights[t == 0] = w0
        elif estimand == 'ATC':
            w1, _ = ec.maybe_exact_calibrate(covariates=x_treated,
                                             target_covariates=x_control,
                                             autoscale=True)
            weights[t == 1] = w1
        elif estimand == 'ATE':
            w1, _ = ec.maybe_exact_calibrate(covariates=x_treated,
                                             target_covariates=x,
                                             autoscale=True)
            w0, _ = ec.maybe_exact_calibrate(covariates=x_control,
                                             target_covariates=x,
                                             autoscale=True)
            weights[t == 1] = w1
            weights[t == 0] = w0

        return weights
Ejemplo n.º 2
0
def g_ec(wh, xmat, targets, options):
         # options
         # target_weights: np.ndarray = None,
         # objective: ec.Objective = ec.Objective.ENTROPY,
         # increment: float = 0.001):

    # this is a wrapper to get g, the ratio of new weights to old weights,
    # for the empirical calibration function

    # small_positive = np.nextafter(np.float64(0), np.float64(1))
    wh = np.where(wh == 0, SMALL_POSITIVE, wh)

    pop = wh.sum()
    tmeans = targets / pop

    # ompw:  optimal means-producing weights
    ompw, l2_norm = ec.maybe_exact_calibrate(
        covariates=xmat,
        target_covariates=tmeans.reshape((1, -1)),
        baseline_weights=wh,
        # target_weights=np.array([[.25, .75]]), # target priorities
        target_weights=options['target_weights'],  # target priorities???
        autoscale=options['autoscale'],  # doesn't always seem to work well
        # note that QUADRATIC weights often can be zero
        objective=options['objective'],  # ENTROPY or QUADRATIC
        increment=options['increment']
    )
    # print(l2_norm)

    # wh, when multiplied by g, will yield the targets
    g = ompw * pop / wh
    g = np.array(g, dtype=float).reshape((-1, ))  # djb

    return g
Ejemplo n.º 3
0
def gec(wh,
        xmat,
        targets,
        target_weights: np.ndarray = None,
        objective: ec.Objective = ec.Objective.ENTROPY,
        increment: float = 0.001):

    # ec.Objective.ENTROPY ec.Objective.QUADRATIC

    # small_positive = np.nextafter(np.float64(0), np.float64(1))
    wh = np.where(wh == 0, SMALL_POSITIVE, wh)

    pop = wh.sum()
    tmeans = targets / pop

    # ompw:  optimal means-producing weights
    ompw, l2_norm = ec.maybe_exact_calibrate(
        covariates=xmat,
        target_covariates=tmeans.reshape((1, -1)),
        baseline_weights=wh,
        # target_weights=np.array([[.25, .75]]), # target priorities
        # target_weights=target_weights,
        autoscale=True,  # doesn't always seem to work well
        # note that QUADRATIC weights often can be zero
        objective=objective,  # ENTROPY or QUADRATIC
        increment=increment)
    # print(l2_norm)

    # wh, when multiplied by g, will yield the targets
    g = ompw * pop / wh
    g = np.array(g, dtype=float).reshape((-1, ))  # djb

    return l2_norm, g
Ejemplo n.º 4
0
def gec(wh, xmat, targets, options=None):

    a = timer()

    # update options with any user-supplied options
    if options is None:
        options_all = options_defaults.copy()
    else:
        options_all = options_defaults.copy()
        options_all.update(options)
        # options_all = {**options_defaults, **options}

    if options_all['objective'] == 'ENTROPY':
        options_all['objective'] = ENTROPY
    elif options_all['objective'] == 'QUADRATIC':
        options_all['objective'] = QUADRATIC

    # convert dict to named tuple for ease of use
    opts = ut.dict_nt(options_all)

    # small_positive = np.nextafter(np.float64(0), np.float64(1))
    wh = np.where(wh == 0, SMALL_POSITIVE, wh)
    wh = np.full(wh.shape, wh.mean())

    pop = wh.sum()
    tmeans = targets / pop

    # ompw:  optimal means-producing weights
    ompw, l2_norm = ec.maybe_exact_calibrate(
        covariates=xmat,
        target_covariates=tmeans.reshape((1, -1)),
        # baseline_weights=wh,
        # target_weights=np.array([[.25, .75]]), # target priorities
        # target_weights=target_weights,
        autoscale=opts.autoscale,  # doesn't always seem to work well
        # note that QUADRATIC weights often can be zero
        objective=opts.objective,  # ENTROPY or QUADRATIC
        increment=opts.increment)
    # print(l2_norm)

    # wh, when multiplied by g, will yield the targets
    g = ompw * pop / wh
    g = np.array(g, dtype=float).reshape((-1, ))  # djb
    wh_opt = g * wh
    targets_opt = np.dot(xmat.T, wh_opt)
    b = timer()

    # create a named tuple of items to return
    fields = ('elapsed_seconds', 'wh_opt', 'targets_opt', 'g', 'opts',
              'l2_norm')
    Result = namedtuple('Result', fields, defaults=(None, ) * len(fields))

    res = Result(elapsed_seconds=b - a,
                 wh_opt=wh_opt,
                 targets_opt=targets_opt,
                 g=g,
                 opts=opts,
                 l2_norm=l2_norm)

    return res
Ejemplo n.º 5
0
 def test_target_weights(self):
     # Replicating the first 10 rows of self.target_covariates should be
     # equivalent to
     # assigning a weight of 2 to each of the first 10 rows and 0 for others.
     n = len(self.target_covariates)
     index = list(range(10)) + list(range(n))
     weights = [2] * 10 + [1] * (n - 10)
     duplicated_weights, duplicated_l2 = ec.maybe_exact_calibrate(
         covariates=self.covariates,
         target_covariates=self.target_covariates[index])
     weighted_weights, weighted_l2 = ec.maybe_exact_calibrate(
         covariates=self.covariates,
         target_covariates=self.target_covariates,
         target_weights=weights)
     self.assertAlmostEqual(duplicated_l2, weighted_l2)
     self.assertAlmostEqual(
         0.0, np.linalg.norm(duplicated_weights - weighted_weights))
Ejemplo n.º 6
0
 def test_maybe_exact_calibrate(self, min_feasible_l2_norm,
                                mock_maybe_exact_calibrate):  # pylint: disable=unused-argument
     _mock_calibrate.min_feasible_l2_norm = min_feasible_l2_norm
     self.assertEqual(
         ec.maybe_exact_calibrate(covariates=None,
                                  target_covariates=None,
                                  target_weights=None,
                                  autoscale=None,
                                  objective=None,
                                  max_weight=None,
                                  increment=0.01)[1], min_feasible_l2_norm)
Ejemplo n.º 7
0
    def test_from_formula(self, objective, target_weights):
        # Two api should give the same results.
        # _ec indicates the original empirical_calibration API.
        weights_ec, l2_norm_ec = ec.maybe_exact_calibrate(
            covariates=self.dmatrix,
            target_covariates=self.target_dmatrix,
            target_weights=target_weights,
            objective=objective)

        # _fec indicates empirical_calibration's formula API.
        formula = "~ x + y"
        weights_fec, l2_norm_fec = ec.from_formula(
            formula=formula,
            df=self.df,
            target_df=self.target_df,
            target_weights=target_weights,
            objective=objective)

        np.testing.assert_almost_equal(weights_ec, weights_fec, decimal=3)
        self.assertAlmostEqual(l2_norm_ec, l2_norm_fec, places=2)
Ejemplo n.º 8
0

# %% package example

# !wget -q https://github.com/anqif/CVXR/raw/master/data/dspop.rda
# !wget -q https://github.com/anqif/CVXR/raw/master/data/dssamp.rda
dspop = rdata.conversion.convert(rdata.parser.parse_file('dspop.rda'))['dspop']
dssamp = rdata.conversion.convert(rdata.parser.parse_file('dssamp.rda'))['dssamp']

type(dspop) # pandas
dssamp

cols = ['sex', 'age']
weights, l2_norm = ec.maybe_exact_calibrate(
    covariates=dssamp[cols], # 100 rows
    target_covariates=dspop[cols],  # 1000 rows
    objective=ec.Objective.ENTROPY
)
l2_norm
# weights is an array, length 100, sum is 1
weights.sum()
check = np.multiply(dssamp[cols], weights.reshape(weights.size, 1))
check.sum(axis=0) # ok, this hits the means
dspop[cols].mean()

# so this gets weights that ensure that weighted sample means = pop means

# therefore, for sums, we have:
dspop[cols].sum()
tmeans * np.size(dspop, 0)
dspop[cols].sum() / np.size(dspop, 0) # this is what we should use as target