Esempio n. 1
0
def psi_tmle_cont_outcome(q_t0, q_t1, g, t, y, eps_hat=None, truncate_level=0.05):
    q_t0, q_t1, g, t, y = truncate_all_by_g(q_t0, q_t1, g, t, y, truncate_level)

    g_loss = mse(g, t)
    h = t * (1.0/g) - (1.0-t) / (1.0 - g)
    full_q = (1.0-t)*q_t0 + t*q_t1 # predictions from unperturbed model

    if eps_hat is None:
        eps_hat = np.sum(h*(y-full_q)) / np.sum(np.square(h))

    def q1(t_cf):
        h_cf = t_cf * (1.0 / g) - (1.0 - t_cf) / (1.0 - g)
        full_q = (1.0 - t_cf) * q_t0 + t_cf * q_t1  # predictions from unperturbed model
        return full_q + eps_hat * h_cf

    ite = q1(np.ones_like(t)) - q1(np.zeros_like(t))
    psi_tmle = np.mean(ite)

    # standard deviation computation relies on asymptotic expansion of non-parametric estimator, see van der Laan and Rose p 96
    ic = h*(y-q1(t)) + ite - psi_tmle
    psi_tmle_std = np.std(ic) / np.sqrt(t.shape[0])
    initial_loss = np.mean(np.square(full_q-y))
    final_loss = np.mean(np.square(q1(t)-y))

    # print("tmle epsilon_hat: ", eps_hat)
    # print("initial risk: {}".format(initial_loss))
    # print("final risk: {}".format(final_loss))

    return psi_tmle, psi_tmle_std, eps_hat, initial_loss, final_loss, g_loss
Esempio n. 2
0
def psi_aiptw(q_t0, q_t1, g, t, y, truncate_level=0.05):
    q_t0, q_t1, g, t, y = truncate_all_by_g(q_t0, q_t1, g, t, y, truncate_level)

    full_q = q_t0 * (1 - t) + q_t1 * t
    h = t * (1.0 / g) - (1.0 - t) / (1.0 - g)
    ite = h * (y - full_q) + q_t1 - q_t0

    return np.mean(ite), np.std(ite)/np.sqrt(ite.shape[0])
Esempio n. 3
0
def psi_aiptw(q_t0, q_t1, g, t, y, prob_t, truncate_level=0.05):
    # the robust ATT estimator described in eqn 3.9 of
    # https://www.econstor.eu/bitstream/10419/149795/1/869216953.pdf

    q_t0, q_t1, g, t, y = truncate_all_by_g(q_t0, q_t1, g, t, y, truncate_level)
    estimate = (t*(y-q_t0) - (1-t)*(g/(1-g))*(y-q_t0)).mean() / prob_t

    return estimate
Esempio n. 4
0
def psi_tmle_bin_outcome(q_t0, q_t1, g, t, y, truncate_level=0.05):
    # TODO: make me useable
    # solve the perturbation problem

    q_t0, q_t1, g, t, y = truncate_all_by_g(q_t0, q_t1, g, t, y, truncate_level)

    eps_hat = minimize(lambda eps: cross_entropy(y, _perturbed_model_bin_outcome(q_t0, q_t1, g, t, eps))
                       , 0., method='Nelder-Mead')

    eps_hat = eps_hat.x[0]

    def q1(t_cf):
        return _perturbed_model_bin_outcome(q_t0, q_t1, g, t_cf, eps_hat)

    ite = q1(np.ones_like(t)) - q1(np.zeros_like(t))
    return np.mean(ite)
Esempio n. 5
0
def psi_tmle(q_t0, q_t1, g, t, y, prob_t, truncate_level=0.05):
    """
    Near canonical van der Laan TMLE, except we use a
    1 dimension epsilon shared between the Q and g update models

    """

    q_t0, q_t1, g, t, y = truncate_all_by_g(q_t0, q_t1, g, t, y, truncate_level)

    def _perturbed_loss(eps):
        pert_q, pert_g = _perturbed_model(q_t0, q_t1, g, t, prob_t, eps)
        loss = (np.square(y - pert_q)).mean() + cross_entropy(t, pert_g)
        return loss

    eps_hat = minimize(_perturbed_loss, 0.)
    eps_hat = eps_hat.x[0]

    def q2(t_cf, epsilon):
        h_cf = t_cf * (1.0 / g) - (1.0 - t_cf) / (1.0 - g)
        full_q = (1.0 - t_cf) * q_t0 + t_cf * q_t1  # predictions from unperturbed model
        return full_q - epsilon * h_cf

    psi_tmle = np.mean(t * (q2(np.ones_like(t), eps_hat) - q2(np.zeros_like(t), eps_hat))) / prob_t
    return psi_tmle
Esempio n. 6
0
    def tmle(q_t0, q_t1, g, t, y, truncate_level=0.05, deps=deps_default):
        """
        Computes the tmle for the ATT (equivalently: direct effect)

        :param q_t0:
        :param q_t1:
        :param g:
        :param t:
        :param y:
        :param truncate_level:
        :param deps:
        :return:
        """
        q_t0, q_t1, g, t, y = truncate_all_by_g(q_t0, q_t1, g, t, y, truncate_level)

        eps = 0.0

        q0_old = q_t0
        q1_old = q_t1
        g_old = g

        # determine whether epsilon should go up or down
        # translated blindly from line 299 of https://github.com/cran/tmle/blob/master/R/tmle.R
        h1 = t / prob_t - ((1 - t) * g) / (prob_t * (1 - g))
        full_q = (1.0 - t) * q_t0 + t * q_t1
        deriv = np.mean(prob_t*h1*(y-full_q) + t*(q_t1 - q_t0 - _psi(q_t0, q_t1, g)))
        if deriv > 0:
            deps = -deps

        # run until loss starts going up
        # old_loss = np.inf  # this is the thing used by Rose' implementation
        old_loss = _loss(full_q, g, y, t)

        while True:
            # print("Psi: {}".format(_psi(q0_old, q1_old, g_old)))

            perturbed_q0, perturbed_q1, perturbed_q, perturbed_g = _perturb_g_and_q(q0_old, q1_old, g_old, t, deps=deps)

            new_loss = _loss(perturbed_q, perturbed_g, y, t)
            # print("new_loss is: ", new_loss, "old_loss is ", old_loss)

            # # if this is the first step, decide whether to go down or up from eps=0.0
            # if eps == 0.0:
            #     _, _, perturbed_q_neg, perturbed_g_neg = _perturb_g_and_q(q0_old, q1_old, g_old, t, deps=-deps)
            #     neg_loss = _loss(perturbed_q_neg, perturbed_g_neg, y, t)
            #
            #     if neg_loss < new_loss:
            #         return tmle(q_t0, q_t1, g, t, y, deps=-1.0 * deps)

            # check if converged
            if new_loss > old_loss:
                if eps == 0.:
                    print("Warning: no update occurred (is deps too big?)")
                return _psi(q0_old, q1_old, g_old), eps
            else:
                eps += deps

                q0_old = perturbed_q0
                q1_old = perturbed_q1
                g_old = perturbed_g

                old_loss = new_loss
Esempio n. 7
0
def psi_plugin(q_t0, q_t1, g, t, y, prob_t, truncate_level=0.05):
    q_t0, q_t1, g, t, y = truncate_all_by_g(q_t0, q_t1, g, t, y, truncate_level)

    ite_t = g*(q_t1 - q_t0)/prob_t
    estimate = ite_t.mean()
    return estimate
Esempio n. 8
0
def psi_q_only(q_t0, q_t1, g, t, y, prob_t, truncate_level=0.05):
    q_t0, q_t1, g, t, y = truncate_all_by_g(q_t0, q_t1, g, t, y, truncate_level)

    ite_t = (q_t1 - q_t0)[t == 1]
    estimate = ite_t.mean()
    return estimate