def test_conversions(self):
        p = np.array([0.7, 0.5, 0.1, 0.01, 0.99])
        npt.assert_allclose(p,
                            odds_to_probability(probability_to_odds(p)),
                            rtol=1e-12)

        o = np.array([
            2,
            1,
            0.1,
            9,
            12,
            0.3,
        ])
        npt.assert_allclose(o,
                            probability_to_odds(odds_to_probability(o)),
                            rtol=1e-12)
    def targeting_step(y, q_init, iptw, verbose):
        f = sm.families.family.Binomial()
        log = sm.GLM(
            y,  # Outcome / dependent variable
            np.repeat(1, y.shape[0]),  # Generating intercept only model
            offset=np.log(probability_to_odds(
                q_init)),  # Offset by g-formula predictions
            freq_weights=iptw,  # Weighted by calculated IPW
            family=f).fit()

        if verbose:  # Optional argument to print each intermediary result
            print(
                '=============================================================================='
            )
            print('Targeting Model')
            print(log.summary())

        return log.params[0]  # Returns single-step estimated Epsilon term
Exemple #3
0
shift = bool(int(shift))
if shift:
    prop_treated = [-2.5, -2.0, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5]

    # Generating probabilities (true) to assign
    data = network_to_df(G)
    adj_matrix = nx.adjacency_matrix(G, weight=None)
    data['O_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['O']),
                                  measure='mean')
    data['G_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['G']),
                                  measure='mean')
    prob = logistic.cdf(-1.3 - 1.5 * data['P'] + 1.5 * data['P'] * data['G'] +
                        0.95 * data['O_mean'] + 0.95 * data['G_mean'])
    log_odds = np.log(probability_to_odds(prob))

else:
    prop_treated = [
        0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65,
        0.7, 0.75, 0.8, 0.85, 0.9, 0.95
    ]

truth = truth_values(network=network,
                     dgm=exposure,
                     restricted_degree=restrict,
                     shift=shift)

print("#############################################")
print("Sim Script:", script_name)
print("=============================================")
    def fit(self, p, conditional=None, samples=100, seed=None):
        """Calculate the mean from the predicted exposure probabilities and predicted outcome values using the TMLE
        procedure. Confidence intervals are calculated using influence curves.

        Parameters
        ----------
        p : float, list, tuple
            Proportion that correspond to the number of persons treated (all values must be between 0.0 and 1.0). If
            conditional is specified, p must be a list/tuple of floats of the same length
        conditional : None, list, tuple, optional
            A
        samples : int, optional
            Number of samples to use for the Monte Carlo integration procedure
        seed : None, int, optional
            Seed for the Monte Carlo integration procedure

        Note
        ----
        Exposure and outcome models must be specified prior to `fit()`

        Returns
        -------
        `StochasticTMLE` gains `marginal_vector` and `marginal_outcome` along with `marginal_ci`
        """
        if self._denominator_ is None:
            raise ValueError(
                "The exposure_model() function must be specified before the fit() function"
            )
        if self._Qinit_ is None:
            raise ValueError(
                "The outcome_model() function must be specified before the fit() function"
            )

        if seed is None:
            pass
        else:
            np.random.seed(seed)

        p = np.array(p)
        if np.any(p > 1) or np.any(p < 0):
            raise ValueError(
                "All specified treatment probabilities must be between 0 and 1"
            )
        if conditional is not None:
            if len(p) != len(conditional):
                raise ValueError(
                    "'p' and 'conditional' must be the same length")

        # Step 1) Calculating clever covariate (HAW)
        if conditional is None:
            numerator = np.where(self.df[self.exposure] == 1, p, 1 - p)
        else:
            df = self.df.copy()
            stochastic_check_conditional(df=self.df, conditional=conditional)
            numerator = np.array([np.nan] for i in range(self.df.shape[0]))
            for c, prop in zip(conditional, p):
                numerator = np.where(
                    eval(c), np.where(df[self.exposure] == 1, prop, 1 - prop),
                    numerator)

        haw = np.array(numerator / self._denominator_).astype(float)

        # Step 2) Estimate from Q-model
        # process completed in outcome_model() function and stored in self._Qinit_

        # Step 3) Target parameter TMLE
        self.epsilon = self.targeting_step(y=self.df[self.outcome],
                                           q_init=self._Qinit_,
                                           iptw=haw,
                                           verbose=self._verbose_)

        # Step 4) Monte-Carlo Integration procedure
        q_star_list = []
        q_i_star_list = []
        self._resamples_ = samples
        for i in range(samples):
            # Applying treatment plan
            df = self.df.copy()
            if conditional is None:
                df[self.exposure] = np.random.binomial(n=1,
                                                       p=p,
                                                       size=df.shape[0])
            else:
                df[self.exposure] = np.nan
                for c, prop in zip(conditional, p):
                    df[self.exposure] = np.random.binomial(n=1,
                                                           p=prop,
                                                           size=df.shape[0])

            # Outcome model under treatment plan
            if self._out_model_custom:
                _, data_star = patsy.dmatrices(self._q_model + ' - 1', self.df)
                y_star = stochastic_outcome_predict(
                    xdata=data_star,
                    fit_ml_model=self._outcome_model,
                    continuous=self._continuous_outcome)
            else:
                y_star = self._outcome_model.predict(df)

            if self._continuous_outcome:  # Ensures all predicted values are bounded
                y_star = np.where(y_star < self._q_min_bound,
                                  self._q_min_bound, y_star)
                y_star = np.where(y_star > self._q_max_bound,
                                  self._q_max_bound, y_star)

            # Targeted Estimate
            logit_qstar = np.log(
                probability_to_odds(y_star)) + self.epsilon  # logit(Y^*) + e
            q_star = odds_to_probability(np.exp(logit_qstar))  # Y^*
            q_i_star_list.append(q_star)  # Saving Y_i^* for marginal variance
            q_star_list.append(np.mean(q_star))  # Saving E[Y^*]

        if self._continuous_outcome:
            self.marginals_vector = _tmle_unit_unbound_(
                np.array(q_star_list),
                mini=self._continuous_min,
                maxi=self._continuous_max)
            y_ = np.array(
                _tmle_unit_unbound_(self.df[self.outcome],
                                    mini=self._continuous_min,
                                    maxi=self._continuous_max))
            yq0_ = _tmle_unit_unbound_(self._Qinit_,
                                       mini=self._continuous_min,
                                       maxi=self._continuous_max)
            yqstar_ = _tmle_unit_unbound_(np.array(q_i_star_list),
                                          mini=self._continuous_min,
                                          maxi=self._continuous_max)

        else:
            self.marginals_vector = q_star_list
            y_ = np.array(self.df[self.outcome])
            yq0_ = self._Qinit_
            yqstar_ = np.array(q_i_star_list)

        self.marginal_outcome = np.mean(self.marginals_vector)

        # Step 5) Estimating Var(psi)
        zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1)

        # Marginal variance estimator
        variance_marginal = self.est_marginal_variance(
            haw=haw,
            y_obs=y_,
            y_pred=yq0_,
            y_pred_targeted=np.mean(yqstar_, axis=0),
            psi=self.marginal_outcome)
        self.marginal_se = np.sqrt(variance_marginal) / np.sqrt(
            self.df.shape[0])
        self.marginal_ci = [
            self.marginal_outcome - zalpha * self.marginal_se,
            self.marginal_outcome + zalpha * self.marginal_se
        ]

        # Conditional on W variance estimator (not generally recommended but I need it for other work)
        variance_conditional = self.est_conditional_variance(haw=haw,
                                                             y_obs=y_,
                                                             y_pred=yq0_)
        self.conditional_se = np.sqrt(variance_conditional) / np.sqrt(
            self.df.shape[0])
        self.conditional_ci = [
            self.marginal_outcome - zalpha * self.conditional_se,
            self.marginal_outcome + zalpha * self.conditional_se
        ]