Example #1
0
    def _generate_recoverable_errors(a_X,
                                     X,
                                     a_W=None,
                                     W=None,
                                     featurizer=FunctionTransformer()):
        """Return error vectors e_t and e_y such that OLS can recover the true coefficients from both stages."""
        if W is None:
            W = np.empty((shape(X)[0], 0))
        if a_W is None:
            a_W = np.zeros((shape(W)[1], ))
        # to correctly recover coefficients for T via OLS, we need e_t to be orthogonal to [W;X]
        WX = hstack([W, X])
        e_t = rand_sol(WX.T, np.zeros((shape(WX)[1], )))

        # to correctly recover coefficients for Y via OLS, we need ([X; W]⊗[1; ϕ(X); W])⁺ e_y =
        #                                                          -([X; W]⊗[1; ϕ(X); W])⁺ ((ϕ(X)⊗e_t)a_X+(W⊗e_t)a_W)
        # then, to correctly recover a in the third stage, we additionally need (ϕ(X)⊗e_t)ᵀ e_y = 0

        ϕ = featurizer.fit_transform(X)

        v_X = cross_product(ϕ, e_t)
        v_W = cross_product(W, e_t)

        M = np.linalg.pinv(
            cross_product(WX, hstack([np.ones((shape(WX)[0], 1)), ϕ, W])))
        e_y = rand_sol(
            vstack([M, v_X.T]),
            vstack([-M @ (v_X @ a_X + v_W @ a_W),
                    np.zeros((shape(v_X)[1], ))]))

        return e_t, e_y
Example #2
0
    def create_instance(self,
                        s_x,
                        sigma_x,
                        sigma_y,
                        conf_str,
                        hetero_strength=0,
                        hetero_inds=None,
                        autoreg=.5,
                        state_effect=.5,
                        random_seed=123):
        np.random.seed(random_seed)
        self.s_x = s_x
        self.conf_str = conf_str
        self.sigma_x = sigma_x
        self.sigma_y = sigma_y
        self.hetero_inds = hetero_inds.astype(
            int) if hetero_inds is not None else hetero_inds
        self.endo_inds = np.setdiff1d(np.arange(self.n_x),
                                      hetero_inds).astype(int)
        # The first s_x state variables are confounders. The final s_x variables are exogenous and can create
        # heterogeneity
        self.Alpha = state_effect * np.ones((self.n_x, self.n_treatments))
        if self.hetero_inds is not None:
            self.Alpha[self.hetero_inds] = 0

        self.Beta = autoreg * np.eye(self.n_x)

        self.epsilon = np.random.uniform(-1, 1, size=self.n_treatments)
        self.zeta = np.zeros(self.n_x)
        self.zeta[:self.s_x] = self.conf_str / self.s_x

        self.y_hetero_effect = np.zeros(self.n_x)
        self.x_hetero_effect = np.zeros(self.n_x)
        if self.hetero_inds is not None:
            self.y_hetero_effect[self.hetero_inds] = np.random.uniform(.5 * hetero_strength, 1.5 * hetero_strength) / \
                len(self.hetero_inds)
            self.x_hetero_effect[self.hetero_inds] = np.random.uniform(.5 * hetero_strength, 1.5 * hetero_strength) / \
                len(self.hetero_inds)

        self.true_effect = np.zeros((self.n_periods, self.n_treatments))
        self.true_effect[0] = self.epsilon
        for t in np.arange(1, self.n_periods):
            self.true_effect[t, :] = (self.zeta.reshape(
                1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha)

        self.true_hetero_effect = np.zeros(
            (self.n_periods, (self.n_x + 1) * self.n_treatments))
        self.true_hetero_effect[0, :] = cross_product(
            add_constant(self.y_hetero_effect.reshape(1, -1),
                         has_constant='add'), self.epsilon.reshape(1, -1))
        for t in np.arange(1, self.n_periods):
            self.true_hetero_effect[t, :] = cross_product(
                add_constant(self.x_hetero_effect.reshape(1, -1),
                             has_constant='add'),
                self.zeta.reshape(1, -1) @ np.linalg.matrix_power(
                    self.Beta, t - 1) @ self.Alpha)

        return self
Example #3
0
    def create_instance(self,
                        s_x,
                        sigma_x,
                        sigma_y,
                        conf_str,
                        hetero_strength=0,
                        hetero_inds=None,
                        autoreg=.5,
                        state_effect=.5,
                        random_seed=123):
        np.random.seed(random_seed)
        self.s_x = s_x
        self.conf_str = conf_str
        self.sigma_x = sigma_x
        self.sigma_y = sigma_y
        self.hetero_inds = np.arange(self.n_x - self.n_treatments, self.n_x)

        self.Alpha = state_effect * \
            np.ones((self.n_x, self.n_treatments))/self.n_treatments
        self.Alpha[-self.n_treatments:,
                   -self.n_treatments:] = state_effect * np.eye(
                       self.n_treatments)

        self.Beta = autoreg * np.eye(self.n_x)

        self.epsilon = np.random.uniform(-1, 1, size=self.n_treatments)
        self.zeta = np.zeros(self.n_x)
        self.zeta[:self.s_x] = self.conf_str / self.s_x

        self.y_hetero_effect = np.zeros(self.n_x)
        self.x_hetero_effect = np.zeros(self.n_x)
        if self.hetero_inds is not None:
            self.y_hetero_effect[self.hetero_inds] = hetero_strength / \
                len(self.hetero_inds)
            self.x_hetero_effect[self.hetero_inds] = hetero_strength / \
                len(self.hetero_inds)

        self.true_effect = np.zeros((self.n_periods, self.n_treatments))
        self.true_effect[0] = self.epsilon
        for t in np.arange(1, self.n_periods):
            self.true_effect[t, :] = (self.zeta.reshape(
                1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha)

        self.true_hetero_effect = np.zeros(
            (self.n_periods, (self.n_x + 1) * self.n_treatments))
        self.true_hetero_effect[0, :] = cross_product(
            add_constant(self.y_hetero_effect.reshape(1, -1),
                         has_constant='add'), self.epsilon.reshape(1, -1))
        for t in np.arange(1, self.n_periods):
            self.true_hetero_effect[t, :] = cross_product(
                add_constant(self.x_hetero_effect.reshape(1, -1),
                             has_constant='add'),
                self.zeta.reshape(1, -1) @ np.linalg.matrix_power(
                    self.Beta, t - 1) @ self.Alpha)

        return self
Example #4
0
    def test_cross_product(self):
        X = np.array([[1, 2], [3, 4]])
        Y = np.array([[1, 2, 3], [4, 5, 6]])
        Z = np.array([1, 1])

        # make sure cross product varies more slowly with first array
        # and that vectors are okay as inputs
        assert np.all(
            cross_product(Z, Y, X) == np.array([[1, 2, 3, 2, 4, 6],
                                                [12, 15, 18, 16, 20, 24]]))

        assert np.all(
            cross_product(X, Z, Y) == np.array([[1, 2, 2, 4, 3, 6],
                                                [12, 16, 15, 20, 18, 24]]))

        ()
Example #5
0
    def fit_final(self, Y, T, X, groups, resT, resY, n_periods, hetero_inds):
        ''' Fits the final lag effect models
        '''
        models = {}
        panelX = X.reshape((X.shape[0] // n_periods, n_periods, -1))
        resTX = {}
        for kappa in np.arange(n_periods):
            resTX[kappa] = {}
            for tau in np.arange(kappa, n_periods):
                resTX[kappa][tau] = cross_product(
                    add_constant(panelX[:, tau, hetero_inds],
                                 has_constant='add'),
                    resT[kappa][tau].reshape(-1, self._n_treatments))
        for kappa in np.arange(n_periods):
            period = n_periods - 1 - kappa
            Y_cal = resY[period].copy()
            if kappa > 0:
                Y_cal -= np.sum([
                    models[tau].predict(resTX[period][n_periods - 1 - tau])
                    for tau in np.arange(kappa)
                ],
                                axis=0)

            models[kappa] = self._model_final_gen().fit(
                resTX[period][period], Y_cal)

        self._fit_cov_matrix(resTX, resY, models)
        self.final_models = models
        return self
Example #6
0
 def test_min_var_leaf(self):
     n_samples_train = 10
     for criterion in ['het', 'mse']:
         config = self._get_base_config(n_samples_train=n_samples_train,
                                        n_t=1,
                                        n_features=1)
         config['max_depth'] = 1
         config['min_samples_leaf'] = 1
         config['min_eig_leaf'] = .2
         config['criterion'] = criterion
         X = np.arange(n_samples_train).reshape(-1, 1)
         T = np.random.binomial(1, .5, size=(n_samples_train, 1))
         T[X[:, 0] < n_samples_train // 2] = 0
         T[X[:, 0] >= n_samples_train // 2] = 1
         Taug = np.hstack([T, np.ones((T.shape[0], 1))])
         y = np.zeros((n_samples_train, 1))
         yaug = np.hstack([y, y * Taug, cross_product(Taug, Taug)])
         tree = self._train_tree(config, X, yaug)
         if criterion == 'het':
             np.testing.assert_array_less(
                 config['min_eig_leaf'],
                 np.mean(T[X[:, 0] > tree.threshold[0]]**2))
             np.testing.assert_array_less(
                 config['min_eig_leaf'],
                 np.mean(T[X[:, 0] <= tree.threshold[0]]**2))
         else:
             np.testing.assert_array_equal(tree.feature, np.array([-2]))
Example #7
0
    def _get_true_quantities(self, X, T, y, mask, criterion, fit_intercept, sample_weight=None):
        if sample_weight is None:
            sample_weight = np.ones(X.shape[0])
        X, T, y, sample_weight = X[mask], T[mask], y[mask], sample_weight[mask]
        n_relevant_outputs = T.shape[1]
        if fit_intercept:
            T = np.hstack([T, np.ones((T.shape[0], 1))])
        alpha = y * T
        pointJ = cross_product(T, T)
        node_weight = np.sum(sample_weight)
        jac = node_weight * np.average(pointJ, axis=0, weights=sample_weight)
        precond = node_weight * np.average(alpha, axis=0, weights=sample_weight)

        if jac.shape[0] == 1:
            invJ = np.array([[1 / jac[0]]])
        elif jac.shape[0] == 4:
            det = jac[0] * jac[3] - jac[1] * jac[2]
            if abs(det) < 1e-6:
                det = 1e-6
            invJ = np.array([[jac[3], -jac[1]], [-jac[2], jac[0]]]) / det
        else:
            invJ = np.linalg.inv(jac.reshape((alpha.shape[1], alpha.shape[1])) + 1e-6 * np.eye(T.shape[1]))

        param = invJ @ precond
        jac = jac / node_weight
        precond = precond / node_weight
        if criterion == 'het':
            moment = alpha - pointJ.reshape((-1, alpha.shape[1], alpha.shape[1])) @ param
            rho = ((invJ @ moment.T).T)[:, :n_relevant_outputs] * node_weight
            impurity = np.mean(np.average(rho**2, axis=0, weights=sample_weight))
            impurity -= np.mean(np.average(rho, axis=0, weights=sample_weight)**2)
        else:
            impurity = np.mean(np.average(y**2, axis=0, weights=sample_weight))
            impurity -= (param.reshape(1, -1) @ jac.reshape((alpha.shape[1], alpha.shape[1])) @ param)[0]
        return jac, precond, param, impurity
Example #8
0
def _coverage_profile(est, X_test, alpha, true_coef, true_effect):
    cov = {}
    d_t = true_coef.shape[1] // (X_test.shape[1] + 1)
    d_y = true_coef.shape[0]
    coef_interval = est.coef__interval(alpha=alpha)
    intercept_interval = est.intercept__interval(alpha=alpha)
    true_coef = true_coef.flatten()
    est_coef = np.concatenate((est.intercept_[..., np.newaxis], est.coef_), axis=-1).flatten()
    est_coef_lb = np.concatenate((intercept_interval[0][..., np.newaxis], coef_interval[0]), axis=-1).flatten()
    est_coef_ub = np.concatenate((intercept_interval[1][..., np.newaxis], coef_interval[1]), axis=-1).flatten()
    cov['coef'] = est_coef
    cov['coef_lower'] = est_coef_lb
    cov['coef_upper'] = est_coef_ub
    cov['true_coef'] = true_coef
    cov['coef_stderr'] = est.model_final.coef_stderr_.flatten()
    cov['coef_sqerror'] = (est_coef - true_coef)**2
    cov['coef_cov'] = ((true_coef >= est_coef_lb) & (true_coef <= est_coef_ub))
    cov['coef_length'] = est_coef_ub - est_coef_lb
    effect_interval = est.effect_interval(X_test, T0=np.zeros(
        (X_test.shape[0], d_t)), T1=np.ones((X_test.shape[0], d_t)), alpha=alpha)
    true_eff = true_effect(X_test, np.ones((X_test.shape[0], d_t))).reshape(effect_interval[0].shape)
    est_effect = est.effect(X_test, T0=np.zeros((X_test.shape[0], d_t)), T1=np.ones((X_test.shape[0], d_t)))
    cov['x_test'] = np.repeat(X_test, d_y, axis=0)
    cov['effect'] = est_effect.flatten()
    cov['effect_lower'] = effect_interval[0].flatten()
    cov['effect_upper'] = effect_interval[1].flatten()
    cov['true_effect'] = true_eff.flatten()
    cov['effect_sqerror'] = ((est_effect - true_eff)**2).flatten()
    cov['effect_stderr'] = est.model_final.prediction_stderr(
        cross_product(add_constant(X_test), np.ones((X_test.shape[0], d_t)))).flatten()
    cov['effect_cov'] = ((true_eff >= effect_interval[0]) & (true_eff <= effect_interval[1])).flatten()
    cov['effect_length'] = (effect_interval[1] - effect_interval[0]).flatten()
    return cov
Example #9
0
 def _get_continuous_data(self, config):
     random_state = np.random.RandomState(config['random_state'])
     X = random_state.normal(size=(config['n_samples_train'],
                                   config['n_features']))
     T = np.zeros((config['n_samples_train'], config['n_relevant_outputs']))
     for t in range(T.shape[1]):
         T[:, t] = random_state.binomial(1, .5, size=(T.shape[0], ))
     Taug = np.hstack([T, np.ones((T.shape[0], 1))])
     y = ((X[:, [0]] > 0.0) + .5) * np.sum(T, axis=1, keepdims=True) + .5
     yaug = np.hstack([y, y * Taug, cross_product(Taug, Taug)])
     X = np.vstack([X, X])
     yaug = np.vstack([yaug, yaug])
     return X, yaug, np.hstack([(X[:, [0]] > 0.0) + .5,
                                (X[:, [0]] > 0.0) + .5])
Example #10
0
    def adaptive_policy_effect(self, X, groups, policy_gen, alpha=.05):
        """ Assumes that the policy is adaptive only on exogenous states that
        are not affected by the treatmnet.
        """
        u_periods = np.unique(np.bincount(groups.astype(int)))
        if len(u_periods) > 1 or u_periods[0] != self._n_train_periods:
            raise AttributeError("Invalid period lengths.")
        n_periods = u_periods[0]

        panelX = X.reshape((X.shape[0] // n_periods, n_periods, -1))
        tau = np.zeros((panelX.shape[0], n_periods, self._n_treatments))
        for period in range(n_periods):
            if period == 0:
                tau_pre = np.zeros((panelX.shape[0], self._n_treatments))
            else:
                tau_pre = tau[:, period - 1, :]
            tau[:, period, :] = np.array([
                policy_gen(t_pre, x, period)
                for t_pre, x in zip(tau_pre, panelX[:, period, :])
            ])

        resTX = np.zeros(
            (n_periods, (len(self.hetero_inds) + 1) * self._n_treatments))
        for kappa in np.arange(n_periods):
            resTX[kappa] = np.mean(cross_product(
                add_constant(panelX[:, n_periods - 1 - kappa,
                                    self.hetero_inds],
                             has_constant='add'),
                tau[:, n_periods - 1 - kappa, :]),
                                   axis=0)

        point = np.dot(self.param, resTX.flatten())
        std = self._policy_effect_stderr(resTX.flatten())
        if std == 0:
            return point, (point, point), 0
        return point, (scipy.stats.norm.ppf(alpha / 2, loc=point, scale=std),
                       scipy.stats.norm.ppf(1 - alpha / 2,
                                            loc=point,
                                            scale=std)), std
Example #11
0
    def policy_effect(self, tau, subX, groups, alpha=0.05):
        u_periods = np.unique(np.bincount(groups.astype(int)))
        if len(u_periods) > 1 or u_periods[0] != self._n_train_periods:
            raise AttributeError("Invalid period lengths.")
        n_periods = u_periods[0]
        panelX = subX.reshape((subX.shape[0] // n_periods, n_periods, -1))
        resTX = np.zeros((n_periods, (subX.shape[1] + 1) * self._n_treatments))
        for kappa in np.arange(n_periods):
            resTX[kappa] = np.mean(cross_product(
                add_constant(panelX[:, n_periods - 1 - kappa, :],
                             has_constant='add'),
                np.tile(
                    tau[n_periods - 1 - kappa].reshape(1, self._n_treatments),
                    (panelX.shape[0], 1))),
                                   axis=0)

        point = np.dot(self.param, resTX.flatten())
        std = self._policy_effect_stderr(resTX.flatten())
        if std == 0:
            return point, (point, point), 0
        return point, (scipy.stats.norm.ppf(alpha / 2, loc=point, scale=std),
                       scipy.stats.norm.ppf(1 - alpha / 2,
                                            loc=point,
                                            scale=std)), std
 def true_effect(x, t):
     return cross_product(
         np.hstack([
             np.ones(
                 (x.shape[0], 1)), x[:, :d_x]
         ]), t) @ true_coef.T
Example #13
0
    def create_instance(self,
                        s_x,
                        sigma_x,
                        sigma_y,
                        conf_str,
                        epsilon,
                        Alpha_unnormalized,
                        hetero_strength=0,
                        hetero_inds=None,
                        autoreg=.5,
                        state_effect=.5,
                        random_seed=123):
        random_state = np.random.RandomState(random_seed)
        self.s_x = s_x
        self.conf_str = conf_str
        self.sigma_x = sigma_x
        self.sigma_y = sigma_y
        self.hetero_inds = hetero_inds.astype(
            int) if hetero_inds is not None else hetero_inds
        self.hetero_strength = hetero_strength
        self.autoreg = autoreg
        self.state_effect = state_effect
        self.random_seed = random_seed
        self.endo_inds = np.setdiff1d(np.arange(self.n_x),
                                      hetero_inds).astype(int)
        # The first s_x state variables are confounders. The final s_x variables are exogenous and can create
        # heterogeneity
        self.Alpha = Alpha_unnormalized
        self.Alpha /= np.linalg.norm(self.Alpha, axis=1, ord=1, keepdims=True)
        self.Alpha *= state_effect
        if self.hetero_inds is not None:
            self.Alpha[self.hetero_inds] = 0

        self.Beta = np.zeros((self.n_x, self.n_x))
        for t in range(self.n_x):
            self.Beta[t, :] = autoreg * np.roll(
                random_state.uniform(low=4.0**(-np.arange(0, self.n_x)),
                                     high=4.0**(-np.arange(1, self.n_x + 1))),
                t)
        if self.hetero_inds is not None:
            self.Beta[np.ix_(self.endo_inds, self.hetero_inds)] = 0
            self.Beta[np.ix_(self.hetero_inds, self.endo_inds)] = 0

        self.epsilon = epsilon
        self.zeta = np.zeros(self.n_x)
        self.zeta[:self.s_x] = self.conf_str / self.s_x

        self.y_hetero_effect = np.zeros(self.n_x)
        self.x_hetero_effect = np.zeros(self.n_x)
        if self.hetero_inds is not None:
            self.y_hetero_effect[self.hetero_inds] = random_state.uniform(.5 * hetero_strength,
                                                                          1.5 * hetero_strength) /\
                len(self.hetero_inds)
            self.x_hetero_effect[self.hetero_inds] = random_state.uniform(.5 * hetero_strength,
                                                                          1.5 * hetero_strength) / \
                len(self.hetero_inds)

        self.true_effect = np.zeros((self.n_periods, self.n_treatments))
        self.true_effect[0] = self.epsilon
        for t in np.arange(1, self.n_periods):
            self.true_effect[t, :] = (self.zeta.reshape(
                1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha)

        self.true_hetero_effect = np.zeros(
            (self.n_periods, (self.n_x + 1) * self.n_treatments))
        self.true_hetero_effect[0, :] = cross_product(
            add_constant(self.y_hetero_effect.reshape(1, -1),
                         has_constant='add'), self.epsilon.reshape(1, -1))
        for t in np.arange(1, self.n_periods):
            self.true_hetero_effect[t, :] = cross_product(
                add_constant(self.x_hetero_effect.reshape(1, -1),
                             has_constant='add'),
                self.zeta.reshape(1, -1) @ np.linalg.matrix_power(
                    self.Beta, t - 1) @ self.Alpha)

        return self