def effect(self, X=None, T0=0, T1=1): """ Calculate the heterogeneous treatment effect τ(·,·,·). The effect is calculated between the two treatment points conditional on a vector of features on a set of m test samples {T0ᵢ, T1ᵢ, Xᵢ}. Parameters ---------- T0: (m × dₜ) matrix Base treatments for each sample T1: (m × dₜ) matrix Target treatments for each sample X: optional (m × dₓ) matrix Features for each sample Returns ------- τ: (m × d_y) matrix Heterogeneous treatment effects on each outcome for each sample Note that when Y is a vector rather than a 2-dimensional array, the corresponding singleton dimension will be collapsed (so this method will return a vector) """ X, T0, T1 = check_input_arrays(X, T0, T1) if np.ndim(T0) == 0: T0 = np.repeat(T0, 1 if X is None else np.shape(X)[0]) if np.ndim(T1) == 0: T1 = np.repeat(T1, 1 if X is None else np.shape(X)[0]) if X is None: X = np.empty((np.shape(T0)[0], 0)) return (self._effect_model.predict([T1, X]) - self._effect_model.predict([T0, X])).reshape((-1, ) + self._d_y)
def predict(self, T, X): """Predict outcomes given treatment assignments and features. Parameters ---------- T: (m × dₜ) matrix Base treatments for each sample X: (m × dₓ) matrix Features for each sample Returns ------- Y: (m × d_y) matrix Outcomes for each sample Note that when Y is a vector rather than a 2-dimensional array, the corresponding singleton dimension will be collapsed (so this method will return a vector) """ T, X = check_input_arrays(T, X) return self._effect_model.predict([T, X]).reshape((-1, ) + self._d_y)
def marginal_effect(self, T, X=None): """ Calculate the marginal effect ∂τ(·, ·) around a base treatment point conditional on features. Parameters ---------- T: (m × dₜ) matrix Base treatments for each sample X: optional(m × dₓ) matrix Features for each sample Returns ------- grad_tau: (m × d_y × dₜ) array Heterogeneous marginal effects on each outcome for each sample Note that when Y or T is a vector rather than a 2-dimensional array, the corresponding singleton dimensions in the output will be collapsed (e.g. if both are vectors, then the output of this method will also be a vector) """ T, X = check_input_arrays(T, X) # TODO: any way to get this to work on batches of arbitrary size? return self._marginal_effect_model.predict([T, X], batch_size=1).reshape((-1,) + self._d_y + self._d_t)
def fit(self, Y, T, X, Z, *, inference=None): """Estimate the counterfactual model from data. That is, estimate functions τ(·, ·, ·), ∂τ(·, ·). Parameters ---------- Y: (n × d_y) matrix or vector of length n Outcomes for each sample T: (n × dₜ) matrix or vector of length n Treatments for each sample X: (n × dₓ) matrix Features for each sample Z: (n × d_z) matrix Instruments for each sample inference: string, :class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of :class:`.BootstrapInference`) Returns ------- self """ Y, T, X, Z = check_input_arrays(Y, T, X, Z) assert 1 <= np.ndim(X) <= 2 assert 1 <= np.ndim(Z) <= 2 assert 1 <= np.ndim(T) <= 2 assert 1 <= np.ndim(Y) <= 2 assert np.shape(X)[0] == np.shape(Y)[0] == np.shape(T)[0] == np.shape( Z)[0] # in case vectors were passed for Y or T, keep track of trailing dims for reshaping effect output d_x, d_y, d_z, d_t = [ np.shape(a)[1] if np.ndim(a) > 1 else 1 for a in [X, Y, Z, T] ] x_in, y_in, z_in, t_in = [L.Input((d, )) for d in [d_x, d_y, d_z, d_t]] n_components = self._n_components treatment_network = self._m(z_in, x_in) # the dimensionality of the output of the network # TODO: is there a more robust way to do this? d_n = K.int_shape(treatment_network)[-1] pi, mu, sig = mog_model(n_components, d_n, d_t)([treatment_network]) ll = mog_loss_model(n_components, d_t)([pi, mu, sig, t_in]) model = Model([z_in, x_in, t_in], [ll]) model.add_loss(L.Lambda(K.mean)(ll)) model.compile(self._optimizer) # TODO: do we need to give the user more control over other arguments to fit? model.fit([Z, X, T], [], **self._first_stage_options) lm = response_loss_model( lambda t, x: self._h(t, x), lambda z, x: Model( [z_in, x_in], # subtle point: we need to build a new model each time, # because each model encapsulates its randomness [mog_sample_model(n_components, d_t)([pi, mu, sig])])([z, x]), d_z, d_x, d_y, self._n_samples, self._use_upper_bound_loss, self._n_gradient_samples) rl = lm([z_in, x_in, y_in]) response_model = Model([z_in, x_in, y_in], [rl]) response_model.add_loss(L.Lambda(K.mean)(rl)) response_model.compile(self._optimizer) # TODO: do we need to give the user more control over other arguments to fit? response_model.fit([Z, X, Y], [], **self._second_stage_options) self._effect_model = Model([t_in, x_in], [self._h(t_in, x_in)]) # TODO: it seems like we need to sum over the batch because we can only apply gradient to a scalar, # not a general tensor (because of how backprop works in every framework) # (alternatively, we could iterate through the batch in addition to iterating through the output, # but this seems annoying...) # Therefore, it's important that we use a batch size of 1 when we call predict with this model def calc_grad(t, x): h = self._h(t, x) all_grads = K.concatenate([ g for i in range(d_y) for g in K.gradients(K.sum(h[:, i]), [t]) ]) return K.reshape(all_grads, (-1, d_y, d_t)) self._marginal_effect_model = Model( [t_in, x_in], L.Lambda(lambda tx: calc_grad(*tx))([t_in, x_in]))
def fit(self, Y, T, X=None, W=None, Z=None, *, outcome_names=None, treatment_names=None, feature_names=None, confounder_names=None, instrument_names=None, graph=None, estimand_type="nonparametric-ate", proceed_when_unidentifiable=True, missing_nodes_as_confounders=False, control_value=0, treatment_value=1, target_units="ate", **kwargs): """ Estimate the counterfactual model from data through dowhy package. Parameters ---------- Y: vector of length n Outcomes for each sample T: vector of length n Treatments for each sample X: optional (n, d_x) matrix (Default=None) Features for each sample W: optional (n, d_w) matrix (Default=None) Controls for each sample Z: optional (n, d_z) matrix (Default=None) Instruments for each sample outcome_names: optional list (Default=None) Name of the outcome treatment_names: optional list (Default=None) Name of the treatment feature_names: optional list (Default=None) Name of the features confounder_names: optional list (Default=None) Name of the confounders instrument_names: optional list (Default=None) Name of the instruments graph: optional Path to DOT file containing a DAG or a string containing a DAG specification in DOT format estimand_type: optional string Type of estimand requested (currently only "nonparametric-ate" is supported). In the future, may support other specific parametric forms of identification proceed_when_unidentifiable: optional bool (Default=True) Whether the identification should proceed by ignoring potential unobserved confounders missing_nodes_as_confounders: optional bool (Default=False) Whether variables in the dataframe that are not included in the causal graph should be automatically included as confounder nodes control_value: optional scalar (Default=0) Value of the treatment in the control group, for effect estimation treatment_value: optional scalar (Default=1) Value of the treatment in the treated group, for effect estimation target_units: optional (Default="ate") The units for which the treatment effect should be estimated. This can be of three types: 1. A string for common specifications of target units (namely, "ate", "att" and "atc"), 2. A lambda function that can be used as an index for the data (pandas DataFrame), 3. A new DataFrame that contains values of the effect_modifiers and effect will be estimated only for this new data kwargs: optional Other keyword arguments from fit method for CATE estimator Returns ------- self """ Y, T, X, W, Z = check_input_arrays(Y, T, X, W, Z) # create dataframe n_obs = Y.shape[0] Y, T, X, W, Z = reshape_arrays_2dim(n_obs, Y, T, X, W, Z) # currently dowhy only support single outcome and single treatment assert Y.shape[1] == 1, "Can only accept single dimensional outcome." assert T.shape[1] == 1, "Can only accept single dimensional treatment." # column names if outcome_names is None: outcome_names = [f"Y{i}" for i in range(Y.shape[1])] if treatment_names is None: treatment_names = [f"T{i}" for i in range(T.shape[1])] if feature_names is None: feature_names = [f"X{i}" for i in range(X.shape[1])] if confounder_names is None: confounder_names = [f"W{i}" for i in range(W.shape[1])] if instrument_names is None: instrument_names = [f"Z{i}" for i in range(Z.shape[1])] column_names = outcome_names + treatment_names + feature_names + confounder_names + instrument_names df = pd.DataFrame(np.hstack((Y, T, X, W, Z)), columns=column_names) self.dowhy_ = CausalModel( data=df, treatment=treatment_names, outcome=outcome_names, graph=graph, common_causes=feature_names + confounder_names if X.shape[1] > 0 or W.shape[1] > 0 else None, instruments=instrument_names if Z.shape[1] > 0 else None, effect_modifiers=feature_names if X.shape[1] > 0 else None, estimand_type=estimand_type, proceed_when_unidetifiable=proceed_when_unidentifiable, missing_nodes_as_confounders=missing_nodes_as_confounders ) self.identified_estimand_ = self.dowhy_.identify_effect(proceed_when_unidentifiable=True) method_name = "backdoor." + self._cate_estimator.__module__ + "." + self._cate_estimator.__class__.__name__ init_params = {} for p in self._get_params(): init_params[p] = getattr(self._cate_estimator, p) self.estimate_ = self.dowhy_.estimate_effect(self.identified_estimand_, method_name=method_name, control_value=control_value, treatment_value=treatment_value, target_units=target_units, method_params={ "init_params": init_params, "fit_params": kwargs, }, ) return self