def counterfactuals(args, config): # we generate the same SEM as in the intervention example # returns X, coeffs, dag - DAG is adjencency matrix, but not sued X, _, _ = intervention_sem(n_obs=2500, seed=0, random=False) (_, _, X3_std, X4_std) = X.std(axis=0) X /= np.array([1, 1, X3_std, X4_std]) # fit CAReFl to the data mod = CAREFL(config) mod.fit_to_sem(X) def gen_observation(N): X_0 = N[0, 0] X_1 = N[0, 1] X_2 = (X_0 + .5 * X_1 * X_1 * X_1 + N[0, 2]) / X3_std X_3 = (-X_1 + .5 * X_0 * X_0 + N[0, 3]) / X4_std return np.array([X_0, X_1, X_2, X_3]).reshape((1, 4)) ### now we run some CF trials: N = np.array([2, 1.5, 1.4, -1]).reshape((1, 4)) # this is the random value of 4D random varibale x we observe. Now we plot the counterfactual # given x_0 had been other values xObs = gen_observation(N) # should be (2.00, 1.50, 0.81, −0.28) # PLOT xvals = np.arange(-3, 3, .1) sns.set_style("whitegrid") sns.set_palette(sns.color_palette("muted", 8)) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) # fig.suptitle(r'Counterfactual predictions', fontsize=22) # see quality of counterfactual predictions for X_4 xCF_true = [gen_observation(np.hstack((x, N[0, 1:])).reshape((1, 4)))[0, 3] for x in xvals] xCF_pred = [mod.predict_counterfactual(x_obs=xObs, cf_val=x, iidx=0)[0, 3] for x in xvals] ax1.plot(xvals, xCF_true, label=r'True $\mathbb{E} [{X_4}_{X_1 \leftarrow \alpha} (n) ] $', linewidth=3, linestyle='-.') ax1.plot(xvals, xCF_pred, label=r'Predicted $\mathbb{E} [{X_4}_{X_1 \leftarrow \alpha} (n) ] $', linewidth=3, linestyle='-.') ax1.legend(loc=1, fontsize=15) ax1.set_xlabel(r'Value of counterfactual variable, $X_1=\alpha$', fontsize=18) ax1.set_ylabel(r'Predicted value of $X_4$', fontsize=18) # see quality of counterfactual predictions for X_3 xCF_true = [gen_observation(np.hstack((N[0, 0], x, N[0, 2:])).reshape((1, 4)))[0, 2] for x in xvals] xCF_pred = [mod.predict_counterfactual(x_obs=xObs, cf_val=x, iidx=1)[0, 2] for x in xvals] ax2.plot(xvals, xCF_true, label=r'True $\mathbb{E} [{X_3}_{X_2 \leftarrow \alpha} (n) ] $', linewidth=3, linestyle='-.') ax2.plot(xvals, xCF_pred, label=r'Predicted $\mathbb{E} [{X_3}_{X_2 \leftarrow \alpha} (n) ] $', linewidth=3, linestyle='-.') ax2.legend(loc='best', fontsize=15) ax2.set_xlabel(r'Value of counterfactual variable, $X_2=\alpha$', fontsize=18) ax2.set_ylabel(r'Predicted value of $X_3$', fontsize=18) plt.tight_layout() plt.subplots_adjust(top=0.925) plt.savefig(os.path.join(args.run, 'counterfactuals_4d.pdf'), dpi=300)
def true_observation(self, n_samples=100): """ We generate the value of x """ # sample from prior z = self.flow.prior.sample((n_samples,)).cpu().detach().numpy() x = intervention_sem(100, dim=3, seed=0, noise_dist='laplace', random=True, shuffle=False, nonlin='poly', multiplicative=False) return x
def run_interventions(args, config): n_obs = config.data.n_points model = config.algorithm.lower() print("** {} observations **".format(n_obs)) # generate coeffcients for equation (12), and data from that SEM # X, coeffs, dag - DAG is A-matrix - synthetic data generated according to model in paper data, coeffs, dag = intervention_sem(n_obs, dim=config.data.graphdim, seed=config.data.seed, random=config.data.random, multiplicative=config.data.multiplicative, nonlin=config.data.nonlin) print("fitting a {} model".format(model)) # fit to an affine autoregressive flow or ANM with gp/linear functions mod = CAREFL(config) if model == 'carefl' else ANM(method=model) # fit here DAG as input - but the function itself does never use DAG mod.fit_to_sem(data) #removed dag if config.algorithm =='carefl': x_obs_dist = mod.generate_observation(n_samples=n_obs) plot_true_generated(args, data, x_obs_dist, title='observational') # intervene on X_1 and get a sample of {x | do(X_1=a)} for a in [-3, 3] # avals = np.arange(-3, 3) #np.arange(-3, 3, .1) avals = [-1] x_int_sample = [] x_int_exp = [] for a in avals: # three different values for a for intervention res = mod.predict_intervention(a, n_samples=n_obs, iidx=config.data.iidx) x_int_sample.append(res[0].mean(axis=0)) x_int_exp.append(res[1].mean(axis=0)) x_int_dist = res[2] data_int = intervention_sem(n_obs, dim=config.data.graphdim, seed=config.data.seed, random=config.data.random, multiplicative=config.data.multiplicative, nonlin=config.data.nonlin, iidx =config.data.iidx, value=a) if config.algorithm == 'carefl': plot_true_generated(args, data_int[0], x_int_dist, title='interventional')
def run_interventions(args, config): n_obs = config.data.n_points model = config.algorithm.lower() print("** {} observations **".format(n_obs)) # generate coeffcients for equation (12), and data from that SEM data, coeffs, dag = intervention_sem( n_obs, dim=4, seed=config.data.seed, random=config.data.random, multiplicative=config.data.multiplicative) print("fitting a {} model".format(model)) # fit to an affine autoregressive flow or ANM with gp/linear functions mod = CAREFL(config) if model == 'carefl' else ANM(method=model) mod.fit_to_sem(data, dag) # intervene on X_1 and get a sample of {x | do(X_1=a)} for a in [-3, 3] avals = np.arange(-3, 3, .1) x_int_sample = [] x_int_exp = [] for a in avals: res = mod.predict_intervention(a, n_samples=20, iidx=0) x_int_sample.append(res[0].mean(axis=0)) x_int_exp.append(res[1].mean(axis=0)) x_int_sample = np.array(x_int_sample) x_int_exp = np.array(x_int_exp) # compute the MSE between the true E[x_3|x_1=a] to the empirical expectation from the sample # we know that the true E[x_3|x_1=a] = a mse_x3 = np.mean((x_int_sample[:, 2] - avals)**2) mse_x3e = np.mean((x_int_exp[:, 2] - avals)**2) # do the same for x_4; true E[x_4|x_1=a] = c_1*a^2 mse_x4 = np.mean((x_int_sample[:, 3] - coeffs[1] * avals * avals)**2) mse_x4e = np.mean((x_int_exp[:, 3] - coeffs[1] * avals * avals)**2) # store results results = {} results["x3"] = mse_x3 results["x4"] = mse_x4 results["x3e"] = mse_x3e results["x4e"] = mse_x4e pickle.dump( results, open(os.path.join(args.output, res_save_name(config, model)), 'wb'))