コード例 #1
0
def counterfactuals(args, config):
    # we generate the same SEM as in the intervention example

    # returns X, coeffs, dag - DAG is adjencency matrix, but not sued
    X, _, _ = intervention_sem(n_obs=2500, seed=0, random=False)
    (_, _, X3_std, X4_std) = X.std(axis=0)
    X /= np.array([1, 1, X3_std, X4_std])
    # fit CAReFl to the data
    mod = CAREFL(config)
    mod.fit_to_sem(X)

    def gen_observation(N):
        X_0 = N[0, 0]
        X_1 = N[0, 1]
        X_2 = (X_0 + .5 * X_1 * X_1 * X_1 + N[0, 2]) / X3_std
        X_3 = (-X_1 + .5 * X_0 * X_0 + N[0, 3]) / X4_std
        return np.array([X_0, X_1, X_2, X_3]).reshape((1, 4))

    ### now we run some CF trials:
    N = np.array([2, 1.5, 1.4, -1]).reshape((1, 4))
    # this is the random value of 4D random varibale x we observe. Now we plot the counterfactual
    # given x_0 had been other values
    xObs = gen_observation(N)  # should be (2.00, 1.50, 0.81, −0.28)

    # PLOT
    xvals = np.arange(-3, 3, .1)
    sns.set_style("whitegrid")
    sns.set_palette(sns.color_palette("muted", 8))
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
    # fig.suptitle(r'Counterfactual predictions', fontsize=22)
    # see quality of counterfactual predictions for X_4
    xCF_true = [gen_observation(np.hstack((x, N[0, 1:])).reshape((1, 4)))[0, 3] for x in xvals]
    xCF_pred = [mod.predict_counterfactual(x_obs=xObs, cf_val=x, iidx=0)[0, 3] for x in xvals]
    ax1.plot(xvals, xCF_true, label=r'True $\mathbb{E} [{X_4}_{X_1 \leftarrow \alpha} (n) ] $', linewidth=3,
             linestyle='-.')
    ax1.plot(xvals, xCF_pred, label=r'Predicted $\mathbb{E} [{X_4}_{X_1 \leftarrow \alpha} (n) ] $', linewidth=3,
             linestyle='-.')
    ax1.legend(loc=1, fontsize=15)
    ax1.set_xlabel(r'Value of counterfactual variable, $X_1=\alpha$', fontsize=18)
    ax1.set_ylabel(r'Predicted value of $X_4$', fontsize=18)
    # see quality of counterfactual predictions for X_3
    xCF_true = [gen_observation(np.hstack((N[0, 0], x, N[0, 2:])).reshape((1, 4)))[0, 2] for x in xvals]
    xCF_pred = [mod.predict_counterfactual(x_obs=xObs, cf_val=x, iidx=1)[0, 2] for x in xvals]
    ax2.plot(xvals, xCF_true, label=r'True $\mathbb{E} [{X_3}_{X_2 \leftarrow \alpha} (n) ] $', linewidth=3,
             linestyle='-.')
    ax2.plot(xvals, xCF_pred, label=r'Predicted $\mathbb{E} [{X_3}_{X_2 \leftarrow \alpha} (n) ] $', linewidth=3,
             linestyle='-.')
    ax2.legend(loc='best', fontsize=15)
    ax2.set_xlabel(r'Value of counterfactual variable, $X_2=\alpha$', fontsize=18)
    ax2.set_ylabel(r'Predicted value of $X_3$', fontsize=18)

    plt.tight_layout()
    plt.subplots_adjust(top=0.925)
    plt.savefig(os.path.join(args.run, 'counterfactuals_4d.pdf'), dpi=300)
コード例 #2
0
def true_observation(self, n_samples=100):
    """
    We generate the value of x
    """

    # sample from prior
    z = self.flow.prior.sample((n_samples,)).cpu().detach().numpy()

    x = intervention_sem(100, dim=3, seed=0, noise_dist='laplace',
                     random=True, shuffle=False, nonlin='poly', multiplicative=False)

    return x
コード例 #3
0
def run_interventions(args, config):
    n_obs = config.data.n_points
    model = config.algorithm.lower()
    print("** {} observations **".format(n_obs))

    # generate coeffcients for equation (12), and data from that SEM
    # X, coeffs, dag - DAG is A-matrix - synthetic data generated according to model in paper
    data, coeffs, dag = intervention_sem(n_obs, dim=config.data.graphdim, seed=config.data.seed, random=config.data.random,
                                         multiplicative=config.data.multiplicative, nonlin=config.data.nonlin)

    print("fitting a {} model".format(model))
    # fit to an affine autoregressive flow or ANM with gp/linear functions
    mod = CAREFL(config) if model == 'carefl' else ANM(method=model)

    # fit here DAG as input - but the function itself does never use DAG
    mod.fit_to_sem(data) #removed dag


    if config.algorithm =='carefl':
        x_obs_dist = mod.generate_observation(n_samples=n_obs)
        plot_true_generated(args, data, x_obs_dist, title='observational')

    # intervene on X_1 and get a sample of {x | do(X_1=a)} for a in [-3, 3]
    # avals = np.arange(-3, 3) #np.arange(-3, 3, .1)
    avals = [-1]
    x_int_sample = []
    x_int_exp = []
    for a in avals: # three different values for a for intervention
        res = mod.predict_intervention(a, n_samples=n_obs, iidx=config.data.iidx)
        x_int_sample.append(res[0].mean(axis=0))
        x_int_exp.append(res[1].mean(axis=0))
        x_int_dist = res[2]
        data_int = intervention_sem(n_obs, dim=config.data.graphdim, seed=config.data.seed, random=config.data.random,
                                         multiplicative=config.data.multiplicative, nonlin=config.data.nonlin,
                                    iidx =config.data.iidx, value=a)

        if config.algorithm == 'carefl':
            plot_true_generated(args, data_int[0], x_int_dist, title='interventional')
コード例 #4
0
def run_interventions(args, config):
    n_obs = config.data.n_points
    model = config.algorithm.lower()
    print("** {} observations **".format(n_obs))
    # generate coeffcients for equation (12), and data from that SEM
    data, coeffs, dag = intervention_sem(
        n_obs,
        dim=4,
        seed=config.data.seed,
        random=config.data.random,
        multiplicative=config.data.multiplicative)
    print("fitting a {} model".format(model))
    # fit to an affine autoregressive flow or ANM with gp/linear functions
    mod = CAREFL(config) if model == 'carefl' else ANM(method=model)
    mod.fit_to_sem(data, dag)
    # intervene on X_1 and get a sample of {x | do(X_1=a)} for a in [-3, 3]
    avals = np.arange(-3, 3, .1)
    x_int_sample = []
    x_int_exp = []
    for a in avals:
        res = mod.predict_intervention(a, n_samples=20, iidx=0)
        x_int_sample.append(res[0].mean(axis=0))
        x_int_exp.append(res[1].mean(axis=0))
    x_int_sample = np.array(x_int_sample)
    x_int_exp = np.array(x_int_exp)
    # compute the MSE between the true E[x_3|x_1=a] to the empirical expectation from the sample
    # we know that the true E[x_3|x_1=a] = a
    mse_x3 = np.mean((x_int_sample[:, 2] - avals)**2)
    mse_x3e = np.mean((x_int_exp[:, 2] - avals)**2)
    # do the same for x_4; true E[x_4|x_1=a] = c_1*a^2
    mse_x4 = np.mean((x_int_sample[:, 3] - coeffs[1] * avals * avals)**2)
    mse_x4e = np.mean((x_int_exp[:, 3] - coeffs[1] * avals * avals)**2)
    # store results
    results = {}
    results["x3"] = mse_x3
    results["x4"] = mse_x4
    results["x3e"] = mse_x3e
    results["x4e"] = mse_x4e
    pickle.dump(
        results,
        open(os.path.join(args.output, res_save_name(config, model)), 'wb'))