Ejemplo n.º 1
0
    def test_deepiv_shape(self):
        """Make sure that arbitrary sizes for t, z, x, and y don't break the basic operations."""
        for _ in range(5):
            d_t = np.random.choice(range(1, 4))  # number of treatments
            d_z = np.random.choice(range(1, 4))  # number of instruments
            d_x = np.random.choice(range(1, 4))  # number of features
            d_y = np.random.choice(range(1, 4))  # number of responses
            n = 500
            # simple DGP only for illustration
            x = np.random.uniform(size=(n, d_x))
            z = np.random.uniform(size=(n, d_z))
            p_x_t = np.random.uniform(size=(d_x, d_t))
            p_z_t = np.random.uniform(size=(d_z, d_t))
            t = x @ p_x_t + z @ p_z_t
            p_xt_y = np.random.uniform(size=(d_x * d_t, d_y))
            y = (x.reshape(n, -1, 1) * t.reshape(n, 1, -1)).reshape(n, -1) @ p_xt_y

            # Define the treatment model neural network architecture
            # This will take the concatenation of one-dimensional values z and x as input,
            # so the input shape is (d_z + d_x,)
            # The exact shape of the final layer is not critical because the Deep IV framework will
            # add extra layers on top for the mixture density network
            treatment_model = keras.Sequential([keras.layers.Dense(128, activation='relu', input_shape=(d_z + d_x,)),
                                                keras.layers.Dropout(0.17),
                                                keras.layers.Dense(64, activation='relu'),
                                                keras.layers.Dropout(0.17),
                                                keras.layers.Dense(32, activation='relu'),
                                                keras.layers.Dropout(0.17)])

            # Define the response model neural network architecture
            # This will take the concatenation of one-dimensional values t and x as input,
            # so the input shape is (d_t + d_x,)
            # The output should match the shape of y, so it must have shape (d_y,) in this case
            # NOTE: For the response model, it is important to define the model *outside*
            #       of the lambda passed to the DeepIvEstimator, as we do here,
            #       so that the same weights will be reused in each instantiation
            response_model = keras.Sequential([keras.layers.Dense(128, activation='relu', input_shape=(d_t + d_x,)),
                                               keras.layers.Dropout(0.17),
                                               keras.layers.Dense(64, activation='relu'),
                                               keras.layers.Dropout(0.17),
                                               keras.layers.Dense(32, activation='relu'),
                                               keras.layers.Dropout(0.17),
                                               keras.layers.Dense(d_y)])

            deepIv = DeepIVEstimator(n_components=10,  # number of gaussians in our mixture density network
                                     m=lambda z, x: treatment_model(
                                         keras.layers.concatenate([z, x])),  # treatment model
                                     h=lambda t, x: response_model(keras.layers.concatenate([t, x])),  # response model
                                     n_samples=1,  # number of samples to use to estimate the response
                                     use_upper_bound_loss=False,  # whether to use an approximation to the true loss
                                     # number of samples to use in second estimate of the response
                                     # (to make loss estimate unbiased)
                                     n_gradient_samples=1,
                                     # Keras optimizer to use for training - see https://keras.io/optimizers/
                                     optimizer='adam')

            deepIv.fit(Y=y, T=t, X=x, Z=z)
            # do something with predictions...
            deepIv.predict(T=t, X=x)
            deepIv.effect(x, np.zeros_like(t), t)
Ejemplo n.º 2
0
    def test_deepiv_models(self):
        n = 2000
        s1 = 2
        s2 = 2
        e = np.random.uniform(low=-0.5, high=0.5, size=(n, 1))
        z = np.random.uniform(size=(n, 1))
        x = np.random.uniform(size=(n, 1)) + e
        p = x + z * e + np.random.uniform(size=(n, 1))
        y = p * x + e

        losses = []
        marg_effs = []

        z_fresh = np.random.uniform(size=(n, 1))
        e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, 1))
        x_fresh = np.random.uniform(size=(n, 1)) + e_fresh
        p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, 1))
        y_fresh = p_fresh * x_fresh + e_fresh

        for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]:
            treatment_model = keras.Sequential([
                keras.layers.Dense(10, activation='relu', input_shape=(2, )),
                keras.layers.Dense(10, activation='relu'),
                keras.layers.Dense(10, activation='relu')
            ])

            hmodel = keras.Sequential([
                keras.layers.Dense(10, activation='relu', input_shape=(2, )),
                keras.layers.Dense(10, activation='relu'),
                keras.layers.Dense(1)
            ])

            deepIv = DeepIVEstimator(
                10,
                lambda z, x: treatment_model(keras.layers.concatenate([z, x])),
                lambda t, x: hmodel(keras.layers.concatenate([t, x])),
                n_samples=n1,
                use_upper_bound_loss=u,
                n_gradient_samples=n2,
                s1=s1,
                s2=s2)
            deepIv.fit(y, p, x, z)

            losses.append(
                np.mean(np.square(y_fresh - deepIv.predict(p_fresh, x_fresh))))
            marg_effs.append(
                deepIv.marginal_effect(np.array([[0.3], [0.5], [0.7]]),
                                       np.array([[0.4], [0.6], [0.2]])))
        print("losses: {}".format(losses))
        print("marg_effs: {}".format(marg_effs))
Ejemplo n.º 3
0
    def test_deepiv_arbitrary_covariance(self):
        d = 5
        n = 5000
        # to generate a random symmetric positive semidefinite covariance matrix, we can use A*A^T
        A1 = np.random.normal(size=(d, d))
        cov1 = np.matmul(A1, np.transpose(A1))
        # convex combinations of semidefinite covariance matrices are themselves semidefinite
        A2 = np.random.normal(size=(d, d))
        cov2 = np.matmul(A2, np.transpose(A2))
        m1 = np.random.normal(size=(d,))
        m2 = np.random.normal(size=(d,))
        x = np.random.uniform(size=(n, 1))
        z = np.random.uniform(size=(n, 1))
        alpha = (x * x + z * z) / 2  # in range [0,1]
        t = np.array([np.random.multivariate_normal(m1 + alpha[i] * (m2 - m1),
                                                    cov1 + alpha[i] * (cov2 - cov1)) for i in range(n)])
        y = np.expand_dims(np.einsum('nx,nx->n', t, t), -1) + x
        results = []
        s = 6
        for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]:
            treatment_model = keras.Sequential([keras.layers.Dense(90, activation='relu', input_shape=(2,)),
                                                keras.layers.Dropout(0.2),
                                                keras.layers.Dense(60, activation='relu'),
                                                keras.layers.Dropout(0.2),
                                                keras.layers.Dense(30, activation='relu')])

            hmodel = keras.Sequential([keras.layers.Dense(90, activation='relu', input_shape=(d + 1,)),
                                       keras.layers.Dropout(0.2),
                                       keras.layers.Dense(60, activation='relu'),
                                       keras.layers.Dropout(0.2),
                                       keras.layers.Dense(30, activation='relu'),
                                       keras.layers.Dropout(0.2),
                                       keras.layers.Dense(1)])

            deepIv = DeepIVEstimator(s,
                                     lambda z, x: treatment_model(keras.layers.concatenate([z, x])),
                                     lambda t, x: hmodel(keras.layers.concatenate([t, x])),
                                     n_samples=n1, use_upper_bound_loss=u, n_gradient_samples=n2,
                                     first_stage_options={'epochs': 20}, second_stage_options={'epochs': 20})
            deepIv.fit(y[:n // 2], t[:n // 2], x[:n // 2], z[:n // 2])

            results.append({'s': s, 'n1': n1, 'u': u, 'n2': n2,
                            'loss': np.mean(np.square(y[n // 2:] - deepIv.predict(t[n // 2:], x[n // 2:]))),
                            'marg': deepIv.marginal_effect(np.array([[0.5] * d]), np.array([[1.0]]))})
        print(results)
Ejemplo n.º 4
0
    def test_deepiv_models_paper2(self):
        def monte_carlo_error(g_hat, data_fn, ntest=5000, has_latent=False, debug=False):
            seed = np.random.randint(1e9)
            try:
                # test = True ensures we draw test set images
                x, z, t, y, g_true = data_fn(ntest, seed, test=True)
            except ValueError:
                warnings.warn("Too few images, reducing test set size")
                ntest = int(ntest * 0.7)
                # test = True ensures we draw test set images
                x, z, t, y, g_true = data_fn(ntest, seed, test=True)

            # re-draw to get new independent treatment and implied response
            t = np.linspace(np.percentile(t, 2.5), np.percentile(t, 97.5), ntest).reshape(-1, 1)
            # we need to make sure z _never_ does anything in these g functions (fitted and true)
            # above is necesary so that reduced form doesn't win
            if has_latent:
                x_latent, _, _, _, _ = data_fn(ntest, seed, images=False)
                y = g_true(x_latent, z, t)
            else:
                y = g_true(x, z, t)
            y_true = y.flatten()
            y_hat = g_hat(x, z, t).flatten()
            return ((y_hat - y_true)**2).mean()

        def one_hot(col, **kwargs):
            z = col.reshape(-1, 1)
            enc = OneHotEncoder(sparse=False, **kwargs)
            return enc.fit_transform(z)

        def sensf(x):
            return 2.0 * ((x - 5)**4 / 600 + np.exp(-((x - 5) / 0.5)**2) + x / 10. - 2)

        def emocoef(emo):
            emoc = (emo * np.array([1., 2., 3., 4., 5., 6., 7.])[None, :]).sum(axis=1)
            return emoc

        psd = 3.7
        pmu = 17.779
        ysd = 158.  # 292.
        ymu = -292.1

        def storeg(x, price):
            emoc = emocoef(x[:, 1:])
            time = x[:, 0]
            g = sensf(time) * emoc * 10. + (6 * emoc * sensf(time) - 2.0) * (psd * price.flatten() + pmu)
            y = (g - ymu) / ysd
            return y.reshape(-1, 1)

        def demand(n, seed=1, ynoise=1., pnoise=1., ypcor=0.8, use_images=False, test=False):
            rng = np.random.RandomState(seed)

            # covariates: time and emotion
            time = rng.rand(n) * 10
            emotion_id = rng.randint(0, 7, size=n)
            emotion = one_hot(emotion_id, n_values=7)
            emotion_feature = emotion

            # random instrument
            z = rng.randn(n)

            # z -> price
            v = rng.randn(n) * pnoise
            price = sensf(time) * (z + 3) + 25.
            price = price + v
            price = (price - pmu) / psd

            # true observable demand function
            x = np.concatenate([time.reshape((-1, 1)), emotion_feature], axis=1)
            x_latent = np.concatenate([time.reshape((-1, 1)), emotion], axis=1)

            def g(x, z, p):
                return storeg(x, p)  # doesn't use z

            # errors
            e = (ypcor * ynoise / pnoise) * v + rng.randn(n) * ynoise * np.sqrt(1 - ypcor**2)
            e = e.reshape(-1, 1)

            # response
            y = g(x_latent, None, price) + e

            return (x,
                    z.reshape((-1, 1)),
                    price.reshape((-1, 1)),
                    y.reshape((-1, 1)),
                    g)

        def datafunction(n, s, images=False, test=False):
            return demand(n=n, seed=s, ypcor=0.5, use_images=images, test=test)

        n = 1000
        epochs = 20

        x, z, t, y, g_true = datafunction(n, 1)

        print("Data shapes:\n\
Features:{x},\n\
Instruments:{z},\n\
Treament:{t},\n\
Response:{y}".format(**{'x': x.shape, 'z': z.shape,
                        't': t.shape, 'y': y.shape}))

        losses = []

        for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]:
            treatment_model = keras.Sequential([keras.layers.Dense(50, activation='relu', input_shape=(9,)),
                                                keras.layers.Dense(25, activation='relu'),
                                                keras.layers.Dense(25, activation='relu')])

            hmodel = keras.Sequential([keras.layers.Dense(50, activation='relu', input_shape=(9,)),
                                       keras.layers.Dense(25, activation='relu'),
                                       keras.layers.Dense(25, activation='relu'),
                                       keras.layers.Dense(1)])

            deepIv = DeepIVEstimator(10,
                                     lambda z, x: treatment_model(keras.layers.concatenate([z, x])),
                                     lambda t, x: hmodel(keras.layers.concatenate([t, x])),
                                     n_samples=n1, use_upper_bound_loss=u, n_gradient_samples=n2,
                                     first_stage_options={'epochs': epochs}, second_stage_options={'epochs': epochs})
            deepIv.fit(y, t, x, z)

            losses.append(monte_carlo_error(lambda x, z, t: deepIv.predict(
                t, x), datafunction, has_latent=False, debug=False))
        print("losses: {}".format(losses))
Ejemplo n.º 5
0
class DeepIV(AbstractBaseline):
    def __init__(self, treatment_model=None):
        if treatment_model is None:
            print("Using standard treatment model...")
            self._treatment_model = lambda input_shape: keras.models.Sequential(
                [
                    keras.layers.Dense(
                        128, activation='relu', input_shape=input_shape),
                    keras.layers.Dropout(0.17),
                    keras.layers.Dense(64, activation='relu'),
                    keras.layers.Dropout(0.17),
                    keras.layers.Dense(32, activation='relu'),
                    keras.layers.Dropout(0.17)
                ])

        else:
            if keras.backend.image_data_format() == "channels_first":
                image_shape = (1, 28, 28)
            else:
                image_shape = (28, 28, 1)

            self._treatment_model = lambda input_shape: keras.models.Sequential(
                [
                    keras.layers.Reshape(image_shape, input_shape=input_shape),
                    keras.layers.Conv2D(
                        16, kernel_size=(3, 3), activation='relu'),
                    keras.layers.Conv2D(32, (3, 3), activation='relu'),
                    keras.layers.MaxPooling2D(pool_size=(2, 2)),
                    keras.layers.Dropout(0.1),
                    keras.layers.Flatten(),
                    keras.layers.Dense(128, activation='relu'),
                    keras.layers.Dropout(0.1)
                ])

    def _fit(self, x, y, z, context=None):
        if context is None:
            context = np.empty((x.shape[0], 0))

        x_dim = x.shape[1]
        z_dim = z.shape[1]
        context_dim = context.shape[1]

        treatment_model = self._treatment_model((context_dim + z_dim, ))

        response_model = keras.models.Sequential([
            keras.layers.Dense(128,
                               activation='relu',
                               input_shape=(context_dim + x_dim, )),
            keras.layers.Dropout(0.17),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dropout(0.17),
            keras.layers.Dense(32, activation='relu'),
            keras.layers.Dropout(0.17),
            keras.layers.Dense(1)
        ])

        self._model = DeepIVEstimator(
            n_components=10,
            # Number of gaussians in the mixture density networks)
            m=lambda _z, _context: treatment_model(
                keras.layers.concatenate([_z, _context])),
            # Treatment model
            h=lambda _t, _context: response_model(
                keras.layers.concatenate([_t, _context])),
            # Response model
            n_samples=1)
        t0 = time.time()
        self._model.fit(y, x, context, z)
        return time.time() - t0

    def _predict(self, x, context):
        if context is None:
            context = np.empty((x.shape[0], 0))

        return self._model.predict(x, context)