def _fit(self, x, y, z, context=None): if context is None: context = np.empty((x.shape[0], 0)) x_dim = x.shape[1] z_dim = z.shape[1] context_dim = context.shape[1] treatment_model = self._treatment_model((context_dim + z_dim, )) response_model = keras.models.Sequential([ keras.layers.Dense(128, activation='relu', input_shape=(context_dim + x_dim, )), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(1) ]) self._model = DeepIVEstimator( n_components=10, # Number of gaussians in the mixture density networks) m=lambda _z, _context: treatment_model( keras.layers.concatenate([_z, _context])), # Treatment model h=lambda _t, _context: response_model( keras.layers.concatenate([_t, _context])), # Response model n_samples=1) t0 = time.time() self._model.fit(y, x, context, z) return time.time() - t0
def test_deepiv(self): X = TestPandasIntegration.df[TestPandasIntegration.features] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat] Z = TestPandasIntegration.df[TestPandasIntegration.instrument] # Test DeepIV treatment_model = keras.Sequential([ keras.layers.Dense(128, activation='relu', input_shape=(3, )), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17) ]) response_model = keras.Sequential([ keras.layers.Dense(128, activation='relu', input_shape=(3, )), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(1) ]) est = DeepIVEstimator( n_components= 10, # Number of gaussians in the mixture density networks) m=lambda z, x: treatment_model(keras.layers.concatenate([z, x]) ), # Treatment model h=lambda t, x: response_model(keras.layers.concatenate([t, x]) ), # Response model n_samples=1 # Number of samples used to estimate the response ) est.fit(Y, T, X=X, Z=Z) treatment_effects = est.effect(X)
def test_deepiv_shape(self): """Make sure that arbitrary sizes for t, z, x, and y don't break the basic operations.""" for _ in range(5): d_t = np.random.choice(range(1, 4)) # number of treatments d_z = np.random.choice(range(1, 4)) # number of instruments d_x = np.random.choice(range(1, 4)) # number of features d_y = np.random.choice(range(1, 4)) # number of responses n = 500 # simple DGP only for illustration x = np.random.uniform(size=(n, d_x)) z = np.random.uniform(size=(n, d_z)) p_x_t = np.random.uniform(size=(d_x, d_t)) p_z_t = np.random.uniform(size=(d_z, d_t)) t = x @ p_x_t + z @ p_z_t p_xt_y = np.random.uniform(size=(d_x * d_t, d_y)) y = (x.reshape(n, -1, 1) * t.reshape(n, 1, -1)).reshape(n, -1) @ p_xt_y # Define the treatment model neural network architecture # This will take the concatenation of one-dimensional values z and x as input, # so the input shape is (d_z + d_x,) # The exact shape of the final layer is not critical because the Deep IV framework will # add extra layers on top for the mixture density network treatment_model = keras.Sequential([keras.layers.Dense(128, activation='relu', input_shape=(d_z + d_x,)), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17)]) # Define the response model neural network architecture # This will take the concatenation of one-dimensional values t and x as input, # so the input shape is (d_t + d_x,) # The output should match the shape of y, so it must have shape (d_y,) in this case # NOTE: For the response model, it is important to define the model *outside* # of the lambda passed to the DeepIvEstimator, as we do here, # so that the same weights will be reused in each instantiation response_model = keras.Sequential([keras.layers.Dense(128, activation='relu', input_shape=(d_t + d_x,)), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(d_y)]) deepIv = DeepIVEstimator(n_components=10, # number of gaussians in our mixture density network m=lambda z, x: treatment_model( keras.layers.concatenate([z, x])), # treatment model h=lambda t, x: response_model(keras.layers.concatenate([t, x])), # response model n_samples=1, # number of samples to use to estimate the response use_upper_bound_loss=False, # whether to use an approximation to the true loss # number of samples to use in second estimate of the response # (to make loss estimate unbiased) n_gradient_samples=1, # Keras optimizer to use for training - see https://keras.io/optimizers/ optimizer='adam') deepIv.fit(Y=y, T=t, X=x, Z=z) # do something with predictions... deepIv.predict(T=t, X=x) deepIv.effect(x, np.zeros_like(t), t)
def test_deepiv_models(self): n = 2000 s1 = 2 s2 = 2 e = np.random.uniform(low=-0.5, high=0.5, size=(n, 1)) z = np.random.uniform(size=(n, 1)) x = np.random.uniform(size=(n, 1)) + e p = x + z * e + np.random.uniform(size=(n, 1)) y = p * x + e losses = [] marg_effs = [] z_fresh = np.random.uniform(size=(n, 1)) e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, 1)) x_fresh = np.random.uniform(size=(n, 1)) + e_fresh p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, 1)) y_fresh = p_fresh * x_fresh + e_fresh for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]: treatment_model = keras.Sequential([ keras.layers.Dense(10, activation='relu', input_shape=(2, )), keras.layers.Dense(10, activation='relu'), keras.layers.Dense(10, activation='relu') ]) hmodel = keras.Sequential([ keras.layers.Dense(10, activation='relu', input_shape=(2, )), keras.layers.Dense(10, activation='relu'), keras.layers.Dense(1) ]) deepIv = DeepIVEstimator( 10, lambda z, x: treatment_model(keras.layers.concatenate([z, x])), lambda t, x: hmodel(keras.layers.concatenate([t, x])), n_samples=n1, use_upper_bound_loss=u, n_gradient_samples=n2, s1=s1, s2=s2) deepIv.fit(y, p, x, z) losses.append( np.mean(np.square(y_fresh - deepIv.predict(p_fresh, x_fresh)))) marg_effs.append( deepIv.marginal_effect(np.array([[0.3], [0.5], [0.7]]), np.array([[0.4], [0.6], [0.2]]))) print("losses: {}".format(losses)) print("marg_effs: {}".format(marg_effs))
def test_deepiv_arbitrary_covariance(self): d = 5 n = 5000 # to generate a random symmetric positive semidefinite covariance matrix, we can use A*A^T A1 = np.random.normal(size=(d, d)) cov1 = np.matmul(A1, np.transpose(A1)) # convex combinations of semidefinite covariance matrices are themselves semidefinite A2 = np.random.normal(size=(d, d)) cov2 = np.matmul(A2, np.transpose(A2)) m1 = np.random.normal(size=(d,)) m2 = np.random.normal(size=(d,)) x = np.random.uniform(size=(n, 1)) z = np.random.uniform(size=(n, 1)) alpha = (x * x + z * z) / 2 # in range [0,1] t = np.array([np.random.multivariate_normal(m1 + alpha[i] * (m2 - m1), cov1 + alpha[i] * (cov2 - cov1)) for i in range(n)]) y = np.expand_dims(np.einsum('nx,nx->n', t, t), -1) + x results = [] s = 6 for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]: treatment_model = keras.Sequential([keras.layers.Dense(90, activation='relu', input_shape=(2,)), keras.layers.Dropout(0.2), keras.layers.Dense(60, activation='relu'), keras.layers.Dropout(0.2), keras.layers.Dense(30, activation='relu')]) hmodel = keras.Sequential([keras.layers.Dense(90, activation='relu', input_shape=(d + 1,)), keras.layers.Dropout(0.2), keras.layers.Dense(60, activation='relu'), keras.layers.Dropout(0.2), keras.layers.Dense(30, activation='relu'), keras.layers.Dropout(0.2), keras.layers.Dense(1)]) deepIv = DeepIVEstimator(s, lambda z, x: treatment_model(keras.layers.concatenate([z, x])), lambda t, x: hmodel(keras.layers.concatenate([t, x])), n_samples=n1, use_upper_bound_loss=u, n_gradient_samples=n2, first_stage_options={'epochs': 20}, second_stage_options={'epochs': 20}) deepIv.fit(y[:n // 2], t[:n // 2], x[:n // 2], z[:n // 2]) results.append({'s': s, 'n1': n1, 'u': u, 'n2': n2, 'loss': np.mean(np.square(y[n // 2:] - deepIv.predict(t[n // 2:], x[n // 2:]))), 'marg': deepIv.marginal_effect(np.array([[0.5] * d]), np.array([[1.0]]))}) print(results)
def test_deepiv_models_paper2(self): def monte_carlo_error(g_hat, data_fn, ntest=5000, has_latent=False, debug=False): seed = np.random.randint(1e9) try: # test = True ensures we draw test set images x, z, t, y, g_true = data_fn(ntest, seed, test=True) except ValueError: warnings.warn("Too few images, reducing test set size") ntest = int(ntest * 0.7) # test = True ensures we draw test set images x, z, t, y, g_true = data_fn(ntest, seed, test=True) # re-draw to get new independent treatment and implied response t = np.linspace(np.percentile(t, 2.5), np.percentile(t, 97.5), ntest).reshape(-1, 1) # we need to make sure z _never_ does anything in these g functions (fitted and true) # above is necesary so that reduced form doesn't win if has_latent: x_latent, _, _, _, _ = data_fn(ntest, seed, images=False) y = g_true(x_latent, z, t) else: y = g_true(x, z, t) y_true = y.flatten() y_hat = g_hat(x, z, t).flatten() return ((y_hat - y_true)**2).mean() def one_hot(col, **kwargs): z = col.reshape(-1, 1) enc = OneHotEncoder(sparse=False, **kwargs) return enc.fit_transform(z) def sensf(x): return 2.0 * ((x - 5)**4 / 600 + np.exp(-((x - 5) / 0.5)**2) + x / 10. - 2) def emocoef(emo): emoc = (emo * np.array([1., 2., 3., 4., 5., 6., 7.])[None, :]).sum(axis=1) return emoc psd = 3.7 pmu = 17.779 ysd = 158. # 292. ymu = -292.1 def storeg(x, price): emoc = emocoef(x[:, 1:]) time = x[:, 0] g = sensf(time) * emoc * 10. + (6 * emoc * sensf(time) - 2.0) * (psd * price.flatten() + pmu) y = (g - ymu) / ysd return y.reshape(-1, 1) def demand(n, seed=1, ynoise=1., pnoise=1., ypcor=0.8, use_images=False, test=False): rng = np.random.RandomState(seed) # covariates: time and emotion time = rng.rand(n) * 10 emotion_id = rng.randint(0, 7, size=n) emotion = one_hot(emotion_id, n_values=7) emotion_feature = emotion # random instrument z = rng.randn(n) # z -> price v = rng.randn(n) * pnoise price = sensf(time) * (z + 3) + 25. price = price + v price = (price - pmu) / psd # true observable demand function x = np.concatenate([time.reshape((-1, 1)), emotion_feature], axis=1) x_latent = np.concatenate([time.reshape((-1, 1)), emotion], axis=1) def g(x, z, p): return storeg(x, p) # doesn't use z # errors e = (ypcor * ynoise / pnoise) * v + rng.randn(n) * ynoise * np.sqrt(1 - ypcor**2) e = e.reshape(-1, 1) # response y = g(x_latent, None, price) + e return (x, z.reshape((-1, 1)), price.reshape((-1, 1)), y.reshape((-1, 1)), g) def datafunction(n, s, images=False, test=False): return demand(n=n, seed=s, ypcor=0.5, use_images=images, test=test) n = 1000 epochs = 20 x, z, t, y, g_true = datafunction(n, 1) print("Data shapes:\n\ Features:{x},\n\ Instruments:{z},\n\ Treament:{t},\n\ Response:{y}".format(**{'x': x.shape, 'z': z.shape, 't': t.shape, 'y': y.shape})) losses = [] for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]: treatment_model = keras.Sequential([keras.layers.Dense(50, activation='relu', input_shape=(9,)), keras.layers.Dense(25, activation='relu'), keras.layers.Dense(25, activation='relu')]) hmodel = keras.Sequential([keras.layers.Dense(50, activation='relu', input_shape=(9,)), keras.layers.Dense(25, activation='relu'), keras.layers.Dense(25, activation='relu'), keras.layers.Dense(1)]) deepIv = DeepIVEstimator(10, lambda z, x: treatment_model(keras.layers.concatenate([z, x])), lambda t, x: hmodel(keras.layers.concatenate([t, x])), n_samples=n1, use_upper_bound_loss=u, n_gradient_samples=n2, first_stage_options={'epochs': epochs}, second_stage_options={'epochs': epochs}) deepIv.fit(y, t, x, z) losses.append(monte_carlo_error(lambda x, z, t: deepIv.predict( t, x), datafunction, has_latent=False, debug=False)) print("losses: {}".format(losses))
class DeepIV(AbstractBaseline): def __init__(self, treatment_model=None): if treatment_model is None: print("Using standard treatment model...") self._treatment_model = lambda input_shape: keras.models.Sequential( [ keras.layers.Dense( 128, activation='relu', input_shape=input_shape), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17) ]) else: if keras.backend.image_data_format() == "channels_first": image_shape = (1, 28, 28) else: image_shape = (28, 28, 1) self._treatment_model = lambda input_shape: keras.models.Sequential( [ keras.layers.Reshape(image_shape, input_shape=input_shape), keras.layers.Conv2D( 16, kernel_size=(3, 3), activation='relu'), keras.layers.Conv2D(32, (3, 3), activation='relu'), keras.layers.MaxPooling2D(pool_size=(2, 2)), keras.layers.Dropout(0.1), keras.layers.Flatten(), keras.layers.Dense(128, activation='relu'), keras.layers.Dropout(0.1) ]) def _fit(self, x, y, z, context=None): if context is None: context = np.empty((x.shape[0], 0)) x_dim = x.shape[1] z_dim = z.shape[1] context_dim = context.shape[1] treatment_model = self._treatment_model((context_dim + z_dim, )) response_model = keras.models.Sequential([ keras.layers.Dense(128, activation='relu', input_shape=(context_dim + x_dim, )), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(1) ]) self._model = DeepIVEstimator( n_components=10, # Number of gaussians in the mixture density networks) m=lambda _z, _context: treatment_model( keras.layers.concatenate([_z, _context])), # Treatment model h=lambda _t, _context: response_model( keras.layers.concatenate([_t, _context])), # Response model n_samples=1) t0 = time.time() self._model.fit(y, x, context, z) return time.time() - t0 def _predict(self, x, context): if context is None: context = np.empty((x.shape[0], 0)) return self._model.predict(x, context)