def test_deepiv_models(self): n = 2000 epochs = 2 e = np.random.uniform(low=-0.5, high=0.5, size=(n, 1)) z = np.random.uniform(size=(n, 1)) x = np.random.uniform(size=(n, 1)) + e p = x + z * e + np.random.uniform(size=(n, 1)) y = p * x + e losses = [] marg_effs = [] z_fresh = np.random.uniform(size=(n, 1)) e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, 1)) x_fresh = np.random.uniform(size=(n, 1)) + e_fresh p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, 1)) y_fresh = p_fresh * x_fresh + e_fresh for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]: treatment_model = keras.Sequential([ keras.layers.Dense(10, activation='relu', input_shape=(2, )), keras.layers.Dense(10, activation='relu'), keras.layers.Dense(10, activation='relu') ]) hmodel = keras.Sequential([ keras.layers.Dense(10, activation='relu', input_shape=(2, )), keras.layers.Dense(10, activation='relu'), keras.layers.Dense(1) ]) deepIv = DeepIV( n_components=10, m=lambda z, x: treatment_model(keras.layers.concatenate([z, x]) ), h=lambda t, x: hmodel(keras.layers.concatenate([t, x])), n_samples=n1, use_upper_bound_loss=u, n_gradient_samples=n2, first_stage_options={'epochs': epochs}, second_stage_options={'epochs': epochs}) deepIv.fit(y, p, X=x, Z=z) losses.append( np.mean(np.square(y_fresh - deepIv.predict(p_fresh, x_fresh)))) marg_effs.append( deepIv.marginal_effect(np.array([[0.3], [0.5], [0.7]]), np.array([[0.4], [0.6], [0.2]]))) print("losses: {}".format(losses)) print("marg_effs: {}".format(marg_effs))
def test_deepiv_arbitrary_covariance(self): d = 5 n = 5000 # to generate a random symmetric positive semidefinite covariance matrix, we can use A*A^T A1 = np.random.normal(size=(d, d)) cov1 = np.matmul(A1, np.transpose(A1)) # convex combinations of semidefinite covariance matrices are themselves semidefinite A2 = np.random.normal(size=(d, d)) cov2 = np.matmul(A2, np.transpose(A2)) m1 = np.random.normal(size=(d, )) m2 = np.random.normal(size=(d, )) x = np.random.uniform(size=(n, 1)) z = np.random.uniform(size=(n, 1)) alpha = (x * x + z * z) / 2 # in range [0,1] t = np.array([ np.random.multivariate_normal(m1 + alpha[i] * (m2 - m1), cov1 + alpha[i] * (cov2 - cov1)) for i in range(n) ]) y = np.expand_dims(np.einsum('nx,nx->n', t, t), -1) + x results = [] s = 6 for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]: treatment_model = keras.Sequential([ keras.layers.Dense(90, activation='relu', input_shape=(2, )), keras.layers.Dropout(0.2), keras.layers.Dense(60, activation='relu'), keras.layers.Dropout(0.2), keras.layers.Dense(30, activation='relu') ]) hmodel = keras.Sequential([ keras.layers.Dense(90, activation='relu', input_shape=(d + 1, )), keras.layers.Dropout(0.2), keras.layers.Dense(60, activation='relu'), keras.layers.Dropout(0.2), keras.layers.Dense(30, activation='relu'), keras.layers.Dropout(0.2), keras.layers.Dense(1) ]) deepIv = DeepIV( n_components=s, m=lambda z, x: treatment_model(keras.layers.concatenate([z, x]) ), h=lambda t, x: hmodel(keras.layers.concatenate([t, x])), n_samples=n1, use_upper_bound_loss=u, n_gradient_samples=n2, first_stage_options={'epochs': 20}, second_stage_options={'epochs': 20}) deepIv.fit(y[:n // 2], t[:n // 2], X=x[:n // 2], Z=z[:n // 2]) results.append({ 's': s, 'n1': n1, 'u': u, 'n2': n2, 'loss': np.mean( np.square(y[n // 2:] - deepIv.predict(t[n // 2:], x[n // 2:]))), 'marg': deepIv.marginal_effect(np.array([[0.5] * d]), np.array([[1.0]])) }) print(results)
def test_deepiv_shape(self): fit_opts = {"epochs": 2} """Make sure that arbitrary sizes for t, z, x, and y don't break the basic operations.""" for _ in range(5): d_t = np.random.choice(range(1, 4)) # number of treatments d_z = np.random.choice(range(1, 4)) # number of instruments d_x = np.random.choice(range(1, 4)) # number of features d_y = np.random.choice(range(1, 4)) # number of responses n = 500 # simple DGP only for illustration x = np.random.uniform(size=(n, d_x)) z = np.random.uniform(size=(n, d_z)) p_x_t = np.random.uniform(size=(d_x, d_t)) p_z_t = np.random.uniform(size=(d_z, d_t)) t = x @ p_x_t + z @ p_z_t p_xt_y = np.random.uniform(size=(d_x * d_t, d_y)) y = (x.reshape(n, -1, 1) * t.reshape(n, 1, -1)).reshape( n, -1) @ p_xt_y # Define the treatment model neural network architecture # This will take the concatenation of one-dimensional values z and x as input, # so the input shape is (d_z + d_x,) # The exact shape of the final layer is not critical because the Deep IV framework will # add extra layers on top for the mixture density network treatment_model = keras.Sequential([ keras.layers.Dense(128, activation='relu', input_shape=(d_z + d_x, )), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17) ]) # Define the response model neural network architecture # This will take the concatenation of one-dimensional values t and x as input, # so the input shape is (d_t + d_x,) # The output should match the shape of y, so it must have shape (d_y,) in this case # NOTE: For the response model, it is important to define the model *outside* # of the lambda passed to the DeepIvEstimator, as we do here, # so that the same weights will be reused in each instantiation response_model = keras.Sequential([ keras.layers.Dense(128, activation='relu', input_shape=(d_t + d_x, )), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(d_y) ]) deepIv = DeepIV( n_components= 10, # number of gaussians in our mixture density network m=lambda z, x: treatment_model(keras.layers.concatenate([z, x]) ), # treatment model h=lambda t, x: response_model(keras.layers.concatenate([t, x]) ), # response model n_samples=1, # number of samples to use to estimate the response use_upper_bound_loss= False, # whether to use an approximation to the true loss # number of samples to use in second estimate of the response # (to make loss estimate unbiased) n_gradient_samples=1, # Keras optimizer to use for training - see https://keras.io/optimizers/ optimizer='adam', first_stage_options=fit_opts, second_stage_options=fit_opts) deepIv.fit(Y=y, T=t, X=x, Z=z) # do something with predictions... deepIv.predict(T=t, X=x) deepIv.effect(x, np.zeros_like(t), t) # also test vector t and y for _ in range(3): d_z = np.random.choice(range(1, 4)) # number of instruments d_x = np.random.choice(range(1, 4)) # number of features n = 500 # simple DGP only for illustration x = np.random.uniform(size=(n, d_x)) z = np.random.uniform(size=(n, d_z)) p_x_t = np.random.uniform(size=(d_x, )) p_z_t = np.random.uniform(size=(d_z, )) t = x @ p_x_t + z @ p_z_t p_xt_y = np.random.uniform(size=(d_x, )) y = (x * t.reshape(n, 1)) @ p_xt_y # Define the treatment model neural network architecture # This will take the concatenation of one-dimensional values z and x as input, # so the input shape is (d_z + d_x,) # The exact shape of the final layer is not critical because the Deep IV framework will # add extra layers on top for the mixture density network treatment_model = keras.Sequential([ keras.layers.Dense(128, activation='relu', input_shape=(d_z + d_x, )), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17) ]) # Define the response model neural network architecture # This will take the concatenation of one-dimensional values t and x as input, # so the input shape is (d_t + d_x,) # The output should match the shape of y, so it must have shape (d_y,) in this case # NOTE: For the response model, it is important to define the model *outside* # of the lambda passed to the DeepIvEstimator, as we do here, # so that the same weights will be reused in each instantiation response_model = keras.Sequential([ keras.layers.Dense(128, activation='relu', input_shape=(1 + d_x, )), keras.layers.Dropout(0.17), keras.layers.Dense(64, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(32, activation='relu'), keras.layers.Dropout(0.17), keras.layers.Dense(1) ]) deepIv = DeepIV( n_components= 10, # number of gaussians in our mixture density network m=lambda z, x: treatment_model(keras.layers.concatenate([z, x]) ), # treatment model h=lambda t, x: response_model(keras.layers.concatenate([t, x]) ), # response model n_samples=1, # number of samples to use to estimate the response use_upper_bound_loss= False, # whether to use an approximation to the true loss # number of samples to use in second estimate of the response # (to make loss estimate unbiased) n_gradient_samples=1, # Keras optimizer to use for training - see https://keras.io/optimizers/ optimizer='adam', first_stage_options=fit_opts, second_stage_options=fit_opts) deepIv.fit(Y=y, T=t, X=x, Z=z) # do something with predictions... deepIv.predict(T=t, X=x) assert (deepIv.effect(x).shape == (n, ))
def test_deepiv_models_paper2(self): def monte_carlo_error(g_hat, data_fn, ntest=5000, has_latent=False, debug=False): seed = np.random.randint(1e9) try: # test = True ensures we draw test set images x, z, t, y, g_true = data_fn(ntest, seed, test=True) except ValueError: warnings.warn("Too few images, reducing test set size") ntest = int(ntest * 0.7) # test = True ensures we draw test set images x, z, t, y, g_true = data_fn(ntest, seed, test=True) # re-draw to get new independent treatment and implied response t = np.linspace(np.percentile(t, 2.5), np.percentile(t, 97.5), ntest).reshape(-1, 1) # we need to make sure z _never_ does anything in these g functions (fitted and true) # above is necesary so that reduced form doesn't win if has_latent: x_latent, _, _, _, _ = data_fn(ntest, seed, images=False) y = g_true(x_latent, z, t) else: y = g_true(x, z, t) y_true = y.flatten() y_hat = g_hat(x, z, t).flatten() return ((y_hat - y_true)**2).mean() def one_hot(col, **kwargs): z = col.reshape(-1, 1) enc = OneHotEncoder(sparse=False, **kwargs) return enc.fit_transform(z) def sensf(x): return 2.0 * ((x - 5)**4 / 600 + np.exp(-( (x - 5) / 0.5)**2) + x / 10. - 2) def emocoef(emo): emoc = (emo * np.array([1., 2., 3., 4., 5., 6., 7.])[None, :]).sum( axis=1) return emoc psd = 3.7 pmu = 17.779 ysd = 158. # 292. ymu = -292.1 def storeg(x, price): emoc = emocoef(x[:, 1:]) time = x[:, 0] g = sensf(time) * emoc * 10. + (6 * emoc * sensf(time) - 2.0) * ( psd * price.flatten() + pmu) y = (g - ymu) / ysd return y.reshape(-1, 1) def demand(n, seed=1, ynoise=1., pnoise=1., ypcor=0.8, use_images=False, test=False): rng = np.random.RandomState(seed) # covariates: time and emotion time = rng.rand(n) * 10 emotion_id = rng.randint(0, 7, size=n) emotion = one_hot(emotion_id, categories=[np.arange(7)]) emotion_feature = emotion # random instrument z = rng.randn(n) # z -> price v = rng.randn(n) * pnoise price = sensf(time) * (z + 3) + 25. price = price + v price = (price - pmu) / psd # true observable demand function x = np.concatenate([time.reshape((-1, 1)), emotion_feature], axis=1) x_latent = np.concatenate([time.reshape((-1, 1)), emotion], axis=1) def g(x, z, p): return storeg(x, p) # doesn't use z # errors e = (ypcor * ynoise / pnoise) * v + rng.randn(n) * ynoise * np.sqrt(1 - ypcor**2) e = e.reshape(-1, 1) # response y = g(x_latent, None, price) + e return (x, z.reshape((-1, 1)), price.reshape( (-1, 1)), y.reshape((-1, 1)), g) def datafunction(n, s, images=False, test=False): return demand(n=n, seed=s, ypcor=0.5, use_images=images, test=test) n = 1000 epochs = 20 x, z, t, y, g_true = datafunction(n, 1) print("Data shapes:\n\ Features:{x},\n\ Instruments:{z},\n\ Treament:{t},\n\ Response:{y}".format(**{ 'x': x.shape, 'z': z.shape, 't': t.shape, 'y': y.shape })) losses = [] for (n1, u, n2) in [(2, False, None), (2, True, None), (1, False, 1)]: treatment_model = keras.Sequential([ keras.layers.Dense(50, activation='relu', input_shape=(9, )), keras.layers.Dense(25, activation='relu'), keras.layers.Dense(25, activation='relu') ]) hmodel = keras.Sequential([ keras.layers.Dense(50, activation='relu', input_shape=(9, )), keras.layers.Dense(25, activation='relu'), keras.layers.Dense(25, activation='relu'), keras.layers.Dense(1) ]) deepIv = DeepIV( n_components=10, m=lambda z, x: treatment_model(keras.layers.concatenate([z, x]) ), h=lambda t, x: hmodel(keras.layers.concatenate([t, x])), n_samples=n1, use_upper_bound_loss=u, n_gradient_samples=n2, first_stage_options={'epochs': epochs}, second_stage_options={'epochs': epochs}) deepIv.fit(y, t, X=x, Z=z) losses.append( monte_carlo_error(lambda x, z, t: deepIv.predict(t, x), datafunction, has_latent=False, debug=False)) print("losses: {}".format(losses))