def test_smoke(num_data, feature_dim, outcome_dist): x, t, y = generate_data(num_data, feature_dim) if outcome_dist == "exponential": y.clamp_(min=1e-20) cevae = CEVAE(feature_dim, outcome_dist) cevae.fit(x, t, y, num_epochs=2) ite = cevae.ite(x) assert ite.shape == (num_data,)
def fit(self, X, treatment, y, p=None): """ Fits CEVAE. Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix treatment (np.array or pd.Series): a treatment vector y (np.array or pd.Series): an outcome vector """ X, treatment, y = convert_pd_to_np(X, treatment, y) self.cevae = CEVAEModel(outcome_dist=self.outcome_dist, feature_dim=X.shape[-1], latent_dim=self.latent_dim, hidden_dim=self.hidden_dim, num_layers=self.num_layers) self.cevae.fit(x=torch.tensor(X, dtype=torch.float), t=torch.tensor(treatment, dtype=torch.float), y=torch.tensor(y, dtype=torch.float), num_epochs=self.num_epochs, batch_size=self.batch_size, learning_rate=self.learning_rate, learning_rate_decay=self.learning_rate_decay, weight_decay=self.weight_decay)
def main(args): pyro.enable_validation(__debug__) if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') # Generate synthetic data. pyro.set_rng_seed(args.seed) x_train, t_train, y_train, _ = generate_data(args) # Train. pyro.set_rng_seed(args.seed) pyro.clear_param_store() cevae = CEVAE(feature_dim=args.feature_dim, latent_dim=args.latent_dim, hidden_dim=args.hidden_dim, num_layers=args.num_layers, num_samples=10) cevae.fit(x_train, t_train, y_train, num_epochs=args.num_epochs, batch_size=args.batch_size, learning_rate=args.learning_rate, learning_rate_decay=args.learning_rate_decay, weight_decay=args.weight_decay) # Evaluate. x_test, t_test, y_test, true_ite = generate_data(args) true_ate = true_ite.mean() print("true ATE = {:0.3g}".format(true_ate.item())) naive_ate = y_test[t_test == 1].mean() - y_test[t_test == 0].mean() print("naive ATE = {:0.3g}".format(naive_ate)) if args.jit: cevae = cevae.to_script_module() est_ite = cevae.ite(x_test) est_ate = est_ite.mean() print("estimated ATE = {:0.3g}".format(est_ate.item()))
def test_serialization(jit, feature_dim, outcome_dist): x, t, y = generate_data(num_data=32, feature_dim=feature_dim) if outcome_dist == "exponential": y.clamp_(min=1e-20) cevae = CEVAE(feature_dim, outcome_dist=outcome_dist, num_samples=1000, hidden_dim=32) cevae.fit(x, t, y, num_epochs=4, batch_size=8) pyro.set_rng_seed(0) expected_ite = cevae.ite(x) if jit: traced_cevae = cevae.to_script_module() f = io.BytesIO() torch.jit.save(traced_cevae, f) f.seek(0) loaded_cevae = torch.jit.load(f) else: f = io.BytesIO() with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) torch.save(cevae, f) f.seek(0) loaded_cevae = torch.load(f) pyro.set_rng_seed(0) actual_ite = loaded_cevae.ite(x) assert_close(actual_ite, expected_ite, atol=0.1)
class CEVAE: def __init__(self, outcome_dist="studentt", latent_dim=20, hidden_dim=200, num_epochs=50, num_layers=3, batch_size=100, learning_rate=1e-3, learning_rate_decay=0.1, num_samples=1000, weight_decay=1e-4): """ Initializes CEVAE. Args: outcome_dist (str): Outcome distribution as one of: "bernoulli" , "exponential", "laplace", "normal", and "studentt" latent_dim (int) : Dimension of the latent variable hidden_dim (int) : Dimension of hidden layers of fully connected networks num_epochs (int): Number of training epochs num_layers (int): Number of hidden layers in fully connected networks batch_size (int): Batch size learning_rate (int): Learning rate learning_rate_decay (float/int): Learning rate decay over all epochs; the per-step decay rate will depend on batch size and number of epochs such that the initial learning rate will be learning_rate and the final learning rate will be learning_rate * learning_rate_decay num_samples (int) : Number of samples to calculate ITE weight_decay (float) : Weight decay """ self.outcome_dist = outcome_dist self.latent_dim = latent_dim self.hidden_dim = hidden_dim self.num_epochs = num_epochs self.num_layers = num_layers self.batch_size = batch_size self.learning_rate = learning_rate self.learning_rate_decay = learning_rate_decay self.num_samples = num_samples self.weight_decay = weight_decay def fit(self, X, treatment, y, p=None): """ Fits CEVAE. Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix treatment (np.array or pd.Series): a treatment vector y (np.array or pd.Series): an outcome vector """ X, treatment, y = convert_pd_to_np(X, treatment, y) self.cevae = CEVAEModel(outcome_dist=self.outcome_dist, feature_dim=X.shape[-1], latent_dim=self.latent_dim, hidden_dim=self.hidden_dim, num_layers=self.num_layers) self.cevae.fit(x=torch.tensor(X, dtype=torch.float), t=torch.tensor(treatment, dtype=torch.float), y=torch.tensor(y, dtype=torch.float), num_epochs=self.num_epochs, batch_size=self.batch_size, learning_rate=self.learning_rate, learning_rate_decay=self.learning_rate_decay, weight_decay=self.weight_decay) def predict(self, X, treatment=None, y=None, p=None): """ Calls predict on fitted DragonNet. Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix Returns: (np.ndarray): Predictions of treatment effects. """ return self.cevae.ite(torch.tensor(X, dtype=torch.float), num_samples=self.num_samples, batch_size=self.batch_size).cpu().numpy() def fit_predict(self, X, treatment, y, p=None): """ Fits the CEVAE model and then predicts. Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix treatment (np.array or pd.Series): a treatment vector y (np.array or pd.Series): an outcome vector Returns: (np.ndarray): Predictions of treatment effects. """ self.fit(X, treatment, y) return self.predict(X)