class CausalRelation: def __init__(self, sql_query: str, model: str = 'neural', is_mediation: bool = False, mediation_variable: int = -1) -> None: if model not in { 'neural', 'random_forest', 'linear', 'gradient_boost' }: raise ValueError( 'Model name should be either neural, random_forest, linear') self.model_name = model if model in {'random_forest', 'linear', 'gradient_boost'}: host = 'database-1.cftlk3722yhu.us-east-1.rds.amazonaws.com' user = '******' port = 3306 db = 'project_db' password = '******' connection = SqlConnection(host=host, user=user, port=port, db=db, password=password) res = connection.execute_sql(sql=sql_query) self.x = res[:, :-1] self.y = res[:, -1] if model == 'neural': dataset = SqlDataset(sql_query=sql_query) n_col = dataset.__getitem__(0)[0].size()[0] self.model = CausalRelationNetwork(num_dimensions=n_col, num_hidden=128) print('Started training the neural network.') self.model = self.model.train() train_neural_network(model=self.model, dataset=dataset) self.model = self.model.eval() self.x = torch.tensor(dataset.data[:, :-1], dtype=torch.float32) self.y = torch.tensor(dataset.data[:, -1], dtype=torch.float32) elif model == 'random_forest': self.model = RandomForestRegressor(n_estimators=1000) print('Fitting {} model to the given dataset.'.format(model)) self.model.fit(self.x, self.y) # print(self.model.score(self.x, self.y)) elif model == 'linear': print('Fitting {} model to the given dataset.'.format(model)) self.model = LinearRegression() self.model.fit(self.x, self.y) # print(self.model.score(self.x, self.y)) elif model == 'gradient_boost': print('Fitting {} model to the given dataset.'.format(model)) self.model = xgb.XGBRegressor(objective='reg:squarederror') self.model.fit(self.x, self.y) # print(self.model.score(self.x, self.y)) self.is_mediation = is_mediation self.mediaton_variable = mediation_variable if self.is_mediation: self.mediation_model = LinearRegression() self.mediation_model.fit( self.x[:, self.mediaton_variable].reshape(-1, 1), self.y) # print(self.mediation_model.score(self.x[:, self.mediaton_variable].reshape(-1, 1), self.y)) def get_nace(self, treatment_dim: int, treatment_value: float) -> float: try: X_treatment = self.x.copy() except AttributeError: X_treatment = self.x.clone() X_treatment[:, treatment_dim] = treatment_value if self.is_mediation: adjusted_mediator_score = self.mediation_model.predict( np.array([treatment_value]).reshape(-1, 1))[0] # print(adjusted_mediator_score) # print(X_treatment[:, self.mediaton_variable].mean()) try: X_treatment[:, self.mediaton_variable] = np.array( [adjusted_mediator_score] * X_treatment.shape[0]) except TypeError: X_treatment[:, self.mediaton_variable] = torch.tensor( [adjusted_mediator_score] * X_treatment.shape[0], dtype=torch.float32) nace = self.model.predict(X_treatment) - self.model.predict(self.x) mean_nace = [] if self.model_name != 'neural': for _ in range(100): subset = np.random.choice(nace, size=20) mean_nace.append(subset.mean()) else: nace_numpy = nace.data.numpy().flatten() for _ in range(100): subset = np.random.choice(nace_numpy, size=20) mean_nace.append(subset.mean()) return (np.array(mean_nace).mean())
for epoch in range(n_epochs): model.train() training_loss = 0 for X_batch, y_batch in train_dl: optimizer.zero_grad() y_pred = model(X_batch, hidden_0) loss = loss_func(y_pred.squeeze(), y_batch) training_loss += loss.item() loss.backward() optimizer.step() model.eval() valid_loss = 0 with no_grad(): for X_batch, y_batch in valid_dl: y_pred = model(X_batch, hidden_0) loss = loss_func(y_pred.squeeze(), y_batch.squeeze()) valid_loss += loss.item() training_loss_epoch = training_loss * 100 valid_loss_epoch = valid_loss * 100 training_losses[epoch] = training_loss_epoch valid_losses[epoch] = valid_loss_epoch print('Epoch {}: train loss: {:.4} valid loss: {:.4}'.format( epoch, training_loss_epoch, valid_loss_epoch))