Ejemplo n.º 1
0
class CausalRelation:
    def __init__(self,
                 sql_query: str,
                 model: str = 'neural',
                 is_mediation: bool = False,
                 mediation_variable: int = -1) -> None:
        if model not in {
                'neural', 'random_forest', 'linear', 'gradient_boost'
        }:
            raise ValueError(
                'Model name should be either neural, random_forest, linear')

        self.model_name = model

        if model in {'random_forest', 'linear', 'gradient_boost'}:
            host = 'database-1.cftlk3722yhu.us-east-1.rds.amazonaws.com'
            user = '******'
            port = 3306
            db = 'project_db'
            password = '******'
            connection = SqlConnection(host=host,
                                       user=user,
                                       port=port,
                                       db=db,
                                       password=password)
            res = connection.execute_sql(sql=sql_query)

            self.x = res[:, :-1]
            self.y = res[:, -1]

        if model == 'neural':
            dataset = SqlDataset(sql_query=sql_query)
            n_col = dataset.__getitem__(0)[0].size()[0]
            self.model = CausalRelationNetwork(num_dimensions=n_col,
                                               num_hidden=128)
            print('Started training the neural network.')
            self.model = self.model.train()
            train_neural_network(model=self.model, dataset=dataset)
            self.model = self.model.eval()
            self.x = torch.tensor(dataset.data[:, :-1], dtype=torch.float32)
            self.y = torch.tensor(dataset.data[:, -1], dtype=torch.float32)

        elif model == 'random_forest':
            self.model = RandomForestRegressor(n_estimators=1000)
            print('Fitting {} model to the given dataset.'.format(model))
            self.model.fit(self.x, self.y)
            # print(self.model.score(self.x, self.y))

        elif model == 'linear':
            print('Fitting {} model to the given dataset.'.format(model))
            self.model = LinearRegression()
            self.model.fit(self.x, self.y)
            # print(self.model.score(self.x, self.y))

        elif model == 'gradient_boost':
            print('Fitting {} model to the given dataset.'.format(model))
            self.model = xgb.XGBRegressor(objective='reg:squarederror')
            self.model.fit(self.x, self.y)
            # print(self.model.score(self.x, self.y))

        self.is_mediation = is_mediation
        self.mediaton_variable = mediation_variable
        if self.is_mediation:
            self.mediation_model = LinearRegression()
            self.mediation_model.fit(
                self.x[:, self.mediaton_variable].reshape(-1, 1), self.y)
            # print(self.mediation_model.score(self.x[:, self.mediaton_variable].reshape(-1, 1), self.y))

    def get_nace(self, treatment_dim: int, treatment_value: float) -> float:
        try:
            X_treatment = self.x.copy()
        except AttributeError:
            X_treatment = self.x.clone()
        X_treatment[:, treatment_dim] = treatment_value
        if self.is_mediation:
            adjusted_mediator_score = self.mediation_model.predict(
                np.array([treatment_value]).reshape(-1, 1))[0]
            # print(adjusted_mediator_score)
            # print(X_treatment[:, self.mediaton_variable].mean())
            try:
                X_treatment[:, self.mediaton_variable] = np.array(
                    [adjusted_mediator_score] * X_treatment.shape[0])
            except TypeError:
                X_treatment[:, self.mediaton_variable] = torch.tensor(
                    [adjusted_mediator_score] * X_treatment.shape[0],
                    dtype=torch.float32)
        nace = self.model.predict(X_treatment) - self.model.predict(self.x)
        mean_nace = []
        if self.model_name != 'neural':
            for _ in range(100):
                subset = np.random.choice(nace, size=20)
                mean_nace.append(subset.mean())
        else:
            nace_numpy = nace.data.numpy().flatten()
            for _ in range(100):
                subset = np.random.choice(nace_numpy, size=20)
                mean_nace.append(subset.mean())
        return (np.array(mean_nace).mean())
Ejemplo n.º 2
0
for epoch in range(n_epochs):
    model.train()
    training_loss = 0
    for X_batch, y_batch in train_dl:
        optimizer.zero_grad()

        y_pred = model(X_batch, hidden_0)

        loss = loss_func(y_pred.squeeze(), y_batch)

        training_loss += loss.item()

        loss.backward()
        optimizer.step()

    model.eval()
    valid_loss = 0
    with no_grad():
        for X_batch, y_batch in valid_dl:
            y_pred = model(X_batch, hidden_0)
            loss = loss_func(y_pred.squeeze(), y_batch.squeeze())
            valid_loss += loss.item()

    training_loss_epoch = training_loss * 100
    valid_loss_epoch = valid_loss * 100

    training_losses[epoch] = training_loss_epoch
    valid_losses[epoch] = valid_loss_epoch

    print('Epoch {}: train loss: {:.4} valid loss: {:.4}'.format(
        epoch, training_loss_epoch, valid_loss_epoch))