Beispiel #1
0
 def test_data_columns(self):
     data_engineering = DataEngineering()
     data_engineering.load_data(self.csv_path)
     columns = [
         'POZO', 'FECHA PRUEBAS', 'mes', 'año', 'BBPD', 'BNPD', '% AGUA',
         'E_FLUJO', 'NU_COORD_UTM ESTE', 'NU_COORD_UTM NORTE', '°API'
     ]
     assert data_engineering.get_data_columns() == columns
Beispiel #2
0
    def test_set_get_label(self):
        label = "BNPD"
        data_engineering = DataEngineering()
        assert data_engineering.get_label() is None

        data_engineering.load_data(self.csv_path)
        data_engineering.set_label(label)
        assert data_engineering.get_label() == label
Beispiel #3
0
    def test_set_get_features(self):
        features = ["POZO", "mes", "BBPD"]
        data_engineering = DataEngineering()
        assert data_engineering.get_features() is None

        data_engineering.load_data(self.csv_path)
        data_engineering.set_features(features)
        assert data_engineering.get_features() == features
Beispiel #4
0
 def test_load_data(self):
     data_engineering = DataEngineering()
     data_engineering.load_data(self.csv_path)
     self.assertIsNotNone(data_engineering.get_data())
Beispiel #5
0
    def test_split_data(self):
        features = [
            "flujo", "NU_COORD_UTM ESTE", "NU_COORD_UTM NORTE", "°API",
            "antiguedad"
        ]
        label = "BBPD"
        data_engineering = DataEngineering()
        data_engineering.load_data(self.csv_path)
        data = data_engineering.get_data()

        max_date = data["año"].max()
        age = max_date - data["año"]
        data_engineering.add_column("antiguedad", age)

        flow_data = data["E_FLUJO"].copy().astype("category").cat.codes
        data_engineering.add_column("flujo", flow_data)

        data_engineering.set_label(label)
        data_engineering.set_features(features)
        data_engineering.split_data()

        assert data_engineering.x_train is not None
        assert data_engineering.x_test is not None
        assert data_engineering.y_train is not None
        assert data_engineering.y_test is not None
Beispiel #6
0
 def test_load_data_empty(self):
     data_engineering = DataEngineering()
     self.assertIsNone(data_engineering.get_data())
Beispiel #7
0
from model.utils.data_engineering import DataEngineering
from model.prediction_model.regression import Regression

# Create an instance for DataEngineering and load data from CSV
csv_path = "data/area_01.csv"
data_e = DataEngineering()
data_e.load_data(csv_path)
data_e.clean_data()

# Create new features
# "age" feature
max_date = data_e.get_data()["año"].max()
age = max_date - data_e.get_data()["año"]
data_e.add_column("age", age)

# "flow" feature
flow_data = data_e.get_data()["E_FLUJO"].copy().astype("category").cat.codes
data_e.add_column("flow", flow_data)

# Set features and label
features = ["flow", "NU_COORD_UTM ESTE", "NU_COORD_UTM NORTE", "°API", "age"]
label = "BBPD"
data_e.set_features(features)
data_e.set_label(label)

# Split Train-Test data
data_e.split_data()

# Create a Model
model = Regression(data_e)