Beispiel #1
0
    def test_split_data(self):
        features = [
            "flujo", "NU_COORD_UTM ESTE", "NU_COORD_UTM NORTE", "°API",
            "antiguedad"
        ]
        label = "BBPD"
        data_engineering = DataEngineering()
        data_engineering.load_data(self.csv_path)
        data = data_engineering.get_data()

        max_date = data["año"].max()
        age = max_date - data["año"]
        data_engineering.add_column("antiguedad", age)

        flow_data = data["E_FLUJO"].copy().astype("category").cat.codes
        data_engineering.add_column("flujo", flow_data)

        data_engineering.set_label(label)
        data_engineering.set_features(features)
        data_engineering.split_data()

        assert data_engineering.x_train is not None
        assert data_engineering.x_test is not None
        assert data_engineering.y_train is not None
        assert data_engineering.y_test is not None
Beispiel #2
0
from model.utils.data_engineering import DataEngineering
from model.prediction_model.regression import Regression

# Create an instance for DataEngineering and load data from CSV
csv_path = "data/area_01.csv"
data_e = DataEngineering()
data_e.load_data(csv_path)
data_e.clean_data()

# Create new features
# "age" feature
max_date = data_e.get_data()["año"].max()
age = max_date - data_e.get_data()["año"]
data_e.add_column("age", age)

# "flow" feature
flow_data = data_e.get_data()["E_FLUJO"].copy().astype("category").cat.codes
data_e.add_column("flow", flow_data)

# Set features and label
features = ["flow", "NU_COORD_UTM ESTE", "NU_COORD_UTM NORTE", "°API", "age"]
label = "BBPD"
data_e.set_features(features)
data_e.set_label(label)

# Split Train-Test data
data_e.split_data()

# Create a Model
model = Regression(data_e)