from regression_model.config import config
from regression_model.processing.validation import validate_inputs
<<<<<<< HEAD
=======
from regression_model.config.logging_config import get_logger
>>>>>>> 6162c318b58b225e0061fccd6c64cd67fe205c1b
from regression_model import __version__ as _version

import logging
import typing as t

<<<<<<< HEAD

_logger = logging.getLogger(__name__)
=======
_logger = get_logger(logger_name=__name__)
>>>>>>> 6162c318b58b225e0061fccd6c64cd67fe205c1b

pipeline_file_name = f'{config.PIPELINE_SAVE_FILE}{_version}.pkl'
_price_pipe = load_pipeline(file_name=pipeline_file_name)


def make_prediction(*, input_data: t.Union[pd.DataFrame, dict],
                    ) -> dict:
    """Make a prediction using a saved model pipeline.

    Args:
        input_data: Array of model prediction inputs.

    Returns:
        Predictions for each input row, as well as the model version.
import numpy as np
from sklearn.model_selection import train_test_split

from regression_model import pipeline
from regression_model.processing.data_management import load_dataset, save_pipeline
from regression_model.config import config
from regression_model.config import logging_config
from regression_model import __version__ as _version

_logger = logging_config.get_logger(__name__)


def run_training() -> None:
    """Train the model."""

    # read the training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    # transform the target
    y_train = np.log(y_train)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"saving model version: {_version}")
Beispiel #3
0
from regression_model import pipeline
from regression_model.processing.data_management import (load_dataset,
                                                         save_pipeline)
from regression_model.config import config
import numpy as np
from sklearn.model_selection import train_test_split
from regression_model import __version__ as _version
from regression_model.config import logging_config

_logger = logging_config.get_logger()


def run_training() -> None:

    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        test_size=0.1,
                                                        random_state=0)

    #transforming the target
    y_train = np.log(y_train)
    y_test = np.log(y_test)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)
    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)