from regression_model.config import config from regression_model.processing.validation import validate_inputs <<<<<<< HEAD ======= from regression_model.config.logging_config import get_logger >>>>>>> 6162c318b58b225e0061fccd6c64cd67fe205c1b from regression_model import __version__ as _version import logging import typing as t <<<<<<< HEAD _logger = logging.getLogger(__name__) ======= _logger = get_logger(logger_name=__name__) >>>>>>> 6162c318b58b225e0061fccd6c64cd67fe205c1b pipeline_file_name = f'{config.PIPELINE_SAVE_FILE}{_version}.pkl' _price_pipe = load_pipeline(file_name=pipeline_file_name) def make_prediction(*, input_data: t.Union[pd.DataFrame, dict], ) -> dict: """Make a prediction using a saved model pipeline. Args: input_data: Array of model prediction inputs. Returns: Predictions for each input row, as well as the model version.
import numpy as np from sklearn.model_selection import train_test_split from regression_model import pipeline from regression_model.processing.data_management import load_dataset, save_pipeline from regression_model.config import config from regression_model.config import logging_config from regression_model import __version__ as _version _logger = logging_config.get_logger(__name__) def run_training() -> None: """Train the model.""" # read the training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # we are setting the seed here # transform the target y_train = np.log(y_train) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}")
from regression_model import pipeline from regression_model.processing.data_management import (load_dataset, save_pipeline) from regression_model.config import config import numpy as np from sklearn.model_selection import train_test_split from regression_model import __version__ as _version from regression_model.config import logging_config _logger = logging_config.get_logger() def run_training() -> None: data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) #transforming the target y_train = np.log(y_train) y_test = np.log(y_test) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.price_pipe)