def test_multiple_prediction(): test_data = load_dataset(file_name='test.csv') multiple_test_input = test_data.to_json(orient='records') subject = predict.make_prediction(multiple_test_input) assert subject is not None assert len(subject) == 482
def test_make_single_prediction(): test_data = load_dataset(file_name='test.csv') single_test_input = test_data[0:1].to_json(orient='records') subject = predict.make_prediction(single_test_input) assert subject is not None assert isinstance(subject[0], float)
def run_training(): """Train the model.""" # read training data data = data_management.load_dataset( file_name=configuracion.TRAINING_DATA_FILE) y = data[configuracion.TARGET] model = pipeline.preprocessor_pipe.fit(data, y) data_management.save_pipeline(model_to_persist=model) _logger.debug(f'Training model version: {configuracion._version} ')
from azureml.core import Workspace from azureml.core import Experiment from regression_model.data_management import load_dataset from regression_model.config import configuracion from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score ws = Workspace.from_config() experiment = Experiment(workspace=ws, name="finalexp2") data = load_dataset(file_name=configuracion.TRAINING_DATA_FILE) y = data[configuracion.TARGET] X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=66) #from data_management import load_pipeline #import configuracion from sklearn.externals import joblib from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.base import BaseEstimator, TransformerMixin class ImputeNa(BaseEstimator, TransformerMixin): """ Replace nan values with 'missing' """ def __init__(self, variables=None) -> None: if not isinstance(variables, list): self.variables = [variables]