def run_training() -> None: """Train the model.""" _logger.info(f"training the pipeline with version: {_version}") # read training data data = load_dataset(file_name=config.DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0 ) # we are setting the seed here pipeline.marathon_pipeline.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.marathon_pipeline) print("########################################") print("Test prediction: ") test_data = load_dataset(file_name='test.csv') single_test_json = test_data[0:1].to_json(orient='records') # When subject = make_prediction(input_data=single_test_json) print(subject)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # print(data.head()) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # we are setting the seed here pipeline.end_to_end_pipeline.fit(X_train[config.FEATURES], y_train) pred = pipeline.end_to_end_pipeline.predict(X_test) # determine mse and rmse print("test mse: {}".format(int(mean_squared_error(y_test, (pred))))) print("test rmse: {}".format( int(np.sqrt(mean_squared_error(y_test, (pred)))))) print("test r2: {}".format(r2_score(y_test, (pred)))) print(pipeline.end_to_end_pipeline.named_steps["Linear_model"].coef_) _version = "0.0.1" _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.end_to_end_pipeline)
def run_training() : data = load_dataset(file_name= TRAINING_DATA_FILE) X_train, X_test , Y_train, Y_test = train_test_split(data[FEATURES],data[TARGET], test_size = 0.2, random_state=0 ) Y_train = np.log(Y_train) Y_test = np.log(Y_test) price_pipe.fit(X_train[FEATURES], Y_train) _logger.info(f"saving model version:{_version}") save_pipeline(pipeline_to_save=price_pipe)
def run_training() -> None: """Train the model""" data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X, y = data[config.FEATURES], data[config.TARGET] pipeline.fit(X, y) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline)
def run_training() -> None: """Train the model""" #read training data data = load_dataset(filename=config.TRAINING_DATA_FILE) #train test split X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=config.TEST_SIZE, random_state=config.RANDOM_STATE) pipeline.energy_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_save=pipeline.energy_pipe)
def run_training() -> None: data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) #transforming the target y_train = np.log(y_train) y_test = np.log(y_test) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # we are setting the seed here pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f'saving model version: {_version}') save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_name=config.DATASET_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data.drop(config.TARGET, axis=1), data[config.TARGET], test_size=0.2, random_state=0) # we are setting the seed here pipeline.titanic_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.titanic_pipe)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # split data into train and test data X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # transform the target y_train = np.log(y_train) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """ Train the model """ """Read training data""" data = load_dataset(file_name = config.TRAINING_DATA_FILE) """Divide the dataset into training and testing""" X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES], data[config.TARGET], test_size = 0.1, random_state = 0) """Transform the target """ y_train = np.log(y_train) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist = pipeline.price_pipe)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # _data = pd.read_csv(f"{config.DATASET_DIR}/{config.TRAINING_DATA_FILE}") # print(_data.head()) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], random_state=0) # we are setting the seed here y_train = y_train.apply(lambda x: 1 if x == "M" else 0) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_path_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data.iloc[:, :-1], data[config.TARGET], test_size=0.1, random_state=0) # we are setting the seed here # transform the target y_train = np.log(y_train) #print('Data divided into training and test') pipeline.price_pipe.fit(X_train, y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """Entenar el modelo.""" # Lee los datos de entrenamiento data = load_dataset(file_name=config.TRAINING_DATA_FILE) # Divide entre set de entrenamiento y prueba X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # indicamos la semilla!!! # Transformamos la variable objetivo y_train = np.log(y_train) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # we are setting the seed here # transform the target y_train = np.log(y_train) y_test = np.log(y_test) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) save_pipeline(pipeline=pipeline.price_pipe)
def run_training() -> None: """モデルを学習する。""" # 学習データの読み込み data = load_dataset(file_name=config.TRAINING_DATA_FILE) # 学習データとテストデータを分割 X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # ここにシードを設定しています # ターゲットを変換する y_train = np.log(y_train) y_test = np.log(y_test) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f'saving model version: {_version}') save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # we are setting the seed here # transform the target, use log format because it is expected by the sklearn pipeline y_train = np.log(y_train) y_test = np.log(y_test) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) #after applying fit we save to pipeline _logger.info(f'saving model version: {_version}') save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """Train the model.""" # read training data print(f'Current directory: {os.getcwd()}') print(f"Train path: {config.DATASET_DIR}") data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # we are setting the seed here # transform the target y_train = np.log(y_train) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) #joblib.dump(pipeline.price_pipe, config.PIPELINE_NAME) save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None: """Train the model.""" # read training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # we are setting the seed here # transform the target y_train = np.log(y_train) y_test = np.log(y_test) # call fit for all class of pipeline.py # each class contains __init__, fit with return = self and preprocessing with return X # this is the right way to make a class for pipeline pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f'saving model version: {_version}') save_pipeline(pipeline_to_persist=pipeline.price_pipe)