def test_fit(self, example_dataset): """Tests fitting the model.""" X, y = example_dataset model = TitanicModel() model.fit(X, y) assert len(y_pred) > 0
def test_fit(self, example_dataset): """Tests fitting the model.""" X, y = example_dataset model = TitanicModel() model.fit(X, y) # pylint: disable=protected-access assert model._estimator is not None
def test_predict(self, example_dataset): """Tests predicting with the model.""" X, y = example_dataset model = TitanicModel() model.fit(X, y) y_pred = model.predict(X) assert len(y_pred) > 0
def main(input_dir, input_file, model_dir, model_file, n_trees): # Fetch our dataset. dataset = pd.read_csv(input_dir / input_file) X = dataset.drop("Survived", axis=1) y = dataset["Survived"] # Train our model. model = TitanicModel(n_trees=n_trees) model.fit(X, y=y) # Save our model output. model_dir.mkdir(parents=True, exist_ok=True) model.save(model_dir / model_file)
def train(input_path, model_path, n_estimators): """Trains a model on the given dataset.""" logger = logging.getLogger(__name__) logger.info("Loading input dataset") train_dataset = pd.read_csv(input_path) X_train = train_dataset.drop(["Survived"], axis=1) y_train = train_dataset["Survived"] logger.info(f"Training model with n_estimators = {n_estimators}") model = TitanicModel(n_estimators=n_estimators) model.fit(X_train, y=y_train) logger.info(f"Writing output to {model_path}") model_dir = Path(model_path).parent model_dir.mkdir(parents=True, exist_ok=True) joblib.dump(model, model_path)