def test_training(): '''Launch a dummy training and ensure results repeatability''' # Load preprocessed dataset train_data, valid_data, classes = load_processed("data/processed/") # Launch model training train(train_data, valid_data, classes, output_dir="test/models/", n_iter=1) # TODO: Compare files hash (depends on object stability) pass
def run(cfg=None): #Getting dataset raw_data = data_acq(cfg) #Building features data = data_prep(raw_data, cfg) #Split data X_train, X_test, y_train, y_test = split_data(data) #Hyperparameter optimization opt = train(X_train, y_train, cfg) #Evaluation report generate_report(opt, X_test, y_test)
def start(self): clean_data.clean() build_features.build_features() cv_scores = train_model.train() self.cv_scores = cv_scores
import pandas as pd from src.data import make_dataset from src.features import build_features from src.models import train_model if __name__ == "__main__": print("\n---- TEST FUNCTIONS -----\n") # Get the dataset path dataset_path = make_dataset.debug_path() df = pd.read_csv(dataset_path, sep=',', low_memory=False) print(f"Dataset: {df.head()}") # Feature Engineering X, y = build_features.preprocessing(df) # Train the Model dataset_splitted = train_model.split_dataset(X, y) max_features = 'auto' model = train_model.train(dataset_splitted, max_features)
import os import pandas as pd from src.features import preprocessing from src.models import train_model if __name__ == "__main__": BASEPATH = os.path.abspath("data") DATASET = "heart.csv" dataset_path = os.path.join(BASEPATH, DATASET) df = pd.read_csv(dataset_path, sep=',', low_memory=False) # Feature Engineering X, y = preprocessing.preprocessing(df) # Split dataset and train model dataset_splitted = train_model.split_dataset(X, y) train_model.train(dataset_splitted)