예제 #1
0
def test_training():
    '''Launch a dummy training and ensure results repeatability'''
    # Load preprocessed dataset
    train_data, valid_data, classes = load_processed("data/processed/")
    # Launch model training
    train(train_data, valid_data, classes, output_dir="test/models/", n_iter=1)
    # TODO: Compare files hash (depends on object stability)
    pass
예제 #2
0
def run(cfg=None):
    #Getting dataset
    raw_data = data_acq(cfg)
    #Building features
    data = data_prep(raw_data, cfg)
    #Split data
    X_train, X_test, y_train, y_test = split_data(data)
    #Hyperparameter optimization
    opt = train(X_train, y_train, cfg)
    #Evaluation report
    generate_report(opt, X_test, y_test)
 def start(self):
     clean_data.clean()
     build_features.build_features()
     cv_scores = train_model.train()
     self.cv_scores = cv_scores
예제 #4
0
파일: test.py 프로젝트: FedeOr/ai-eng
import pandas as pd

from src.data import make_dataset
from src.features import build_features
from src.models import train_model

if __name__ == "__main__":
    print("\n---- TEST FUNCTIONS -----\n")

    # Get the dataset path
    dataset_path = make_dataset.debug_path()

    df = pd.read_csv(dataset_path, sep=',', low_memory=False)
    print(f"Dataset: {df.head()}")

    # Feature Engineering
    X, y = build_features.preprocessing(df)
    
    # Train the Model
    dataset_splitted = train_model.split_dataset(X, y)
    
    max_features = 'auto'
    model = train_model.train(dataset_splitted, max_features)
import os
import pandas as pd

from src.features import preprocessing
from src.models import train_model

if __name__ == "__main__":

    BASEPATH = os.path.abspath("data")
    DATASET = "heart.csv"

    dataset_path = os.path.join(BASEPATH, DATASET)

    df = pd.read_csv(dataset_path, sep=',', low_memory=False)

    # Feature Engineering
    X, y = preprocessing.preprocessing(df)

    # Split dataset and train model
    dataset_splitted = train_model.split_dataset(X, y)
    train_model.train(dataset_splitted)