예제 #1
0
def run_training():
    """Train the model."""

    # read training data
    data = pd.read_csv(config.TRAINING_DATA_FILE)
    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(data.drop(config.TARGET, axis = 1), data[config.TARGET], test_size = 0.2, 
                                                        random_state = 0)
    # fit pipeline
    titanic_pipe.fit(X_train, y_train)
    # save pipeline
    joblib.dump(titanic_pipe, config.PIPELINE_NAME)
예제 #2
0
def run_training():
    """Train the model."""

    data = pd.read_csv(config.TRAINING_DATA_FILE)

    training_data, testing_data, training_target, testing_target = train_test_split(
        data.drop(config.TARGET, axis=1),
        data[config.TARGET],
        test_size=0.2,
        random_state=0)  # we are setting the seed here

    titanic_pipe.fit(training_data, training_target)
    joblib.dump(titanic_pipe, config.PIPELINE_NAME)
예제 #3
0
def run_training():
    """Train the model."""

    # read training data
    data = pd.read_csv(config.TRAINING_DATA_FILE)
    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data.drop('survived', axis=1),  # predictors
        data['survived'],  # target
        test_size=0.2,  # percentage of obs in test set
        random_state=0)  # seed to ensure reproducibility
    # fit pipeline
    titanic_pipe.fit(X_train, y_train)
    # save pipeline
    joblib.dump(titanic_pipe, config.PIPELINE_NAME)
예제 #4
0
def run_training():
    """Train the model."""

    # read training data
    X = pd.read_csv(config.TRAINING_DATA_FILE)

    # divide train and test
    features = config.NUMERICAL_VARS + config.CATEGORICAL_VARS
    X_train, X_test, y_train, y_test = train_test_split(X[features],
                                                        X[config.TARGET],
                                                        test_size=0.2,
                                                        random_state=0)

    # fit pipeline
    titanic_pipe.fit(X_train[features], y_train)

    # save pipeline
    joblib.dump(titanic_pipe, config.PIPELINE_NAME)
예제 #5
0
def run_training():
    """Train the model."""

    # read training data
    data = pd.read_csv(config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    # fit pipeline
    titanic_pipe.fit(X_train[config.FEATURES], y_train)

    # save pipeline
    joblib.dump(titanic_pipe, config.PIPELINE_NAME)
예제 #6
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = pd.read_csv(config.CLEANED_DATA)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data.drop(config.TARGET, axis=1),
        data[config.TARGET],
        test_size=0.2,
        random_state=config.SEED)  # we are setting the seed here

    # Fit ML pipeline
    titanic_pipe.fit(X_train, y_train)
    logger.info("**Training ML Pipeline started")
    # save pipeline
    save_pipeline(pipeline_to_persist=titanic_pipe)
    logger.info("**Saved trained ML pipeline")
def run_training():
    """Train the model."""
    # read training data
    config = ut.read_config_file('config.yaml')
    path = config[0]['Paths'].get('directory')
    filename = config[0]['Paths'].get('data_filename')
    extension = config[0]['Paths'].get('data_extension')
    cols = config[2]['Feature_Groups'].get('data_columns')
    target = config[2]['Feature_Groups'].get('target')
    data = ut.load_data(path=path,
                        filename=filename,
                        extension=extension,
                        cols=cols)
    # divide train and test
    data[target] = data[target].astype(int)
    X_train, X_test, y_train, y_test = train_test_split(data.drop(target,
                                                                  axis=1),
                                                        data[target],
                                                        test_size=0.2,
                                                        random_state=0)
    # fit pipeline
    titanic_pipe.fit(X_train, y_train)
    # save pipeline
    joblib.dump(titanic_pipe, config[0]['Paths'].get('output_model_path'))