예제 #1
0
def main(input_data, output_model):
    """ Runs modeling scripts using processed data (../raw) to
        create model. Model is saved as pickle (saved in ../models).
    """
    logger = logging.getLogger(__name__)
    logger.info('training model')

    data = DataSet(train_dir=input_data)
    train = data.get_train_set()
    X_train = data.get_features(train)
    y = data.get_label(train)

    clf = models[4]
    param_grid = params[4]

    model = Model.tune(clf, X_train, y, param_grid)
    model.save(output_model + model.name)
예제 #2
0
def main(input_train, input_test, input_model, output_prediction):
    """ Runs modeling scripts using model pickle (../models) to predict
        outcomes. Outcomes file is saved as .csv (saved in ../models).
    """
    logger = logging.getLogger(__name__)
    logger.info('predicting outcomes')

    data = DataSet(train_dir=input_train, test_dir=input_test)
    test = data.get_test_set()
    X_test = data.get_features(test)

    model = Model.load(input_model + 'XGBClassifier')
    y_pred = model.predict(X_test)

    output = pd.DataFrame({
        'PassengerId': test['PassengerId'],
        'Survived': y_pred
    })
    output.to_csv(output_prediction + 'submission_{}.csv'.format(model.name),
                  index=False)