def main(input_data, output_model): """ Runs modeling scripts using processed data (../raw) to create model. Model is saved as pickle (saved in ../models). """ logger = logging.getLogger(__name__) logger.info('training model') data = DataSet(train_dir=input_data) train = data.get_train_set() X_train = data.get_features(train) y = data.get_label(train) clf = models[4] param_grid = params[4] model = Model.tune(clf, X_train, y, param_grid) model.save(output_model + model.name)
def main(input_train, input_test, input_model, output_prediction): """ Runs modeling scripts using model pickle (../models) to predict outcomes. Outcomes file is saved as .csv (saved in ../models). """ logger = logging.getLogger(__name__) logger.info('predicting outcomes') data = DataSet(train_dir=input_train, test_dir=input_test) test = data.get_test_set() X_test = data.get_features(test) model = Model.load(input_model + 'XGBClassifier') y_pred = model.predict(X_test) output = pd.DataFrame({ 'PassengerId': test['PassengerId'], 'Survived': y_pred }) output.to_csv(output_prediction + 'submission_{}.csv'.format(model.name), index=False)