def main(input_filepath, model_filepath, output_filepath, config_file): """Runs data loading and cleaning and pre-processing scripts and saves data in ../processed.""" logger = logging.getLogger(__name__) logger.info('Loading training set, test set and model and predicting.') # Parse config file config = parse_config(config_file) # Load data X_train = pd.read_csv(input_filepath + '/X_train.csv') y_train = pd.read_csv(input_filepath + '/y_train.csv').values.ravel() X_test = pd.read_csv(input_filepath + '/X_test.csv') y_test = pd.read_csv(input_filepath + '/y_test.csv').values.ravel() # Load model model = Model.load(model_filepath + config['predicting']['model_name']) # Make predictions train_pred = model.predict(X_train) test_pred = model.predict(X_test) # Evaluate model train_score = np.sqrt(mean_squared_error(y_train, train_pred)) test_score = np.sqrt(mean_squared_error(y_test, test_pred)) # Plot predictions scores = ( (r'$RMSE={:,.0f}$' + ' EUR').format(train_score), (r'$RMSE={:,.0f}$' + ' EUR').format(test_score), ) pred_plots = plot_predictions(scores, train_pred, test_pred, y_train, y_test) pred_plots.savefig(output_filepath + '/pred_plots.png')
def predict(model_filepath, config, input_data): """Return prediction from user input.""" # Load model model = Model.load(model_filepath + config['predicting']['model_name']) # Predict prediction = int(np.round(model.predict(input_data), -3)[0]) return prediction
def main(input_train, input_test, input_model, output_prediction): """ Runs modeling scripts using model pickle (../models) to predict outcomes. Outcomes file is saved as .csv (saved in ../models). """ logger = logging.getLogger(__name__) logger.info('predicting outcomes') data = DataSet(train_dir=input_train, test_dir=input_test) test = data.get_test_set() X_test = data.get_features(test) model = Model.load(input_model + 'XGBClassifier') y_pred = model.predict(X_test) output = pd.DataFrame({ 'PassengerId': test['PassengerId'], 'Survived': y_pred }) output.to_csv(output_prediction + 'submission_{}.csv'.format(model.name), index=False)
num_workers=8, drop_last=True, shuffle=True) val_dataset = InpaintingDataset(config, val_list, fix_mask_path=val_fix_mask, training=False) val_loader = DataLoader(dataset=val_dataset, batch_size=config.batch_size, num_workers=2, drop_last=False, shuffle=False) sample_iterator = val_dataset.create_iterator(config.sample_size) model = Model(config, logger=logger) model.load(is_test=False) steps_per_epoch = len(train_dataset) // config.batch_size iteration = model.iteration epoch = model.iteration // steps_per_epoch logger.info('Start from epoch:{}, iteration:{}'.format(epoch, iteration)) model.train() keep_training = True best_score = {} while (keep_training): epoch += 1 stateful_metrics = ['epoch', 'iter', 'g_lr'] progbar = Progbar(len(train_dataset), max_iters=steps_per_epoch, width=20,