def main(input_filepath, model_filepath, output_filepath, config_file):
    """Runs data loading and cleaning and pre-processing scripts and
    saves data in ../processed."""
    logger = logging.getLogger(__name__)
    logger.info('Loading training set, test set and model and predicting.')

    # Parse config file
    config = parse_config(config_file)

    # Load data
    X_train = pd.read_csv(input_filepath + '/X_train.csv')
    y_train = pd.read_csv(input_filepath + '/y_train.csv').values.ravel()

    X_test = pd.read_csv(input_filepath + '/X_test.csv')
    y_test = pd.read_csv(input_filepath + '/y_test.csv').values.ravel()

    # Load model
    model = Model.load(model_filepath + config['predicting']['model_name'])

    # Make predictions
    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test)

    # Evaluate model
    train_score = np.sqrt(mean_squared_error(y_train, train_pred))
    test_score = np.sqrt(mean_squared_error(y_test, test_pred))

    # Plot predictions
    scores = (
        (r'$RMSE={:,.0f}$' + ' EUR').format(train_score),
        (r'$RMSE={:,.0f}$' + ' EUR').format(test_score),
    )
    pred_plots = plot_predictions(scores, train_pred, test_pred, y_train,
                                  y_test)
    pred_plots.savefig(output_filepath + '/pred_plots.png')
def predict(model_filepath, config, input_data):
    """Return prediction from user input."""
    # Load model
    model = Model.load(model_filepath + config['predicting']['model_name'])

    # Predict
    prediction = int(np.round(model.predict(input_data), -3)[0])
    return prediction
Esempio n. 3
0
def main(input_train, input_test, input_model, output_prediction):
    """ Runs modeling scripts using model pickle (../models) to predict
        outcomes. Outcomes file is saved as .csv (saved in ../models).
    """
    logger = logging.getLogger(__name__)
    logger.info('predicting outcomes')

    data = DataSet(train_dir=input_train, test_dir=input_test)
    test = data.get_test_set()
    X_test = data.get_features(test)

    model = Model.load(input_model + 'XGBClassifier')
    y_pred = model.predict(X_test)

    output = pd.DataFrame({
        'PassengerId': test['PassengerId'],
        'Survived': y_pred
    })
    output.to_csv(output_prediction + 'submission_{}.csv'.format(model.name),
                  index=False)
                              num_workers=8,
                              drop_last=True,
                              shuffle=True)
    val_dataset = InpaintingDataset(config,
                                    val_list,
                                    fix_mask_path=val_fix_mask,
                                    training=False)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=config.batch_size,
                            num_workers=2,
                            drop_last=False,
                            shuffle=False)
    sample_iterator = val_dataset.create_iterator(config.sample_size)

    model = Model(config, logger=logger)
    model.load(is_test=False)
    steps_per_epoch = len(train_dataset) // config.batch_size
    iteration = model.iteration
    epoch = model.iteration // steps_per_epoch
    logger.info('Start from epoch:{}, iteration:{}'.format(epoch, iteration))

    model.train()
    keep_training = True
    best_score = {}
    while (keep_training):
        epoch += 1

        stateful_metrics = ['epoch', 'iter', 'g_lr']
        progbar = Progbar(len(train_dataset),
                          max_iters=steps_per_epoch,
                          width=20,