Example #1
0
def algo_diagnostics(dataset_config, yatsm_config, X, y,
                     row, col, algo, n_fold, make_plots=True):
    """ Display algorithm diagnostics for a given X and y

    Args:
      dataset_config (dict): dict of dataset configuration options
      yatsm_config (dict): dict of YATSM algorithm options
      X (np.ndarray): X feature input used in classification
      y (np.ndarray): y labeled examples
      row (np.ndarray): row pixel locations of `y`
      col (np.ndarray): column pixel locations of `y`
      algo (sklearn classifier): classifier used from scikit-learn
      n_fold (int): number of folds for crossvalidation
      make_plots (bool, optional): show diagnostic plots (default: True)

    """
    # Print algorithm diagnostics without crossvalidation
    logger.info('<----- DIAGNOSTICS ----->')
    if hasattr(algo, 'oob_score_'):
        logger.info('Out of Bag score: {p}'.format(p=algo.oob_score_))

    kfold_summary = np.zeros((0, 2))

    logger.info('<----------------------->')
    logger.info('KFold crossvalidation scores:')
    kf = KFold(y.size, n_folds=n_fold)
    kfold_summary = np.vstack((kfold_summary,
                              diagnostics.kfold_scores(X, y, algo, kf)
                               ))

    logger.info('<----------------------->')
    logger.info('Stratified KFold crossvalidation scores:')
    kf = StratifiedKFold(y, n_folds=n_fold)
    kfold_summary = np.vstack((kfold_summary,
                              diagnostics.kfold_scores(X, y, algo, kf)
                               ))

    logger.info('<----------------------->')
    logger.info('Spatialized shuffled KFold crossvalidation scores:')
    kf = diagnostics.SpatialKFold(y, row, col, n_folds=n_fold, shuffle=True)
    kfold_summary = np.vstack((kfold_summary,
                              diagnostics.kfold_scores(X, y, algo, kf)
                               ))

    if make_plots:
        test_names = ['KFold',
                      'Stratified KFold',
                      'Spatial KFold (shuffle)'
                      ]
        plots.plot_crossvalidation_scores(kfold_summary, test_names)

    logger.info('<----------------------->')
    if hasattr(algo, 'feature_importances_'):
        logger.info('Feature importance:')
        logger.info(algo.feature_importances_)
        if make_plots:
            plots.plot_feature_importance(algo, dataset_config, yatsm_config)
Example #2
0
def algo_diagnostics(cfg, X, y, row, col, algo, n_fold, make_plots=True):
    """ Display algorithm diagnostics for a given X and y

    Args:
        cfg (dict): YATSM configuration dictionary
        X (np.ndarray): X feature input used in classification
        y (np.ndarray): y labeled examples
        row (np.ndarray): row pixel locations of `y`
        col (np.ndarray): column pixel locations of `y`
        algo (sklearn classifier): classifier used from scikit-learn
        n_fold (int): number of folds for crossvalidation
        make_plots (bool, optional): show diagnostic plots (default: True)

    """
    # Print algorithm diagnostics without crossvalidation
    logger.info("<----- DIAGNOSTICS ----->")
    if hasattr(algo, "oob_score_"):
        logger.info("Out of Bag score: %f" % algo.oob_score_)

    kfold_summary = np.zeros((0, 2))
    test_names = ["KFold", "Stratified KFold", "Spatial KFold (shuffle)"]

    def report(kf):
        logger.info("<----------------------->")
        logger.info("%s crossvalidation scores:" % kf.__class__.__name__)
        try:
            scores = diagnostics.kfold_scores(X, y, algo, kf)
        except Exception as e:
            logger.warning("Could not perform %s cross-validation: %s" % (kf.__class__.__name__, e.message))
        else:
            return scores

    kf = KFold(y.size, n_folds=n_fold)
    kfold_summary = np.vstack((kfold_summary, report(kf)))

    kf = StratifiedKFold(y, n_folds=n_fold)
    kfold_summary = np.vstack((kfold_summary, report(kf)))

    kf = diagnostics.SpatialKFold(y, row, col, n_folds=n_fold, shuffle=True)
    kfold_summary = np.vstack((kfold_summary, report(kf)))

    if make_plots:
        plots.plot_crossvalidation_scores(kfold_summary, test_names)

    logger.info("<----------------------->")
    if hasattr(algo, "feature_importances_"):
        logger.info("Feature importance:")
        logger.info(algo.feature_importances_)
        if make_plots:
            plots.plot_feature_importance(algo, cfg)
Example #3
0
def algo_diagnostics(X, y, row, col, algo):
    """ Display algorithm diagnostics for a given X and y

    Args:
      X (np.ndarray): X feature input used in classification
      y (np.ndarray): y labeled examples
      row (np.ndarray): row pixel locations of `y`
      col (np.ndarray): column pixel locations of `y`
      algo (sklearn classifier): classifier used from scikit-learn

    """
    # Print algorithm diagnostics without crossvalidation
    logger.info('<----- DIAGNOSTICS ----->')
    if hasattr(algo, 'oob_score_'):
        logger.info('Out of Bag score: {p}'.format(p=algo.oob_score_))

    kfold_summary = np.zeros((0, 2))

    logger.info('<----------------------->')
    logger.info('KFold crossvalidation scores:')
    kf = KFold(y.size, n_folds=n_fold)
    kfold_summary = np.vstack(
        (kfold_summary, diagnostics.kfold_scores(X, y, algo, kf)))

    logger.info('<----------------------->')
    logger.info('Stratified KFold crossvalidation scores:')
    kf = StratifiedKFold(y, n_folds=n_fold)
    kfold_summary = np.vstack(
        (kfold_summary, diagnostics.kfold_scores(X, y, algo, kf)))

    logger.info('<----------------------->')
    logger.info('Spatialized shuffled KFold crossvalidation scores:')
    kf = diagnostics.SpatialKFold(y, row, col, n_folds=n_fold, shuffle=True)
    kfold_summary = np.vstack(
        (kfold_summary, diagnostics.kfold_scores(X, y, algo, kf)))

    if make_plots:
        test_names = ['KFold', 'Stratified KFold', 'Spatial KFold (shuffle)']
        plots.plot_crossvalidation_scores(kfold_summary, test_names)

    logger.info('<----------------------->')
    if hasattr(algo, 'feature_importances_'):
        logger.info('Feature importance:')
        logger.info(algo.feature_importances_)
        if make_plots:
            plots.plot_feature_importance(algo, dataset_config, yatsm_config)