def score_predictions(self, metric, fitter, X, y, support): """Score, according to some metric, predictions provided by a model. The resulting score will be negated if an information criterion is specified. Parameters ---------- metric : string The type of score to run on the prediction. Valid options include 'r2' (explained variance), 'BIC' (Bayesian information criterion), 'AIC' (Akaike information criterion), and 'AICc' (corrected AIC). fitter : Poisson object The Poisson object that has been fit to the data with the respective hyperparameters. X : nd-array The design matrix. y : nd-array The response vector. support: array-like The indices of the non-zero features. Returns ------- score : float The score. """ # for Poisson, use predict_mean to calculate the "predicted" values y_pred = fitter.predict_mean(X[:, support]) # calculate the log-likelihood ll = utils.log_likelihood_glm(model='poisson', y_true=y, y_pred=y_pred) if metric == 'log': score = ll # information criteria else: n_features = np.count_nonzero(support) if fitter.intercept_ != 0: n_features += 1 n_samples = y.size if metric == 'BIC': score = utils.BIC(ll, n_features, n_samples) elif metric == 'AIC': score = utils.AIC(ll, n_features) elif metric == 'AICc': score = utils.AICc(ll, n_features, n_samples) else: raise ValueError(metric + ' is not a valid metric.') # negate the score since lower information criterion is # preferable score = -score return score
def score_predictions(metric, fitter, X, y, support): """Score, according to some metric, predictions provided by a model. the resulting score will be negated if an information criterion is specified Parameters ---------- metric : string The type of score to run on the prediction. Valid options include 'r2' (explained variance), 'BIC' (Bayesian information criterion), 'AIC' (Akaike information criterion), and 'AICc' (corrected AIC). y_true : array-like The true response variables. y_pred : array-like The predicted response variables. supports: array-like The value of the supports for the model that was used to generate *y_pred*. Returns ------- score : float The score. """ if metric == 'acc': y_pred = fitter.predict(X[:, support]) score = accuracy_score(y, y_pred) else: y_pred = fitter.predict_proba(X[:, support]) ll = -log_loss(y, y_pred) if metric == 'log': score = ll else: n_features = np.count_nonzero(support) n_samples = y.size if metric == 'BIC': score = utils.BIC(ll, n_features, n_samples) elif metric == 'AIC': score = utils.AIC(ll, n_features) elif metric == 'AICc': score = utils.AICc(ll, n_features, n_samples) else: raise ValueError(metric + ' is not a valid metric.') # negate the score since lower information criterion is # preferable score = -score return score
def score_predictions(metric, y_true, y_pred, supports): """Score, according to some metric, predictions provided by a model. the resulting score will be negated if an information criterion is specified Parameters ---------- metric : string The type of score to run on the prediction. Valid options include 'r2' (explained variance), 'BIC' (Bayesian information criterion), 'AIC' (Akaike information criterion), and 'AICc' (corrected AIC). y_true : array-like The true response variables. y_pred : array-like The predicted response variables. supports: array-like The value of the supports for the model that was used to generate *y_pred*. Returns ------- score : float The score. """ if metric == 'r2': score = r2_score(y_true, y_pred) else: n_features = np.count_nonzero(supports) if metric == 'BIC': score = utils.BIC(y_true, y_pred, n_features) elif metric == 'AIC': score = utils.AIC(y_true, y_pred, n_features) elif metric == 'AICc': score = utils.AICc(y_true, y_pred, n_features) else: raise ValueError(metric + ' is not a valid option.') score = -score return score
def _score_predictions(self, metric, fitter, X, y, support, boot_idxs): """Score, according to some metric, predictions provided by a model. The resulting score will be negated if an information criterion is specified. Parameters ---------- metric : string The type of score to run on the prediction. Valid options include 'r2' (explained variance), 'BIC' (Bayesian information criterion), 'AIC' (Akaike information criterion), and 'AICc' (corrected AIC). fitter : object Must contain .predict and .predict_proba methods. X : array-like The design matrix. y : array-like Response vector. supports : array-like The value of the supports for the model boot_idxs : 2-tuple of array-like objects Tuple of (train_idxs, test_idxs) generated from a bootstrap sample. If this is specified, then the appropriate set of data will be used for evaluating scores: test data for r^2, and training data for information criteria Returns ------- score : float The score. """ # Select the data relevant for the estimation_score X = X[boot_idxs[self._estimation_target]] y = y[boot_idxs[self._estimation_target]] if metric == 'acc': if self.shared_support: y_pred = fitter.predict(X[:, support]) else: y_pred = fitter.predict(X) score = accuracy_score(y, y_pred) else: if self.shared_support: y_pred = fitter.predict_proba(X[:, support]) else: y_pred = fitter.predict_proba(X) ll = -log_loss(y, y_pred, labels=self.classes_) if metric == 'log': score = ll else: n_features = np.count_nonzero(support) n_samples = X.shape[0] if metric == 'BIC': score = utils.BIC(n_samples * ll, n_features, n_samples) elif metric == 'AIC': score = utils.AIC(n_samples * ll, n_features) elif metric == 'AICc': score = utils.AICc(n_samples * ll, n_features, n_samples) else: raise ValueError(metric + ' is not a valid metric.') # negate the score since lower information criterion is # preferable score = -score return score
def _score_predictions(self, metric, fitter, X, y, support, boot_idxs): """Score, according to some metric, predictions provided by a model. The resulting score will be negated if an information criterion is specified. Parameters ---------- metric : string The type of score to run on the prediction. Valid options include 'r2' (explained variance), 'BIC' (Bayesian information criterion), 'AIC' (Akaike information criterion), and 'AICc' (corrected AIC). fitter : object Must contain .predict and .predict_proba methods. X : array-like The design matrix. y : array-like Response vector. supports : array-like The value of the supports for the model boot_idxs : 2-tuple of array-like objects Tuple of (train_idxs, test_idxs) generated from a bootstrap sample. If this is specified, then the appropriate set of data will be used for evaluating scores: test data for r^2, and training data for information criteria Returns ------- score : float The score. """ # Select the data relevant for the estimation_score X = X[boot_idxs[self._estimation_target]] y = y[boot_idxs[self._estimation_target]] if y.ndim == 2: if y.shape[1] > 1: raise ValueError('y should either have shape ' + '(n_samples, ) or (n_samples, 1).') y = np.squeeze(y) elif y.ndim > 2: raise ValueError('y should either have shape ' + '(n_samples, ) or (n_samples, 1).') y_pred = fitter.predict(X[:, support]) if y.shape != y_pred.shape: raise ValueError('Targets and predictions are not the same shape.') if metric == 'r2': score = r2_score(y, y_pred) else: ll = utils.log_likelihood_glm(model='normal', y_true=y, y_pred=y_pred) n_features = np.count_nonzero(support) n_samples = X.shape[0] if metric == 'BIC': score = utils.BIC(ll, n_features, n_samples) elif metric == 'AIC': score = utils.AIC(ll, n_features) elif metric == 'AICc': score = utils.AICc(ll, n_features, n_samples) else: raise ValueError(metric + ' is not a valid option.') # negate the score since lower information criterion is preferable score = -score return score
def _score_predictions(self, metric, fitter, X, y, support, boot_idxs=None): """Score, according to some metric, predictions provided by a model. The resulting score will be negated if an information criterion is specified. Parameters ---------- metric : string The type of score to run on the prediction. Valid options include 'r2' (explained variance), 'BIC' (Bayesian information criterion), 'AIC' (Akaike information criterion), and 'AICc' (corrected AIC). fitter : Poisson object The Poisson object that has been fit to the data with the respective hyperparameters. X : ndarray, shape (n_samples, n_features) The design matrix. y : ndarray, shape (n_samples,) The response vector. support: ndarray The indices of the non-zero features. boot_idxs : 2-tuple of array-like objects Tuple of (train_idxs, test_idxs) generated from a bootstrap sample. If this is specified, then the appropriate set of data will be used for evaluating scores: test data for r^2, and training data for information criteria Returns ------- score : float The score. """ # Select the train data if boot_idxs is not None: X = X[boot_idxs[self._estimation_target]] y = y[boot_idxs[self._estimation_target]] # for Poisson, use predict_mean to calculate the "predicted" values y_pred = fitter.predict_mean(X[:, support]) # calculate the log-likelihood ll = utils.log_likelihood_glm(model='poisson', y_true=y, y_pred=y_pred) if metric == 'log': score = ll # information criteria else: n_features = np.count_nonzero(support) if fitter.intercept_ != 0: n_features += 1 n_samples = X.shape[0] if metric == 'BIC': score = utils.BIC(n_samples * ll, n_features, n_samples) elif metric == 'AIC': score = utils.AIC(n_samples * ll, n_features) elif metric == 'AICc': score = utils.AICc(n_samples * ll, n_features, n_samples) else: raise ValueError(metric + ' is not a valid metric.') # negate the score since lower information criterion is preferable score = -score return score