def score_samples(self, X, y=None): """Compute log-likelihood (or log(det(Jacobian))) for each sample. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. y : None, default=None Not used but kept for compatibility. Returns ------- log_likelihood : array, shape (n_samples,) Log likelihood of each data point in X. """ self._check_is_fitted() components = self._get_component_densities() component_log_likelihood = np.array([ comp.score_samples(X) for comp in components ]) # (n_components, n_samples) weighted_log_likelihood = np.log(self.weights_).reshape( -1, 1) + component_log_likelihood return scipy_logsumexp(weighted_log_likelihood, axis=0)
def calc_mean_binary_cross_entropy_from_scores(ytrue_N, scores_N): ''' Compute average cross entropy for given binary classifier's predictions Consumes "scores", real values between (-np.inf, np.inf), and then conceptually produces probabilities by feeding into sigmoid, and then into the BCE function to get the result. In practice, we compute BCE directly from scores using an implementation that avoids the numerical issues of the sigmoid (saturation, if underflows to zero would become negative infinity). This clever implementation uses the "logsumexp" trick. Computing BCE uses *base-2* logarithms, so the resulting number is a valid upper bound of the zero-one loss (aka error rate) when we threshold at 0.5. Args ---- ytrue_N : 1D array, shape (n_examples,) = (N,) All values must be either 0 or 1. Will be cast to int dtype. Each entry represents the binary 'true' label of one example One entry per example in current dataset scores_N : 1D array, shape (n_examples,) = (N,) One entry per example in current dataset. Each entry is a real value, could be between -infinity and +infinity. Large negative values indicate strong probability of label y=0. Zero values indicate probability of 0.5. Large positive values indicate strong probability of label y=1. Needs to be same size as ytrue_N. Returns ------- bce : float Binary cross entropy, averaged over all N provided examples Examples -------- >>> N = 8 >>> ytrue_N = np.asarray([0., 0., 0., 0., 1., 1., 1., 1.]) # Try some good scores for these 8 points. >>> good_scores_N = np.asarray([-4., -3., -2., -1., 1, 2., 3., 4.]) >>> good_bce = calc_mean_binary_cross_entropy_from_scores( ... ytrue_N, good_scores_N) >>> print("%.6f" % (good_bce)) 0.182835 # Try some near-perfect scores for these 8 points. >>> perfect_scores_N = np.asarray([-9., -9., -9, -9., 9., 9., 9., 9.]) >>> perfect_bce = calc_mean_binary_cross_entropy_from_scores( ... ytrue_N, perfect_scores_N) >>> print("%.6f" % (perfect_bce)) 0.000178 # Check same computation with "probas". Should match exactly. >>> perfect_bce2 = calc_mean_binary_cross_entropy_from_probas( ... ytrue_N, sigmoid(perfect_scores_N)) >>> print("%.6f" % (perfect_bce2)) 0.000178 # Try some extreme scores (should get worse BCE with more extreme scores) >>> ans = calc_mean_binary_cross_entropy_from_scores([0], [+99.]) >>> print("%.6f" % (ans)) 142.826809 >>> ans = calc_mean_binary_cross_entropy_from_scores([0], [+999.]) >>> print("%.6f" % (ans)) 1441.252346 >>> ans = calc_mean_binary_cross_entropy_from_scores([0], [+9999.]) >>> print("%.6f" % (ans)) 14425.507714 # Try with "probas": sigmoid saturates! using scores is *better* >>> a = calc_mean_binary_cross_entropy_from_probas([0], sigmoid([+999.])) >>> print("%.6f" % (a)) 46.508147 >>> a = calc_mean_binary_cross_entropy_from_probas([0], sigmoid([+9999.])) >>> print("%.6f" % (a)) 46.508147 # Try empty >>> empty_bce = calc_mean_binary_cross_entropy_from_scores([], []) >>> np.allclose(0.0, empty_bce) True ''' # Cast labels to integer just to be sure we're getting what's expected ytrue_N = np.asarray(ytrue_N, dtype=np.int32) N = int(ytrue_N.size) if N == 0: return 0.0 # Convert binary y values so 0 becomes +1 and 1 becomes -1 # See HW2 instructions on website for the math yflippedsign_N = -1 * np.sign(ytrue_N - 0.001) # Cast logit scores to float scores_N = np.asarray(scores_N, dtype=np.float64) # Use provided math to obtain numerically stable expression flipped_scores_N = yflippedsign_N * scores_N scores_and_zeros_N2 = np.hstack( [flipped_scores_N.reshape((N, 1)), np.zeros((N, 1))]) bce_N = scipy_logsumexp(scores_and_zeros_N2, axis=1) return np.mean(bce_N) / np.log(2.0)
def test_logsumexp(): x = np.arange(5) assert np.equal(logsumexp(x), scipy_logsumexp(x))
def test_logsumexp(values, axis, keepdims): npt.assert_almost_equal( logsumexp(values, axis=axis, keepdims=keepdims).eval(), scipy_logsumexp(values, axis=axis, keepdims=keepdims), )
def logsumexp(arr, axis=None): return scipy_logsumexp(arr, axis)
def conditional_densities(self, X, cond_idx, not_cond_idx): """Compute conditional densities. Parameters ---------- X : array-like, shape (n_samples, n_features) Data to condition on based on `cond_idx`. cond_idx : array-like of int Indices to condition on. not_cond_idx : Indices not to condition on. Returns ------- conditional_densities : array-like of estimators Either a single density if all the same or a list of Gaussian densities with conditional variances and means. """ if len(cond_idx) + len(not_cond_idx) != X.shape[1]: raise ValueError( '`cond_idx_arr` and `not_cond_idx_arr` should be complements that ' 'have the a total of X.shape[1] number of values.') # Handle trivial case if len(cond_idx) == 0: return self self._check_is_fitted() # Retrieve components components = self._get_component_densities() # (n_components,) n_samples, n_features = X.shape # Get new weights X_cond = X[:, cond_idx] marginal_log_likelihood = np.array([ component.marginal_density(cond_idx).score_samples(X_cond) for component in components ]).transpose() # (n_samples, n_components) # Numerically safe way to get new weights if marginal_log_likelihood is very low e.g. # -1000, which would mean np.exp(-1000) = 0 log_cond_weights = np.log(self.weights_) + marginal_log_likelihood log_cond_weights -= scipy_logsumexp(log_cond_weights, axis=1).reshape(-1, 1) cond_weights = np.exp(log_cond_weights) cond_weights = np.maximum(1e-100, cond_weights) # Just to avoid complete zeros # Condition each Gaussian cond_components = np.array([ self._process_conditionals( component.conditional_densities(X, cond_idx, not_cond_idx), n_samples) for component in components ]).transpose() # (n_samples, n_components) # Create final conditional densities conditional_densities = np.array([ self._set_fit_params(clone(self), cond_w, cond_c_arr) for cond_w, cond_c_arr in zip(cond_weights, cond_components) ]) return conditional_densities
import time import numpy as np import matplotlib.pyplot as plt from fitr.utils import logsumexp as fitr_logsumexp from scipy.special import logsumexp as scipy_logsumexp n = 10000 X = np.empty((n, 2)) for i in range(n): x = np.ones(i + 2) ft0 = time.time() yf = fitr_logsumexp(x) ft = time.time() - ft0 fs0 = time.time() ys = scipy_logsumexp(x) fs = time.time() - fs0 X[i] = np.array([fs, ft]) fig, ax = plt.subplots(figsize=(7, 3)) ax.set_title('Log-sum-exp Speed') ax.set_xlabel(r'Dimensionality of $\mathbf{x}$') ax.set_ylabel(r'Time (s)') ax.semilogy(np.arange(n) + 2, X[:, 1], c='k', ls='-', label='fitr') ax.semilogy(np.arange(n) + 2, X[:, 0], c='k', ls='--', label='SciPy') plt.show()