def test_dot_2args(): from numpy.core import dot a = np.array([[1, 2], [3, 4]], dtype=float) b = np.array([[1, 0], [1, 1]], dtype=float) c = np.array([[3, 2], [7, 4]], dtype=float) d = dot(a, b) assert_allclose(c, d)
def pinv(a, rcond=1e-15 ): """ Compute the (Moore-Penrose) pseudo-inverse of a matrix. Calculate the generalized inverse of a matrix using its singular-value decomposition (SVD) and including all `large` singular values. Parameters ---------- a : array_like (M, N) Matrix to be pseudo-inverted. rcond : float Cutoff for `small` singular values. Singular values smaller than rcond*largest_singular_value are considered zero. Returns ------- B : ndarray (N, M) The pseudo-inverse of `a`. If `a` is an np.matrix instance, then so is `B`. Raises ------ LinAlgError In case SVD computation does not converge. Examples -------- >>> a = np.random.randn(9, 6) >>> B = np.linalg.pinv(a) >>> np.allclose(a, np.dot(a, np.dot(B, a))) True >>> np.allclose(B, np.dot(B, np.dot(a, B))) True """ a, wrap = _makearray(a) _assertNonEmpty(a) a = a.conjugate() u, s, vt = svd(a, 0) m = u.shape[0] n = vt.shape[1] cutoff = rcond*maximum.reduce(s) for i in range(min(n, m)): if s[i] > cutoff: s[i] = 1./s[i] else: s[i] = 0.; res = dot(transpose(vt), multiply(s[:, newaxis],transpose(u))) return wrap(res)
def projectArray(homography, points): from numpy.core import dot from numpy.lib.function_base import append if points.shape[0] != 2: raise Exception('points of dimension {0} {1}'.format(points.shape[0], points.shape[1])) if (homography is not None) and homography.size>0: augmentedPoints = append(points,[[1]*points.shape[1]], 0) prod = dot(homography, augmentedPoints) return prod[0:2]/prod[2] else: return points
def pinv(a, rcond=1e-15 ): """Compute the (Moore-Penrose) pseudo-inverse of a matrix. Calculate a generalized inverse of a matrix using its singular-value decomposition and including all 'large' singular values. Parameters ---------- a : array-like, shape (M, N) Matrix to be pseudo-inverted rcond : float Cutoff for 'small' singular values. Singular values smaller than rcond*largest_singular_value are considered zero. Returns ------- B : array, shape (N, M) If a is a matrix, then so is B. Raises LinAlgError if SVD computation does not converge Examples -------- >>> from numpy import * >>> a = random.randn(9, 6) >>> B = linalg.pinv(a) >>> allclose(a, dot(a, dot(B, a))) True >>> allclose(B, dot(B, dot(a, B))) True """ a, wrap = _makearray(a) _assertNonEmpty(a) a = a.conjugate() u, s, vt = svd(a, 0) m = u.shape[0] n = vt.shape[1] cutoff = rcond*maximum.reduce(s) for i in range(min(n, m)): if s[i] > cutoff: s[i] = 1./s[i] else: s[i] = 0.; res = dot(transpose(vt), multiply(s[:, newaxis],transpose(u))) return wrap(res)
def pinv(a, rcond=1e-15 ): """Return the (Moore-Penrose) pseudo-inverse of a 2-d array This method computes the generalized inverse using the singular-value decomposition and all singular values larger than rcond of the largest. """ a, wrap = _makearray(a) a = a.conjugate() u, s, vt = svd(a, 0) m = u.shape[0] n = vt.shape[1] cutoff = rcond*maximum.reduce(s) for i in range(min(n, m)): if s[i] > cutoff: s[i] = 1./s[i] else: s[i] = 0.; return wrap(dot(transpose(vt), multiply(s[:, newaxis],transpose(u))))
def _inv(a, cf, rcond, epsilon): """ modified pseudo inverse """ def _assertNoEmpty2d(*arrays): for a in arrays: if a.size == 0 and product(a.shape[-2:]) == 0: raise RuntimeError("Arrays cannot be empty") def _makearray(a): new = asarray(a) wrap = getattr(a, "__array_prepare__", new.__array_wrap__) return new, wrap a, wrap = _makearray(a) _assertNoEmpty2d(a) if epsilon is not None: epsilon = numpy.repeat(epsilon, a.shape[0]) epsilon = numpy.diag(epsilon) a = a + epsilon a = a.conjugate() #WARNING! the "s" eigenvalues might not equal the eigenvalues of eigh u, s, vt = numpy.linalg.svd(a, 0) m = u.shape[0] n = vt.shape[1] eigen = numpy.copy(s) # cutoff = rcond*maximum.reduce(s) cutoff = cf(s, rcond) for i in range(min(n, m)): # The first Singular Value will always be selected because we want at least one, and the first is the highest if s[i] >= cutoff or i==0: s[i] = 1. / s[i] else: s[i] = 0. n_indep = numpy.count_nonzero(s) res = dot(transpose(vt), multiply(s[:, newaxis], transpose(u))) return wrap(res), n_indep, eigen
def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): """ Least squares polynomial fit. Fit a polynomial ``p(x) = p[0] * x**deg + ... + p[deg]`` of degree `deg` to points `(x, y)`. Returns a vector of coefficients `p` that minimises the squared error. Parameters ---------- x : array_like, shape (M,) x-coordinates of the M sample points ``(x[i], y[i])``. y : array_like, shape (M,) or (M, K) y-coordinates of the sample points. Several data sets of sample points sharing the same x-coordinates can be fitted at once by passing in a 2D-array that contains one dataset per column. deg : int Degree of the fitting polynomial rcond : float, optional Relative condition number of the fit. Singular values smaller than this relative to the largest singular value will be ignored. The default value is len(x)*eps, where eps is the relative precision of the float type, about 2e-16 in most cases. full : bool, optional Switch determining nature of return value. When it is False (the default) just the coefficients are returned, when True diagnostic information from the singular value decomposition is also returned. w : array_like, shape (M,), optional weights to apply to the y-coordinates of the sample points. cov : bool, optional Return the estimate and the covariance matrix of the estimate If full is True, then cov is not returned. Returns ------- p : ndarray, shape (M,) or (M, K) Polynomial coefficients, highest power first. If `y` was 2-D, the coefficients for `k`-th data set are in ``p[:,k]``. residuals, rank, singular_values, rcond : present only if `full` = True Residuals of the least-squares fit, the effective rank of the scaled Vandermonde coefficient matrix, its singular values, and the specified value of `rcond`. For more details, see `linalg.lstsq`. V : ndaray, shape (M,M) or (M,M,K) : present only if `full` = False and `cov`=True The covariance matrix of the polynomial coefficient estimates. The diagonal of this matrix are the variance estimates for each coefficient. If y is a 2-d array, then the covariance matrix for the `k`-th data set are in ``V[:,:,k]`` Warns ----- RankWarning The rank of the coefficient matrix in the least-squares fit is deficient. The warning is only raised if `full` = False. The warnings can be turned off by >>> import warnings >>> warnings.simplefilter('ignore', np.RankWarning) See Also -------- polyval : Computes polynomial values. linalg.lstsq : Computes a least-squares fit. scipy.interpolate.UnivariateSpline : Computes spline fits. Notes ----- The solution minimizes the squared error .. math :: E = \\sum_{j=0}^k |p(x_j) - y_j|^2 in the equations:: x[0]**n * p[n] + ... + x[0] * p[1] + p[0] = y[0] x[1]**n * p[n] + ... + x[1] * p[1] + p[0] = y[1] ... x[k]**n * p[n] + ... + x[k] * p[1] + p[0] = y[k] The coefficient matrix of the coefficients `p` is a Vandermonde matrix. `polyfit` issues a `RankWarning` when the least-squares fit is badly conditioned. This implies that the best fit is not well-defined due to numerical error. The results may be improved by lowering the polynomial degree or by replacing `x` by `x` - `x`.mean(). The `rcond` parameter can also be set to a value smaller than its default, but the resulting fit may be spurious: including contributions from the small singular values can add numerical noise to the result. Note that fitting polynomial coefficients is inherently badly conditioned when the degree of the polynomial is large or the interval of sample points is badly centered. The quality of the fit should always be checked in these cases. When polynomial fits are not satisfactory, splines may be a good alternative. References ---------- .. [1] Wikipedia, "Curve fitting", http://en.wikipedia.org/wiki/Curve_fitting .. [2] Wikipedia, "Polynomial interpolation", http://en.wikipedia.org/wiki/Polynomial_interpolation Examples -------- >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0]) >>> z = np.polyfit(x, y, 3) >>> z array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) It is convenient to use `poly1d` objects for dealing with polynomials: >>> p = np.poly1d(z) >>> p(0.5) 0.6143849206349179 >>> p(3.5) -0.34732142857143039 >>> p(10) 22.579365079365115 High-order polynomials may oscillate wildly: >>> p30 = np.poly1d(np.polyfit(x, y, 30)) /... RankWarning: Polyfit may be poorly conditioned... >>> p30(4) -0.80000000000000204 >>> p30(5) -0.99999999999999445 >>> p30(4.5) -0.10547061179440398 Illustration: >>> import matplotlib.pyplot as plt >>> xp = np.linspace(-2, 6, 100) >>> plt.plot(x, y, '.', xp, p(xp), '-', xp, p30(xp), '--') [<matplotlib.lines.Line2D object at 0x...>, <matplotlib.lines.Line2D object at 0x...>, <matplotlib.lines.Line2D object at 0x...>] >>> plt.ylim(-2,2) (-2, 2) >>> plt.show() """ order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0 : raise ValueError("expected deg >= 0") if x.ndim != 1: raise TypeError("expected 1D vector for x") if x.size == 0: raise TypeError("expected non-empty vector for x") if y.ndim < 1 or y.ndim > 2 : raise TypeError("expected 1D or 2D array for y") if x.shape[0] != y.shape[0] : raise TypeError("expected x and y to have same length") # set rcond if rcond is None : rcond = len(x)*finfo(x.dtype).eps # set up least squares equation for powers of x lhs = vander(x, order) rhs = y # apply weighting if w is not None: w = NX.asarray(w) + 0.0 if w.ndim != 1: raise TypeError("expected a 1-d array for weights") if w.shape[0] != y.shape[0] : raise TypeError("expected w and y to have the same length") lhs *= w[:, NX.newaxis] if rhs.ndim == 2: rhs *= w[:, NX.newaxis] else: rhs *= w # scale lhs to improve condition number and solve scale = NX.sqrt((lhs*lhs).sum(axis=0)) lhs /= scale c, resids, rank, s = lstsq(lhs, rhs, rcond) c = (c.T/scale).T # broadcast scale coefficients # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) if full : return c, resids, rank, s, rcond elif cov : Vbase = inv(dot(lhs.T,lhs)) Vbase /= NX.outer(scale, scale) # Some literature ignores the extra -2.0 factor in the denominator, but # it is included here because the covariance of Multivariate Student-T # (which is implied by a Bayesian uncertainty analysis) includes it. # Plus, it gives a slightly more conservative estimate of uncertainty. fac = resids / (len(x) - order - 2.0) if y.ndim == 1: return c, Vbase * fac else: return c, Vbase[:,:,NX.newaxis] * fac else : return c
def norm(x, ord=None, axis=None): """ Norm of a sparse matrix This function is able to return one of seven different matrix norms, depending on the value of the ``ord`` parameter. Parameters ---------- x : a sparse matrix Input sparse matrix. If `axis` is None, `x` must be 1-D or 2-D sparse matrix. ord : {non-zero int, inf, -inf, 'fro'}, optional Order of the norm (see table under ``Notes``). inf means numpy's `inf` object. axis : {int, None}, optional If `axis` is an integer, it specifies the axis of `x` along which to compute the vector norms. Returns ------- n : float or matrix Notes ----- Some of the ord are not implemented because some associated functions like, _multi_svd_norm, are not yet available for sparse matrix. This docstring is modified based on numpy.linalg.norm. https://github.com/numpy/numpy/blob/master/numpy/linalg/linalg.py The following norms can be calculated: ===== ============================ ord norm for sparse matrices ===== ============================ None Frobenius norm 'fro' Frobenius norm inf max(sum(abs(x), axis=1)) -inf min(sum(abs(x), axis=1)) 0 abs(x).sum(axis=axis) 1 max(sum(abs(x), axis=0)) -1 min(sum(abs(x), axis=0)) 2 Not implemented -2 Not implemented other Not implemented ===== ============================ The Frobenius norm is given by [1]_: :math:`||A||_F = [\\sum_{i,j} abs(a_{i,j})^2]^{1/2}` References ---------- .. [1] G. H. Golub and C. F. Van Loan, *Matrix Computations*, Baltimore, MD, Johns Hopkins University Press, 1985, pg. 15 Examples -------- >>> from scipy.sparse import * >>> import numpy as np >>> from scipy.sparse.linalg import norm >>> a = np.arange(9) - 4 >>> a array([-4, -3, -2, -1, 0, 1, 2, 3, 4]) >>> b = a.reshape((3, 3)) >>> b array([[-4, -3, -2], [-1, 0, 1], [ 2, 3, 4]]) >>> b = csr_matrix(b) >>> norm(b) 7.745966692414834 >>> norm(b, 'fro') 7.745966692414834 >>> norm(b, np.inf) 9 >>> norm(b, -np.inf) 2 >>> norm(b, 1) 7 >>> norm(b, -1) 6 Using the `axis` argument to compute vector norms: >>> c = np.array([[ 1, 2, 3], ... [-1, 1, 4]]) >>> c = csr_matrix(c) >>> norm(c, axis=0) matrix[[ 1.41421356, 2.23606798, 5. ]] >>> norm(c, axis=1) matrix[[ 3.74165739, 4.24264069]] >>> norm(c, ord=1, axis=1) matrix[[6] [6]] """ if not issparse(x): raise TypeError("input is not sparse. use numpy.linalg.norm") # Check the default case first and handle it immediately. if ord in [None, 'fro', 'f'] and axis is None: if isComplexType(x.dtype.type): sqnorm = dot(x.real, x.real) + dot(x.imag, x.imag) else: sqnorm = x.power(2).sum() return sqrt(sqnorm) # Normalize the `axis` argument to a tuple. nd = x.ndim if axis is None: axis = tuple(range(nd)) if np.isscalar(axis): if ord == Inf: return max(abs(x).sum(axis=axis)) elif ord == -Inf: return min(abs(x).sum(axis=axis)) elif ord == 0: # Zero norm return (x != 0).sum(axis=axis) elif ord == 1: # special case for speedup return abs(x).sum(axis=axis) elif ord == -1: return min(abs(x).sum(axis=axis)) elif ord is None: return sqrt(x.power(2).sum(axis=axis)) else: raise NotImplementedError elif len(axis) == 2: row_axis, col_axis = axis if not (-nd <= row_axis < nd and -nd <= col_axis < nd): raise ValueError('Invalid axis %r for an array with shape %r' % (axis, x.shape)) if row_axis % nd == col_axis % nd: raise ValueError('Duplicate axes given.') if ord == 2: raise NotImplementedError #return _multi_svd_norm(x, row_axis, col_axis, amax) elif ord == -2: raise NotImplementedError #return _multi_svd_norm(x, row_axis, col_axis, amin) elif ord == 1: return abs(x).sum(axis=row_axis).max(axis=col_axis)[0,0] elif ord == Inf: return abs(x).sum(axis=col_axis).max(axis=row_axis)[0,0] elif ord == -1: return abs(x).sum(axis=row_axis).min(axis=col_axis)[0,0] elif ord == -Inf: return abs(x).sum(axis=col_axis).min(axis=row_axis)[0,0] elif ord in [None, 'fro', 'f']: return sqrt(x.power(2).sum(axis=axis)) else: raise ValueError("Invalid norm order for matrices.") else: raise ValueError("Improper number of dimensions to norm.")
def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): import numpy.core.numeric as NX from numpy.core import isscalar, abs, dot from numpy.lib.twodim_base import diag, vander from numpy.linalg import eigvals, lstsq, inv try: from numpy.core import finfo # 1.7 except: from numpy.lib.getlimits import finfo # 1.3 support for cluster order = int(deg) + 1 x = NX.asarray(x) + 0.0 y = NX.asarray(y) + 0.0 # check arguments. if deg < 0 : raise ValueError("expected deg >= 0") if x.ndim != 1: raise TypeError("expected 1D vector for x") if x.size == 0: raise TypeError("expected non-empty vector for x") if y.ndim < 1 or y.ndim > 2 : raise TypeError("expected 1D or 2D array for y") if x.shape[0] != y.shape[0] : raise TypeError("expected x and y to have same length") # set rcond if rcond is None : rcond = len(x)*finfo(x.dtype).eps # set up least squares equation for powers of x lhs = vander(x, order) rhs = y # apply weighting if w is not None: w = NX.asarray(w) + 0.0 if w.ndim != 1: raise TypeError, "expected a 1-d array for weights" if w.shape[0] != y.shape[0] : raise TypeError, "expected w and y to have the same length" lhs *= w[:, NX.newaxis] if rhs.ndim == 2: rhs *= w[:, NX.newaxis] else: rhs *= w # scale lhs to improve condition number and solve scale = NX.sqrt((lhs*lhs).sum(axis=0)) lhs /= scale c, resids, rank, s = lstsq(lhs, rhs, rcond) c = (c.T/scale).T # broadcast scale coefficients # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" warnings.warn(msg, RankWarning) if full : return c, resids, rank, s, rcond elif cov : Vbase = inv(dot(lhs.T,lhs)) Vbase /= NX.outer(scale, scale) # Some literature ignores the extra -2.0 factor in the denominator, but # it is included here because the covariance of Multivariate Student-T # (which is implied by a Bayesian uncertainty analysis) includes it. # Plus, it gives a slightly more conservative estimate of uncertainty. fac = resids / (len(x) - order - 2.0) if y.ndim == 1: return c, Vbase * fac else: return c, Vbase[:,:,NX.newaxis] * fac else : return c
def joint_analysis(context, gene): g, g_n, pvalue, n, n_indep, p_i_best, t_i_best, p_i_worst, t_i_worst, eigen_max, eigen_min, eigen_min_kept, z_min, z_max, z_mean, z_sd, tmi, status \ = None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, CalculationStatus.NO_DATA g = gene.split(".")[0] if context.get_trimmed_ensemble_id() else gene g_n = context.get_gene_name(g) #################################################################################################################### zscores, tissue_labels = context.get_metaxcan_zscores(gene) if not zscores or len(zscores) == 0: status = CalculationStatus.NO_METAXCAN_RESULTS return g, g_n, pvalue, n, n_indep, p_i_best, t_i_best, p_i_worst, t_i_worst, eigen_max, eigen_min, eigen_min_kept, z_min, z_max, z_mean, z_sd, tmi, status n = len(zscores) z_min = numpy.min(zscores) z_max = numpy.max(zscores) z_mean = numpy.mean(zscores) if (len(zscores)>1): z_sd = numpy.std(zscores, ddof=1) #################################################################################################################### labels, matrix = context.get_model_matrix(gene, tissue_labels) if not labels or len(labels) == 0: status = CalculationStatus.NO_PRODUCT return g, g_n, pvalue, n, n_indep, p_i_best, t_i_best, p_i_worst, t_i_worst, eigen_max, eigen_min, eigen_min_kept, z_min, z_max, z_mean, z_sd, tmi, status # also, check that the matrix actually makes sense. We are currently returning it just in case but matrices with complex covariance are suspicious. e, v = numpy.linalg.eigh(matrix) if numpy.imag(e).any(): status = CalculationStatus.COMPLEX_COVARIANCE e = numpy.real(e) eigen_max, eigen_min = numpy.max(e), numpy.min(e) return g, g_n, pvalue, n, n_indep, p_i_best, t_i_best, p_i_worst, t_i_worst, eigen_max, eigen_min, eigen_min_kept, z_min, z_max, z_mean, z_sd, tmi, status # If no eigenvalue satisfies our cutoff criteria, at least the first component will be used # Note there is a slight numerical mismatch between the resolution in eigh and the svd cutoff = context.get_cutoff(matrix) _d = {tissue_labels[i]:zscores[i] for i in xrange(0, len(tissue_labels))} zscores = array([_d[l] for l in labels]) inv, n_indep, eigen = Math.capinv(matrix, cutoff, context.epsilon) eigen_max, eigen_min = numpy.max(eigen), numpy.min(eigen) eigen_min_kept = numpy.min([x for x in eigen[0:n_indep]]) _absz = numpy.abs(zscores) _maxzi = numpy.argmax(_absz) max_z = _absz[_maxzi] p_i_best = 2*stats.norm.sf(max_z) t_i_best = labels[_maxzi] _minzi = numpy.argmin(_absz) min_z = _absz[_minzi] p_i_worst = 2*stats.norm.sf(min_z) t_i_worst = labels[_minzi] #TODO: implement a better heuristic try: eigen_w, eigen_v = numpy.linalg.eigh(inv) except: #WTCCC 'ENSG00000204560.5' logging.log(8, "Problems with inverse for %s, skipping", gene) status = CalculationStatus.INVERSE_ERROR return g, g_n, pvalue, n, n_indep, p_i_best, t_i_best, p_i_worst, t_i_worst, eigen_max, eigen_min, eigen_min_kept, z_min, z_max, z_mean, z_sd, tmi, status #################################################################################################################### w = float(dot(dot(zscores, inv), zscores)) chi2_p = stats.chi2.sf(w, n_indep) tmi = numpy.trace(numpy.dot(matrix,inv)) # if we got to this point, we are ok-ish. The chi distribution might have been unable to calculate the pvalue because it is too small... if chi2_p == 0: status = CalculationStatus.INSUFFICIENT_NUMERICAL_RESOLUTION else: status = CalculationStatus.OK pvalue = chi2_p return g, g_n, pvalue, n, n_indep, p_i_best, t_i_best, p_i_worst, t_i_worst, eigen_max, eigen_min, eigen_min_kept, z_min, z_max, z_mean, z_sd, tmi, status