def text_apr(data, d1data=None, d2data=None): N = 10 T = data.shape[-1] C, D, DD = dcheb(T, N) K = fblas.dgemm(alpha=1., a=data.T, b=C.T, trans_a=True) apr = fblas.dgemm(alpha=1., a=C.T, b=K, trans_a=False, trans_b=True).T d1apr = fblas.dgemm(alpha=1., a=D.T, b=K, trans_a=False, trans_b=True).T d2apr = fblas.dgemm(alpha=1., a=DD.T, b=K, trans_a=False, trans_b=True).T print ((data - apr) ** 2).sum() / (data**2).sum() if d1data is not None: print ((d1data - d1apr) ** 2).sum() / (d1data**2).sum() if d2data is not None: print ((d2data - d2apr) ** 2).sum() / (d2data**2).sum()
def mygemm(alpha,A,B,dtype=None,**kwargs): ''' my gemm function that uses scipy fblas functions. ''' from scipy.linalg.fblas import dgemm, sgemm if dtype is None: dtype=A.dtype if dtype != np.float32 and dtype != np.float64: print 'Error: this function cannot deal with such dtype.' exit() if not (A.flags['F_CONTIGUOUS'] or A.flags['C_CONTIGUOUS']) \ or not (B.flags['F_CONTIGUOUS'] or B.flags['C_CONTIGUOUS']): print 'Matrices should either be C or F contiguous.' exit() if A.dtype != dtype: A=np.asarray(A,dtype=dtype) if B.dtype != dtype: B=np.asarray(B,dtype=dtype) if A.flags['F_CONTIGUOUS']: trans_a=0 else: A=A.T trans_a=1 if B.flags['F_CONTIGUOUS']: trans_b=0 else: B=B.T trans_b=1 if dtype==np.float32: return sgemm(alpha,A,B,trans_a=trans_a,trans_b=trans_b,**kwargs) else: return dgemm(alpha,A,B,trans_a=trans_a,trans_b=trans_b,**kwargs)
def mean_cov(X): n, p = X.shape m = X.mean(axis=0) # covariance matrix with correction for rounding error # S = (cx'*cx - (scx'*scx/n))/(n-1) # Am Stat 1983, vol 37: 242-247. cx = X - m cxT_a = zeros((p - 1, n), float64) cxT_b = zeros((p - 1, n), float64) cxT_a[:, :] = (cx.T)[0:p - 1, :] cxT_b[:, :] = (cx.T)[1:p, :] scx = cx.sum(axis=0) scx_op = dger(-1.0 / n, scx, scx) S = dgemm(1.0, cx.T, cx.T, beta=1.0, c=scx_op, trans_a=0, trans_b=1, overwrite_c=1) #S = dgemm(1.0, cxT_a, cxT_b, beta=1.0, # c=scx_op, trans_a=0, trans_b=1, overwrite_c=1) S[:] *= 1.0 / (n - 1) return m, S.T
def __setJac2(self): '''Calculates :math:`J^T J` and :math:`J^T e` for the training data. Used for Levenberg-Marquardt method.''' self.jac2.fill(0) self.jacDiff.fill(0) for i in range(self.tTD.nBlocks): data = self.tTD.getDataBlock(i) vals,valOut,hiddenOut = self.__processDataBlock(data) diffs = numexpr.evaluate('valOut - vals') jac = np.empty((data.shape[0], (self.nHid) * (self.nIn+1) + self.nHid + 1)) d0 = numexpr.evaluate('-vals * (1 - vals)') ot = (np.outer(d0, self.l2)) dj = numexpr.evaluate('hiddenOut * (1 - hiddenOut) * ot') I = np.tile(np.arange(data.shape[0]), (self.nHid + 1, 1)).flatten('F') J = np.arange(data.shape[0] * (self.nHid + 1)) Q = ss.csc_matrix((dj.flatten(), np.vstack((J, I))), (data.shape[0] * (self.nHid + 1), data.shape[0])) jac[:, 0:self.nHid + 1] = ss.spdiags(d0, 0, data.shape[0], data.shape[0]).dot(hiddenOut) Q2 = np.reshape(Q.dot(data), (data.shape[0],(self.nIn+1) * (self.nHid + 1))) jac[:, self.nHid + 1:jac.shape[1]] = Q2[:, 0:Q2.shape[1] - (self.nIn+1)] if hasfblas: self.jac2 += fblas.dgemm(1.0,a=jac.T,b=jac.T,trans_b=True) self.jacDiff += fblas.dgemv(1.0,a=jac.T,x=diffs) else: self.jac2 += np.dot(jac.T,jac) self.jacDiff += np.dot(jac.T,diffs)
def __setJac2(self): '''Calculates :math:`J^T J` and :math:`J^T e` for the training data. Used for Levenberg-Marquardt method.''' self.jac2.fill(0) self.jacDiff.fill(0) for i in range(self.tTD.nBlocks): data = self.tTD.getDataBlock(i) vals, valOut, hiddenOut = self.__processDataBlock(data) diffs = numexpr.evaluate('valOut - vals') jac = np.empty( (data.shape[0], (self.nHid) * (self.nIn + 1) + self.nHid + 1)) d0 = numexpr.evaluate('-vals * (1 - vals)') ot = (np.outer(d0, self.l2)) dj = numexpr.evaluate('hiddenOut * (1 - hiddenOut) * ot') I = np.tile(np.arange(data.shape[0]), (self.nHid + 1, 1)).flatten('F') J = np.arange(data.shape[0] * (self.nHid + 1)) Q = ss.csc_matrix((dj.flatten(), np.vstack((J, I))), (data.shape[0] * (self.nHid + 1), data.shape[0])) jac[:, 0:self.nHid + 1] = ss.spdiags(d0, 0, data.shape[0], data.shape[0]).dot(hiddenOut) Q2 = np.reshape(Q.dot(data), (data.shape[0], (self.nIn + 1) * (self.nHid + 1))) jac[:, self.nHid + 1:jac.shape[1]] = Q2[:, 0:Q2.shape[1] - (self.nIn + 1)] if hasfblas: self.jac2 += fblas.dgemm(1.0, a=jac.T, b=jac.T, trans_b=True) self.jacDiff += fblas.dgemv(1.0, a=jac.T, x=diffs) else: self.jac2 += np.dot(jac.T, jac) self.jacDiff += np.dot(jac.T, diffs)
def mygemm(alpha, A, B, dtype=None, **kwargs): ''' my gemm function that uses scipy fblas functions. ''' from scipy.linalg.fblas import dgemm, sgemm if dtype is None: dtype = A.dtype if dtype != np.float32 and dtype != np.float64: print 'Error: this function cannot deal with such dtype.' exit() if not (A.flags['F_CONTIGUOUS'] or A.flags['C_CONTIGUOUS']) \ or not (B.flags['F_CONTIGUOUS'] or B.flags['C_CONTIGUOUS']): print 'Matrices should either be C or F contiguous.' exit() if A.dtype != dtype: A = np.asarray(A, dtype=dtype) if B.dtype != dtype: B = np.asarray(B, dtype=dtype) if A.flags['F_CONTIGUOUS']: trans_a = 0 else: A = A.T trans_a = 1 if B.flags['F_CONTIGUOUS']: trans_b = 0 else: B = B.T trans_b = 1 if dtype == np.float32: return sgemm(alpha, A, B, trans_a=trans_a, trans_b=trans_b, **kwargs) else: return dgemm(alpha, A, B, trans_a=trans_a, trans_b=trans_b, **kwargs)
def calc_argmax(data): T = data.shape[-1] N = T // 5 t0 = time() C, D, DD = dcheb(T, N) K = fblas.dgemm(alpha=1., b=data.T, a=C.T).T apr = fblas.dgemm(alpha=1., a=C.T, b=K.T, trans_a=True).T argmaxs = apr.argmax(axis=-1) # Ds = DD[argmaxs] # makes copies # DDs = DD[argmaxs] # pers = inner1d(K, Ds) / inner1d(K, DDs) pers = np.empty_like(argmaxs, dtype=float) for i, ax in enumerate(argmaxs): # does not make copies. slower. pers[i] = fblas.ddot(K[i], D[ax]) / fblas.ddot(K[i], DD[ax]) pers[pers > 1] = 0 return argmaxs - pers
def linear_least_squares(a, b, residuals=False): """ Return the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Parameters ---------- a : (M, N) array_like "Coefficient" matrix. b : (M,) array_like Ordinate or "dependent variable" values. residuals : bool Compute the residuals associated with the least-squares solution Returns ------- x : (M,) ndarray Least-squares solution. The shape of `x` depends on the shape of `b`. residuals : int (Optional) Sums of residuals; squared Euclidean 2-norm for each column in ``b - a*x``. """ if type(a) != np.ndarray or not a.flags['C_CONTIGUOUS']: warn('Matrix a is not a C-contiguous numpy array. The solver will create a copy, which will result' + \ ' in increased memory usage.') a = np.asarray(a, order='c') i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True) x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten() if residuals: return x, np.linalg.norm(np.dot(a, x) - b) else: return x
def verify_ga_gemm(ta, tb, num_m, num_n, num_k, alpha, g_a, g_b, beta, g_c): tmpa = np.ndarray((num_m, num_k), dtype=np.float64) tmpb = np.ndarray((num_k, num_n), dtype=np.float64) tmpc = np.ndarray((num_m, num_n), dtype=np.float64) tmpa = ga.get(g_a, buffer=tmpa) tmpb = ga.get(g_b, buffer=tmpb) tmpc = ga.get(g_c, buffer=tmpc) if not ta and not tb: result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb) elif ta and not tb: result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb) elif not ta and tb: result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb) elif ta and tb: result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb) else: raise ValueError, "shouldn't get here" abs_value = np.abs(tmpc-result) if np.any(abs_value > 1): ga.error('verify ga.gemm failed')
def mean_cov(X): n,p = X.shape m = X.mean(axis=0) # covariance matrix with correction for rounding error # S = (cx'*cx - (scx'*scx/n))/(n-1) # Am Stat 1983, vol 37: 242-247. cx = X - m cxT_a = zeros((p-1,n),float64) cxT_b = zeros((p-1,n),float64) cxT_a[:,:] = (cx.T)[0:p-1,:] cxT_b[:,:] = (cx.T)[1:p,:] scx = cx.sum(axis=0) scx_op = dger(-1.0/n,scx,scx) S = dgemm(1.0, cx.T, cx.T, beta=1.0, c=scx_op, trans_a=0, trans_b=1, overwrite_c=1) #S = dgemm(1.0, cxT_a, cxT_b, beta=1.0, # c=scx_op, trans_a=0, trans_b=1, overwrite_c=1) S[:] *= 1.0/(n-1) return m,S.T
def gemm(alpha,A,B,dtype=None,**kwargs): '''A gemm function that uses scipy fblas functions, avoiding matrix copy when the input is transposed. The returned matrix is designed to be C_CONTIGUOUS. ''' from scipy.linalg.fblas import dgemm, sgemm if A.ndim != 2 or B.ndim != 2: raise TypeError, 'mygemm only deals with 2-D matrices.' if dtype is None: dtype=A.dtype if dtype != np.float32 and dtype != np.float64: raise TypeError, 'Error: this function cannot deal with dtype {}.'.format(dtype) if not (A.flags['F_CONTIGUOUS'] or A.flags['C_CONTIGUOUS']) \ or not (B.flags['F_CONTIGUOUS'] or B.flags['C_CONTIGUOUS']): raise TypeError, 'Matrices should either be C or F contiguous.' if A.dtype != dtype: A=np.asarray(A,dtype=dtype) if B.dtype != dtype: B=np.asarray(B,dtype=dtype) # In fact, what we are doing here is (1) compute B*A, and (2) transpose the # result. The reason is that fblas returns F_CONTINUOUS matrices, so doing # this enables us to get a final output that is C_CONTIGUOUS. if not B.flags['F_CONTIGUOUS']: B = B.T trans_b=0 else: trans_b=1 if not A.flags['F_CONTIGUOUS']: A = A.T trans_a=0 else: trans_a=1 if dtype==np.float32: return sgemm(alpha,B,A,trans_a=trans_b,trans_b=trans_a,**kwargs).T else: return dgemm(alpha,B,A,trans_a=trans_b,trans_b=trans_a,**kwargs).T
def admmDP(C, a=1.0, props={}, warmX=None, warmU=None): n = C.shape[0] logger = logging.getLogger("search.admm.dp") beta = props.get('priorWeight', 0.01) maxIter = props.get("maxParamaterOptIters", 1000) epsilon = props.get("epsilon", 1e-6) returnIntermediate = props.get("returnIntermediate", False) regDiagonal = props.get("regDiagonal", False) degreePriorADMM = props.get("degreePrior", True) adaptiveMu = props.get("adaptiveMu", True) props['dualDecompEpsilon'] = props.get('dualDecompEpsilon', 1e-10) sTime = time.time() logger.info("Starting ADMM learning. n=%d beta=%1.4f", n, beta) mu = props.get("mu", 0.1) #10.0 muChange = props.get("muChange", 0.1) if props.get("normalizeForADMM", True): logger.info("-- NORMALIZING C --") covNormalizer = sqrt(diagonal(C)) C = C / outer(covNormalizer, covNormalizer) else: logger.info("NOT NORMALIZING C") # Rescale to make beta's range a better fit maxOffDiag = numpy.max(numpy.abs(tril(C, -1))) C = array(C / maxOffDiag) if warmX is not None: X = warmX else: X = eye(n) if warmU is not None: U = warmU else: U = eye(n) Z = copy(X) ll = inf Xs = [] gs = [] ds = [] if degreePriorADMM: adelta = -a[:n] + a[1:] adeltaMat = outer(ones(n), adelta) beta = beta / 2 logger.error("adelta: %s", adelta[:6]) warmV = zeros((n,n)) if a is None: a = 1.0 for i in range(maxIter): ##################################################################### ##### Eigenvalue update to X logger.debug("Performing eigenvalue decomposition") for retry in range(6): try: A = mu*(Z - U) - C (lamb, Q) = linalg.eigh(A) logger.debug("Decomposition finished") break except numpy.linalg.linalg.LinAlgError as err: # If A is not in the PSD cone, we reduce the step size mu logger.error("Failed eigendecomposition with mu=%2.2e", mu) mu *= 0.5 U /= 0.5 logger.error("Retry %d, halving mu to: %2.5f", retry, mu) newEigs = (lamb + sqrt(lamb*lamb + 4*mu)) / (2*mu) X = FB.dgemm(alpha=1.0, a=(Q*newEigs), b=Q, trans_b=True) #### Soft thresholding update Z logger.debug("Starting Proximal step") Zpreshrink = X + U Zlast = copy(Z) if degreePriorADMM: Z = proxSubDualDecomp(Zpreshrink, beta/mu, adelta, adeltaMat, props, warmV) else: Z = shrink(Zpreshrink, beta*a/mu, regDiagonal) if props.get("nonpositivePrecision", False): Z = Z * (Z < 0) + diag(diag(Z)) ### Update U ( U is the sum of residuals so far ) logger.debug("Updating U") U += X - Z ##################################################################### dualResidual = linalg.norm(Z - Zlast) residual = linalg.norm(X-Z) if adaptiveMu: # if the two residuals differ my more than this factor, adjust mu (p20) differenceMargin = 10 if residual > dualResidual*differenceMargin: mu *= 1.0 + muChange U /= 1.0 + muChange logger.debug("*** Increasing mu to %2.6f", mu) elif dualResidual > residual*differenceMargin: mu *= 1.0 - muChange U /= 1.0 - muChange logger.debug("*** Decreasing mu to %2.6f", mu) # Ensure that the dual decomp procedure is run with enough accuracy ddeps = props['dualDecompEpsilon'] margin = 50.0 if residual < margin*ddeps or dualResidual < margin*ddeps: props['dualDecompEpsilon'] = min(residual, dualResidual)/margin if returnIntermediate: ds.append(dualResidual) gs.append(residual) if residual < epsilon and dualResidual < epsilon: logger.info("Converged to %2.3e in %i iters", residual, i+1) break edges = (count_nonzero(Z) - n) / 2 logger.info("Iter %d, res: %2.2e, dual res: %2.2e, mu=%1.1e, %d edges free", i+1, residual, dualResidual, mu, edges) eTime = time.time() timeTaken = eTime-sTime logger.info("Time taken(s): %5.7f", timeTaken) if residual > epsilon or dualResidual > epsilon: logger.error("NONCONVERGENCE!!, res: %2.2e, dres: %2.2e, iters: %d", residual, dualResidual, i) edges = (count_nonzero(Z) - n) / 2 logger.info("regDiagonal: %s, beta: %2.4f", regDiagonal, beta) logger.info("Edges %d out of %d | eps=%1.1e", edges, (n*n - n)/2, epsilon) logger.info("Final residual=%2.2e, dual res=%2.2e", residual, dualResidual) return {'X': Z, 'U': U, 'obj': ll, 'iteration': i+1, 'Xs': Xs, 'gs': gs, 'ds': ds, 'timeTaken': timeTaken, 'edges': edges, 'Zpreshrink': Zpreshrink, 'bm': beta/mu}
def admmDP(C, a=1.0, props={}, warmX=None, warmU=None): n = C.shape[0] logger = logging.getLogger("search.admm.dp") beta = props.get('priorWeight', 0.01) maxIter = props.get("maxParamaterOptIters", 1000) epsilon = props.get("epsilon", 1e-6) returnIntermediate = props.get("returnIntermediate", False) regDiagonal = props.get("regDiagonal", False) degreePriorADMM = props.get("degreePrior", True) adaptiveMu = props.get("adaptiveMu", True) props['dualDecompEpsilon'] = props.get('dualDecompEpsilon', 1e-10) sTime = time.time() logger.info("Starting ADMM learning. n=%d beta=%1.4f", n, beta) mu = props.get("mu", 0.1) #10.0 muChange = props.get("muChange", 0.1) if props.get("normalizeForADMM", True): logger.info("-- NORMALIZING C --") covNormalizer = sqrt(diagonal(C)) C = C / outer(covNormalizer, covNormalizer) else: logger.info("NOT NORMALIZING C") # Rescale to make beta's range a better fit maxOffDiag = numpy.max(numpy.abs(tril(C, -1))) C = array(C / maxOffDiag) if warmX is not None: X = warmX else: X = eye(n) if warmU is not None: U = warmU else: U = eye(n) Z = copy(X) ll = inf Xs = [] gs = [] ds = [] if degreePriorADMM: adelta = -a[:n] + a[1:] adeltaMat = outer(ones(n), adelta) beta = beta / 2 logger.error("adelta: %s", adelta[:6]) warmV = zeros((n, n)) if a is None: a = 1.0 for i in range(maxIter): ##################################################################### ##### Eigenvalue update to X logger.debug("Performing eigenvalue decomposition") for retry in range(6): try: A = mu * (Z - U) - C (lamb, Q) = linalg.eigh(A) logger.debug("Decomposition finished") break except numpy.linalg.linalg.LinAlgError as err: # If A is not in the PSD cone, we reduce the step size mu logger.error("Failed eigendecomposition with mu=%2.2e", mu) mu *= 0.5 U /= 0.5 logger.error("Retry %d, halving mu to: %2.5f", retry, mu) newEigs = (lamb + sqrt(lamb * lamb + 4 * mu)) / (2 * mu) X = FB.dgemm(alpha=1.0, a=(Q * newEigs), b=Q, trans_b=True) #### Soft thresholding update Z logger.debug("Starting Proximal step") Zpreshrink = X + U Zlast = copy(Z) if degreePriorADMM: Z = proxSubDualDecomp(Zpreshrink, beta / mu, adelta, adeltaMat, props, warmV) else: Z = shrink(Zpreshrink, beta * a / mu, regDiagonal) if props.get("nonpositivePrecision", False): Z = Z * (Z < 0) + diag(diag(Z)) ### Update U ( U is the sum of residuals so far ) logger.debug("Updating U") U += X - Z ##################################################################### dualResidual = linalg.norm(Z - Zlast) residual = linalg.norm(X - Z) if adaptiveMu: # if the two residuals differ my more than this factor, adjust mu (p20) differenceMargin = 10 if residual > dualResidual * differenceMargin: mu *= 1.0 + muChange U /= 1.0 + muChange logger.debug("*** Increasing mu to %2.6f", mu) elif dualResidual > residual * differenceMargin: mu *= 1.0 - muChange U /= 1.0 - muChange logger.debug("*** Decreasing mu to %2.6f", mu) # Ensure that the dual decomp procedure is run with enough accuracy ddeps = props['dualDecompEpsilon'] margin = 50.0 if residual < margin * ddeps or dualResidual < margin * ddeps: props['dualDecompEpsilon'] = min(residual, dualResidual) / margin if returnIntermediate: ds.append(dualResidual) gs.append(residual) if residual < epsilon and dualResidual < epsilon: logger.info("Converged to %2.3e in %i iters", residual, i + 1) break edges = (count_nonzero(Z) - n) / 2 logger.info( "Iter %d, res: %2.2e, dual res: %2.2e, mu=%1.1e, %d edges free", i + 1, residual, dualResidual, mu, edges) eTime = time.time() timeTaken = eTime - sTime logger.info("Time taken(s): %5.7f", timeTaken) if residual > epsilon or dualResidual > epsilon: logger.error("NONCONVERGENCE!!, res: %2.2e, dres: %2.2e, iters: %d", residual, dualResidual, i) edges = (count_nonzero(Z) - n) / 2 logger.info("regDiagonal: %s, beta: %2.4f", regDiagonal, beta) logger.info("Edges %d out of %d | eps=%1.1e", edges, (n * n - n) / 2, epsilon) logger.info("Final residual=%2.2e, dual res=%2.2e", residual, dualResidual) return { 'X': Z, 'U': U, 'obj': ll, 'iteration': i + 1, 'Xs': Xs, 'gs': gs, 'ds': ds, 'timeTaken': timeTaken, 'edges': edges, 'Zpreshrink': Zpreshrink, 'bm': beta / mu }