def compute_HRP_weights(covariances, res_order): weights = pd.Series(1, index=res_order) clustered_alphas = [res_order] while len(clustered_alphas) > 0: clustered_alphas = [ cluster[start:end] for cluster in clustered_alphas for start, end in ((0, len(cluster) // 2), (len(cluster) // 2, len(cluster))) if len(cluster) > 1 ] for subcluster in range(0, len(clustered_alphas), 2): left_cluster = clustered_alphas[subcluster] right_cluster = clustered_alphas[subcluster + 1] left_subcovar = covariances[left_cluster, :][:, left_cluster] inv_diag = 1 / cupy.diag(left_subcovar) parity_w = inv_diag * (1 / cupy.sum(inv_diag)) left_cluster_var = cupy.dot(parity_w, cupy.dot(left_subcovar, parity_w)) right_subcovar = covariances[right_cluster, :][:, right_cluster] inv_diag = 1 / cupy.diag(right_subcovar) parity_w = inv_diag * (1 / cupy.sum(inv_diag)) right_cluster_var = cupy.dot(parity_w, cupy.dot(right_subcovar, parity_w)) alloc_factor = 1 - left_cluster_var / \ (left_cluster_var + right_cluster_var) weights[left_cluster] *= alloc_factor.item() weights[right_cluster] *= 1 - alloc_factor.item() return weights
def _mutate(self): weight_mask = cp.random.normal( size=(self.weights.shape), dtype='single') * .05 * cp.tile( cp.triu(cp.ones(shape=(self.total, self.total), dtype='bool_'), 1), (self.pop_size, 1, 1)) * (cp.random.uniform( size=(self.weights.shape), dtype='single') < .2) * (self.weights != 0) new_weights_mask = cp.random.normal( size=(self.weights.shape), dtype='single') * .05 * cp.tile( cp.triu(cp.ones(shape=(self.total, self.total), dtype='bool_'), 1), (self.pop_size, 1, 1)) * (cp.random.uniform( size=(self.weights.shape), dtype='single') < .05) * (self.weights == 0) self.weights += weight_mask + new_weights_mask self.weights *= (cp.random.uniform(size=self.weights.shape, dtype='single') < 1) self.weights[:, :self.inputs, :self.inputs] = 0 self.weights[:, -self.outputs:, -self.outputs:] = cp.tile( cp.diag( cp.diag( cp.ones(shape=(self.outputs, self.outputs), dtype='bool_'))), (self.pop_size, 1, 1)) self.biases += cp.random.normal( size=self.biases.shape, dtype='single') * .05 * (cp.random.normal( size=self.biases.shape, dtype='single') < .2)
def Vpi(self, X, Y, a, b, OT_plan): """Return the second order matrix of the displacements: sum_ij { (OT_plan)_ij (X_i-Y_j)(X_i-Y_j)^T }.""" A = X.T.dot(OT_plan).dot(Y) if self.use_gpu: return X.T.dot(cp.diag(a)).dot(X) + Y.T.dot(cp.diag(b)).dot(Y) - A - A.T else: return X.T.dot(np.diag(a)).dot(X) + Y.T.dot(np.diag(b)).dot(Y) - A - A.T
def initialize(self, a, b, X, Y, Omega, k): """Initialize Omega with the projection onto the subspace spanned by top-k eigenvectors of V_pi*, where pi* (=OT_plan) is the (classical) optimal transport plan.""" if self.verbose: print('Initializing') n = X.shape[0] m = Y.shape[0] # Compute the cost matrix if self.use_gpu: ones = cp.ones((n,m)) C = cp.diag(cp.diag(X.dot(X.T))).dot(ones) + ones.dot(cp.diag(cp.diag(Y.dot(Y.T)))) - 2*X.dot(Y.T) else: ones = np.ones((n,m)) C = np.diag(np.diag(X.dot(X.T))).dot(ones) + ones.dot(np.diag(np.diag(Y.dot(Y.T)))) - 2*X.dot(Y.T) # Compute the OT plan _, OT_plan = self.OT(a, b, C) V = self.Vpi(X, Y, a, b, OT_plan) # Eigendecompose V d = V.shape[0] if self.use_gpu: _, eigenvectors = cp.linalg.eigh(V) eigenvectors = eigenvectors[:,-k:] else: _, eigenvectors = sp.linalg.eigh(V, eigvals=(d-k,d-1)) # Return the projection Omega = eigenvectors.dot(eigenvectors.T) return Omega
def evol(s, B, U, chi, d): for i_bond in [0, 1]: ia = np.mod(i_bond - 1, 2) ib = np.mod(i_bond, 2) ic = np.mod(i_bond + 1, 2) chia = B[ib].shape[1] chic = B[ic].shape[2] # Construct theta matrix and time evolution # theta = cp.tensordot(B[ib], B[ic], axes=(2, 1)) # i a j b theta = cp.tensordot(U, theta, axes=([2, 3], [0, 2])) # ip jp a b theta = cp.tensordot(cp.diag(s[ia]), theta, axes=([1, 2])) # a ip jp b theta = cp.reshape(cp.transpose(theta, (1, 0, 2, 3)), (d * chia, d * chic)) # ip a jp b # Schmidt decomposition # X, Y, Z = cp.linalg.svd(theta, full_matrices=0) chi2 = np.min([cp.sum(Y > 10.**(-10)).get(), chi]) piv = cp.zeros(len(Y), cp.bool) piv[(cp.argsort(Y)[::-1])[:chi2]] = True Y = Y[piv] invsq = cp.sqrt(sum(Y**2)) X = X[:, piv] Z = Z[piv, :] # Obtain the new values for B and s # s[ib] = Y / invsq X = cp.reshape(X, (d, chia, chi2)) X = cp.transpose(cp.tensordot(cp.diag(s[ia]**(-1)), X, axes=(1, 1)), (1, 0, 2)) B[ib] = cp.tensordot(X, cp.diag(s[ib]), axes=(2, 0)) B[ic] = cp.transpose(cp.reshape(Z, (chi2, d, chic)), (1, 0, 2)) return s, B
def xx(x): RL = [1.0, ] iRL = [1.0, ] S = cp.matmul(x.T, x).reshape(32, 32, 32, 32) D = cp.zeros((34, 34), dtype = cp.float32) #print("before",S[:][0][0][0]) #print("before",D) conv3check(conv_blocks, conv_threads, (S, S, D)) #print("after",S[:][0][0][0]) #print("after",D) T = cp.zeros((32, 32, 32, 32), dtype = cp.float32) if not fix: T += S for i in range(1, d - 1): #cupy.diag only take diagonal array output length 1024 #cupy.sqrt Elementwise square root L = cp.sqrt(cp.diag(S.reshape(1024, 1024)).reshape(32, 32)) iL = 1.0 / L RL.append(L) iRL.append(iL) trans(trans_blocks, trans_threads, (S, T, L, L, iL, iL)) conv3(conv_blocks, conv_threads, (S, S)) conv3(conv_blocks, conv_threads, (T, T)) L = cp.sqrt(cp.diag(S.reshape(1024, 1024)).reshape(32, 32)) iL = 1.0 / L RL.append(L) iRL.append(iL) trans(trans_blocks, trans_threads, (S, T, L, L, iL, iL)) if fix: T -= S return RL, iRL
def xx(x): RL = [ 1.0, ] iRL = [ 1.0, ] S = cp.matmul(x.T, x).reshape(32, 32, 32, 32) conv3(conv_blocks, conv_threads, (S, S)) T = cp.zeros((32, 32, 32, 32), dtype=cp.float32) if not fix: T += S for i in range(1, d - 1): L = cp.sqrt(cp.diag(S.reshape(1024, 1024)).reshape(32, 32)) iL = 1.0 / L RL.append(L) iRL.append(iL) trans(trans_blocks, trans_threads, (S, T, L, L, iL, iL)) conv3(conv_blocks, conv_threads, (S, S)) conv3(conv_blocks, conv_threads, (T, T)) L = cp.sqrt(cp.diag(S.reshape(1024, 1024)).reshape(32, 32)) iL = 1.0 / L RL.append(L) iRL.append(iL) trans(trans_blocks, trans_threads, (S, T, L, L, iL, iL)) if fix: T -= S return RL, iRL
def test_dgmm_out(self, dtype): self._setup(dtype) if self.side == 'L': ref = cupy.diag(self.x) @ self.a elif self.side == 'R': ref = self.a @ cupy.diag(self.x) c = cupy.empty(self.shape, order=self.orderc, dtype=dtype) cublas.dgmm(self.side, self.a, self.x, out=c) cupy.testing.assert_allclose(c, ref, rtol=self.tol, atol=self.tol)
def test_gdmm_incx_minus_one(self, dtype): if self.orderc != 'F': raise unittest.SkipTest() self._setup(dtype) if self.side == 'L': ref = cupy.diag(self.x[::-1]) @ self.a elif self.side == 'R': ref = self.a @ cupy.diag(self.x[::-1]) c = cublas.dgmm(self.side, self.a, self.x, incx=-1) cupy.testing.assert_allclose(c, ref, rtol=self.tol, atol=self.tol)
def test_dgmm_inplace(self, dtype): if self.orderc != 'F': pytest.skip() self._setup(dtype) if self.side == 'L': ref = cupy.diag(self.x) @ self.a elif self.side == 'R': ref = self.a @ cupy.diag(self.x) cublas.dgmm(self.side, self.a, self.x, out=self.a) cupy.testing.assert_allclose(self.a, ref, rtol=self.tol, atol=self.tol)
def test_dgmm(self, dtype): if self.orderc != 'F': raise unittest.SkipTest() self._setup(dtype) if self.side == 'L': ref = cupy.diag(self.x) @ self.a elif self.side == 'R': ref = self.a @ cupy.diag(self.x) c = cublas.dgmm(self.side, self.a, self.x) cupy.testing.assert_allclose(c, ref, rtol=self.tol, atol=self.tol)
def Mahalanobis(self, X, Y, Omega): """Return the matrix of Mahalanobis costs.""" n = X.shape[0] m = Y.shape[0] if self.use_gpu: ones = cp.ones((n,m)) return cp.diag(cp.diag(X.dot(Omega).dot(X.T))).dot(ones) + ones.dot(cp.diag(cp.diag(Y.dot(Omega).dot(Y.T)))) - 2*X.dot(Omega).dot(Y.T) else: ones = np.ones((n,m)) return np.diag(np.diag(X.dot(Omega).dot(X.T))).dot(ones) + ones.dot(np.diag(np.diag(Y.dot(Omega).dot(Y.T)))) - 2*X.dot(Omega).dot(Y.T)
def magnetization(s, B, d): sz = cp.diag([Sz(conf, 0) for conf in range(0, d)]) # sz=cp.array([[0,1],[1,0]]) mag = cp.array(0., dtype=np.float32) for i_bond in range(2): sB = cp.tensordot(cp.diag(s[np.mod(i_bond - 1, 2)]), B[i_bond], axes=(1, 1)) C = cp.tensordot(sB, cp.conj(sB), axes=([0, 2], [0, 2])) mag += cp.real(cp.tensordot(C, sz, axes=([0, 1], [0, 1])).get()) return mag * 0.5
def test_dgmm_x_matrix(self, dtype): if self.orderc != 'F': pytest.skip() self._setup(dtype, xdim=2) if self.side == 'L': ref = cupy.diag(cupy.diag(self.x)) @ self.a incx = self.shape[0] + 1 elif self.side == 'R': ref = self.a @ cupy.diag(cupy.diag(self.x)) incx = self.shape[1] + 1 c = cublas.dgmm(self.side, self.a, self.x, incx=incx) cupy.testing.assert_allclose(c, ref, rtol=self.tol, atol=self.tol)
def test_dgmm_incx_minus_one(self, dtype): if self.orderc != 'F': pytest.skip() if cupy.cuda.runtime.is_hip: if self._check_dgmm_incx_minus_one_hip_skip_condition(): pytest.xfail('HIP dgmm may have a bug') self._setup(dtype) if self.side == 'L': ref = cupy.diag(self.x[::-1]) @ self.a elif self.side == 'R': ref = self.a @ cupy.diag(self.x[::-1]) c = cublas.dgmm(self.side, self.a, self.x, incx=-1) cupy.testing.assert_allclose(c, ref, rtol=self.tol, atol=self.tol)
def itkrm(data,K,S,maxitr,startD=np.array([1])): M, N = data.shape if startD.all()==1: D_init = np.random.randn(M, K) else: D_init = startD #Algorithm GPU_D_old = cp.asarray(D_init) GPU_Y = cp.asarray(data) GPU_M = int(cp.asarray(M)) GPU_N = int(cp.asarray(N)) GPU_S = int(cp.asarray(S)) GPU_maxitr = int(cp.asarray(maxitr)) GPU_I_D = cp.zeros((S,N),dtype=cp.int32) for i in range(GPU_maxitr): start_time = N_timer.cont_timer(0,0) N_timer.Timer(i,maxitr) for n in range(GPU_N): GPU_I_D[:,n] = max_atoms(GPU_D_old,GPU_Y[:,n],GPU_S) GPU_D_new = cp.zeros((M,K)) GPU_DtD = GPU_D_old.T @ GPU_D_old for n in range(GPU_N): GPU_DtY = GPU_D_old[:,GPU_I_D[:,n]].T @ GPU_Y[:,n] GPU_matproj = cp.repeat((GPU_D_old[:,GPU_I_D[:,n]] @ cp.linalg.inv(GPU_DtD[GPU_I_D[:,n,None], GPU_I_D[:,n]]) @ GPU_DtY)[:,None],GPU_S,axis=1) GPU_vecproj = GPU_D_old[:,GPU_I_D[:,n]] @ cp.diag(cp.diag( GPU_DtD[GPU_I_D[:,n,None], GPU_I_D[:,n]] )**-1*( GPU_DtY )) GPU_signer = cp.sign( GPU_DtY ) GPU_D_new[:,GPU_I_D[:,n]] = GPU_D_new[:,GPU_I_D[:,n]] + (cp.repeat(GPU_Y[:,n,None], S, axis=1) - GPU_matproj + GPU_vecproj)*GPU_signer GPU_scale = cp.sum(GPU_D_new*GPU_D_new, axis=0) GPU_iszero = cp.where(GPU_scale < 0.00001)[0] # GPU_D_new[:,GPU_iszero] = np.random.randn(GPU_M, len(GPU_iszero)) # generate random with GPU GPU_D_new[:,GPU_iszero] = cp.asarray(np.random.randn(M, len(GPU_iszero))) # generate random with CPU #end hugget GPU_D_new = normalize_mat_col(GPU_D_new) GPU_D_old = 1*GPU_D_new return cp.asnumpy(GPU_D_old)
def triageTemplates2(params, iW, C2C, W, U, dWU, mu, nsp, ndrop): # This function checks if some templates should be dropped # either because they are very similar to another template, # or because they are not catching any spikes, (low mean firing rate). # Takes as inputs almost all the information that determines templates, and # outputs the same variables back after removing some clusters. # this is the firing rate threshold m0 = params.minFR * params.NT / params.fs idrop = nsp < m0 # drop any templates with firing rate below this # remove those templates everywhere W = W[:, ~idrop, :] U = U[:, ~idrop, :] dWU = dWU[:, :, ~idrop] mu = mu[~idrop] nsp = nsp[~idrop] # keep track of how many templates have been removed this way ndrop[0] = .9 * ndrop[0] + .1 * idrop.sum() # compute pairwise correlations between templates cc = getMeWtW2(W, U, None) cc = cc - cp.diag(cp.diag(cc)) # exclude the diagonal sd = sqrt(10) # this is hard-coded here # compute a score for the separation of the means r0 = 4 * sd / cp.abs(mu[:, np.newaxis] - mu[np.newaxis, :]) # determine which template has more spikes (that one survives) rdir = (nsp[:, np.newaxis] - nsp[np.newaxis, :]) < 0 # for each pair of template, score their similarity by their template correlation, # and amplitude separation ipair = (cc > 0.9) & (r0 > 1) & rdir # for each template, find its most similar other template amax = cp.max(ipair, axis=1) # if this score is 1, then all the criteria have bene met for dropping this template idrop = amax > 0 # remove these templates everywhere like before W = W[:, ~idrop, :] U = U[:, ~idrop, :] dWU = dWU[:, :, ~idrop] mu = mu[~idrop] nsp = nsp[~idrop] # keep track of how many templates have been removed this way ndrop[1] = .9 * ndrop[1] + .1 * idrop.sum() return W, U, dWU, mu, nsp, ndrop
def svdecon(X, nPC0=None): """ Input: X : m x n matrix Output: X = U*S*V' Description: Does equivalent to svd(X,'econ') but faster Vipin Vijayan (2014) """ m, n = X.shape nPC = nPC0 or min(m, n) if m <= n: C = cp.dot(X, X.T) D, U = cp.linalg.eigh(C, 'U') ix = cp.argsort(np.abs(D))[::-1] d = D[ix] U = U[:, ix] d = d[:nPC] U = U[:, :nPC] V = cp.dot(X.T, U) s = cp.sqrt(d) V = V / s.T S = cp.diag(s) else: C = cp.dot(X.T, X) D, V = cp.linalg.eigh(C) ix = cp.argsort(cp.abs(D))[::-1] d = D[ix] V = V[:, ix] # convert evecs from X'*X to X*X'. the evals are the same. U = cp.dot(X, V) s = cp.sqrt(d) U = U / s.T S = cp.diag(s) return U, S, V
def diagflat(v, k=0): """Creates a diagonal array from the flattened input. Args: v (array-like): Array or array-like object. k (int): Index of diagonals. See :func:`cupy.diag` for detail. Returns: cupy.ndarray: A 2-D diagonal array with the diagonal copied from ``v``. """ if isinstance(v, cupy.ndarray): return cupy.diag(v.ravel(), k) else: return cupy.diag(numpy.ndarray(v).ravel(), k)
def decompose( self, lhs: List[int], rhs: List[int], mergeV: bool = True, cutoff: float = 1e-12, maxdim: int = 2147483648 ) -> Tuple["Tensor", "Tensor", xp.array, int]: lhs_size = reduce(lambda x, y: x * y, [self._indices[i].size for i in lhs]) rhs_size = reduce(lambda x, y: x * y, [self._indices[i].size for i in rhs]) self.transpose(lhs + rhs) u, s, v = xp.linalg.svd(self._data.reshape([lhs_size, rhs_size]), full_matrices=False, compute_uv=True) s_norm = xp.linalg.norm(s) s_cutoff = (1 - cutoff) * s_norm * s_norm s_squared_cumsum = xp.cumsum(xp.power(s, 2)) # dim = 0 # for i in range(s.size): # dim += 1 # if s_squared_cumsum[i] >= s_cutoff or (dim + 1) > maxdim: # break dim = int(xp.searchsorted(s_squared_cumsum[:maxdim], s_cutoff)) + 1 dim = min(dim, s.size, maxdim) u = u[:, :dim] s = xp.clip(s[:dim] * s_norm / xp.sqrt(s_squared_cumsum[dim - 1]), a_min=1e-32, a_max=None) v = v[:dim, :] if mergeV: v = xp.diag(s) @ v else: u = u @ xp.diag(s) a = Index(dim) lhs_indices = self._indices[:len(lhs)] + [a] rhs_indices = [a] + self._indices[len(lhs):] lhs_tensor = Tensor(lhs_indices, u.reshape([idx.size for idx in lhs_indices])) rhs_tensor = Tensor(rhs_indices, v.reshape([idx.size for idx in rhs_indices])) return lhs_tensor, rhs_tensor, s, dim
def getRandomHermitianMatrix(M): ret = xp.diag(0j + randn(M)) for y in range(0, M - 1): for x in range(y + 1, M): ret[y, x] = randn_c() ret[x, y] = xp.conj(ret[y, x]) return ret
def setUp(self): self.dtype = numpy.dtype(self.dtype) if self.dtype.char in 'fF': self.r_dtype = numpy.float32 else: self.r_dtype = numpy.float64 n = self.n nrhs = 1 if self.nrhs is None else self.nrhs # Diagonally dominant matrix is used as it is stable alpha = 2.0 / n a = self._make_matrix((n, n), alpha, -alpha / 2) diag = cupy.diag(cupy.ones((n, ), dtype=self.r_dtype)) a[diag > 0] = 0 a += diag x = self._make_matrix((n, nrhs), 0.2, 0.9) b = cupy.matmul(a, x) b_shape = [n] if self.nrhs is not None: b_shape.append(nrhs) self.a = a self.b = b.reshape(b_shape) self.x_ref = x.reshape(b_shape) if self.r_dtype == numpy.float32: self.tol = self._tol['f'] elif self.r_dtype == numpy.float64: self.tol = self._tol['d']
def lanczos(op: LinearOp, x: Tensor, krylov_size: int, num_restarts: int, smallest: bool = True) -> Tuple[float, Tensor]: v_next = x._data.copy() beta = xp.zeros(krylov_size + 1) alpha = xp.zeros(krylov_size) for _ in range(num_restarts): beta[0] = 0.0 v_prev = xp.zeros(x._data.shape) v_next /= xp.linalg.norm(v_next) V = xp.zeros([x.size, krylov_size]) for i in range(0, krylov_size): w = op(v_next) alpha[i] = xp.dot(w.reshape(x.size), v_next.reshape(x.size)) w -= (alpha[i] * v_next + beta[i] * v_prev) beta[i + 1] = xp.linalg.norm(w) v_prev = v_next.copy() v_next = w / beta[i + 1] V[:, i] = v_prev.reshape(x.size) tridiag = xp.diag(alpha) for i in range(0, krylov_size - 1): tridiag[i + 1, i] = beta[i + 1] d, v = xp.linalg.eigh(tridiag, UPLO="L") if smallest: ev = d[0] v_next = (V @ v[:, 0]).reshape(x._data.shape) else: ev = d[-1] v_next = (V @ v[:, -1]).reshape(x._data.shape) x._data = v_next return ev, x
def _make_matrix(self, dtype): if not cusparse.check_availability('csrilu02'): pytest.skip('csrilu02 is not available') a = testing.shaped_random( (self.n, self.n), cupy, dtype=dtype, scale=0.9) + 0.1 a = a + cupy.diag(cupy.ones((self.n, ), dtype=dtype.char.lower())) return a
def admittanceMatrixD(self): all_el_nodes_coords = self.pts[:(self.ne * self.n_per_el)].reshape( (self.ne, self.n_per_el, 2)) lengths = cp.linalg.norm((all_el_nodes_coords[:, 0] - all_el_nodes_coords[:, (self.n_per_el - 1)]), axis=1) admittanceMatrixD = cp.diag(lengths / self.z) return admittanceMatrixD
def _eigsh_solve_ritz(alpha, beta, beta_k, k, which): t = cupy.diag(alpha) t = t + cupy.diag(beta[:-1], k=1) t = t + cupy.diag(beta[:-1], k=-1) if beta_k is not None: t[k, :k] = beta_k t[:k, k] = beta_k w, s = cupy.linalg.eigh(t) # Pick-up k ritz-values and ritz-vectors if which == 'LA': idx = cupy.argsort(w) elif which == 'LM': idx = cupy.argsort(cupy.absolute(w)) wk = w[idx[-k:]] sk = s[:, idx[-k:]] return wk, sk
def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, 'i') info = cupy.empty((), 'i') # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf #<-- MODIFIED elif dtype == 'd': getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf elif dtype == 'F': getrf_bufferSize = cusolver.cgetrf_bufferSize getrf = cusolver.cgetrf else: getrf_bufferSize = cusolver.zgetrf_bufferSize getrf = cusolver.zgetrf #<-- MODIFIED buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, info.data.ptr) if info[()] == 0: diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() else: sign = cupy.array(0.0, dtype=dtype) #ORIGINAL # logdet = cupy.array(float('-inf'), dtype) #<-- MODIFIED if dtype in ['f', 'd']: logdet = cupy.array(float('-inf'), dtype) elif dtype == 'F': logdet = cupy.array(float('-inf'), cupy.float32) else: logdet = cupy.array(float('-inf'), cupy.float64) #<-- MODIFIED return sign, logdet
def _initialize_nets(self): self.weights = cp.random.normal( size=(self.pop_size, self.total, self.total), dtype='single') * cp.tile( cp.triu(cp.ones(shape=(self.total, self.total), dtype='bool_'), 1), (self.pop_size, 1, 1)) * (cp.random.uniform( size=(self.pop_size, self.total, self.total), dtype='single') < .5) self.weights[:, :, self.inputs:] *= cp.sqrt(4 / cp.minimum( cp.arange(self.inputs, self.total), self.inputs + self.hidden)) self.weights[:, :self.inputs, :self.inputs] = 0 self.weights[:, -self.outputs:, -self.outputs:] = cp.tile( cp.diag( cp.diag( cp.ones(shape=(self.outputs, self.outputs), dtype='bool_'))), (self.pop_size, 1, 1)) self.biases = cp.random.normal( size=(self.pop_size, 1, self.hidden + self.outputs), dtype='single') * .5
def __estimate_one_step(self, threshold: int): """ Estimate the solution having a given threshold value and keeping only the singular values above these threshold. Values smaller than numerical zero are always discarded. :param threshold: Value specifying the smallest singular value (sorted in descending order) to keep. All singular values smaller than given are discarded. :type threshold: int """ self.current = cp.matmul(self.__V.T[:, :threshold], cp.matmul(cp.diag(cp.divide(1, self.__D[:threshold])), cp.matmul(self.__U[:, :threshold].T, self.q_estimator)))
def test_csrilu02(self, dtype): dtype = numpy.dtype(dtype) a_ref = self._make_matrix(dtype) a = sparse.csr_matrix(a_ref) cusparse.csrilu02(a, level_info=self.level_info) a = a.todense() al = cupy.tril(a, k=-1) al = al + cupy.diag(cupy.ones((self.n, ), dtype=dtype.char.lower())) au = cupy.triu(a) a = al @ au tol = self._tol[dtype.char.lower()] cupy.testing.assert_allclose(a, a_ref, atol=tol, rtol=tol)