def backward_cuda(self, grad): # the same routine assert self.is_compiled batch_sz = grad.shape[0] flat_grad_cuda = grad.reshape((-1, self.output_ch)) d_W_cuda = cp.matmul(cp.transpose(self.last_input_col_cuda), flat_grad_cuda) self.grads_cuda["w"] = d_W_cuda.reshape(self.kernel_shape) self.grads_cuda["b"] = cp.sum(flat_grad_cuda, axis=0) d_X_cuda = cp.matmul(grad, cp.transpose(self.W_cuda)) d_in_cuda = cp.zeros(shape=(batch_sz, *self.input_shape), dtype=np.float32) # if this part could be re-written, it would be better for i, r in enumerate(range(0, self.output_sz)): for j, c in enumerate(range(0, self.output_sz)): # patch = d_X_cuda[:, i, j, :] # patch = patch.reshape((batch_sz, self.kernel_sz, self.kernel_sz, self.input_ch)) # d_in_cuda[:, r:r + self.kernel_sz, c:c + self.kernel_sz, :] += patch d_in_cuda[:, r:r + self.kernel_sz, c:c + self.kernel_sz, :] += d_X_cuda[:, i, j, :].reshape( (batch_sz, self.kernel_sz, self.kernel_sz, self.input_ch)) return d_in_cuda
def calc_inv(ir): ir = np.transpose(ir) q = np.linalg.inv(np.matmul(np.transpose(ir), ir)) hh = np.matmul(ir, np.transpose(q)) print(hh) print(hh.shape) return hh.tolist()
def mvdr(x, sv): """ Minimum variance distortionless response (MVDR) beamformer weights Parameters ---------- x : ndarray Received signal, assume 2D array with size [num_sensors, num_samples] sv: ndarray Steering vector, assume 1D array with size [num_sensors, 1] Note: Unlike MATLAB where input matrix x is of size MxN where N represents the number of array elements, we assume row-major formatted data where each row is assumed to be complex-valued data from a given sensor (i.e. NxM) """ if x.shape[0] > x.shape[1]: raise ValueError('Matrix has more sensors than samples. Consider \ transposing and remember cuSignal is row-major, unlike MATLAB') if x.shape[0] != sv.shape[0]: raise ValueError('Steering Vector and input data do not align') R = cp.cov(x) R_inv = cp.linalg.inv(R) svh = cp.transpose(cp.conj(sv)) wB = cp.matmul(R_inv, sv) # wA is a 1x1 scalar wA = cp.matmul(svh, wB) w = wB / wA return w
def check_usv(self, shape, dtype): array = testing.shaped_random( shape, numpy, dtype=dtype, seed=self.seed) a_cpu = numpy.asarray(array, dtype=dtype) a_gpu = cupy.asarray(array, dtype=dtype) result_cpu = numpy.linalg.svd(a_cpu, full_matrices=self.full_matrices) result_gpu = cupy.linalg.svd(a_gpu, full_matrices=self.full_matrices) # Check if the input matrix is not broken cupy.testing.assert_allclose(a_gpu, a_cpu) assert len(result_gpu) == 3 for i in range(3): assert result_gpu[i].shape == result_cpu[i].shape assert result_gpu[i].dtype == result_cpu[i].dtype u_cpu, s_cpu, vh_cpu = result_cpu u_gpu, s_gpu, vh_gpu = result_gpu cupy.testing.assert_allclose(s_gpu, s_cpu, atol=1e-4) k, = s_cpu.shape # reconstruct the matrix if self.full_matrices: a_gpu_usv = cupy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :]) else: a_gpu_usv = cupy.dot(u_gpu * s_gpu, vh_gpu) cupy.testing.assert_allclose(a_gpu, a_gpu_usv, atol=1e-4) # assert unitary cupy.testing.assert_allclose( cupy.matmul(u_gpu.T.conj(), u_gpu), numpy.eye(u_gpu.shape[1]), atol=1e-4) cupy.testing.assert_allclose( cupy.matmul(vh_gpu, vh_gpu.T.conj()), numpy.eye(vh_gpu.shape[0]), atol=1e-4)
def _b_orthonormalize(B, blockVectorV, blockVectorBV=None, retInvR=False): """B-orthonormalize the given block vector using Cholesky.""" normalization = blockVectorV.max(axis=0) + cupy.finfo( blockVectorV.dtype).eps blockVectorV = blockVectorV / normalization if blockVectorBV is None: if B is not None: blockVectorBV = B(blockVectorV) else: blockVectorBV = blockVectorV else: blockVectorBV = blockVectorBV / normalization VBV = cupy.matmul(blockVectorV.T.conj(), blockVectorBV) try: # VBV is a Cholesky factor VBV = _cholesky(VBV) VBV = linalg.inv(VBV.T) blockVectorV = cupy.matmul(blockVectorV, VBV) if B is not None: blockVectorBV = cupy.matmul(blockVectorBV, VBV) else: blockVectorBV = None except numpy.linalg.LinAlgError: # LinAlg Error: cholesky transformation might fail in rare cases # raise ValueError("cholesky has failed") blockVectorV = None blockVectorBV = None VBV = None if retInvR: return blockVectorV, blockVectorBV, VBV, normalization else: return blockVectorV, blockVectorBV
def wint(n, t): N = len(t) s = cp.linspace(1e-40, 1, n) # Inverse vandermonde matrix tmp1 = cp.arange(n) tmp2 = cp.arange(1, n + 2) iv = cp.linalg.inv(cp.exp(cp.outer(tmp1, cp.log(s)))) u = cp.diff( cp.exp(cp.outer(tmp2, cp.log(s))) * cp.tile(1.0 / tmp2[..., cp.newaxis], [1, n])) # integration over short intervals W1 = cp.matmul(iv, u[1:n + 1, :]) # x*pn(x) term W2 = cp.matmul(iv, u[0:n, :]) # const*pn(x) term # Compensate for overlapping short intervals tmp1 = cp.arange(1, n) tmp2 = (n - 1) * cp.ones((N - 2 * (n - 1) - 1)) tmp3 = cp.arange(n - 1, 0, -1) p = 1 / cp.concatenate((tmp1, tmp2, tmp3)) w = cp.zeros(N) for j in range(N - n + 1): # Change coordinates, and constant and linear parts W = ((t[j + n - 1] - t[j])**2) * W1 + (t[j + n - 1] - t[j]) * t[j] * W2 for k in range(n - 1): w[j:j + n] = w[j:j + n] + p[j + k] * W[:, k] wn = w wn[-40:] = (w[-40]) / (N - 40) * cp.arange(N - 40, N) return wn
def backpropagate(node_array, output_array, weights_matrix, layer_n_list): ''' My implementation of a backpropagation algorithm. Inputs - \n node_array - list of np arrays, each array (each list element) corresponds to the nodes in each layer, for each training observation. The first element is the input layer, last element is output layer. \n Output_aray - a numpy array corresponding to the y values of each observation \n weights_matrix - A list where the elements are two dimensional numpy arrays of float64s. \n layer_n_list - the vector corresponding to the neural network structure. Example- [10,5,3] corresponds to some unknown amount of inputs, 10 nodes in layer 1, 5 in layer 2, and 3 output nodes. \n \n Outputs - \n grad_adjustment_vector - list of numpy arrays containing the amounts that the weights should be adjusted to reduce error in a given iteration of the backprop algorithm \n Output err-r also used in adjusting weights in backprop algorithm ''' output_error = node_array[-1] - output_array d_vector = [output_error] grad_adjustmet_vector = [cp.matmul(output_error.T, node_array[-2])] for i in cp.arange(len(layer_n_list)-1): output_error = cp.matmul(output_error, weights_matrix[-int(i+1)][:,1:]) * (node_array[-int(i+2)][:,1:]*(1-node_array[-int(i+2)][:,1:])) grad_adjustment = cp.matmul(output_error.T, node_array[-int(i+3)]) d_vector.append(output_error) grad_adjustmet_vector.append(grad_adjustment) grad_adjustmet_vector.reverse() return grad_adjustmet_vector, d_vector[0]
def MWU_game_algorithm(payoff_mat, phi=1 / 2, steps_number=10000): payoff_mat = np.array(payoff_mat) rows_number = payoff_mat.shape[0] cols_number = payoff_mat.shape[1] p_0 = np.ones((1, rows_number)) p_0 = p_0 / rows_number p_t = p_0 j_sumed = np.zeros((cols_number, 1)) smallest_column_payoff = 1 p_best = p_0 p_t_sum = np.zeros((1, rows_number)) for i in range(steps_number): payoffs = np.matmul(p_t, payoff_mat) j_best_response = np.argmax(payoffs) if (payoffs[0, j_best_response] < smallest_column_payoff): smallest_column_payoff = payoffs[0, j_best_response] p_best = p_t j_sumed[j_best_response] += 1 m_t = payoff_mat[:, j_best_response] m_t_negative = (m_t < 0) p_t_significant = (p_t > SIGNIFICANCE_CONST) to_update = np.logical_or(m_t_negative, p_t_significant[0]) m_t_updating = np.where(to_update, m_t, 0) p_t_updating = np.where(to_update, p_t, 0) p_t = np.multiply((1 - phi * m_t_updating), p_t_updating) p_t = p_t / p_t.sum() p_t_sum = p_t_sum + p_t j_distribution = j_sumed / j_sumed.sum() game_value = np.matmul(np.matmul(p_best, payoff_mat), j_distribution)[0][0] return p_best, j_distribution, -game_value, game_value
def __world2image__(self, qwxyz, t, K, point3d): if self.pose_cupy is False: I = np.eye(4) I[:3, 3] = t quat = Quaternion(qwxyz) Rm = quat.rotation_matrix Rm_inv = np.linalg.inv(Rm) I[:3, :3] = Rm Iinv = np.linalg.inv(I) hpts3d = self.__homogeneousCoord__(point3d).T point3d_local = np.matmul(Iinv, hpts3d)[0:3,:] image_pixel = self.__hnormalized__(np.matmul(K, point3d_local).T) else: I = cp.eye(4) I[:3, 3] = cp.asarray(t) quat = Quaternion(qwxyz) Rm = cp.asarray(quat.rotation_matrix) Rm_inv = cp.linalg.inv(Rm) Rm_inv = cp.asnumpy(Rm_inv) I[:3, :3] = cp.asarray(Rm) Iinv = cp.linalg.inv(I) hpts3d = self.__homogeneousCoord__(cp.asarray(point3d)).T point3d_local = cp.matmul(Iinv, hpts3d)[0:3,:] image_pixel = self.__hnormalized__(cp.matmul(cp.asarray(K), point3d_local).T) image_pixel = cp.asnumpy(image_pixel) return image_pixel, Rm_inv
def update_render(self) -> QPixmap: # TODO img = cp.zeros( (self.shader_resolution[1], self.shader_resolution[0], 3), cp.uint8) v_color = cp.array([255, 255, 255], cp.uint8) if self.render_method == self.PROJECTION_PERSPECTIVE: for model in self.models: v_extend = cp.concatenate( (model.v, cp.ones((model.v_size, 1), cp.float32)), axis=1) v_im = cp.transpose( cp.matmul(self.im, cp.matmul(self.em, cp.transpose(v_extend)))) v_im = v_im / v_im[:, 2:] v_im = v_im[:, :2] v_im = v_im.astype(cp.int32) for i in range(len(v_im)): if 0 <= v_im[i, 0] <= self.shader_resolution[0] and 0 <= v_im[ i, 1] <= self.shader_resolution[1]: img[v_im[i, 1], v_im[i, 0], :] = v_color img = cp.asnumpy(img) pixmap = QImage(img.data, img.shape[1], img.shape[0], QImage.Format_RGB888) pixmap = QPixmap(pixmap) return pixmap
def predict(self, X, returndict=0, returnclass=0): predictions = X if returndict == 1: values = {} values["h0"] = X layers = len(self.structure) - 1 for i in range(layers - 1): predictions = cp.matmul( self.params["w" + str(i + 1)], predictions) + self.params["b" + str(i + 1)] if returndict == 1: values["a" + str(i + 1)] = predictions predictions = NeuralNet.activate(predictions, self.activation) if returndict == 1: values["h" + str(i + 1)] = predictions predictions = cp.matmul(self.params["w" + str(layers)], predictions) + self.params["b" + str(layers)] if returndict == 1: values["a" + str(layers)] = predictions if returnclass == 1: return cp.argmax(predictions, axis=0) cp.clip(predictions, -700, 700) predictions = cp.exp(predictions) / cp.sum(cp.exp(predictions), axis=0) if returndict == 1: values["h" + str(layers)] = predictions if returndict == 0: return predictions else: return values
def propagate(input_array, weights_matrix, layer_n_list): ''' forward propagation algorithm. \n Inputs - \n input_array - two dimensional numpy array corresponding to x data \n weights_matrix - A list where the elements are two dimensional numpy arrays of float64s. \n layer_n_list- the vector corresponding to the neural network structure. Example- [10,5,3] corresponds to some unknown amount of inputs, 10 nodes in layer 1, 5 in layer 2, and 3 output nodes. \n Output - \n node_array - list of np arrays, each array (each list element) corresponds to the nodes in each layer, for each training observation. The first element is the input layer, last element is output layer. \n ''' node_array = [input_array] for i in cp.arange(len(layer_n_list)-1): foo = cp.matmul(node_array[int(i)], weights_matrix[int(i)].T) foo = sigmoid(foo) foo = cp.concatenate((cp.array([[1]]*foo.shape[0]), foo), axis=1) node_array.append(foo) foo = cp.matmul(node_array[-1], weights_matrix[-1].T) foo = sigmoid(foo) node_array.append(foo) return node_array
def v_grad(self, i, j, neg_i, neg_j, vi, vj, neg_vi, neg_vj, v_i_s, v_n_i_s, inv_v_i_s, inv_v_n_i_s, mid_ij, mid_n_ij, inv_ij, inv_n_ij, mask): lvi, lvj = self.vars[i], self.c_vars[j] lneg_vi, lneg_vj = self.vars[neg_i], self.c_vars[neg_j] pos_i = wb.batch_log2(vi, vj, mid=mid_ij, inv_sU=inv_v_i_s, numIters=self.num_sqrt_iters, prod=True) neg_i_ = wb.batch_log2(neg_vi, neg_vj, mid=mid_n_ij, inv_sU=inv_v_n_i_s, numIters=self.num_sqrt_iters, prod=True) / self.num_neg pos_j = wb.batch_log(vj, vi, sV=v_i_s, inv=inv_ij, numIters=self.num_sqrt_iters, prod=True) neg_j_ = wb.batch_log(neg_vj, neg_vi, sV=v_n_i_s, inv=inv_n_ij, numIters=self.num_sqrt_iters, prod=True) return ((- (cp.matmul(pos_i, lvi)) + (cp.matmul(neg_i_, lneg_vi)).reshape(-1, self.num_neg, self.n_dim, self.n_dim).sum(axis=1)) * mask.reshape(-1, 1, 1)).reshape(-1, self.window_size, self.n_dim, self.n_dim).sum(axis=1), \ - (cp.matmul(pos_j, lvj)) * mask.reshape(-1, 1, 1), \ (cp.matmul(neg_j_, lneg_vj)) * mask.repeat(self.num_neg).reshape(-1, 1, 1) / self.num_neg #+ 2 * self.var_reg * lneg_vj
def predict(self, Xtest, use_gpu=False): """Predict using the linear model Let :math:`B^k` be the basis vectors of class :math:`k`, and :math:`x` be the RCDT sapce feature vector of an input, the NS method performs classification by .. math:: arg\min_k \| B^k (B^k)^T x - x\|^2 Parameters ---------- Xtest : array-like, shape (n_samples, n_rows, n_columns) Image data for testing. use_gpu: boolean flag; IF TRUE, use gpu for calculations default = False. Returns ------- ndarray of shape (n_samples,) Predicted target values per element in Xtest. """ # calculate the RCDT using parallel CPUs print('\nCalculating RCDTs for testing images ...') Xrcdt = self.rcdt_parallel(Xtest) # vectorize RCDT matrix X = Xrcdt.reshape([Xrcdt.shape[0], -1]) # import cupy for using GPU if use_gpu: import cupy as cp X = cp.array(X) # find nearest subspace for each test sample print('Finding nearest subspace for each test sample ...') D = [] for class_idx in range(self.num_classes): basis = self.subspaces[class_idx] basis = basis[:self.len_subspace, :] if use_gpu: D.append( cp.linalg.norm(cp.matmul(cp.matmul(X, cp.array(basis).T), cp.array(basis)) - X, axis=1)) else: proj = X @ basis.T # (n_samples, n_basis) projR = proj @ basis # (n_samples, n_features) D.append(LA.norm(projR - X, axis=1)) if use_gpu: preds = cp.argmin(cp.stack(D, axis=0), axis=0) return cp.asnumpy(preds) else: D = np.stack(D, axis=0) preds = np.argmin(D, axis=0) return preds
def batch_exp(U, V): """ Exponential map at N(U) in the direction of V """ batchsize = U.shape[0] n = V.shape[1] V_I = V + cp.eye(n).reshape(1, n, n).repeat(batchsize, axis=0) return cp.matmul(V_I, cp.matmul(U, V_I))
def batch_Tuv2(U, V, mid=None, inv_sU=None, numIters = 2): """ Returns the transportation matrix from N(U) to N(V): V^{-1/2}[V^{1/2}UV^{1/2}]^{1/2}V^{-1/2} """ if (inv_sU is None) or (mid is None): sU, inv_sU = batch_sqrtm(U, numIters = numIters) if mid is None: mid, _ = batch_sqrtm(cp.matmul(cp.matmul(sU, V), sU), numIters = numIters) return cp.matmul(inv_sU, cp.matmul(mid, inv_sU))
def batch_Tuv(U, V, inv=None, sV=None, numIters = 2): """ Returns the transportation matrix from N(U) to N(V): V^{1/2}[V^{1/2}UV^{1/2}]^{-1/2}V^{1/2} """ if sV is None: sV, _ = batch_sqrtm(V, numIters=numIters) if inv is None: _, inv = batch_sqrtm(cp.matmul(cp.matmul(sV, U), sV), numIters = numIters) return cp.matmul(sV, cp.matmul(inv, sV))
def matmul(a, b, l=False, exception=False): """ Does matrix multiplication as a @ b, accounting for whether either is of dtype=int8. 'l' indicates whether this matrix operation is being used for a latency weight matrix """ # Exception hardcoded for W_rnn_latency because it has more dims than usual if exception == "h_in * W_rnn_latency": print("STARTING MATMUL()") print("a.shape") print(a.shape) a = a[cp.newaxis, ...] print("new a.shape") print(a.shape) print("b.shape") print(b.shape) input() print("a.nbytes") print(a.nbytes) print("b.nbytes") print(b.nbytes) input() print( "cp.sum(a[...,cp.newaxis]*b[:,cp.newaxis,...], axis=-2, dtype=cp.float16)" ) print(cp.sum(a * b, axis=-2, dtype=cp.float16)) print("a*b.nbytes") print((a * b).nbytes) input() print("a*b.shape") print((a * b).shape) input() print("cp.matmul(a, b).shape") print(cp.matmul(a, b).shape) input() return cp.matmul(a, b) else: a = a[..., 0] b = b[:, 0, :, :] return cp.matmul(a, b)[..., np.newaxis]
def update_one(P, flt, r, e, J, delta=0): #add discount factor delta to perform weighted RLS r_p = cp.multiply(flt.reshape(-1, 1), r) k = cp.matmul(P, r_p) #(N,1) rPr = cp.matmul(r_p.T, k) # scalar c = 1.0 / (1.0 + (1 + delta) / (1 - delta) * rPr) # scalar P = P / (1 - delta) - (1 + delta) / (1 - delta)**2 * cp.matmul( k, (k.T * c)) dw = -e * k * c * (1 + delta) / (1 - delta) J += dw.reshape(-1, ) return P, J
def step(u): if u.ndim == 3: x = np.random.rand(u.shape[0], number_neurons, 1).astype(np.float32) else: x = np.random.rand(number_neurons).astype(np.float32) dx = np.matmul(W, x) + np.matmul(V, u) x += 0.05 * dx return np.matmul(T, x)
def forward(self, x, y_prev, c_prev): u = np.matmul(x, self.w) + np.matmul(y_prev, self.v) + self.b.reshape(4, 1, -1) a0 = sigmoid(u[0]) a1 = sigmoid(u[1]) a2 = np.tanh(u[2]) a3 = sigmoid(u[3]) self.gates = np.stack((a0, a1, a2, a3)) self.c = a0 * c_prev + a1 * a2 self.y = a3 * np.tanh(self.c)
def __estimate_one_step(self, threshold: int): """ Estimate the solution having a given threshold value and keeping only the singular values above these threshold. Values smaller than numerical zero are always discarded. :param threshold: Value specifying the smallest singular value (sorted in descending order) to keep. All singular values smaller than given are discarded. :type threshold: int """ self.current = cp.matmul(self.__V.T[:, :threshold], cp.matmul(cp.diag(cp.divide(1, self.__D[:threshold])), cp.matmul(self.__U[:, :threshold].T, self.q_estimator)))
def _finalize(self): epsilon = self.transitions.get('epsilon', None) if epsilon is not None: epsilon = epsilon + cupy.identity(epsilon.shape[0], dtype=bool) for i in range(math.ceil(math.log(epsilon.shape[0], 2))): epsilon = cupy.matmul(epsilon, epsilon) for key in self.transitions: self.transitions[key] = cupy.matmul(epsilon, self.transitions[key]) self.transitions['epsilon'] = epsilon return self
def test_accuracy(W1, b1, W2, b2, W3, b3, images, labels): nums = labels.shape[1] z1 = np.matmul(W1, images) + b1 a1 = common.relu(z1) z2 = np.matmul(W2, a1) + b2 a2 = common.relu(z2) z3 = np.matmul(W3, a2) + b3 cost = common.cross_entropy_with_softmax(labels, z3) / nums pred = np.argmax(labels, axis=0) label = np.argmax(z3, axis=0) return (np.sum(np.equal(pred, label)) / nums, cost)
def test_accuracy(W1, W2, W3, images, labels): nums = labels.shape[1] z1 = np.matmul(W1, images) a1 = para_func.relu(z1) z2 = np.matmul(W2, a1) a2 = para_func.relu(z2) z3 = np.matmul(W3, a2) # print("output shape", z3.shape, labels.shape) cost = para_func.cross_entropy_with_softmax(labels, z3) / nums pred = np.argmax(labels, axis=0) label = np.argmax(z3, axis=0) return (100.0 * np.sum(np.equal(pred, label)) / nums, cost)
def free_run(self, dt, simulation_time): x = self.x r = self.r z = cp.matmul(self.Jz.T, r) tspan = cp.array(cp.arange(0, simulation_time, dt)) states_T = cp.zeros((self.N, len(tspan))) for i, t in enumerate(tspan): x = (1.0 - dt) * x + cp.matmul(self.M, r * dt) + cp.matmul( self.Jgz, z * dt) r = cp.tanh(x) #(N,1) z = cp.matmul(self.Jz.T, r) states_T[:, i] = x[:, 0] return states_T
def batch_bures(U, V, numIters = 20, U_stride=None, sU = None, inv_sU = None, prod = False): #Avoid recomputing roots if not necessary if sU is None: if U_stride is not None: sU_, inv_sU_ = batch_sqrtm(U[::U_stride], numIters=numIters) sU = sU_.repeat(U_stride, axis=0) inv_sU = inv_sU_.repeat(U_stride, axis=0) else : sU, inv_sU = batch_sqrtm(U, numIters=numIters) cross, inv = batch_sqrtm(cp.matmul(sU, cp.matmul(V, sU)), numIters = numIters) if prod: return cp.trace(cross, axis1=1, axis2=2), inv, sU, inv_sU, cross else: return cp.trace(U + V - 2 * cross, axis1=1, axis2=2), inv, sU, inv_sU, cross
def test_stream_capture_failure_cublas(self): s = cupy.cuda.Stream(non_blocking=True) a = cupy.random.random((3, 4)) b = cupy.random.random((4, 5)) with s: s.begin_capture() with pytest.raises(NotImplementedError) as e: cupy.matmul(a, b) assert 'cuBLAS' in str(e.value) s.end_capture() # check s left the capture mode and permits normal usage assert not s.is_capturing() s.synchronize()
def setUp(self): self.dtype = numpy.dtype(self.dtype) if self.dtype.char in 'fF': self.r_dtype = numpy.float32 else: self.r_dtype = numpy.float64 n = self.n bs = 1 if self.bs is None else self.bs nrhs = 1 if self.nrhs is None else self.nrhs a = self._make_well_conditioned_matrices((bs, n, n)) x = self._make_random_matrices((bs, n, nrhs), cupy) b = cupy.matmul(a, x) a_shape = (n, n) if self.bs is None else (bs, n, n) b_shape = [n] if self.bs is not None: b_shape.insert(0, bs) if self.nrhs is not None: b_shape.append(nrhs) self.a = a.reshape(a_shape) self.b = b.reshape(b_shape) self.x_ref = x.reshape(b_shape) if self.r_dtype == numpy.float32: self.tol = self._tol['f'] elif self.r_dtype == numpy.float64: self.tol = self._tol['d']
def _eigh(A, B=None): """ Helper function for converting a generalized eigenvalue problem A(X) = lambda(B(X)) to standard eigen value problem using cholesky transformation """ if (B is None): # use cupy's eigh in standard case vals, vecs = linalg.eigh(A) return vals, vecs R = _cholesky(B) RTi = linalg.inv(R) Ri = linalg.inv(R.T) F = cupy.matmul(RTi, cupy.matmul(A, Ri)) vals, vecs = linalg.eigh(F) eigVec = cupy.matmul(Ri, vecs) return vals, eigVec