def leslie(f, s): """Create a Leslie matrix. Given the length n array of fecundity coefficients ``f`` and the length n-1 array of survival coefficients ``s``, return the associated Leslie matrix. Args: f (cupy.ndarray): The "fecundity" coefficients. s (cupy.ndarray): The "survival" coefficients, has to be 1-D. The length of ``s`` must be one less than the length of ``f``, and it must be at least 1. Returns: cupy.ndarray: The array is zero except for the first row, which is ``f``, and the first sub-diagonal, which is ``s``. The data-type of the array will be the data-type of ``f[0]+s[0]``. .. seealso:: :func:`scipy.linalg.leslie` """ if f.ndim != 1: raise ValueError('Incorrect shape for f. f must be 1D') if s.ndim != 1: raise ValueError('Incorrect shape for s. s must be 1D') n = f.size if n != s.size + 1: raise ValueError('Length of s must be one less than length of f') if s.size == 0: raise ValueError('The length of s must be at least 1.') a = cupy.zeros((n, n), dtype=cupy.result_type(f, s)) a[0] = f cupy.fill_diagonal(a[1:], s) return a
def companion(a): """Create a companion matrix. Create the companion matrix associated with the polynomial whose coefficients are given in ``a``. Args: a (cupy.ndarray): 1-D array of polynomial coefficients. The length of ``a`` must be at least two, and ``a[0]`` must not be zero. Returns: (cupy.ndarray): The first row of the output is ``-a[1:]/a[0]``, and the first sub-diagonal is all ones. The data-type of the array is the same as the data-type of ``-a[1:]/a[0]``. .. seealso:: :func:`cupyx.scipy.linalg.fiedler_companion` .. seealso:: :func:`scipy.linalg.companion` """ n = a.size if a.ndim != 1: raise ValueError('`a` must be one-dimensional.') if n < 2: raise ValueError('The length of `a` must be at least 2.') # Following check requires device-to-host synchronization so will we not # raise an error this situation # if a[0] == 0: # raise ValueError('The first coefficient in `a` must not be zero.') first_row = -a[1:] / a[0] c = cupy.zeros((n - 1, n - 1), dtype=first_row.dtype) c[0] = first_row cupy.fill_diagonal(c[1:], 1) return c
def test_lu_factor_reconstruction(self, dtype): m, n = self.shape A = testing.shaped_random(self.shape, cupy, dtype=dtype) lu, piv = cupyx.scipy.linalg.lu_factor(A) # extract ``L`` and ``U`` from ``lu`` L = cupy.tril(lu, k=-1) cupy.fill_diagonal(L, 1.) L = L[:, :m] U = cupy.triu(lu) U = U[:n, :] # check output shapes assert lu.shape == (m, n) assert L.shape == (m, min(m, n)) assert U.shape == (min(m, n), n) assert piv.shape == (min(m, n),) # apply pivot (on CPU since slaswp is not available in cupy) piv = cupy.asnumpy(piv) rows = numpy.arange(m) for i, row in enumerate(piv): if i != row: rows[i], rows[row] = rows[row], rows[i] PA = A[rows] # check that reconstruction is close to original LU = L.dot(U) cupy.testing.assert_allclose(LU, PA, atol=1e-5)
def __lioness_loop(self): """ Description: Initialize instance of Lioness class and load data. Outputs: self.total_lioness_network: An edge-by-sample matrix containing sample-specific networks. """ for i in self.indexes: print("Running LIONESS for sample %d:" % (i+1)) idx = [x for x in range(self.n_conditions) if x != i] # all samples except i with Timer("Computing coexpression network:"): if self.computing=='gpu': import cupy as cp correlation_matrix = cp.corrcoef(self.expression_matrix[:, idx]) if cp.isnan(correlation_matrix).any(): cp.fill_diagonal(correlation_matrix, 1) correlation_matrix = cp.nan_to_num(correlation_matrix) correlation_matrix=cp.asnumpy(correlation_matrix) else: correlation_matrix = np.corrcoef(self.expression_matrix[:, idx]) if np.isnan(correlation_matrix).any(): np.fill_diagonal(correlation_matrix, 1) correlation_matrix = np.nan_to_num(correlation_matrix) with Timer("Normalizing networks:"): correlation_matrix_orig = correlation_matrix # save matrix before normalization correlation_matrix = self._normalize_network(correlation_matrix) with Timer("Inferring LIONESS network:"): if self.motif_matrix is not None: del correlation_matrix_orig subset_panda_network = self.panda_loop(correlation_matrix, np.copy(self.motif_matrix), np.copy(self.ppi_matrix),self.computing) else: del correlation_matrix subset_panda_network = correlation_matrix_orig lioness_network = self.n_conditions * (self.network - subset_panda_network) + subset_panda_network with Timer("Saving LIONESS network %d to %s using %s format:" % (i+1, self.save_dir, self.save_fmt)): path = os.path.join(self.save_dir, "lioness.%d.%s" % (i+1, self.save_fmt)) if self.save_fmt == 'txt': np.savetxt(path, lioness_network) elif self.save_fmt == 'npy': np.save(path, lioness_network) elif self.save_fmt == 'mat': from scipy.io import savemat savemat(path, {'PredNet': lioness_network}) else: print("Unknown format %s! Use npy format instead." % self.save_fmt) np.save(path, lioness_network) if i == 0: self.total_lioness_network = np.fromstring(np.transpose(lioness_network).tostring(),dtype=lioness_network.dtype) else: self.total_lioness_network=np.column_stack((self.total_lioness_network ,np.fromstring(np.transpose(lioness_network).tostring(),dtype=lioness_network.dtype))) return self.total_lioness_network
def test_run_spearman_rho(pca_approved_drugs_csv, fingerprint_approved_drugs_csv, cluster_column, n_dims_eucl_data, top_k): """Validate the spearman rho scoring""" # Load PCA data to use as Euclidean distances pca_data = pd.read_csv(pca_approved_drugs_csv).set_index('molregno').drop( cluster_column, axis=1) float_data = pca_data[pca_data.columns[:n_dims_eucl_data]] euclidean_dist = pairwise_distances(cupy.array(float_data)) # Load fingerprints and calculate tanimoto distance fp_data = pd.read_csv(fingerprint_approved_drugs_csv).set_index('molregno') tanimoto_dist = tanimoto_calculate(cupy.array(fp_data), calc_distance=True) # Check all data compared to the CPU version all_data_gpu = spearmanr(tanimoto_dist, euclidean_dist) euclidean_dist_cpu = cupy.asnumpy(euclidean_dist) tanimoto_dist_cpu = cupy.asnumpy(tanimoto_dist) all_data_cpu = _rowwise_numpy_corr(tanimoto_dist_cpu, euclidean_dist_cpu, spearmanr_cpu) cupy.allclose(cupy.array(all_data_cpu), all_data_gpu, atol=0.005, equal_nan=True) # Check using top k calculation compared to the CPU version top_k_data_gpu = spearmanr(tanimoto_dist, euclidean_dist, top_k=top_k, axis=1) cupy.fill_diagonal(tanimoto_dist, cupy.NaN) kth_lim = get_kth_unique_value(tanimoto_dist, top_k, axis=1) mask = tanimoto_dist > kth_lim tanimoto_dist[mask] = cupy.NaN euclidean_dist[mask] = cupy.NaN euclidean_dist_cpu = cupy.asnumpy(euclidean_dist) tanimoto_dist_cpu = cupy.asnumpy(tanimoto_dist) top_k_data_cpu = _rowwise_numpy_corr(tanimoto_dist_cpu, euclidean_dist_cpu, spearmanr_cpu) cupy.allclose(cupy.array(top_k_data_cpu), top_k_data_gpu, atol=0.005, equal_nan=True)
def calc_neighbourhood_stats(self, distances, radius): neighbours = distances < radius cp.fill_diagonal(neighbours, False) neighbours_states_sum = (self.state[cp.newaxis, :, :] * neighbours[:, :, cp.newaxis]).sum(axis=1) neighbours_num = neighbours.sum(axis=1) has_neighbours = neighbours_num > 0 neighbours_num = neighbours_num[cp.where(has_neighbours)[0], cp.newaxis] neighbours_states_sum = neighbours_states_sum[ cp.where(has_neighbours)[0], :] return has_neighbours, neighbours_num, neighbours_states_sum
def make_W(n=4): ## Making a random graph A = np.random.binomial(1, 0.5, [n, n]) A = np.triu(A, 1) A += np.transpose(A) A = cp.array(A) # Making random weights W = cp.array(np.random.exponential(scale=1, size=(n, n))) W = cp.multiply(A, W) W = cp.array(np.triu(W.get(), 1)) W += cp.transpose(W) W += (cp.ones([n, n]) - A) * myInf cp.fill_diagonal(W, 0) W = cp.array(W) return W
def fiedler_companion(a): """Returns a Fiedler companion matrix Given a polynomial coefficient array ``a``, this function forms a pentadiagonal matrix with a special structure whose eigenvalues coincides with the roots of ``a``. Args: a (cupy.ndarray): 1-D array of polynomial coefficients in descending order with a nonzero leading coefficient. For ``N < 2``, an empty array is returned. Returns: cupy.ndarray: Resulting companion matrix Notes: Similar to ``companion`` the leading coefficient should be nonzero. In the case the leading coefficient is not 1, other coefficients are rescaled before the array generation. To avoid numerical issues, it is best to provide a monic polynomial. .. seealso:: :func:`cupyx.scipy.linalg.companion` .. seealso:: :func:`scipy.linalg.fiedler_companion` """ if a.ndim != 1: raise ValueError('Input `a` must be a 1-D array.') if a.size < 2: return cupy.zeros((0, ), a.dtype) if a.size == 2: return (-a[1] / a[0])[None, None] # Following check requires device-to-host synchronization so will we not # raise an error this situation # if a[0] == 0.: # raise ValueError('Leading coefficient is zero.') a = a / a[0] n = a.size - 1 c = cupy.zeros((n, n), dtype=a.dtype) # subdiagonals cupy.fill_diagonal(c[3::2, 1::2], 1) cupy.fill_diagonal(c[2::2, 1::2], -a[3::2]) # superdiagonals cupy.fill_diagonal(c[::2, 2::2], 1) cupy.fill_diagonal(c[::2, 1::2], -a[2::2]) c[0, 0] = -a[1] c[1, 0] = 1 return c
def cdist(a): A_ext, B_ext = ext_arrs(a, a) dist = A_ext.dot(B_ext) cp.fill_diagonal(dist, 0) dist = cp.sqrt(dist) return dist
from_int=bus_int[from_bus-1].astype(cp.int) to_int=bus_int[to_bus-1].astype(cp.int) #print(to_int) tap[liney_ratio>0]=cp.exp((-jay*phase_shift[liney_ratio>0])*cp.pi/180)/liney_ratio[liney_ratio>0] from_int=from_int-1 to_int=to_int-1 #print(from_int) # Line impedance # Determine connection matrices including tap chargers and phase shifters # sparse matrix formulation c_from[from_int,a]=tap[a] c_to[to_int,a]=1 c_line[from_int,a]=c_from[from_int,a]-c_to[from_int,a] c_line[to_int,a]=c_from[to_int,a]-c_to[to_int,a] # Form Y matrix from primative line ys and connection matrices cp.fill_diagonal(chrgfull, chrg) cp.fill_diagonal(yyfull, yy) #print(c_from) Y_dummy=cp.matmul(chrgfull,c_from.T) Y=cp.matmul(c_from,Y_dummy) Y_dummy=cp.matmul(chrgfull,c_to.T) Y=cp.matmul(c_to,Y_dummy)+Y Y_2=cp.copy(Y) Y_dummy=cp.matmul(yyfull,c_line.T) Y=cp.matmul(c_line,Y_dummy)+Y #print(Y) Pl[b_type==3]=Pl[b_type==3]-b_pg[b_type==3] Ql[b_type==3]=Ql[b_type==3]-b_qg[b_type==3] yl=(Pl-jay*Ql)/(V*V) ra=g_r*basmva/g_m
def sgd_subset(train_X, train_Y, iterations, alpha, regularization,weight_matrix): N = train_X.shape[0]#N = 6928 & 6928/866=8 M = weight_matrix.shape[1] tensor_of_x_features = cupy.tile(0.0,(N,1,trainX.shape[1])) tensor_of_x_squared = cupy.tile(0.0,(N,trainX.shape[1],trainX.shape[1])) matrix_set_diag_to_zero = cupy.tile(1.0,(trainX.shape[1],trainX.shape[1])) cupy.fill_diagonal(matrix_set_diag_to_zero,0.0) for i in range(N): tensor_of_x_features[i]=train_X[i] tensor_of_x_squared[i]=train_X[i].dot(train_X[i]) historical_gradient=cupy.tile(0.0,(weight_matrix.shape)) tensor_of_x_squared = tensor_of_x_squared*matrix_set_diag_to_zero tensor_of_x_features_squared = tensor_of_x_features*tensor_of_x_features tensor_of_proto_vx = cupy.tile(0.0,(N,1,M)) tensor_of_proto_square = cupy.tile(0.0,(N,1,M)) vector_of_prediction = cupy.tile(0.0,N) vector_of_sum = cupy.tile(1.0,(M,1)) vector_of_gradient = cupy.tile(0.0,N) weight_matrix_square = cupy.tile(0.0,(weight_matrix.shape)) update_step = cupy.tile(0.0,(weight_matrix.shape)) splits = 3#9*2#720 splits_minus_one = splits -1 n_minus_one = N -1 #print(numpy.floor(N/splits)) taker = numpy.floor(N/splits).astype(numpy.int32) seed = 0 #print(taker) idxs = cupy.linspace(start=0,stop=taker,num=taker)#,dtype=cupy.int32) for i in range(iterations): seed = seed + 1 cupy.random.seed(seed) numpy_rand_idx_list = numpy.random.permutation(N) random_idx_list = cupy.array(numpy_rand_idx_list) #skiper = 0 #idxs = 0 init = 0 ending = 0 for j in range(splits): init = j*taker ending = (j+1)*taker if j == (splits_minus_one): ending = n_minus_one idxs = random_idx_list[init:ending] weight_matrix[cupy.abs(weight_matrix)<0.0000001]=0 weight_matrix_square = weight_matrix*weight_matrix tensor_of_proto_vx = cupy.tensordot(tensor_of_x_features[idxs],weight_matrix,axes=1) tensor_of_proto_square = cupy.tensordot(tensor_of_x_features_squared[idxs],weight_matrix_square,axes=1) vector_of_prediction = cupy.tensordot(((tensor_of_proto_vx*tensor_of_proto_vx) - tensor_of_proto_square),vector_of_sum,axes=1).sum(axis=1)*0.5 b = train_Y[idxs]-vector_of_prediction #print(cupy.abs(b.mean())) vector_of_gradient = -2*b vrau = cupy.tensordot(tensor_of_x_squared[idxs],weight_matrix,axes=1) update_step = ((vector_of_gradient.T*vrau.T).T).sum(axis=0)+weight_matrix_square*regularization #ADAGRAD UPDATE historical_gradient += update_step * update_step weight_matrix -= alpha/(cupy.sqrt(historical_gradient)) * update_step#+0.000001 return weight_matrix
def optimize(self,training_features, training_targets,weight_matrix): training_features = cupy.array(training_features) training_targets = cupy.array(training_targets) N = training_features.shape[0] M = weight_matrix.shape[1] tensor_of_x_features = cupy.tile(0.0,(N,1,training_features.shape[1])) tensor_of_x_squared = cupy.tile(0.0,(N,training_features.shape[1],training_features.shape[1])) matrix_set_diag_to_zero = cupy.tile(1.0,(training_features.shape[1],training_features.shape[1])) cupy.fill_diagonal(matrix_set_diag_to_zero,0.0) for i in range(N): tensor_of_x_features[i]=training_features[i] tensor_of_x_squared[i]=training_features[i].dot(training_features[i]) historical_gradient=cupy.tile(0.0,(weight_matrix.shape)) tensor_of_x_squared = tensor_of_x_squared*matrix_set_diag_to_zero tensor_of_x_features_squared = tensor_of_x_features*tensor_of_x_features tensor_of_proto_vx = cupy.tile(0.0,(N,1,M)) tensor_of_proto_square = cupy.tile(0.0,(N,1,M)) vector_of_prediction = cupy.tile(0.0,N) vector_of_sum = cupy.tile(1.0,(M,1)) vector_of_gradient = cupy.tile(0.0,N) weight_matrix_square = cupy.tile(0.0,(weight_matrix.shape)) update_step = cupy.tile(0.0,(weight_matrix.shape)) #batch_size = #numpy.floor(N/batch_count).astype(numpy.int32) batch_count = numpy.floor(N/self.batch_size).astype(numpy.int32) seed = 0 idxs = cupy.linspace(0,self.batch_size,self.batch_size,dtype=numpy.int32) patience_counter = 0 last_iteration_error = 0 #error_iter_array = numpy.tile(1,(iterations,1)) error_iter_array = numpy.empty(self.iterations, dtype=numpy.float32) for i in range(self.iterations): seed = seed + 1 cupy.random.seed(seed) numpy_rand_idx_list = numpy.random.permutation(N) random_idx_list = cupy.array(numpy_rand_idx_list) idxs = 0 init = 0 ending = 0 error_sum = 0 for j in range(batch_count): init = j*self.batch_size ending = (j+1)*self.batch_size idxs = random_idx_list[init:ending] weight_matrix[cupy.abs(weight_matrix)<0.0000001]=0 weight_matrix_square = weight_matrix*weight_matrix tensor_of_proto_vx = cupy.tensordot(tensor_of_x_features[idxs],weight_matrix,axes=1) tensor_of_proto_square = cupy.tensordot(tensor_of_x_features_squared[idxs],weight_matrix_square,axes=1) vector_of_prediction = cupy.tensordot(((tensor_of_proto_vx*tensor_of_proto_vx) - tensor_of_proto_square),vector_of_sum,axes=1).sum(axis=1)*0.5 b = training_targets[idxs]-vector_of_prediction #print(b.mean()) error_sum = error_sum+cupy.mean(b)#b.mean() vector_of_gradient = -2*b vrau = cupy.tensordot(tensor_of_x_squared[idxs],weight_matrix,axes=1) update_step = ((vector_of_gradient.T*vrau.T).T).sum(axis=0)+weight_matrix_square*self.regularization #ADAGRAD UPDATE historical_gradient += update_step * update_step weight_matrix -= self.alpha/(cupy.sqrt(historical_gradient)) * update_step#+0.000001 error_iter_array[i] = error_sum/batch_count if cupy.abs(cupy.abs(error_iter_array[i]) - last_iteration_error) < self.iteration_patience_threshold: patience_counter = patience_counter+1 else: patience_counter = 0 #RESET if patience_counter == self.iteration_patience: break # last_iteration_error = cupy.abs(error_iter_array[i]) return weight_matrix,error_iter_array.mean(),error_iter_array#return array with the most errors
def spearmanr(x, y, axis=1, top_k=None): """GPU implementation of Spearman R correlation coefficient for paired data with NaN support Parameters ---------- x : array_like The baseline array of values. y : array_like The comparison array of values. axis : {None, int}, optional Axis along which to perform the ranking. Default is 1 -- samples in rows, observations in columns top_k : {int} kth unique value to be found Returns ------- spearmanr_array : cupy ndarray Array of spearmanr rank correlation values """ if hasattr(x, 'values'): x = x.values x = cupy.array(x, copy=True) if hasattr(y, 'values'): y = y.values y = cupy.array(y, copy=True) assert x.ndim <= 2 assert x.shape == y.shape if x.ndim < 2: if axis == 0: x = x[:, None] y = y[:, None] else: x = x[None, :] y = y[None, :] if axis == 0: n_obs, n_samples = x.shape else: n_samples, n_obs = x.shape n_obs -= 1 assert n_obs > 2 msg = 'Calculating Spearman correlation coefficient on {} molecules'.format( n_samples) if top_k is not None: msg += ' with selection of top {} molecules'.format(top_k) logger.info(msg + ' ...') # Force diagonal to be last in ranking so it can be ignored cupy.fill_diagonal(x, cupy.NaN) cupy.fill_diagonal(y, cupy.NaN) ranks_x = rankdata(x, axis=axis, method='average', na_option='keep') ranks_y = rankdata(y, axis=axis, method='average', na_option='keep') # cudf does not currently preserve the NaNs, even with na_option='keep' so add them back cupy.fill_diagonal(ranks_x, cupy.NaN) cupy.fill_diagonal(ranks_y, cupy.NaN) # Filter out values above top k if top_k is not None: if top_k <= n_obs: top_k_values = get_kth_unique_value(ranks_x, top_k, axis=axis) mask = ranks_x > top_k_values ranks_x[mask] = cupy.NaN ranks_y[mask] = cupy.NaN spearmanr_array = corr_pairwise(ranks_x, ranks_y, return_pearson=True).squeeze() return spearmanr_array