def setdiag(self, values, k=0): """Set diagonal or off-diagonal elements of the array. Args: values (ndarray): New values of the diagonal elements. Values may have any length. If the diagonal is longer than values, then the remaining diagonal entries will not be set. If values are longer than the diagonal, then the remaining values are ignored. If a scalar value is given, all of the diagonal is set to it. k (int, optional): Which off-diagonal to set, corresponding to elements a[i,i+k]. Default: 0 (the main diagonal). """ M, N = self.shape if (k > 0 and k >= N) or (k < 0 and -k >= M): raise ValueError("k exceeds matrix dimensions") if values.ndim and not len(values): return idx_dtype = self.row.dtype # Determine which triples to keep and where to put the new ones. full_keep = self.col - self.row != k if k < 0: max_index = min(M + k, N) if values.ndim: max_index = min(max_index, len(values)) keep = cupy.logical_or(full_keep, self.col >= max_index) new_row = cupy.arange(-k, -k + max_index, dtype=idx_dtype) new_col = cupy.arange(max_index, dtype=idx_dtype) else: max_index = min(M, N - k) if values.ndim: max_index = min(max_index, len(values)) keep = cupy.logical_or(full_keep, self.row >= max_index) new_row = cupy.arange(max_index, dtype=idx_dtype) new_col = cupy.arange(k, k + max_index, dtype=idx_dtype) # Define the array of data consisting of the entries to be added. if values.ndim: new_data = values[:max_index] else: new_data = cupy.empty(max_index, dtype=self.dtype) new_data[:] = values # Update the internal structure. self.row = cupy.concatenate((self.row[keep], new_row)) self.col = cupy.concatenate((self.col[keep], new_col)) self.data = cupy.concatenate((self.data[keep], new_data)) self.has_canonical_format = False
def MWU_game_algorithm(payoff_mat, phi=1 / 2, steps_number=10000): payoff_mat = np.array(payoff_mat) rows_number = payoff_mat.shape[0] cols_number = payoff_mat.shape[1] p_0 = np.ones((1, rows_number)) p_0 = p_0 / rows_number p_t = p_0 j_sumed = np.zeros((cols_number, 1)) smallest_column_payoff = 1 p_best = p_0 p_t_sum = np.zeros((1, rows_number)) for i in range(steps_number): payoffs = np.matmul(p_t, payoff_mat) j_best_response = np.argmax(payoffs) if (payoffs[0, j_best_response] < smallest_column_payoff): smallest_column_payoff = payoffs[0, j_best_response] p_best = p_t j_sumed[j_best_response] += 1 m_t = payoff_mat[:, j_best_response] m_t_negative = (m_t < 0) p_t_significant = (p_t > SIGNIFICANCE_CONST) to_update = np.logical_or(m_t_negative, p_t_significant[0]) m_t_updating = np.where(to_update, m_t, 0) p_t_updating = np.where(to_update, p_t, 0) p_t = np.multiply((1 - phi * m_t_updating), p_t_updating) p_t = p_t / p_t.sum() p_t_sum = p_t_sum + p_t j_distribution = j_sumed / j_sumed.sum() game_value = np.matmul(np.matmul(p_best, payoff_mat), j_distribution)[0][0] return p_best, j_distribution, -game_value, game_value
def _masked_column_median(arr, masked_value): """Compute the median of each column in the 2D array arr, ignoring any instances of masked_value""" mask = _get_mask(arr, masked_value) if arr.size == 0: return cp.full(arr.shape[1], cp.nan) arr_sorted = arr.copy() if not cp.isnan(masked_value): # If nan is not the missing value, any column with nans should # have a median of nan nan_cols = cp.any(cp.isnan(arr), axis=0) arr_sorted[mask] = cp.nan else: nan_cols = cp.full(arr.shape[1], False) # nans are always sorted to end of array arr_sorted = cp.sort(arr_sorted, axis=0) count_missing_values = mask.sum(axis=0) # Ignore missing values in determining "halfway" index of sorted # array n_elems = arr.shape[0] - count_missing_values # If no elements remain after removing missing value, median for # that colum is nan nan_cols = cp.logical_or(nan_cols, n_elems <= 0) col_index = cp.arange(arr_sorted.shape[1]) median = (arr_sorted[cp.floor_divide(n_elems - 1, 2), col_index] + arr_sorted[cp.floor_divide(n_elems, 2), col_index]) / 2 median[nan_cols] = cp.nan return median
def forward(self, input_encodings, output_units, input_masks=None, output_masks=None): if input_masks is not None: input_masks = F.expand_dims(input_masks, -2) mask_shape = list(output_units.shape) mask_shape[-1] = mask_shape[-2] mask = xp.triu(xp.ones(mask_shape, dtype=xp.bool), k=1) if output_masks is not None: output_masks = F.expand_dims(output_masks, -2) mask = xp.logical_or(mask, output_masks.array) x1 = F.dropout( self.mmha(output_units, output_units, output_units, mask=mask), self.p_drop) x2 = self.lnorm1(output_units + x1) x3 = F.dropout( self.mha(x2, input_encodings, input_encodings, mask=input_masks), self.p_drop) x4 = self.lnorm2(x2 + x3) x5 = F.dropout(self.ff(x4), self.p_drop) x6 = self.lnorm3(x4 + x5) return x6
def FSITM(HDR, LDR, alpha=None): NumPixels = LDR.size if alpha is None: r = cp.floor(NumPixels / (2.**18)) if r > 1.: alpha = 1. - (1. / r) else: alpha = 0. minNonzero = cp.min(HDR[HDR > 0]) LogH = cp.log(cp.maximum(HDR, minNonzero)) # float is needed for further calculation LogH = cp.around((LogH - LogH.min()) * 255. / (LogH.max() - LogH.min())).astype(cp.float) if alpha > 0.: PhaseHDR_CH = phasecong100(HDR, 2, 2, 8, 8) PhaseLDR_CH8 = phasecong100(LDR, 2, 2, 8, 8) else: # so, if image size is smaller than 512x512? PhaseHDR_CH = 0 PhaseLDR_CH8 = 0 PhaseLogH = phasecong100(LogH, 2, 2, 2, 2) PhaseH = alpha * PhaseHDR_CH + (1 - alpha) * PhaseLogH PhaseLDR_CH2 = phasecong100(LDR, 2, 2, 2, 2) PhaseL = alpha * PhaseLDR_CH8 + (1 - alpha) * PhaseLDR_CH2 Q = cp.sum( cp.logical_or(cp.logical_and(PhaseL <= 0, PhaseH <= 0), cp.logical_and(PhaseL > 0, PhaseH > 0))) / NumPixels return Q
def updateSublattice(self, sublattice): boltzmanFactor = np.exp(2 * self.interactionEnergies / (self.k * self.t)) evenDist = np.random.uniform(0, 1, size=self.spec) temp1 = np.greater(self.interactionEnergies, self.ground) temp2 = np.greater(boltzmanFactor, evenDist) criteria = np.logical_and(sublattice, np.logical_or(temp1, temp2)) self.system = np.where(criteria, -self.system, self.system) self.updateEnergies()
def logical_or(x1: Array, x2: Array, /) -> Array: """ Array API compatible wrapper for :py:func:`np.logical_or <numpy.logical_or>`. See its docstring for more information. """ if x1.dtype not in _boolean_dtypes or x2.dtype not in _boolean_dtypes: raise TypeError("Only boolean dtypes are allowed in logical_or") # Call result type here just to raise on disallowed type combinations _result_type(x1.dtype, x2.dtype) x1, x2 = Array._normalize_two_args(x1, x2) return Array._new(np.logical_or(x1._array, x2._array))
def MWU_game_algorithm_experiment(payoff_mat, phi=1/2, steps_number=10000): payoff_mat = np.array(payoff_mat) rows_number = payoff_mat.shape[0] cols_number = payoff_mat.shape[1] p_0 = np.ones((1, rows_number)) p_0 = p_0/rows_number p_t = p_0 j_sumed = np.zeros((cols_number, 1)) smallest_column_payoff = 1 p_best = p_0 p_t_sum = np.zeros((1, rows_number)) start = time.time() row_row = [] col_col = [] row_col = [] times = [] curr_index = 125 for i in range (1, steps_number + 1): payoffs = np.matmul(p_t, payoff_mat) j_best_response = np.argmax(payoffs) if(payoffs[0, j_best_response] < smallest_column_payoff): smallest_column_payoff = payoffs[0, j_best_response] p_best = p_t j_sumed[j_best_response] += 1 m_t = payoff_mat[:,j_best_response] m_t_negative = (m_t < 0) p_t_significant = (p_t > SIGNIFICANCE_CONST) to_update = np.logical_or(m_t_negative, p_t_significant[0]) m_t_updating = np.where(to_update,m_t,0) p_t_updating = np.where(to_update,p_t,0) p_t = np.multiply((1 - phi * m_t_updating), p_t_updating) p_t = p_t/p_t.sum() p_t_sum = p_t_sum + p_t if(i == curr_index): j_distribution = j_sumed / j_sumed.sum() print(i) now = time.time() times.append(now - start) row_row.append(max(epsilon_value(p_best, np.transpose(p_best), payoff_mat))) col_col.append(max(epsilon_value(np.transpose(j_distribution), j_distribution, payoff_mat))) row_col.append(max(epsilon_value(p_best, j_distribution, payoff_mat))) start -= (time.time() - now) curr_index *= 2 # game_value = np.matmul(np.matmul(p_best, payoff_mat), j_distribution)[0][0] # print() return times, row_row, col_col, row_col
def createWedge(self, subunit_num=0): ###Initialize the location of the protofilament to remove theta0=np.arctan2(self.com[0], self.com[1])+\ np.deg2rad(subunit_num*self.twist_per_subunit) z0=(self.com[2]+self.rise_per_subunit*subunit_num)/self.pixel_size ###Define the length along the protofilament in terms of subunits zsubunits=(self.zline.copy()-z0)*self.pixel_size/self.dimer_repeat_dist ###Define the angle of the center of the protofilament along the length of the segment theta=np.deg2rad((-self.helical_twist)*zsubunits)+theta0 ###Initialize the wedge mask wedge=np.zeros(self.vol_dim.tolist()) ###Define the size of the wedgemask fudge=np.deg2rad(360.0/(self.num_pfs*2)) ###Generate the wedge mask for i in range(len(theta)): temp1=np.remainder(theta[i]-fudge+2*np.pi,2*np.pi)-2*np.pi temp2=np.remainder(theta[i]+fudge+2*np.pi,2*np.pi)-2*np.pi angles=[temp1, temp2] if max(angles)-min(angles)>2*fudge+.2: above=max(angles) below=min(angles) inds=np.logical_or(self.radmatrix>above,self.radmatrix<below) else: above=min(angles) below=max(angles) inds=np.logical_and(self.radmatrix>above,self.radmatrix<below) wedge[i,:,:][inds]=1 return wedge
def _zdivide(x, y): """Patched version of :func:`sporco.linalg.zdivide`.""" div = x / y div[cp.logical_or(cp.isnan(div), cp.isinf(div))] = 0 return div
def ravel_multi_index(multi_index, dims, mode='wrap', order='C'): """ Converts a tuple of index arrays into an array of flat indices, applying boundary modes to the multi-index. Args: multi_index (tuple of cupy.ndarray) : A tuple of integer arrays, one array for each dimension. dims (tuple of ints): The shape of array into which the indices from ``multi_index`` apply. mode ('raise', 'wrap' or 'clip'), optional: Specifies how out-of-bounds indices are handled. Can specify either one mode or a tuple of modes, one mode per index: - *'raise'* -- raise an error - *'wrap'* -- wrap around (default) - *'clip'* -- clip to the range In 'clip' mode, a negative index which would normally wrap will clip to 0 instead. order ('C' or 'F'), optional: Determines whether the multi-index should be viewed as indexing in row-major (C-style) or column-major (Fortran-style) order. Returns: raveled_indices (cupy.ndarray): An array of indices into the flattened version of an array of dimensions ``dims``. .. warning:: This function may synchronize the device when ``mode == 'raise'``. Notes ----- Note that the default `mode` (``'wrap'``) is different than in NumPy. This is done to avoid potential device synchronization. Examples -------- >>> cupy.ravel_multi_index(cupy.asarray([[3,6,6],[4,5,1]]), (7,6)) array([22, 41, 37]) >>> cupy.ravel_multi_index(cupy.asarray([[3,6,6],[4,5,1]]), (7,6), ... order='F') array([31, 41, 13]) >>> cupy.ravel_multi_index(cupy.asarray([[3,6,6],[4,5,1]]), (4,6), ... mode='clip') array([22, 23, 19]) >>> cupy.ravel_multi_index(cupy.asarray([[3,6,6],[4,5,1]]), (4,4), ... mode=('clip', 'wrap')) array([12, 13, 13]) >>> cupy.ravel_multi_index(cupy.asarray((3,1,4,1)), (6,7,8,9)) array(1621) .. seealso:: :func:`numpy.ravel_multi_index`, :func:`unravel_index` """ ndim = len(dims) if len(multi_index) != ndim: raise ValueError( "parameter multi_index must be a sequence of " "length {}".format(ndim)) for d in dims: if not isinstance(d, numbers.Integral): raise TypeError( "{} object cannot be interpreted as an integer".format( type(d))) if isinstance(mode, str): mode = (mode, ) * ndim if functools.reduce(operator.mul, dims) > cupy.iinfo(cupy.int64).max: raise ValueError("invalid dims: array size defined by dims is larger " "than the maximum possible size") s = 1 ravel_strides = [1] * ndim if order is None: order = "C" if order == "C": for i in range(ndim - 2, -1, -1): s = s * dims[i + 1] ravel_strides[i] = s elif order == "F": for i in range(1, ndim): s = s * dims[i - 1] ravel_strides[i] = s else: raise TypeError("order not understood") multi_index = cupy.broadcast_arrays(*multi_index) raveled_indices = cupy.zeros(multi_index[0].shape, dtype=cupy.int64) for d, stride, idx, _mode in zip(dims, ravel_strides, multi_index, mode): if not isinstance(idx, cupy.ndarray): raise TypeError("elements of multi_index must be cupy arrays") if not cupy.can_cast(idx, cupy.int64, 'same_kind'): raise TypeError( 'multi_index entries could not be cast from dtype(\'{}\') to ' 'dtype(\'{}\') according to the rule \'same_kind\''.format( idx.dtype, cupy.int64().dtype)) idx = idx.astype(cupy.int64, copy=False) if _mode == "raise": if cupy.any(cupy.logical_or(idx >= d, idx < 0)): raise ValueError("invalid entry in coordinates array") elif _mode == "clip": idx = cupy.clip(idx, 0, d - 1) elif _mode == 'wrap': idx = idx % d else: raise TypeError("Unrecognized mode: {}".format(_mode)) raveled_indices += stride * idx return raveled_indices
def evaluate_chunks( results: [cp.ndarray, cp.ndarray, cp.ndarray], # closest triangle, distance, projection all_pts: cp.ndarray = None, vertices: cp.ndarray = None, edges: cp.ndarray = None, edge_norms: cp.ndarray = None, edge_normssq: cp.ndarray = None, normals: cp.ndarray = None, norms: cp.ndarray = None, normssq: cp.ndarray = None, zero_tensor: cp.ndarray = None, one_tensor: cp.ndarray = None, tris: cp.ndarray = None, vertex_normals: cp.ndarray = None, bounding_box: dict = None, chunk_size: int = None, num_verts: int = None) -> None: # # Expand vertex normals if non empty if vertex_normals is not None: vertex_normals = vertex_normals[tris] vertex_normals = cp.tile(cp.expand_dims(vertex_normals, axis=2), (1, 1, chunk_size, 1)) # begin = time.time() # # Load and extend the batch num_chunks = all_pts.shape[0] // chunk_size for i in range(num_chunks): # # Get subset of the query points start_index = i * chunk_size end_index = (i + 1) * chunk_size pts = all_pts[start_index:end_index, :] # # Match the dimensions to those assumed above. # REPEATED REPEATED # [triangle_index, vert_index, querypoint_index, coordinates] pts = cp.tile(cp.expand_dims(pts, axis=(0, 1)), (num_verts, 3, 1, 1)) # # Compute the differences between # vertices on each triangle and the # points of interest # # [triangle_index, vert_index, querypoint_index, coordinates] # =================== # [:,0,:,:] = p - p1 # [:,1,:,:] = p - p2 # [:,2,:,:] = p - p3 diff_vectors = pts - vertices # # Compute alpha, beta, gamma barycentric = cp.empty(diff_vectors.shape) # # gamma = u x (p - p1) barycentric[:, 2, :, :] = cp.cross(edges[:, 0, :, :], diff_vectors[:, 0, :, :]) # beta = (p - p1) x v barycentric[:, 1, :, :] = cp.cross(diff_vectors[:, 0, :, :], edges[:, 1, :, :]) # alpha = w x (p - p2) barycentric[:, 0, :, :] = cp.cross(edges[:, 2, :, :], diff_vectors[:, 1, :, :]) barycentric = cp.divide( cp.sum(cp.multiply(barycentric, normals), axis=3), normssq) # # Test conditions less_than_one = cp.less_equal(barycentric, one_tensor) more_than_zero = cp.greater_equal(barycentric, zero_tensor) # # if 0 <= gamma and gamma <= 1 # and 0 <= beta and beta <= 1 # and 0 <= alpha and alpha <= 1: cond1 = cp.logical_and(less_than_one, more_than_zero) # # if gamma <= 0: cond2 = cp.logical_not(more_than_zero[:, 2, :]) cond2 = cp.tile(cp.expand_dims(cond2, axis=1), (1, 3, 1)) # # if beta <= 0: cond3 = cp.logical_not(more_than_zero[:, 1, :]) cond3 = cp.tile(cp.expand_dims(cond3, axis=1), (1, 3, 1)) # # if alpha <= 0: cond4 = cp.logical_not(more_than_zero[:, 0, :]) cond4 = cp.tile(cp.expand_dims(cond4, axis=1), (1, 3, 1)) # # Get the projections for each case xi = cp.empty(barycentric.shape) barycentric_ext = cp.tile(cp.expand_dims(barycentric, axis=3), (1, 1, 1, 3)) proj = cp.sum(cp.multiply(barycentric_ext, vertices), axis=1) # # if 0 <= gamma and gamma <= 1 # and 0 <= beta and beta <= 1 # and 0 <= alpha and alpha <= 1: xi[cond1] = barycentric[cond1] # # if gamma <= 0: # x = p - p1 # u = p2 - p1 # a = p1 # b = p2 t2 = cp.divide( # # u.dot(x) cp.sum(cp.multiply(edges[:, 0, :, :], diff_vectors[:, 0, :, :]), axis=2), edge_normssq[:, 0]) xi2 = cp.zeros((t2.shape[0], 3, t2.shape[1])) xi2[:, 0, :] = -t2 + 1 xi2[:, 1, :] = t2 # t2 = cp.tile(cp.expand_dims(t2, axis=2), (1, 1, 3)) lz = cp.less(t2, cp.zeros(t2.shape)) go = cp.greater(t2, cp.ones(t2.shape)) proj2 = vertices[:, 0, :, :] + cp.multiply(t2, edges[:, 0, :, :]) proj2[lz] = vertices[:, 0, :, :][lz] proj2[go] = vertices[:, 1, :, :][go] # xi[cond2] = xi2[cond2] proj[cp.swapaxes(cond2, 1, 2)] = proj2[cp.swapaxes(cond2, 1, 2)] # # if beta <= 0: # x = p - p1 # v = p3 - p1 # a = p1 # b = p3 t3 = cp.divide( # # v.dot(x) cp.sum(cp.multiply(edges[:, 1, :, :], diff_vectors[:, 0, :, :]), axis=2), edge_normssq[:, 1]) xi3 = cp.zeros((t3.shape[0], 3, t3.shape[1])) xi3[:, 0, :] = -t3 + 1 xi3[:, 2, :] = t3 # t3 = cp.tile(cp.expand_dims(t3, axis=2), (1, 1, 3)) lz = cp.less(t3, cp.zeros(t3.shape)) go = cp.greater(t3, cp.ones(t3.shape)) proj3 = vertices[:, 0, :, :] + cp.multiply(t3, edges[:, 1, :, :]) proj3[lz] = vertices[:, 0, :, :][lz] proj3[go] = vertices[:, 2, :, :][go] # xi[cond3] = xi3[cond3] proj[cp.swapaxes(cond3, 1, 2)] = proj3[cp.swapaxes(cond3, 1, 2)] # # if alpha <= 0: # y = p - p2 # w = p3 - p2 # a = p2 # b = p3 t4 = cp.divide( # # w.dot(y) cp.sum(cp.multiply(edges[:, 2, :, :], diff_vectors[:, 1, :, :]), axis=2), edge_normssq[:, 2]) xi4 = cp.zeros((t4.shape[0], 3, t4.shape[1])) xi4[:, 1, :] = -t4 + 1 xi4[:, 2, :] = t4 # t4 = cp.tile(cp.expand_dims(t4, axis=2), (1, 1, 3)) lz = cp.less(t4, cp.zeros(t4.shape)) go = cp.greater(t4, cp.ones(t4.shape)) proj4 = vertices[:, 1, :, :] + cp.multiply(t4, edges[:, 2, :, :]) proj4[lz] = vertices[:, 1, :, :][lz] proj4[go] = vertices[:, 2, :, :][go] # xi[cond4] = xi4[cond4] proj[cp.swapaxes(cond4, 1, 2)] = proj4[cp.swapaxes(cond4, 1, 2)] vec_to_point = pts[:, 0, :, :] - proj distances = cp.linalg.norm(vec_to_point, axis=2) # n = "\n" # print(f"{pts[:,0,:,:]=}") # print(f"{proj=}") # print(f"{pts[:,0,:,:] - proj=}") # print(f"{distances=}") min_distances = cp.min(distances, axis=0) closest_triangles = cp.argmin(distances, axis=0) projections = proj[closest_triangles, np.arange(chunk_size), :] # # Distinguish close triangles is_close = cp.isclose(distances, min_distances) # # Determine sign signed_normal = normals[:, 0, :, :] if vertex_normals is not None: signed_normal = cp.sum(vertex_normals.transpose() * xi.transpose(), axis=2).transpose() is_negative = cp.less_equal( cp.sum(cp.multiply(vec_to_point, signed_normal), axis=2), 0.) # # Combine is_close_and_negative = cp.logical_and(is_close, is_negative) # # Determine if inside is_inside = cp.all(cp.logical_or(is_close_and_negative, cp.logical_not(is_close)), axis=0) # # Overwrite the signs of points # that are outside of the box if bounding_box is not None: # # Extract rotation_matrix = cp.asarray(bounding_box['rotation_matrix']) translation_vector = cp.asarray(bounding_box['translation_vector']) size = cp.asarray(bounding_box['size']) # # Transform transformed_pts = cp.dot( all_pts[start_index:end_index, :] - translation_vector, rotation_matrix) # # Determine if outside bbox inside_bbox = cp.all(cp.logical_and( cp.less_equal(0., transformed_pts), cp.less_equal(transformed_pts, size)), axis=1) # # Treat points outside bbox as # being outside of lumen print(f"{inside_bbox=}") is_inside = cp.logical_and(is_inside, inside_bbox) # # Apply sign to indicate whether the distance is # inside or outside the mesh. min_distances[is_inside] = -1 * min_distances[is_inside] # # Emplace results # [triangle_index, vert_index, querypoint_index, coordinates] results[0][start_index:end_index] = closest_triangles results[1][start_index:end_index] = min_distances results[2][start_index:end_index, :] = projections
def _zdivide(x, y): """Patched version of :func:`sporco.linalg.zdivide`.""" div = x / y div[cp.logical_or(cp.isnan(div), cp.isinf(div))] = 0 return div
for num in cp.arange(0, 11, 1): #move camera position 6 entries forward in y-direction qli += num # make dem labels try: ql_ray = ql[qlk, qli, qlj] # if any entry in qlk, qli, qlj outside respective # array dimension sizes, get rid of those entries. except: getbad_k = qlk > dimk getbad_i = qli > dimi getbad_j = qlj > dimj bad_ki = cp.logical_or(getbad_k, getbad_i) bad_kij = cp.logical_or(bad_ki, getbad_j) qlk = qlk[~bad_kij] qli = qli[~bad_kij] qlj = qlj[~bad_kij] # Then make labels ql_ray = ql[qlk, qli, qlj] if ql_ray.size == 0: labels[i, j, num] = cp.nan elif cp.all(~ql_ray) == True: labels[i, j, num] = 1 else: idx = cp.argmax(ql_ray)
def _cuda_bccg(f: typing.Callable, b: typing.Sequence, tol: float, max_it: int, x0: typing.Sequence, min_pressure: float = 0.0, max_pressure: typing.Union[float, typing.Sequence] = cp.inf, k_inn=1) -> typing.Tuple[cp.ndarray, bool]: """ The Bound-Constrained Conjugate Gradient Method for Non-negative Matrices CUDA implementation Parameters ---------- f: Callable A function equivalent to multiplication by a non negative n by n matrix must work with cupy arrays. Typically this function will be generated by slippy.contact.plan_convolve, this will guarantee compatibility with different versions of this function (FFTW and CUDA). b: array 1 by n array of displacements tol: float The tolerance on the result max_it: int The maximum number of iterations used x0: array An initial guess of the solution min_pressure: float, optional (0) The minimum allowable pressure at each node, defaults to 0 max_pressure: float, optional (inf) The maximum allowable pressure at each node, defaults to inf, for purely elastic contacts k_inn: int Returns ------- x: cp.array The solution to the system f(x)-b = 0 with the constraints applied. Notes ----- This function uses the method described in the reference below, with some modification. Firstly, this method allows both a minimum and maximum force to be set simulating quasi plastic regimes. The code has also been optimised in several places and importantly this version has also been modified to run on a GPU through cupy. If you do not have a CUDA compatible GPU, slippy can be imported while falling back to the fftw version by first importing slippy then patching the CUDA variable to False: >>> import slippy >>> slippy.CUDA = False >>> import slippy.contact >>> ... Though this should happen automatically if you don't have cupy installed. References ---------- Vollebregt, E.A.H. The Bound-Constrained Conjugate Gradient Method for Non-negative Matrices. J Optim Theory Appl 162, 931–953 (2014). https://doi.org/10.1007/s10957-013-0499-x Examples -------- """ # if you use np or most built ins in this function at all it will slow it down a lot! try: float(max_pressure) max_is_float = True except TypeError: max_is_float = False max_pressure = cp.array(max_pressure) # initialize b = cp.asarray(b) x = cp.clip(cp.asarray(x0), min_pressure, max_pressure) g = f(x) - b msk_bnd_0 = cp.logical_and(x <= 0, g >= 0) msk_bnd_max = cp.logical_and(x >= max_pressure, g <= 0) n_bound = cp.sum(msk_bnd_0) + cp.sum(msk_bnd_max) n = b.size n_free = n - n_bound small = 1e-14 it = 0 it_inn = 0 rho_prev = cp.nan rho = 0.0 r, p, r_prev = 0, 0, 0 failed = False while True: it += 1 it_inn += 1 x_prev = x if it > 1: r_prev = r rho_prev = rho r = -g r[msk_bnd_0] = 0 r[msk_bnd_max] = 0 rho = cp.dot(r, r) if it > 1: beta_pr = (rho - cp.dot(r, r_prev)) / rho_prev p = r + max([beta_pr, 0])*p else: p = r p[msk_bnd_0] = 0 p[msk_bnd_max] = 0 # compute tildex optimisation ignoring the bounds q = f(p) if it_inn < k_inn: q[msk_bnd_0] = cp.nan q[msk_bnd_max] = cp.nan alpha = cp.dot(r, p) / cp.dot(p, q) x = x + alpha * p rms_xk = cp.linalg.norm(x) / cp.sqrt(n_free) rms_upd = cp.linalg.norm(x - x_prev) / cp.sqrt(n_free) upd = rms_upd / rms_xk # project onto feasible domain changed = False outer_it = it_inn >= k_inn or upd < tol if outer_it: msk_prj_0 = x < -small if cp.any(msk_prj_0): x[msk_prj_0] = 0 msk_bnd_0[msk_prj_0] = True changed = True msk_prj_max = x >= max_pressure * (1 + small) if cp.any(msk_prj_max): if max_is_float: x[msk_prj_max] = max_pressure else: x[msk_prj_max] = max_pressure[msk_prj_max] msk_bnd_max[msk_prj_max] = True changed = True if changed or (outer_it and k_inn > 1): g = f(x) - b else: g = g + alpha * q check_grad = outer_it if check_grad: msk_rel = cp.logical_or(cp.logical_and(msk_bnd_0, g < -small), cp.logical_and(msk_bnd_max, g > small)) if cp.any(msk_rel): msk_bnd_0[msk_rel] = False msk_bnd_max[msk_rel] = False changed = True if changed: n_free = n - cp.sum(msk_bnd_0) - cp.sum(msk_bnd_max) if not n_free: print("No free nodes") warnings.warn("No free nodes for BCCG iterations") failed = True break if outer_it: it_inn = 0 if it > max_it: print("Max iterations") warnings.warn("Bound constrained conjugate gradient iterations failed to converge") failed = True break if outer_it and (not changed) and upd < tol: break return x, bool(failed)