def partial_trace_cupy(rho: cupy.ndarray, retain_qubits) -> cupy.ndarray: """ Compute the partial trace of rho. Args: rho: input rho retain_qubits: the qubits which we want to keep after partial trace. """ if len(retain_qubits) == 0: return trace(rho) total_qb = int(math.log2(rho.shape[0])) assert min(retain_qubits) >= 0 and max(retain_qubits) < total_qb if total_qb == 1 or len(retain_qubits) == total_qb: return rho all_qbs = list(range(total_qb)) qbs_to_remove = list(filter(lambda x: x not in retain_qubits, all_qbs)) rho = rho.reshape([2] * (2 * total_qb)) for qid in reversed(qbs_to_remove): rho = trace(rho, axis1=qid, axis2=qid + total_qb) total_qb -= 1 # retain back to normal density matrix newshape = 2 ** total_qb return rho.reshape(newshape, newshape)
def partial_trace_wf_cupy(iwf: cupy.ndarray, retain_qubits): nqb = int(math_log2(iwf.shape[0])) if len(retain_qubits) == nqb: return outer(iwf, iwf.conj()) iwf = iwf.reshape([2] * nqb, order="C") retain_qubits = sorted(retain_qubits) for idx in range(len(retain_qubits)): r = retain_qubits[idx] if idx != r: iwf = iwf.swapaxes(idx, r) iwf = iwf.reshape((2 ** nqb,)) return partial_trace_wf_keep_first_cupy(iwf, len(retain_qubits))
def fast_forward_one( prev_x: cp.ndarray, prev_l: cp.ndarray, hidden: cp.ndarray, x_embedder_W: cp.ndarray, gru_xw: cp.ndarray, gru_hw: cp.ndarray, gru_xb: cp.ndarray, gru_hb: cp.ndarray, O1_W: cp.ndarray, O1_b: cp.ndarray, O2_W: cp.ndarray, O2_b: cp.ndarray, w_gru_x: cp.ndarray, w_gru_h: cp.ndarray, w_out_x1: cp.ndarray, w_out_x2: cp.ndarray, ): prev_xl = cp.concatenate((x_embedder_W[prev_x], prev_l), axis=1) # (batch_size, ?) # gru_x = prev_xl.dot(gru_xw) + gru_xb gru_x = w_gru_x prev_xl.dot(gru_xw, gru_x) gru_x += gru_xb # gru_h = hidden.dot(gru_hw) + gru_hb gru_h = w_gru_h hidden.dot(gru_hw, gru_h) gru_h += gru_hb size = gru_x.shape[1] // 3 W_r_x, W_z_x, W_x = gru_x[:, :size], gru_x[:, size:size * 2], gru_x[:, size * 2:] U_r_h, U_z_h, U_x = gru_h[:, :size], gru_h[:, size:size * 2], gru_h[:, size * 2:] new_hidden = gru_element_wise(hidden, W_r_x, W_z_x, W_x, U_r_h, U_z_h, U_x) # out_x = new_hidden.dot(O1_W) + O1_b out_x1 = w_out_x1 new_hidden.dot(O1_W, out_x1) out_x1 += O1_b cp.maximum(out_x1, 0.0, out_x1) # out_x = out_x.dot(O2_W) + O2_b out_x2 = w_out_x2 out_x1.dot(O2_W, out_x2) out_x2 += O2_b return out_x2, new_hidden
def _run_cupy(data: cupy.ndarray) -> cupy.ndarray: data = data.astype(cupy.float32) griddim, blockdim = cuda_args(data.shape) out = cupy.empty(data.shape, dtype='f4') out[:] = cupy.nan _run_gpu[griddim, blockdim](data, out) return out
def partial_trace_wf_keep_first_cupy(iwf: cupy.ndarray, n): # TODO: improve the cuda version of partial_trace_wf. # For example, cleverly adjust the blockDim to deal with the other case assert iwf.flags.c_contiguous assert iwf.dtype == default_dtype iwf_conj = iwf.conj() nqb = int(math_log2(iwf.shape[0])) m = nqb - n m_idx = 2 ** m n_idx = 2 ** n rho = zeros(shape=(n_idx, n_idx), dtype=default_dtype, order="C") # Here we simply use the threadDim for i, j in the cuda code. threads_per_bloch = 32 threadDim = (threads_per_bloch, threads_per_bloch) x = (n_idx + (threads_per_bloch - 1)) // threads_per_bloch blockDim = (x, x) partial_trace_wf_keep_first_cuda( grid=blockDim, block=threadDim, args=( iwf, iwf_conj, rho, m, m_idx, n_idx, ), ) return rho
def partial_trace_1d_cupy(rho: cupy.ndarray, retain_qubit: int): """ Compute the partial trace of rho. Returns a reduced density matrix in the Hilbert space of "retain_qubit"th qubit. """ total_qb = int(math.log2(rho.shape[0])) if retain_qubit >= total_qb or retain_qubit < 0: raise ValueError(retain_qubit) if total_qb == 1: return rho all_qbs = list(range(total_qb)) qbs_to_remove = list(filter(lambda x: x != retain_qubit, all_qbs)) assert qbs_to_remove == list(sorted(qbs_to_remove)) rho = rho.reshape([2] * (2 * total_qb)) # ret = np.empty(shape=(2,2), dtype=complex) ret = None for qid in reversed(qbs_to_remove): # remove the qubit with higher qubit count first, this is crucial # otherwise we will have indexing problems. if ret is None: ret = trace(rho, axis1=qid, axis2=qid + total_qb) total_qb -= 1 # removed one already else: ret = trace(ret, axis1=qid, axis2=qid + total_qb) total_qb -= 1 # removed one already assert ret.shape == (2, 2) return ret
def remove_small_objects_gpu(mask: cupy.ndarray, min_size: int) -> None: """ See scikit-image remove_small_objects() N.B. Input array must be a labeled mask. This is a inplace operation. """ component_sizes = cupy.bincount(mask.ravel()) too_small = component_sizes < min_size too_small_mask = too_small[mask] mask[too_small_mask] = 0
def _run_cupy(data: cupy.ndarray, cellsize_x: Union[int, float], cellsize_y: Union[int, float]) -> cupy.ndarray: cellsize_x_arr = cupy.array([float(cellsize_x)], dtype='f4') cellsize_y_arr = cupy.array([float(cellsize_y)], dtype='f4') data = data.astype(cupy.float32) griddim, blockdim = cuda_args(data.shape) out = cupy.empty(data.shape, dtype='f4') out[:] = cupy.nan _run_gpu[griddim, blockdim](data, cellsize_x_arr, cellsize_y_arr, out) return out
def _run_cupy(data: cupy.ndarray, cellsize: Union[int, float]) -> cupy.ndarray: data = data.astype(cupy.float32) cellsize_arr = cupy.array([float(cellsize)], dtype='f4') # TODO: add padding griddim, blockdim = cuda_args(data.shape) out = cupy.empty(data.shape, dtype='f4') out[:] = cupy.nan _run_gpu[griddim, blockdim](data, cellsize_arr, out) return out
def individual_mutation(individual: cupy.ndarray, mutation_probability: float) -> cupy.ndarray: """ Applies a mutation on the input individual, if a random value (from a uniform distribution) is above the mutation probability threshold. The literal to be mutated is also selected randomly in the cas of a mutation. If the threshold is not met, the function returns the input individual :param mutation_probability: Between 0 and 1. The probability that has an individual to mutate :param individual: The individual to mutate :return: Either the input individual if the threshold is not met, or the mutated individual otherwise """ if cupy.random.uniform() <= mutation_probability: mutated_individual = individual.copy() element_to_mutate = cupy.random.randint( low=0, high=mutated_individual.shape[0]) mutated_individual[ element_to_mutate] = 1 - mutated_individual[element_to_mutate] return mutated_individual return individual
def evaluate_chunks( results: [cp.ndarray, cp.ndarray, cp.ndarray], # closest triangle, distance, projection all_pts: cp.ndarray = None, vertices: cp.ndarray = None, edges: cp.ndarray = None, edge_norms: cp.ndarray = None, edge_normssq: cp.ndarray = None, normals: cp.ndarray = None, norms: cp.ndarray = None, normssq: cp.ndarray = None, zero_tensor: cp.ndarray = None, one_tensor: cp.ndarray = None, tris: cp.ndarray = None, vertex_normals: cp.ndarray = None, bounding_box: dict = None, chunk_size: int = None, num_verts: int = None) -> None: # # Expand vertex normals if non empty if vertex_normals is not None: vertex_normals = vertex_normals[tris] vertex_normals = cp.tile(cp.expand_dims(vertex_normals, axis=2), (1, 1, chunk_size, 1)) # begin = time.time() # # Load and extend the batch num_chunks = all_pts.shape[0] // chunk_size for i in range(num_chunks): # # Get subset of the query points start_index = i * chunk_size end_index = (i + 1) * chunk_size pts = all_pts[start_index:end_index, :] # # Match the dimensions to those assumed above. # REPEATED REPEATED # [triangle_index, vert_index, querypoint_index, coordinates] pts = cp.tile(cp.expand_dims(pts, axis=(0, 1)), (num_verts, 3, 1, 1)) # # Compute the differences between # vertices on each triangle and the # points of interest # # [triangle_index, vert_index, querypoint_index, coordinates] # =================== # [:,0,:,:] = p - p1 # [:,1,:,:] = p - p2 # [:,2,:,:] = p - p3 diff_vectors = pts - vertices # # Compute alpha, beta, gamma barycentric = cp.empty(diff_vectors.shape) # # gamma = u x (p - p1) barycentric[:, 2, :, :] = cp.cross(edges[:, 0, :, :], diff_vectors[:, 0, :, :]) # beta = (p - p1) x v barycentric[:, 1, :, :] = cp.cross(diff_vectors[:, 0, :, :], edges[:, 1, :, :]) # alpha = w x (p - p2) barycentric[:, 0, :, :] = cp.cross(edges[:, 2, :, :], diff_vectors[:, 1, :, :]) barycentric = cp.divide( cp.sum(cp.multiply(barycentric, normals), axis=3), normssq) # # Test conditions less_than_one = cp.less_equal(barycentric, one_tensor) more_than_zero = cp.greater_equal(barycentric, zero_tensor) # # if 0 <= gamma and gamma <= 1 # and 0 <= beta and beta <= 1 # and 0 <= alpha and alpha <= 1: cond1 = cp.logical_and(less_than_one, more_than_zero) # # if gamma <= 0: cond2 = cp.logical_not(more_than_zero[:, 2, :]) cond2 = cp.tile(cp.expand_dims(cond2, axis=1), (1, 3, 1)) # # if beta <= 0: cond3 = cp.logical_not(more_than_zero[:, 1, :]) cond3 = cp.tile(cp.expand_dims(cond3, axis=1), (1, 3, 1)) # # if alpha <= 0: cond4 = cp.logical_not(more_than_zero[:, 0, :]) cond4 = cp.tile(cp.expand_dims(cond4, axis=1), (1, 3, 1)) # # Get the projections for each case xi = cp.empty(barycentric.shape) barycentric_ext = cp.tile(cp.expand_dims(barycentric, axis=3), (1, 1, 1, 3)) proj = cp.sum(cp.multiply(barycentric_ext, vertices), axis=1) # # if 0 <= gamma and gamma <= 1 # and 0 <= beta and beta <= 1 # and 0 <= alpha and alpha <= 1: xi[cond1] = barycentric[cond1] # # if gamma <= 0: # x = p - p1 # u = p2 - p1 # a = p1 # b = p2 t2 = cp.divide( # # u.dot(x) cp.sum(cp.multiply(edges[:, 0, :, :], diff_vectors[:, 0, :, :]), axis=2), edge_normssq[:, 0]) xi2 = cp.zeros((t2.shape[0], 3, t2.shape[1])) xi2[:, 0, :] = -t2 + 1 xi2[:, 1, :] = t2 # t2 = cp.tile(cp.expand_dims(t2, axis=2), (1, 1, 3)) lz = cp.less(t2, cp.zeros(t2.shape)) go = cp.greater(t2, cp.ones(t2.shape)) proj2 = vertices[:, 0, :, :] + cp.multiply(t2, edges[:, 0, :, :]) proj2[lz] = vertices[:, 0, :, :][lz] proj2[go] = vertices[:, 1, :, :][go] # xi[cond2] = xi2[cond2] proj[cp.swapaxes(cond2, 1, 2)] = proj2[cp.swapaxes(cond2, 1, 2)] # # if beta <= 0: # x = p - p1 # v = p3 - p1 # a = p1 # b = p3 t3 = cp.divide( # # v.dot(x) cp.sum(cp.multiply(edges[:, 1, :, :], diff_vectors[:, 0, :, :]), axis=2), edge_normssq[:, 1]) xi3 = cp.zeros((t3.shape[0], 3, t3.shape[1])) xi3[:, 0, :] = -t3 + 1 xi3[:, 2, :] = t3 # t3 = cp.tile(cp.expand_dims(t3, axis=2), (1, 1, 3)) lz = cp.less(t3, cp.zeros(t3.shape)) go = cp.greater(t3, cp.ones(t3.shape)) proj3 = vertices[:, 0, :, :] + cp.multiply(t3, edges[:, 1, :, :]) proj3[lz] = vertices[:, 0, :, :][lz] proj3[go] = vertices[:, 2, :, :][go] # xi[cond3] = xi3[cond3] proj[cp.swapaxes(cond3, 1, 2)] = proj3[cp.swapaxes(cond3, 1, 2)] # # if alpha <= 0: # y = p - p2 # w = p3 - p2 # a = p2 # b = p3 t4 = cp.divide( # # w.dot(y) cp.sum(cp.multiply(edges[:, 2, :, :], diff_vectors[:, 1, :, :]), axis=2), edge_normssq[:, 2]) xi4 = cp.zeros((t4.shape[0], 3, t4.shape[1])) xi4[:, 1, :] = -t4 + 1 xi4[:, 2, :] = t4 # t4 = cp.tile(cp.expand_dims(t4, axis=2), (1, 1, 3)) lz = cp.less(t4, cp.zeros(t4.shape)) go = cp.greater(t4, cp.ones(t4.shape)) proj4 = vertices[:, 1, :, :] + cp.multiply(t4, edges[:, 2, :, :]) proj4[lz] = vertices[:, 1, :, :][lz] proj4[go] = vertices[:, 2, :, :][go] # xi[cond4] = xi4[cond4] proj[cp.swapaxes(cond4, 1, 2)] = proj4[cp.swapaxes(cond4, 1, 2)] vec_to_point = pts[:, 0, :, :] - proj distances = cp.linalg.norm(vec_to_point, axis=2) # n = "\n" # print(f"{pts[:,0,:,:]=}") # print(f"{proj=}") # print(f"{pts[:,0,:,:] - proj=}") # print(f"{distances=}") min_distances = cp.min(distances, axis=0) closest_triangles = cp.argmin(distances, axis=0) projections = proj[closest_triangles, np.arange(chunk_size), :] # # Distinguish close triangles is_close = cp.isclose(distances, min_distances) # # Determine sign signed_normal = normals[:, 0, :, :] if vertex_normals is not None: signed_normal = cp.sum(vertex_normals.transpose() * xi.transpose(), axis=2).transpose() is_negative = cp.less_equal( cp.sum(cp.multiply(vec_to_point, signed_normal), axis=2), 0.) # # Combine is_close_and_negative = cp.logical_and(is_close, is_negative) # # Determine if inside is_inside = cp.all(cp.logical_or(is_close_and_negative, cp.logical_not(is_close)), axis=0) # # Overwrite the signs of points # that are outside of the box if bounding_box is not None: # # Extract rotation_matrix = cp.asarray(bounding_box['rotation_matrix']) translation_vector = cp.asarray(bounding_box['translation_vector']) size = cp.asarray(bounding_box['size']) # # Transform transformed_pts = cp.dot( all_pts[start_index:end_index, :] - translation_vector, rotation_matrix) # # Determine if outside bbox inside_bbox = cp.all(cp.logical_and( cp.less_equal(0., transformed_pts), cp.less_equal(transformed_pts, size)), axis=1) # # Treat points outside bbox as # being outside of lumen print(f"{inside_bbox=}") is_inside = cp.logical_and(is_inside, inside_bbox) # # Apply sign to indicate whether the distance is # inside or outside the mesh. min_distances[is_inside] = -1 * min_distances[is_inside] # # Emplace results # [triangle_index, vert_index, querypoint_index, coordinates] results[0][start_index:end_index] = closest_triangles results[1][start_index:end_index] = min_distances results[2][start_index:end_index, :] = projections
def KHK(self, KHK: cp.ndarray): self.__KHK = KHK.astype(cp.float64)
def solution(self, solution: cp.ndarray): self.__solution = solution.astype(cp.float64)
def normalize(cparray: cp.ndarray, axis=-1) -> cp.ndarray: return cparray / cparray.sum(axis=axis, keepdims=True)
def _cupy2torch(array: cp.ndarray) -> Tensor: return from_dlpack(array.toDlpack())