def _apply_swaps(matrix, indices, detected_pairs, seed=0, greedy=True, inplace=False): """ @seed: different seed will lead to different outcome, so if you want multiple directions, use multiple seeds. """ if not inplace: matrix = matrix.copy() indices = indices.copy() rng = np.random.default_rng(seed=seed) if greedy: detected_pairs = detected_pairs[np.argsort(detected_pairs[:, 2])][::-1] else: rng.shuffle(detected_pairs, axis=0) # remove conflicted rows visited = {} selected = [] for i in range(detected_pairs.shape[0]): if detected_pairs[i, 0] not in visited and detected_pairs[ i, 1] not in visited: selected.append(i) visited[detected_pairs[i, 0]] = 1 visited[detected_pairs[i, 1]] = 1 swap_pairs = detected_pairs[selected, :] for i in range(swap_pairs.shape[0]): swap_inplace(matrix, indices, swap_pairs[i, 0], swap_pairs[i, 1]) return matrix, indices, swap_pairs.shape[0]
def detect_and_swap_gpu(matrix, indices, seed, threads_per_block=128, blocks=128): rng_states = create_xoroshiro128p_states(threads_per_block * blocks, seed=seed) vec = np.zeros([threads_per_block * blocks, 3]).astype(int) d_matrix = cuda.to_device(matrix) d_vec = cuda.to_device(vec) _detect_gpu[blocks, threads_per_block](d_matrix, d_vec, rng_states) vec = d_vec.copy_to_host() vec = vec[~np.all(vec == 0, axis=1)] # select non-zero rows vec = vec[np.argsort(vec[:, 2])[::-1]] # TODO: greedy? vec_detected = vec.shape[0] # remove conflicted rows visited = {} selected = [] for i in range(vec.shape[0]): if vec[i, 0] not in visited and vec[i, 1] not in visited: selected.append(i) visited[vec[i, 0]] = 1 visited[vec[i, 1]] = 1 vec = vec[selected, :] for i in range(vec.shape[0]): swap_inplace(matrix, indices, vec[i, 0], vec[i, 1]) vec_swapped = vec.shape[0] return matrix, indices, vec_detected, vec_swapped
def detect_and_swap_gpu(matrix, indices, seed, threads_per_block=128, blocks=128, mode='random'): # threads_per_block, blocks = 128, 128 # wandb.log({"threads_per_block": threads_per_block, "blocks": blocks}) if mode == 'random': rng_states = create_xoroshiro128p_states(threads_per_block * blocks, seed=seed) vec = np.zeros([threads_per_block * blocks, 3]).astype(int) elif mode == 'all': vec = np.zeros([4096, 3]).astype(int) index = np.zeros([1]).astype(int) d_index = cuda.to_device(index) threadsperblock = (32, 32) blockspergrid_x = (matrix.shape[0] + threadsperblock[0] - 1) // threadsperblock[0] blockspergrid_y = (matrix.shape[1] + threadsperblock[1] - 1) // threadsperblock[1] blockspergrid = (blockspergrid_x, blockspergrid_y) d_matrix = cuda.to_device(matrix) d_vec = cuda.to_device(vec) if mode == 'random': _detect_gpu[blocks, threads_per_block](d_matrix, d_vec, rng_states) elif mode == 'all': _detect_all_gpu[blockspergrid, threadsperblock](d_matrix, d_index, d_vec) vec = d_vec.copy_to_host() vec = vec[~np.all(vec == 0, axis=1)] # select non-zero rows vec = vec[np.argsort(vec[:, 2])[::-1]] # TODO: greedy? # print(vec[:5]) vec_detected = vec.shape[0] # print(vec.shape) # remove conflicted rows visited = {} selected = [] for i in range(vec.shape[0]): if vec[i, 0] not in visited and vec[i, 1] not in visited: selected.append(i) visited[vec[i, 0]] = 1 visited[vec[i, 1]] = 1 vec = vec[selected, :] # print(vec.shape) for i in range(vec.shape[0]): swap_inplace(matrix, indices, vec[i, 0], vec[i, 1]) vec_swapped = vec.shape[0] # wandb.log({"detected": vec_detected, "swapped": vec_swapped}) return matrix, indices, vec_detected, vec_swapped