def __init__(self, volume, template, mask, wedge, stdV, gpu=True): self.volume = gu.to_gpu(volume) self.template = Volume(template) self.templatePadded = gu.zeros_like(self.volume, dtype=np.float32) self.mask = Volume(mask) self.maskPadded = gu.zeros_like(self.volume, dtype=np.float32) self.sOrg = mask.shape self.sPad = volume.shape print(self.sPad, self.sOrg) rotate(self.mask, [0, 0, 0], self.maskPadded, self.sPad, self.sOrg) #paste_in_center_gpu(self.template.d_data, self.templatePadded, np.int32(self.sPad), np.int32(self.maskSize), block=(10, 10, 10), grid=(8,1,1)) #rotate(self.template, [0, 0, 0], self.templatePadded, self.sPad, self.maskSize) print(volume.shape, stdV.shape, wedge.shape) self.wedge = gu.to_gpu(wedge) self.stdV = gu.to_gpu(stdV) self.fwd_plan = Plan(volume.shape, volume.dtype, np.complex64) self.inv_plan = Plan(volume.shape, np.complex64, volume.dtype) self.volume_fft = gu.zeros_like(self.volume, dtype=np.complex64) self.template_fft = gu.zeros_like(self.volume, dtype=np.complex64) self.ccc_map = gu.zeros_like(self.volume, dtype=np.float32) self.norm_volume = np.prod(volume.shape) self.scores = gu.ones_like(self.volume, dtype=np.float32) * -1000 self.angles = gu.ones_like(self.volume, dtype=np.float32) * -1000 self.p = sum(self.mask.d_data)
def make_sample_data(set_: int): np.random.seed(set_ * 4347) if set_ == 1: # Uniform distribution data = np.random.uniform(0, 1, size=(samples, num_features)) if set_ == 2: # 3 Gaussian distribution data = multi_gauss_clusters(n_clusters=3) if set_ == 3: # 10 Gaussian distribution data = multi_gauss_clusters(n_clusters=10) df = pd.DataFrame() np.random.shuffle(data) df['vec'] = data.tolist() # find nearest neighbours from sklearn.neighbors import NearestNeighbors nbrs = NearestNeighbors(n_neighbors=51, algorithm='ball_tree', leaf_size=30).fit(data) _, nbrs_indices = nbrs.kneighbors(data) for n_nbr in range(10, 51, 5): df[f"known_neighbours_{n_nbr}"] = [ x[1:(n_nbr + 1)] for x in nbrs_indices ] # hash using random hyperplane LSH import pycuda.gpuarray as gpuarray import skcuda.linalg as linalg import pycuda.autoinit linalg.init() os.environ['CUDA_HOME'] = "/opt/cuda/" vec_np = np.array(df['vec'].values.tolist(), dtype=np.float32) LSH = LSHBias(feature_dim=num_features, bits=LSH_NUM_BITS) W = np.array(LSH.W, dtype=np.float32) b_gpu = gpuarray.to_gpu(W) ones = np.ones(shape=(vec_np.shape[0], 1), dtype=np.float32) X = np.concatenate((vec_np, ones), axis=1) # do the matrix multiplication a_gpu = gpuarray.to_gpu(X) mul = linalg.mdot(a_gpu, b_gpu) # get binary: 1 if value >= 0, else 0 res = gpuarray.if_positive( mul >= gpuarray.zeros(mul.shape, dtype=np.float32), then_=gpuarray.ones_like(mul), else_=gpuarray.zeros_like(mul)) res = np.array(res.get(), dtype=np.uint32) # convert grouped bits to integers res = np_array_binary_to_grouped_integers(res) df[f"hash_{LSH_NUM_BITS}_bits"] = [x for x in res] df.to_parquet(f"{config.CUDA_neighbour_search_df_dir}df-{set_}.parquet", index=False) print("created test-data")
def ones_like(tensor): if tensor.device == 'cuda': return Tensor( data=(gpuarray.ones_like(tensor.data, dtype=dtype) if tensor.shape != () else np.ones_like(tensor.data)), device=tensor.device, ) else: return Tensor( data=np.ones_like(tensor.data, dtype=dtype), device=tensor.device, )
def __init__(self, photons, ncopies=1, copy_flags=True, copy_triangles=True, copy_weights=True): """Load ``photons`` onto the GPU, replicating as requested. Args: - photons: chroma.Event.Photons Photon state information to load onto GPU - ncopies: int, *optional* Number of times to replicate the photons on the GPU. This is used if you want to propagate the same event many times, for example in a likelihood calculation. The amount of GPU storage will be proportionally larger if ncopies > 1, so be careful. """ nphotons = len(photons) self.pos = ga.empty(shape=nphotons * ncopies, dtype=ga.vec.float3) self.dir = ga.empty(shape=nphotons * ncopies, dtype=ga.vec.float3) self.pol = ga.empty(shape=nphotons * ncopies, dtype=ga.vec.float3) self.wavelengths = ga.empty(shape=nphotons * ncopies, dtype=np.float32) self.t = ga.empty(shape=nphotons * ncopies, dtype=np.float32) self.last_hit_triangles = ga.empty(shape=nphotons * ncopies, dtype=np.int32) if not copy_triangles: self.last_hit_triangles.fill(-1) if not copy_flags: self.flags = ga.zeros(shape=nphotons * ncopies, dtype=np.uint32) else: self.flags = ga.empty(shape=nphotons * ncopies, dtype=np.uint32) if not copy_weights: self.weights = ga.ones_like(self.last_hit_triangles, dtype=np.float32) else: self.weights = ga.empty(shape=nphotons * ncopies, dtype=np.float32) self.evidx = ga.empty(shape=nphotons, dtype=np.uint32) # Assign the provided photons to the beginning (possibly # the entire array if ncopies is 1 self.pos[:nphotons].set(to_float3(photons.pos)) self.dir[:nphotons].set(to_float3(photons.dir)) self.pol[:nphotons].set(to_float3(photons.pol)) self.wavelengths[:nphotons].set(photons.wavelengths.astype(np.float32)) self.t[:nphotons].set(photons.t.astype(np.float32)) if copy_triangles: self.last_hit_triangles[:nphotons].set( photons.last_hit_triangles.astype(np.int32)) if copy_flags: self.flags[:nphotons].set(photons.flags.astype(np.uint32)) if copy_weights: self.weights[:nphotons].set(photons.weights.astype(np.float32)) self.evidx[:nphotons].set(photons.evidx.astype(np.uint32)) module = get_cu_module('propagate.cu', options=cuda_options) self.gpu_funcs = GPUFuncs(module) # Replicate the photons to the rest of the slots if needed if ncopies > 1: max_blocks = 1024 nthreads_per_block = 64 for first_photon, photons_this_round, blocks in \ chunk_iterator(nphotons, nthreads_per_block, max_blocks): self.gpu_funcs.photon_duplicate(np.int32(first_photon), np.int32(photons_this_round), self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, self.weights, self.evidx, np.int32(ncopies - 1), np.int32(nphotons), block=(nthreads_per_block, 1, 1), grid=(blocks, 1)) # Save the duplication information for the iterate_copies() method self.true_nphotons = nphotons self.ncopies = ncopies
for filename in tqdm(glob(basepath + "part-*.orc")): df = pd.read_orc(filename) df = df.rename(columns={"FeatureVector_all_features": "vec"}) count += 1 vec_np = np.array(df['vec'].values.tolist(), dtype=np.float32) # add bias term ones = np.ones(shape=(vec_np.shape[0], 1), dtype=np.float32) X = np.concatenate((vec_np, ones), axis=1) # do the matrix multiplication a_gpu = gpuarray.to_gpu(X) mul = linalg.mdot(a_gpu, b_gpu) # get binary: 1 if value >= 0, else 0 res = gpuarray.if_positive( mul >= gpuarray.zeros(mul.shape, dtype=np.float32), then_=gpuarray.ones_like(mul), else_=gpuarray.zeros_like(mul)) res = np.array(res.get(), dtype=np.uint32) # convert grouped bits to integers res = np_array_binary_to_grouped_integers(res) df[f"hash_{LSH_NUM_BITS}_bits"] = [x for x in res] df = df[["rec_MBID", f"hash_{LSH_NUM_BITS}_bits"]] df.to_parquet(f"{config.ABz_GPU_hashed_output_dir}{count}.parquet", index=False) # save as a single parquet file spark = SparkSession \ .builder \ .appName("hashed file coalesce") \
cv.namedWindow("Moving Detecting!") cap = cv.VideoCapture(0) W = 320 H = 240 cap.set(cv.CAP_PROP_FRAME_HEIGHT, H) cap.set(cv.CAP_PROP_FRAME_WIDTH, W) ret, frame = cap.read() gray_a = cv.cvtColor(frame, cv.COLOR_RGB2GRAY) img_ori_gpu = gpuarray.to_gpu(gray_a.astype(np.float32)) img_buf_gpu = gpuarray.empty_like(img_ori_gpu) img_sub = gpuarray.ones_like(img_ori_gpu) img_sub = 25 * img_sub img_bgm = gpuarray.zeros_like(img_sub) while True: ret, frame = cap.read() gray_buff = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) img_res_gpu = gpuarray.to_gpu(gray_buff.astype(np.float32)) img_buf_gpu = cmath.fabs(img_ori_gpu - img_res_gpu) img_buf_gpu = img_buf_gpu - img_sub img_ori_gpu = img_res_gpu.copy() img_res_gpu = gpuarray.if_positive(img_buf_gpu, img_bgm, img_res_gpu) gray_buff = img_res_gpu.get() gray_buff = gray_buff.astype(np.uint8) frame = cv.cvtColor(gray_buff, cv.COLOR_GRAY2RGB) cv.imshow("Moving Detecting!", frame) if cv.waitKey(1) & 0xFF == ord('q'):
b = gpuarray.to_gpu_async(h_array, stream=stream) print('b:\n{0}\nshape={1}\n'.format(b.get(), b.shape)) c = gpuarray.empty((100, 100), dtype=dtype) print('c:\n{0}\nshape={1}\n'.format(c, c.shape)) d = gpuarray.zeros((100, 100), dtype=dtype) print('d:\n{0}\nshape={1}\n'.format(d, d.shape)) e = gpuarray.arange(0.0, 100.0, 1.0, dtype=dtype) print('e:\n{0}\nshape={1}\n'.format(e, e.shape)) f = gpuarray.if_positive(e < 50, e - 100, e + 100) print('f:\n{0}\nshape={1}\n'.format(f, f.shape)) g = gpuarray.if_positive(e < 50, gpuarray.ones_like(e), gpuarray.zeros_like(e)) print('g:\n{0}\nshape={1}\n'.format(g, g.shape)) h = gpuarray.maximum(e, f) print('h:\n{0}\nshape={1}\n'.format(h, h.shape)) i = gpuarray.minimum(e, f) print('i:\n{0}\nshape={1}\n'.format(i, i.shape)) g = gpuarray.sum(a) print(g, type(g)) k = gpuarray.max(a) print(k, type(k)) l = gpuarray.min(a)