def compute(self, floatimage, histogram, k): width, height, nbins = np.shape(histogram) numpixels = width * height image_linear = np.reshape(floatimage, (numpixels, )).astype(np.float32) histogram_linear = np.reshape( histogram, (np.size(histogram), )).astype(np.float32) transform = np.zeros_like(image_linear).astype(np.float32) mf = cl.mem_flags self.buf_image = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=image_linear) self.buf_histogram = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=histogram_linear) self.output_buf = cl.Buffer(self.context, mf.READ_WRITE, transform.nbytes) kernel = self.program.IIF kernel.set_scalar_arg_dtypes([np.uintc, np.uintc, np.float32] + [None] * 3) kernel.set_arg(0, np.uintc(width)) kernel.set_arg(1, np.uintc(height)) kernel.set_arg(2, np.float32(k)) kernel.set_arg(3, self.buf_image) kernel.set_arg(4, self.buf_histogram) kernel.set_arg(5, self.output_buf) cl.enqueue_nd_range_kernel(self.queue, kernel, image_linear.shape, None).wait() cl.enqueue_read_buffer(self.queue, self.output_buf, transform).wait() return np.reshape(transform, (width, height)).astype(np.float)
def nodeInSimplex(self, nodeInd, simplexInd): # Function for checking if node is in simplex # If not saved mesh internally if self._internalID is None: # do that self._storeMeshInternally() # Enforce formating nodeInd = np.uintc(nodeInd) simplexInd = np.uintc(simplexInd) out = ctypes.c_bool(False) status = self._libInstance.implicitMesh_nodeInSimplex( self._internalID, \ ctypes.c_uint( nodeInd ), ctypes.c_uint( simplexInd ), ctypes.byref(out) ) if status != 0: # Try to save internally again self._storeMeshInternally() # Retry call status = self._libInstance.implicitMesh_nodeInSimplex( self._internalID, \ ctypes.c_uint( nodeInd ), ctypes.c_uint( simplexInd ), ctypes.byref(out) ) if status != 0: raise Exception("Uknown error occured! Error code " + str(status) + " from implicitMesh_nodeInSimplex()") return out.value
def compute(self, image, num_bins): width, height = np.shape(image) numpixels = width * height image = np.reshape(image, (numpixels, )).astype(np.float32) result = np.zeros((numpixels * num_bins, ), dtype=np.float32) mf = cl.mem_flags self.buf_image = cl.Buffer(self.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=image) self.output_buf = cl.Buffer(self.context, mf.READ_WRITE, result.nbytes) kernel = self.program.iif_binid kernel.set_scalar_arg_dtypes([np.uintc, np.uintc, np.ubyte] + [None] * 2) kernel.set_arg(0, np.uintc(width)) kernel.set_arg(1, np.uintc(height)) kernel.set_arg(2, np.ubyte(num_bins)) kernel.set_arg(3, self.buf_image) kernel.set_arg(4, self.output_buf) cl.enqueue_nd_range_kernel(self.queue, kernel, image.shape, None).wait() cl.enqueue_read_buffer(self.queue, self.output_buf, result).wait() return np.reshape(result, (width, height, num_bins)).astype(np.float32)
def component_step1_shortcutting_p2(d_v, d_prevD, d_D, d_Q, length, s): """ :param d_v: :param d_prevD: :param d_D: :param d_Q: :param length: :param s: :return: """ import eulercuda.pyencode as enc logger = logging.getLogger('eulercuda.pycomponent.component_step1_shortcutting_p2') logger.info("started.") mod = SourceModule(""" typedef struct Vertex { unsigned int vid; unsigned int n1; unsigned int n2; } Vertex; __global__ void componentStepOne_ShortCuttingP2(Vertex * v, unsigned int * prevD, unsigned int * curD, unsigned int * Q, unsigned int length, int s) { unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if( tid <length) { if(curD[tid]!=prevD[tid]) { Q[curD[tid]]=s; } } } """) block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) np_d_v = gpuarray.to_gpu(d_v) np_d_D = gpuarray.to_gpu(d_D) np_d_prevD = gpuarray.to_gpu(d_prevD) np_d_Q = gpuarray.to_gpu(d_Q) shortcutting_p1_device = mod.get_function('componentStepOne_ShortCuttingP2') shortcutting_p1_device( np_d_v, np_d_prevD, np_d_D, np_d_Q, np.uintc(length), np.uintc(s), block=block_dim, grid=grid_dim ) np_d_v.get(d_v) np_d_prevD.get(d_prevD) np_d_D.get(d_D) np_d_Q.get(d_Q) devdata = pycuda.tools.DeviceData() orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) logger.info("Occupancy = %s" % (orec.occupancy * 100)) logger.info("Finished. Leaving.") return d_Q
def phase1_device(d_keys, d_offset, d_length, count, bucketCount): logger = logging.getLogger('eulercuda.pygpuhash.phase1_device') logger.info("started.") mod = SourceModule(""" //#include <stdio.h> typedef unsigned long long KEY_T ; typedef KEY_T *KEY_PTR; typedef unsigned int VALUE_T; typedef VALUE_T *VALUE_PTR; #define C0 0x01010101 #define C1 0x12345678 #define LARGE_PRIME 1900813 #define MAX_INT 0xffffffff __forceinline__ __device__ unsigned int hash_h(KEY_T key, unsigned int bucketCount) { return ((C0 + C1 * key) % LARGE_PRIME ) % bucketCount; } __global__ void phase1( KEY_PTR keys, unsigned int * offset, unsigned int length, unsigned int* count, unsigned int bucketCount){ unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if(tid<length) { KEY_T key=keys[tid]; unsigned int bucket=hash_h(key,bucketCount); offset[tid]=atomicInc (count+bucket,MAX_INT); } __syncthreads(); } """, options=['--compiler-options', '-Wall']) np_d_keys = np.array(d_keys).astype('Q') keys_gpu = gpuarray.to_gpu(np_d_keys) offset_gpu = gpuarray.zeros(len(d_keys), dtype='I') count_gpu = gpuarray.to_gpu(count) block_dim = (1024, 1, 1) if (d_length//1024) == 0: grid_dim = (1, 1, 1) else: grid_dim = (d_length//1024, 1, 1) phase1 = mod.get_function("phase1") logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) phase1(keys_gpu, offset_gpu, np.uintc(d_length), count_gpu, np.uintc(bucketCount), grid=grid_dim, block=block_dim) d_offset = offset_gpu.get() count = count_gpu.get() devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) # logger.info("Occupancy = %s" % (orec.occupancy * 100)) logger.info('Finished. Leaving.') # return [d_offset, d_bucketSize] return d_offset, count
def _storeMeshInternally(self): # Store mesh internally # Get pointers to actual mesh nodes_p = self._mesh._mesh.nodes.ctypes.data_as(self.c_double_p) simplices_p = self._mesh._mesh.triangles.ctypes.data_as(self.c_uint_p) neighs_p = None if (self._mesh._neighs is not None): neighs_p = self._mesh._neighs.ctypes.data_as(self.c_uint_p) offset_p = None if (self._mesh._offset is not None): offset_p = self._mesh._offset.ctypes.data_as(self.c_double_p) numPerDimension_p = None if (self._mesh._numPerDimension is not None): numPerDimension_p = self._mesh._numPerDimension.ctypes.data_as( self.c_uint_p) offsetHyper_p = None if self._offset is not None: offsetHyper_p = self._offset.ctypes.data_as(self.c_double_p) stepLengths_p = None if self._stepLengths is not None: stepLengths_p = self._stepLengths.ctypes.data_as(self.c_double_p) numSteps_p = None if self._numSteps is not None: numSteps_p = self._numSteps.ctypes.data_as(self.c_uint_p) hyperDim = np.uintc(0) if (offsetHyper_p is not None and stepLengths_p is not None and numSteps_p is not None): hyperDim = np.uintc(self._offset.size) # Preallocate output meshId = ctypes.c_uint(0) newNumNodes = ctypes.c_uint(0) newNumSimplices = ctypes.c_uint(0) # Create implicit mesh status = self._mesh._libInstance.hyperRectExtension_createMesh( \ nodes_p, ctypes.c_uint( self._mesh._mesh.nodes.shape[0]) , ctypes.c_uint( self._mesh._mesh.embD ), \ simplices_p, ctypes.c_uint( self._mesh._mesh.triangles.shape[0]) , ctypes.c_uint( self._mesh._mesh.topD ), \ offset_p, numPerDimension_p, \ offsetHyper_p, stepLengths_p, numSteps_p, ctypes.c_uint(hyperDim), \ ctypes.byref( meshId ), ctypes.byref( newNumNodes ), ctypes.byref( newNumSimplices ), \ neighs_p ) if status != 0: raise Exception("Uknown error occured! Error code " + str(status) + " from hyperRectExtension_createMesh()") # Store mesh internally self._internalID = meshId.value self.N = newNumNodes.value self.NT = newNumSimplices.value
def calculate_circuit_graph_vertex_data_device(d_D, d_C, length): logger = logging.getLogger('eulercuda.pyeulertour.calculate_circuit_graph_vertex_data_device') logger.info("started.") mod = SourceModule(""" __global__ void calculateCircuitGraphVertexData( unsigned int * D,unsigned int * C,unsigned int ecount){ unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if( tid <ecount) { unsigned int c=D[tid]; atomicExch(C+c,1); } } """) calculate_circuit_graph_vertex_data = mod.get_function('calculateCircuitGraphVertexData') block_dim, grid_dim = getOptimalLaunchConfiguration(length, 512) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) np_d_D = gpuarray.to_gpu(d_D) np_d_C = gpuarray.to_gpu(d_C) calculate_circuit_graph_vertex_data( np_d_D, np_d_C, np.uintc(length), block=block_dim, grid=grid_dim ) np_d_D.get(d_D) np_d_C.get(d_C) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # logger.info("Occupancy = %s" % (orec.occupancy * 100)) logger.info("Finished. Leaving.") return d_D, d_C
def test_numpy(self): """NumPy objects get serialized to readable JSON.""" l = [ np.float32(12.5), np.float64(2.0), np.float16(0.5), np.bool(True), np.bool(False), np.bool_(True), np.unicode_("hello"), np.byte(12), np.short(12), np.intc(-13), np.int_(0), np.longlong(100), np.intp(7), np.ubyte(12), np.ushort(12), np.uintc(13), np.ulonglong(100), np.uintp(7), np.int8(1), np.int16(3), np.int32(4), np.int64(5), np.uint8(1), np.uint16(3), np.uint32(4), np.uint64(5), ] l2 = [l, np.array([1, 2, 3])] roundtripped = loads(dumps(l2, cls=EliotJSONEncoder)) self.assertEqual([l, [1, 2, 3]], roundtripped)
def what_is_uint(): ''' - "np.uint" and "np.uintc" are aliases for real underlying NumPy scalar types - The values of those aliases depend on the operating system - On my system, "np.uint" creates an object whose class is "numpy.uint64" - "np.uint" has the same precision as ... ? - On my system, "np.uintc" creates an object whose class is "numpy.uint32" - "np.uintc" has the same precision as ... ? - If I want some size other than those specified by the aliases, I'll have to use a class with an explicit size, e.g. np.uint8 ''' print(np.uint is np.uint64) # True print(np.uintc is np.uint32) # True # No error because 1 certainly fits within the size of a C long ary = np.array(1, dtype=np.uint) print(ary.dtype) # uint64 #print(int(10**50)) # 100000000000000000000000000000000000000000000000000 #np.array(10**50, dtype=np.uint) # OverflowError: Python int too large to convert to C long print(type(np.uint)) # <class 'type'> scalar = np.uint(10) print(type(scalar)) # <class 'numpy.uint64'> scalar = np.uint32(10) print(type(scalar)) # <class 'numpy.uint32'> scalar = np.uintc(10) print(type(scalar)) # <class 'numpy.uint32'> scalar = np.uint8(4) print(type(scalar)) # <class 'numpy.uint8'>
def compute_kmer_device (lmers, pkmers, skmers, kmerBitMask, readLength, readCount): # module_logger = logging.getLogger('eulercuda.pyencode.compute_kmer_device') module_logger.info("started compute_kmer_device.") mod = SourceModule(""" typedef unsigned long long KEY_T ; typedef KEY_T * KEY_PTR ; #define LMER_PREFIX(lmer,bitMask) ((lmer & (bitMask<<2))>>2) #define LMER_SUFFIX(lmer,bitMask) ((lmer & bitMask)) __global__ void computeKmerDevice( KEY_PTR lmers, KEY_PTR pkmers, KEY_PTR skmers, KEY_T validBitMask, unsigned int readCount ) { const unsigned int tid = (blockDim.x * blockDim.y * gridDim.x * blockIdx.y) + (blockDim.x * blockDim.y * blockIdx.x) + (blockDim.x * threadIdx.y) + threadIdx.x; if (tid < readCount) { KEY_T lmer; //fetch lmer lmer = lmers[tid]; //find prefix pkmers[tid] = LMER_PREFIX(lmer,validBitMask); //find suffix skmers[tid] = LMER_SUFFIX(lmer,validBitMask); // __syncthreads(); } } """, options=['--compiler-options', '-Wall']) compute_kmer = mod.get_function("computeKmerDevice") block_dim, grid_dim = getOptimalLaunchConfiguration(readCount, readLength) np_pkmers = gpuarray.to_gpu(pkmers) np_skmers = gpuarray.to_gpu(skmers) if isinstance(lmers, np.ndarray) and isinstance(pkmers, np.ndarray) and isinstance(skmers, np.ndarray): module_logger.info("Going to GPU.") compute_kmer( drv.In(lmers), np_pkmers, np_skmers, np.ulonglong(kmerBitMask), np.uintc(readCount), block=block_dim, grid=grid_dim ) np_pkmers.get(pkmers) np_skmers.get(skmers) else: module_logger.warn("PROBLEM WITH GPU.") devdata = pycuda.tools.DeviceData() orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) module_logger.debug("Occupancy = %s" % (orec.occupancy * 100)) module_logger.info("leaving compute_kmer_device.") return pkmers, skmers
def getFullNeighs(self): # Get whole neighborhood structure if self._neighs is None: return None newNeighs = np.zeros((self.NT, self._mesh.topD + 1), dtype=np.uintc) newNeighs_p = newNeighs.ctypes.data_as(self._mesh.c_uint_p) status = self._libInstance.implicitMesh_retrieveFullNeighsFromImplicitMesh( self._internalID, \ newNeighs_p, ctypes.c_uint(np.uintc(self.NT) ), ctypes.c_uint( np.uintc(self._mesh.topD) ) ) if status != 0: raise Exception( "Uknown error occured! Error code " + str(status) + " from implicitMesh_retrieveFullNeighsFromImplicitMesh()") return newNeighs
def find_component_device(d_v, d_D, length): """ :param d_v: :param d_D: :param ecount: :return: """ import eulercuda.pyencode as enc logger = logging.getLogger('eulercuda.pycomponent.find_component_device') logger.info("started.") mem_size = length d_prevD = np.zeros(mem_size, dtype=np.uintc) d_Q = np.zeros_like(d_prevD) d_t1 = np.zeros_like(d_prevD) d_t2 = np.zeros_like(d_prevD) d_val1 = np.zeros_like(d_prevD) d_val2 = np.zeros_like(d_prevD) sp = np.uintc(0) s = np.uintc d_D, d_Q = component_step_init(d_v, d_D, d_Q, length) s, sp = 1, 1 sptemp = drv.pagelocked_zeros(4, dtype=np.intc, mem_flags=drv.host_alloc_flags.DEVICEMAP) d_sptemp = np.intp(sptemp.base.get_device_pointer()) while s == sp: d_D, d_prevD = d_prevD, d_D d_D = component_step1_shortcutting_p1(d_v, d_prevD, d_D, d_Q, length, s) d_Q = component_step1_shortcutting_p2(d_v, d_prevD, d_D, d_Q, length, s) d_t1, d_t2, d_val1, d_val2 = component_Step2_P1(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s) d_D, d_Q = component_Step2_P2(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s) d_t1, d_t2, d_val1, d_val2 = component_Step3_P1(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s) d_D = component_Step3_P2(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s) d_val1 = component_step4_P1(d_v, d_D, d_val1, length) d_D = component_step4_P2(d_v, d_D, d_val1, length) sptemp[0] = 0 d_sptemp = (d_Q, length, d_sptemp, s) sp += sptemp[0] s += 1 logger.info("Finished. Leaving.") return d_D
def __init__(self, spatial_extent=15, timesteps=4, batchnorm=True, channel_sym=True, return_sequences=False, rand_seed=None, **kwargs): self.spatial_extent = spatial_extent self.timesteps = timesteps self.batchnorm = batchnorm self.channel_sym = channel_sym self.return_sequences = return_sequences self.rand_seed = rand_seed if rand_seed else np.uintc(hash(random.random())) super(hGRU, self).__init__(**kwargs)
def pointInSimplex(self, point, simplexInd, embTol=0, centerOfCurvature=None): # Function for checking if node is in simplex if (point.size != self._mesh.embD): raise Exception("Wrong dimensionality of point in input") # If not saved mesh internally if self._internalID is None: # do that self._storeMeshInternally() # Enforce formating if point.dtype is not np.dtype("float64"): point = point.astype(np.float64) point_p = point.ctypes.data_as(self.c_double_p) simplexInd = np.uintc(simplexInd) embTol = np.float64(embTol) centerOfCurvature_p = None if centerOfCurvature is not None: if isinstance(centerOfCurvature, np.ndarray): if centerOfCurvature.dtype is not np.dtype("float64"): centerOfCurvature = centerOfCurvature.astype(np.float64) centerOfCurvature_p = centerOfCurvature.ctypes.data_as( self.c_double_p) out = ctypes.c_bool(False) status = self._libInstance.implicitMesh_pointInSimplex( self._internalID, \ point_p, np.uintc(point.size), simplexInd, ctypes.byref(out), ctypes.c_double(embTol), centerOfCurvature_p ) if status != 0: # Try to save internally again self._storeMeshInternally() # Retry call status = self._libInstance.implicitMesh_pointInSimplex( self._internalID, \ point_p, np.uintc(point.size), simplexInd, ctypes.byref(out) ) if status != 0: raise Exception("Uknown error occured! Error code " + str(status) + " from implicitMesh_pointInSimplex()") return out.value
def __init__(self, spatial_extent, timesteps, batchnorm, channel_sym, rand_seed=None, **kwargs): self.spatial_extent = spatial_extent self.timesteps = timesteps self.batchnorm = batchnorm self.channel_sym = channel_sym self.rand_seed = rand_seed if rand_seed else np.uintc(hash(random.random())) super(hGRUCell, self).__init__(**kwargs)
def _test_collision_robustness_3d(aspect, y, z, step): nx = nz = 10 ny = int(aspect * nx) mesh = UnitCubeMesh(nx, ny, nz) bb = mesh.bounding_box_tree() x = 0.0 while x <= 1.0: c = bb.compute_first_entity_collision(Point(x, y, z)) assert c < np.uintc(-1) x += step
def _test_collision_robustness_3d(aspect, y, z, step): nx = nz = 10 ny = int(aspect*nx) mesh = UnitCubeMesh(nx, ny, nz) bb = mesh.bounding_box_tree() x = 0.0 while x <= 1.0: c = bb.compute_first_entity_collision(Point(x, y, z)) assert c < np.uintc(-1) x += step
def component_step5(d_Q,length,d_sptemp,s): """ :param d_Q: :param length: :param d_sptemp: :param s: :return: """ import eulercuda.pyencode as enc logger = logging.getLogger('eulercuda.pycomponent.component_Step5') logger.info("started.") mod = SourceModule(""" __global__ void componentStepFive(unsigned int * Q,unsigned int length,unsigned int * sprimtemp,unsigned int s){ unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if(tid <length) { if(Q[tid]==s){ atomicExch(sprimtemp,1); //*sprime=*sprimtemp+1; } } } """) block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) np_d_sptemp = gpuarray.to_gpu(d_sptemp) step5 = mod.get_function('componentStepFive') step5( drv.In(d_Q), np.uintc(length), np_d_sptemp, np.uintc(s), block=block_dim, grid=grid_dim ) np_d_sptemp.get(d_sptemp) devdata = pycuda.tools.DeviceData() orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) logger.info("Occupancy = %s" % (orec.occupancy * 100)) logger.info("Finished. Leaving.") return d_sptemp
def _test_collision_robustness_2d(aspect, y, step): nx = 10 ny = int(aspect * nx) mesh = UnitSquareMesh(nx, ny, 'crossed') bb = mesh.bounding_box_tree() x = 0.0 p = Point(x, y) while x <= 1.0: c = bb.compute_first_entity_collision(Point(x, y)) assert c < np.uintc(-1) x += step
def _test_collision_robustness_2d(aspect, y, step): nx = 10 ny = int(aspect*nx) mesh = UnitSquareMesh(nx, ny, 'crossed') bb = mesh.bounding_box_tree() x = 0.0 p = Point(x, y) while x <= 1.0: c = bb.compute_first_entity_collision(Point(x, y)) assert c < np.uintc(-1) x += step
def construct_successor_graphP2_device(d_ee, d_v, ecount): logger = logging.getLogger('eulercuda.pyeulertour.construct_successor_graphP1_device') logger.info("started.") mod = SourceModule(""" #include <stdio.h> typedef unsigned long long KEY_T ; typedef KEY_T *KEY_PTR; typedef unsigned int VALUE_T; typedef VALUE_T *VALUE_PTR; typedef struct EulerEdge{ KEY_T eid; unsigned int v1; unsigned int v2; unsigned int s; unsigned int pad; }EulerEdge; typedef struct Vertex{ unsigned int vid; unsigned int n1; unsigned int n2; } Vertex; __global__ void constructSuccessorGraphP2(EulerEdge* e, Vertex * v, unsigned int ecount) { unsigned int tid = (blockDim.x * blockDim.y * gridDim.x * blockIdx.y) + (blockDim.x*blockDim.y * blockIdx.x) + (blockDim.x * threadIdx.y) + threadIdx.x; if(tid<ecount) { if(v[tid].n1 <ecount ) { v[v[tid].n1].n2=v[tid].vid; } } } """) construct_successor_graphP2 = mod.get_function("constructSuccessorGraphP2") block_dim, grid_dim = getOptimalLaunchConfiguration(ecount, 512) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) np_d_v = gpuarray.to_gpu(d_v) construct_successor_graphP2( drv.In(d_ee), np_d_v, np.uintc(ecount), block=block_dim, grid=grid_dim ) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # logger.info("Occupancy = %s" % (orec.occupancy * 100)) np_d_v.get(d_v) logger.info("Finished. Leaving.") return d_v
def griddedTemplateMatching( self, img1, img2, templateRadius, searchRadius, \ estimateInds = None, templateSkip = 0, searchSkip = 0, templateStart = 0, searchStart = 0 ): """ Function for performing template matching between two sets of bitmapped images """ # Convert input img1 = np.ascontiguousarray(img1, dtype=np.double) img2 = np.ascontiguousarray(img2, dtype=np.double) if estimateInds is not None: estimateInds = np.ascontiguousarray(estimateInds, dtype=np.bool) # Preallocate output mapIndex = np.zeros( img1.shape, dtype = np.uintc, order = 'C' ) maxCrossCorr = np.nan * np.ones( img1.shape, dtype = np.double, order = 'C' ) # Acquire pointers img1_p = img1.ctypes.data_as( self.c_double_p ) img2_p = img2.ctypes.data_as( self.c_double_p ) mapIndex_p = mapIndex.ctypes.data_as( self.c_uint_p ) if estimateInds is not None: estimateInds_p = estimateInds.ctypes.data_as( self.c_bool_p ) else: estimateInds_p = None maxCrossCorr_p = maxCrossCorr.ctypes.data_as( self.c_double_p ) # Perform template matching status = self._libInstance.misc_localMaxCrossCorr2D( \ img1_p, img2_p, \ ctypes.c_uint( img1.shape[1] ), ctypes.c_uint( img1.shape[0] ), \ ctypes.c_uint( templateRadius ), ctypes.c_uint( searchRadius ), \ ctypes.c_uint( np.uintc(templateSkip) ), ctypes.c_uint( np.uintc(searchSkip) ), \ ctypes.c_uint( np.uintc(templateStart) ), ctypes.c_uint( np.uintc(searchStart) ), \ mapIndex_p, estimateInds_p, maxCrossCorr_p ) if status != 0: raise Exception( "Uknown error occured! Error status: " + str(status) ) # Return index return (mapIndex, maxCrossCorr)
def toFullMesh(self): # Function for returning a full mesh corresponding to the ImplicitMesh # If not saved mesh internally if self._internalID is None: # do that self._storeMeshInternally() # Preallocate for full mesh newNodes = np.zeros((self.N, self._mesh.embD), dtype=np.float64) newSimplices = np.zeros((self.NT, self._mesh.topD + 1), dtype=np.uintc) newNodes_p = newNodes.ctypes.data_as(self._mesh.c_double_p) newSimplices_p = newSimplices.ctypes.data_as(self._mesh.c_uint_p) # Retrieve implicit mesh status = self._libInstance.implicitMesh_retrieveFullMeshFromImplicitMesh( self._internalID, \ newNodes_p, ctypes.c_uint(np.uintc(self.N)) , ctypes.c_uint( np.uint(self._mesh.embD) ), \ newSimplices_p, ctypes.c_uint(np.uintc(self.NT)) , ctypes.c_uint( np.uintc(self._mesh.topD) ) ) if status != 0: # Try to save internally again self._storeMeshInternally() # Try again to retrieve implicit mesh status = self._libInstance.implicitMesh_retrieveFullMeshFromImplicitMesh( self._internalID, \ newNodes_p, ctypes.c_uint(np.uintc(self.N)) , ctypes.c_uint( np.uintc(self._mesh.embD) ), \ newSimplices_p, ctypes.c_uint(np.uintc(self.NT)) , ctypes.c_uint( self._mesh.topD ) ) if status != 0: raise Exception( "Uknown error occured! Error code " + str(status) + " from implicitMesh_retrieveFullMeshFromImplicitMesh()") # Return return Mesh(newSimplices, newNodes, libPath=self._mesh._libPath)
def _storeMeshInternally(self): # Store mesh internally # If is stored internally already if (self.checkInternal()): return self._mesh._storeMeshInternally() if (self._mesh._internalID is None): raise Exception("No internal ID was found!") numElementsPerTensor = np.uintc(np.prod(self._metricTensors.shape[1:])) numTensors = np.uintc(self._metricTensors.shape[0]) tensorMode = np.int(self._tensorMode) metricTensors_p = self._metricTensors.ctypes.data_as(self.c_double_p) sectors_p = None numSectorDimensions_p = None # Preallocate output ID = ctypes.c_uint(0) numNodes = ctypes.c_uint(0) numSimplices = ctypes.c_uint(0) # Create implicit mesh status = self._mesh._libInstance.meshAndMetric_create( ctypes.c_uint(self._mesh._internalID), ctypes.byref(ID), \ metricTensors_p, ctypes.c_uint( numElementsPerTensor ), ctypes.c_uint( numTensors ), \ ctypes.c_int( tensorMode ), \ ctypes.byref(numNodes), ctypes.byref(numSimplices), \ sectors_p, numSectorDimensions_p ) if status != 0: raise Exception("Uknown error occured! Error code " + str(status) + " from meshAndMetric_create()") # Store mesh internally self._internalID = ID.value self._mesh.N = numNodes.value self._mesh.NT = numSimplices.value
def component_step_init(d_v, d_D, d_Q, length): """ :param d_v: :param d_Q: :param length: :return: """ import eulercuda.pyencode as enc logger = logging.getLogger('eulercuda.pycomponent.component_step_init') logger.info("started.") mod = SourceModule(""" typedef struct Vertex { unsigned int vid; unsigned int n1; unsigned int n2; } Vertex; __global__ void componentStepInit(Vertex * v, unsigned int * D, unsigned int* Q, unsigned int length) { unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if( tid <length) { //v[tid].vid; D[tid]=tid; Q[tid]=0; } } """) component_step_init_device = mod.get_function('componentStepInit') block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512) np_d_D = gpuarray.to_gpu(d_D) np_d_Q = gpuarray.to_gpu(d_Q) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) component_step_init_device( drv.In(d_v), np_d_D, np_d_Q, np.uintc(length), block=block_dim, grid=grid_dim ) np_d_D.get(d_D) np_d_Q.get(d_Q) devdata = pycuda.tools.DeviceData() orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) logger.info("Occupancy = %s" % (orec.occupancy * 100)) logger.info("Finished. Leaving.") return d_D, d_Q
def fromSectorAndExplicit2Node(self, sector, explicitInd): # Function for returning implicit node index from sector and explicit node index # Enforce formating sector = np.uintc(sector) explicitInd = np.uintc(explicitInd) # If not saved mesh internally if self._internalID is None: # do that self._storeMeshInternally() nodeInd = ctypes.c_uint(0) status = self._libInstance.implicitMesh_nodeSectorAndExplicit2Ind( self._internalID, ctypes.c_uint( sector ), \ ctypes.c_uint( explicitInd ), ctypes.byref(nodeInd) ) if status != 0: # Try to save internally again self._storeMeshInternally() # Retry call status = self._libInstance.implicitMesh_nodeSectorAndExplicit2Ind( self._internalID, ctypes.c_uint( sector ), \ ctypes.c_uint( explicitInd ), ctypes.byref(nodeInd) ) if status != 0: raise Exception("Uknown error occured! Error code " + str(status) + " from implicitMesh_nodeSectorAndExplicit2Ind()") return nodeInd.value
def component_step4_P2(d_v, d_D, d_val1, length): """ :param d_v: :param d_D: :param d_val1: :param length: :return: """ logger = logging.getLogger('eulercuda.pycomponent.component_Step4_P2') logger.info("started.") mod = SourceModule(""" typedef struct Vertex { unsigned int vid; unsigned int n1; unsigned int n2; } Vertex; __global__ void componentStepFourP2(Vertex * v, unsigned int * curD,unsigned int * val1,unsigned int length){ unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if( tid < length){ curD[tid]= val1[tid]; } } """) block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) np_d_val1 = gpuarray.to_gpu(d_val1) np_d_D = gpuarray.to_gpu(d_D) step4_P2 = mod.get_function('componentStepFourP2') step4_P2( drv.In(d_v), np_d_D, np_d_val1, np.uintc(length), block=block_dim, grid=grid_dim ) np_d_D.get(d_D) np_d_val1.get(d_val1) devdata = pycuda.tools.DeviceData() orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) logger.info("Occupancy = %s" % (orec.occupancy * 100)) logger.info("Finished. Leaving.") return d_D
def identify_contig_start(d_ee, d_contigStart, ecount): logger = logging.getLogger('pyeulertour.identify_contig_start') logger.info("started.") mod = SourceModule(""" typedef unsigned long long KEY_T ; typedef struct EulerEdge{ KEY_T eid; unsigned int v1; unsigned int v2; unsigned int s; unsigned int pad; }EulerEdge; __global__ void identifyContigStart( EulerEdge * e ,unsigned char * contigStart,unsigned int ecount){ unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if(tid<ecount){ if(e[tid].s < ecount){ contigStart[e[tid].s]=0; //atomicExch(contigStart+e[tid].s,0); } } } """) block_dim, grid_dim = getOptimalLaunchConfiguration(ecount.item(), 512) np_d_contigStart = gpuarray.to_gpu(d_contigStart) c_start = mod.get_function('identifyContigStart') c_start( drv.In(d_ee), np_d_contigStart, np.uintc(ecount), block=block_dim, grid=grid_dim ) np_d_contigStart.get(d_contigStart) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # logger.debug("Occupancy = %s" % (orec.occupancy * 100)) logger.info('Finished.') return d_contigStart
def __init__(self): NT = namedtuple('NT', tuple('abc')) self.values = [ np.longlong(-1), np.int_(-1), np.intc(-1), np.short(-1), np.byte(-1), np.ubyte(1), np.ushort(1), np.uintc(1), np.uint(1), np.ulonglong(1), np.half(1.0), np.single(1.0), np.float_(1.0), np.longfloat(1.0), np.csingle(1.0j), np.complex_(1.0j), np.clongfloat(1.0j), np.bool_(0), np.str_('1'), np.unicode_('1'), np.void(1), np.object(), np.datetime64('NaT'), np.timedelta64('NaT'), np.nan, 12, 12.0, True, None, float('NaN'), object(), (1, 2, 3), NT(1, 2, 3), datetime.date(2020, 12, 31), datetime.timedelta(14), ] # Datetime & Timedelta for precision in ['ns', 'us', 'ms', 's', 'm', 'h', 'D', 'M', 'Y']: for kind, ctor in (('m', np.timedelta64), ('M', np.datetime64)): self.values.append(ctor(12, precision)) for size in (1, 8, 16, 32, 64, 128, 256, 512): self.values.append(bytes(size)) self.values.append('x' * size)
def construct_circuit_Graph_vertex(d_C, d_cg_offset, ecount, d_cv): """ :param d_C: :param d_cg_offset: :param ecount: :param d_cv: :return: """ logger = logging.getLogger('eulercuda.pyeulertour.construct_circuit_Graph_vertex') logger.info("started.") mod = SourceModule(""" __global__ void constructCircuitGraphVertex(unsigned int * C,unsigned int * offset,unsigned int ecount, unsigned int * cv) { unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if(tid < ecount){ if(C[tid] != 0){ cv[offset[tid]] = tid; } } } """) np_d_cv = gpuarray.to_gpu(d_cv) circuit_graph_vertex = mod.get_function('constructCircuitGraphVertex') block_dim, grid_dim = getOptimalLaunchConfiguration(ecount, 512) circuit_graph_vertex( drv.In(d_C), drv.In(d_cg_offset), np.uintc(ecount), np_d_cv, block=block_dim, grid=grid_dim ) np_d_cv.get(d_cv) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # logger.info("Occupancy = %s" % (orec.occupancy * 100)) return d_cv
class TestNumpyJSONEncoder(unittest.TestCase): @parameterized.expand( [(numpy.bool_(1), True), (numpy.bool8(1), True), (numpy.byte(1), 1), (numpy.int8(1), 1), (numpy.ubyte(1), 1), (numpy.uint8(1), 1), (numpy.short(1), 1), (numpy.int16(1), 1), (numpy.ushort(1), 1), (numpy.uint16(1), 1), (numpy.intc(1), 1), (numpy.int32(1), 1), (numpy.uintc(1), 1), (numpy.uint32(1), 1), (numpy.int_(1), 1), (numpy.int32(1), 1), (numpy.uint(1), 1), (numpy.uint32(1), 1), (numpy.longlong(1), 1), (numpy.int64(1), 1), (numpy.ulonglong(1), 1), (numpy.uint64(1), 1), (numpy.half(1.0), 1.0), (numpy.float16(1.0), 1.0), (numpy.single(1.0), 1.0), (numpy.float32(1.0), 1.0), (numpy.double(1.0), 1.0), (numpy.float64(1.0), 1.0), (numpy.longdouble(1.0), 1.0)] + ([ (numpy.float128(1.0), 1.0) # unavailable on windows ] if hasattr(numpy, 'float128') else [])) def test_numpy_primary_type_encode(self, np_val, py_val): self.assertEqual(json.dumps(py_val), json.dumps(np_val, cls=NumpyEncoder)) @parameterized.expand([ (numpy.array([1, 2, 3], dtype=numpy.int), [1, 2, 3]), (numpy.array([[1], [2], [3]], dtype=numpy.double), [[1.0], [2.0], [3.0]]), (numpy.zeros((2, 2), dtype=numpy.bool_), [[False, False], [False, False]]), (numpy.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]), [['Rex', 9, 81.0], ['Fido', 3, 27.0]]), (numpy.rec.array([(1, 2., 'Hello'), (2, 3., "World")], dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'U10')]), [[1, 2.0, "Hello"], [2, 3.0, "World"]]) ]) def test_numpy_array_encode(self, np_val, py_val): self.assertEqual(json.dumps(py_val), json.dumps(np_val, cls=NumpyEncoder))
def assign_successor_device(d_ev, d_l, d_e, vcount, d_ee, ecount): """ :param d_ev: :param d_l: :param d_e: :param vcount: :param d_ee: :param ecount: :return: """ # logger = logging.getLogger('eulercuda.pyeulertour.assign_successor_device') module_logger.info("started assign_successor_device.") mod = SourceModule(""" #include <stdio.h> typedef unsigned long long KEY_T ; typedef KEY_T *KEY_PTR; typedef unsigned int VALUE_T; typedef VALUE_T *VALUE_PTR; typedef struct EulerEdge{ KEY_T eid; unsigned int v1; unsigned int v2; unsigned int s; unsigned int pad; }EulerEdge; typedef struct EulerVertex{ KEY_T vid; unsigned int ep; unsigned int ecount; unsigned int lp; unsigned int lcount; }EulerVertex; __global__ void assignSuccessor( EulerVertex * ev, unsigned int * l, unsigned int * e, unsigned vcount, EulerEdge * ee , unsigned int ecount) { unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; unsigned int eidx = 0; if(tid < vcount) { while(eidx < ev[tid].ecount && eidx < ev[tid].lcount) { unsigned int eindex, lindex, eeindex; eindex = ev[tid].ep + eidx; lindex = ev[tid].lp + eidx; if (eindex < ecount) { eeindex = e[ev[tid].ep + eidx]; if (eindex < ecount && lindex < ecount && eeindex < ecount) { // printf(" e = %u, l = %u, ee = %u ", eindex, lindex, eeindex); ee[e[ev[tid].ep + eidx]].s = l[ev[tid].lp + eidx] ; } } eidx++; } } } """) free, total = drv.mem_get_info() # module_logger.debug(" %s free out of %s total memory" % (free, total) ) block_dim, grid_dim = getOptimalLaunchConfiguration(vcount, 256) module_logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) np_d_ev = gpuarray.to_gpu(d_ev) np_d_ee = gpuarray.to_gpu(d_ee) assign_successor = mod.get_function("assignSuccessor") assign_successor( # ecount is list - should be uint np_d_ev, drv.In(d_l), drv.In(d_e), np.uintc(vcount), np_d_ee, np.uintc(ecount), block=block_dim, grid=grid_dim ) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # module_logger.info("Occupancy = %s" % (orec.occupancy * 100)) np_d_ev.get(d_ev) np_d_ee.get(d_ee) module_logger.info("Finished. Leaving.") return d_ev, d_ee
def component_Step3_P2(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s): """ :param d_v: :param d_prevD: :param d_D: :param d_Q: :param d_t1: :param d_val1: :param d_t2: :param d_val2: :param length: :param s: :return: """ import eulercuda.pyencode as enc logger = logging.getLogger('eulercuda.pycomponent.component_Step3_P2') logger.info("started.") mod = SourceModule(""" typedef struct Vertex { unsigned int vid; unsigned int n1; unsigned int n2; } Vertex; __global__ void componentStepThreeP2(Vertex * v, unsigned int * prevD,unsigned int * curD,unsigned int * Q,unsigned int * t1,unsigned int *val1 ,unsigned int * t2,unsigned int * val2,unsigned int length,unsigned int s){ unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; int a; int val; if( tid< length) { //it will be done for each edge 1 if(t1[tid]<length){ a=t1[tid]; val= val1[tid]; atomicMin(curD+a,val); } //it will be done for each edge 2 if(t2[tid]<length){ a=t2[tid]; val= val2[tid]; atomicMin(curD+a,val); } } } """) block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) np_d_D = gpuarray.to_gpu(d_D) np_d_t1 = gpuarray.to_gpu(d_t1) np_d_t2 = gpuarray.to_gpu(d_t2) np_d_val1 = gpuarray.to_gpu(d_val1) np_d_val2 = gpuarray.to_gpu(d_val2) step3_P2 = mod.get_function('componentStepThreeP2') step3_P2( drv.In(d_v), drv.In(d_prevD), np_d_D, drv.In(d_Q), np_d_t1, np_d_val1, np_d_t2, np_d_val2, np.uintc(length), np.uintc(s), block=block_dim, grid=grid_dim ) np_d_D.get(d_D) np_d_t1.get(d_t1) np_d_t2.get(d_t2) np_d_val1.get(d_val1) np_d_val2.get(d_val2) devdata = pycuda.tools.DeviceData() orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) logger.info("Occupancy = %s" % (orec.occupancy * 100)) logger.info("Finished. Leaving.") return d_D
reveal_type(np.bytes0()) # E: numpy.bytes_ reveal_type(np.string_()) # E: numpy.bytes_ reveal_type(np.object0()) # E: numpy.object_ reveal_type(np.void0(0)) # E: numpy.void reveal_type(np.byte()) # E: {byte} reveal_type(np.short()) # E: {short} reveal_type(np.intc()) # E: {intc} reveal_type(np.intp()) # E: {intp} reveal_type(np.int0()) # E: {intp} reveal_type(np.int_()) # E: {int_} reveal_type(np.longlong()) # E: {longlong} reveal_type(np.ubyte()) # E: {ubyte} reveal_type(np.ushort()) # E: {ushort} reveal_type(np.uintc()) # E: {uintc} reveal_type(np.uintp()) # E: {uintp} reveal_type(np.uint0()) # E: {uintp} reveal_type(np.uint()) # E: {uint} reveal_type(np.ulonglong()) # E: {ulonglong} reveal_type(np.half()) # E: {half} reveal_type(np.single()) # E: {single} reveal_type(np.double()) # E: {double} reveal_type(np.float_()) # E: {double} reveal_type(np.longdouble()) # E: {longdouble} reveal_type(np.longfloat()) # E: {longdouble} reveal_type(np.csingle()) # E: {csingle} reveal_type(np.singlecomplex()) # E: {csingle} reveal_type(np.cdouble()) # E: {cdouble}
def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target vector relative to X class_weight : {dict, 'auto'}, optional Weights associated with classes. If not given, all classes are supposed to have weight one. Returns ------- self : object Returns self. """ self._enc = LabelEncoder() y = self._enc.fit_transform(y) if len(self.classes_) < 2: raise ValueError("The number of classes has to be greater than" " one.") X = atleast2d_or_csr(X, dtype=np.float64, order="C") y = np.asarray(y, dtype=np.float64).ravel() self.class_weight_, self.class_weight_label_ = \ _get_class_weight(self.class_weight, y) if X.shape[0] != y.shape[0]: raise ValueError("X and y have incompatible shapes.\n" + "X has %s samples, but y has %s." % \ (X.shape[0], y.shape[0])) liblinear.set_verbosity_wrap(self.verbose) if sp.isspmatrix(X): train = liblinear.csr_train_wrap else: train = liblinear.train_wrap rnd = check_random_state(self.random_state) if self.verbose: print '[LibLinear]', self.raw_coef_ = train(X, y, self._get_solver_type(), self.tol, self._get_bias(), self.C, self.class_weight_label_, self.class_weight_, # seed for srand in range [0..UINT_MAX] rnd.randint(np.uintc(-1) + 1)) if self.fit_intercept: self.coef_ = self.raw_coef_[:, :-1] self.intercept_ = self.intercept_scaling * self.raw_coef_[:, -1] else: self.coef_ = self.raw_coef_ self.intercept_ = 0. return self
def test_table_typing_numpy(): # Pulled from https://numpy.org/devdocs/user/basics.types.html # Numerics table = wandb.Table(columns=["A"], dtype=[NumberType]) table.add_data(None) table.add_data(42) table.add_data(np.byte(1)) table.add_data(np.short(42)) table.add_data(np.ushort(42)) table.add_data(np.intc(42)) table.add_data(np.uintc(42)) table.add_data(np.int_(42)) table.add_data(np.uint(42)) table.add_data(np.longlong(42)) table.add_data(np.ulonglong(42)) table.add_data(np.half(42)) table.add_data(np.float16(42)) table.add_data(np.single(42)) table.add_data(np.double(42)) table.add_data(np.longdouble(42)) table.add_data(np.csingle(42)) table.add_data(np.cdouble(42)) table.add_data(np.clongdouble(42)) table.add_data(np.int8(42)) table.add_data(np.int16(42)) table.add_data(np.int32(42)) table.add_data(np.int64(42)) table.add_data(np.uint8(42)) table.add_data(np.uint16(42)) table.add_data(np.uint32(42)) table.add_data(np.uint64(42)) table.add_data(np.intp(42)) table.add_data(np.uintp(42)) table.add_data(np.float32(42)) table.add_data(np.float64(42)) table.add_data(np.float_(42)) table.add_data(np.complex64(42)) table.add_data(np.complex128(42)) table.add_data(np.complex_(42)) # Booleans table = wandb.Table(columns=["A"], dtype=[BooleanType]) table.add_data(None) table.add_data(True) table.add_data(False) table.add_data(np.bool_(True)) # Array of Numerics table = wandb.Table(columns=["A"], dtype=[[NumberType]]) table.add_data(None) table.add_data([42]) table.add_data(np.array([1, 0], dtype=np.byte)) table.add_data(np.array([42, 42], dtype=np.short)) table.add_data(np.array([42, 42], dtype=np.ushort)) table.add_data(np.array([42, 42], dtype=np.intc)) table.add_data(np.array([42, 42], dtype=np.uintc)) table.add_data(np.array([42, 42], dtype=np.int_)) table.add_data(np.array([42, 42], dtype=np.uint)) table.add_data(np.array([42, 42], dtype=np.longlong)) table.add_data(np.array([42, 42], dtype=np.ulonglong)) table.add_data(np.array([42, 42], dtype=np.half)) table.add_data(np.array([42, 42], dtype=np.float16)) table.add_data(np.array([42, 42], dtype=np.single)) table.add_data(np.array([42, 42], dtype=np.double)) table.add_data(np.array([42, 42], dtype=np.longdouble)) table.add_data(np.array([42, 42], dtype=np.csingle)) table.add_data(np.array([42, 42], dtype=np.cdouble)) table.add_data(np.array([42, 42], dtype=np.clongdouble)) table.add_data(np.array([42, 42], dtype=np.int8)) table.add_data(np.array([42, 42], dtype=np.int16)) table.add_data(np.array([42, 42], dtype=np.int32)) table.add_data(np.array([42, 42], dtype=np.int64)) table.add_data(np.array([42, 42], dtype=np.uint8)) table.add_data(np.array([42, 42], dtype=np.uint16)) table.add_data(np.array([42, 42], dtype=np.uint32)) table.add_data(np.array([42, 42], dtype=np.uint64)) table.add_data(np.array([42, 42], dtype=np.intp)) table.add_data(np.array([42, 42], dtype=np.uintp)) table.add_data(np.array([42, 42], dtype=np.float32)) table.add_data(np.array([42, 42], dtype=np.float64)) table.add_data(np.array([42, 42], dtype=np.float_)) table.add_data(np.array([42, 42], dtype=np.complex64)) table.add_data(np.array([42, 42], dtype=np.complex128)) table.add_data(np.array([42, 42], dtype=np.complex_)) # Array of Booleans table = wandb.Table(columns=["A"], dtype=[[BooleanType]]) table.add_data(None) table.add_data([True]) table.add_data([False]) table.add_data(np.array([True, False], dtype=np.bool_)) # Nested arrays table = wandb.Table(columns=["A"]) table.add_data([[[[1, 2, 3]]]]) table.add_data(np.array([[[[1, 2, 3]]]]))
def run_simulation(self, weights, lengths, params_matrix, speeds, logger, args, n_nodes, n_work_items, n_params, nstep, n_inner_steps, buf_len, states, dt, min_speed): # setup data#{{{ data = { 'weights': weights, 'lengths': lengths, 'params': params_matrix.T } base_shape = n_work_items, for name, shape in dict( tavg=(n_nodes,), state=(buf_len, states * n_nodes), ).items(): data[name] = np.zeros(shape + base_shape, 'f') gpu_data = self.make_gpu_data(data)#{{{ # logger.info('history shape %r', data['state'].shape) logger.info('on device mem: %.3f MiB' % (self.nbytes(data) / 1024 / 1024, ))#}}} # setup CUDA stuff#{{{ step_fn = self.make_kernel( source_file=args.filename, warp_size=32, block_dim_x=args.n_coupling, # ext_options=preproccesor_defines, # caching=args.caching, args=args, lineinfo=args.lineinfo, nh=buf_len, # model=args.model, )#}}} # setup simulation#{{{ tic = time.time() # logger.info('nstep %i', nstep) streams = [drv.Stream() for i in range(32)] events = [drv.Event() for i in range(32)] tavg_unpinned = [] tavg = drv.pagelocked_zeros(data['tavg'].shape, dtype=np.float32) # logger.info('data[tavg].shape %s', data['tavg'].shape) #}}} gridx = args.n_coupling // args.blockszx gridy = args.n_speed // args.blockszy final_block_dim = args.blockszx, args.blockszy, 1 final_grid_dim = gridx, gridy # logger.info('final block dim %r', final_block_dim) logger.info('final grid dim %r', final_grid_dim) # assert n_coupling_per_block * n_coupling_blocks == args.n_coupling #}}} # logger.info('gpu_data[lengts] %s', gpu_data['lengths'].shape) # logger.info('nnodes %r', n_nodes) # logger.info('gpu_data[lengths] %r', gpu_data['lengths']) # run simulation#{{{ # logger.info('submitting work') import tqdm for i in tqdm.trange(nstep): # event = events[i % 32] # stream = streams[i % 32] # stream.wait_for_event(events[(i - 1) % 32]) step_fn(np.uintc(i * n_inner_steps), np.uintc(n_nodes), np.uintc(buf_len), np.uintc(n_inner_steps), np.uintc(n_params), np.float32(dt), np.float32(min_speed), gpu_data['weights'], gpu_data['lengths'], gpu_data['params'], gpu_data['state'], gpu_data['tavg'], block=final_block_dim, grid=final_grid_dim) # event.record(streams[i % 32]) tavg_unpinned.append(tavg.copy()) drv.memcpy_dtoh( tavg, gpu_data['tavg'].ptr) # logger.info('kernel finish..') # release pinned memory tavg = np.array(tavg_unpinned) return tavg
def execute_swipe(d_ev, d_e, vcount, d_ee, d_mark, ecount): logger = logging.getLogger('eulercuda.pyeulertour.execute_swipe') logger.info("started.") mod = SourceModule(""" typedef unsigned long long KEY_T ; typedef struct EulerVertex { KEY_T vid; unsigned int ep; unsigned int ecount; unsigned int lp; unsigned int lcount; } EulerVertex; typedef struct EulerEdge { KEY_T eid; unsigned int v1; unsigned int v2; unsigned int s; unsigned int pad; }EulerEdge; __global__ void executeSwipe( EulerVertex * ev, unsigned int * e, unsigned int vcount , EulerEdge * ee, unsigned int * mark, unsigned int ecount) { unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; unsigned int t; unsigned int index=0; unsigned int maxIndex; unsigned int s; if (tid < vcount) { index = ev[tid].ep; maxIndex = index + ev[tid].ecount - 1; while (index < maxIndex && ee[e[index]].eid < ecount) { /* if (mark[ee[e[index]].eid] == 1) { t = index; s = ee[e[index]].s; while (mark[ee[e[index]].eid] == 1 && index < maxIndex) { ee[e[index]].s = ee[e[index+1]].s; index = index + 1; } if(t != index) { ee[e[index]].s = s; } } */ index++; } } } """) block_dim, grid_dim = getOptimalLaunchConfiguration(vcount.item(), 512) np_d_mark = gpuarray.to_gpu(d_mark) np_d_ee = gpuarray.to_gpu(d_ee) swipe = mod.get_function('executeSwipe') swipe( drv.In(d_ev), drv.In(d_e), np.uintc(vcount), np_d_ee, # may have to do this one the "long way" np_d_mark, np.uintc(ecount), np.uintc(d_ee.size), block = block_dim, grid = grid_dim ) np_d_ee.get(d_ee) np_d_mark.get(d_mark) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # logger.debug("Occupancy = %s" % (orec.occupancy * 100)) logger.info('Finished.') return d_ee, d_mark
def test_ndarray_uintc(self): self.run_test( 'def ndarray_uintc(a): import numpy as np; return np.uintc(a), np.array([a, a], dtype=np.uintc)', numpy.uintc(5), ndarray_uintc=[numpy.uintc])
np.bytes0() np.string_() np.object0() np.void0(0) np.byte() np.short() np.intc() np.intp() np.int0() np.int_() np.longlong() np.ubyte() np.ushort() np.uintc() np.uintp() np.uint0() np.uint() np.ulonglong() np.half() np.single() np.double() np.float_() np.longdouble() np.longfloat() np.csingle() np.singlecomplex() np.cdouble()
def calculate_circuit_graph_edge_data(d_ev, d_e, vcount, d_D, d_cg_offset, ecount, d_cedgeCount ): """ :param d_ev: :param d_e: :param vcount: :param d_D: :param d_cg_offset: :param ecount: :param d_cedgeCount: :return: """ logger = logging.getLogger('eulercuda.pyeulertour.calculate_circuit_graph_edge_data') logger.info("started.") mod = SourceModule(""" #include <stdio.h> typedef unsigned long long KEY_T; typedef struct EulerVertex{ KEY_T vid; unsigned int ep; unsigned int ecount; unsigned int lp; unsigned int lcount; }EulerVertex; __global__ void calculateCircuitGraphEdgeData( EulerVertex* v, unsigned int * e, unsigned vCount, unsigned int * D, unsigned int * map, unsigned int ecount, unsigned int * cedgeCount) { unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; unsigned int index = 0; unsigned int maxIndex = 0; unsigned int c1; unsigned int c2; index = 0; maxIndex = 0; if(tid < vCount && v[tid].ecount > 0 ) { index = v[tid].ep; maxIndex = index + v[tid].ecount - 1; // printf(" index = %u, max = %u ", index, maxIndex); while (index < maxIndex && index < ecount ) { if (e[index] < ecount && e[index + 1] < ecount) { // printf(" map = %u, D = %u ", map[D[e[index]]], D[e[index]]); c1 = map[D[e[index]]]; c2 = map[D[e[index + 1]]]; if( c1 != c2) { unsigned int c = min(c1, c2); atomicInc(cedgeCount + c, ecount); } } index++; } } } """) circuit_graph_edge = mod.get_function('calculateCircuitGraphEdgeData') np_d_cedgeCount = gpuarray.to_gpu(d_cedgeCount) block_dim, grid_dim = getOptimalLaunchConfiguration(vcount, 512) circuit_graph_edge( drv.In(d_ev), drv.In(d_e), np.uintc(vcount), drv.In(d_D), drv.In(d_cg_offset), np.uintc(ecount), np_d_cedgeCount, block=block_dim, grid=grid_dim ) np_d_cedgeCount.get(d_cedgeCount) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # logger.info("Occupancy = %s" % (orec.occupancy * 100)) return d_cedgeCount
def __init__(self, triangles, nodes, minGraphDiam, maxNumGraphNodes=10, minNumTrianglesInGraph=1, libPath=None): if libPath is not None: self._libPath = libPath # Instantiate C library self._libInstance = ctypes.CDLL(self._libPath) # triangles = triangles.copy().astype(ctypes.c_uint) # nodes = nodes.copy().astype(ctypes.c_double) # Maximum number of graph nodes maxNumGraphNodes = ctypes.c_uint(maxNumGraphNodes) # Minimum graph node diameter minGraphDiam = ctypes.c_double(minGraphDiam) # Minimum number of triangles in each graph node minNumTrianglesInGraph = ctypes.c_uint(minNumTrianglesInGraph) # Represent the triangles triangles_p = triangles.ctypes.data_as(self.c_uint_p) # Represent the nodes points_p = nodes.ctypes.data_as(self.c_double_p) # Init number of graph nodes numGraphNodes = ctypes.c_uint(np.uintc(0)) # Init graph index idx = ctypes.c_uint(np.uintc(0)) # Create graph self._libInstance.MeshGraph_createGraph.restype = ctypes.c_int self._libInstance.MeshGraph_createGraph.argtypes = \ [ self.c_double_p, ctypes.c_uint, ctypes.c_uint, \ self.c_uint_p, ctypes.c_uint, ctypes.c_uint, \ ctypes.c_uint, ctypes.c_double, ctypes.c_uint, \ self.c_uint_p, self.c_uint_p ] status = self._libInstance.MeshGraph_createGraph( \ points_p, ctypes.c_uint( nodes.shape[0] ), ctypes.c_uint( nodes.shape[1] ), \ triangles_p, ctypes.c_uint( triangles.shape[0] ), ctypes.c_uint( triangles.shape[1] - 1 ), \ maxNumGraphNodes, minGraphDiam, minNumTrianglesInGraph, \ ctypes.byref( numGraphNodes ), ctypes.byref( idx ) ) if status != 0: raise Exception("Uknown error occured!") # Preallocate boundaries self.boundaries = np.NaN * np.ones( (np.uintc(numGraphNodes), nodes.shape[1], 2), dtype=np.float64) boundaries_p = self.boundaries.ctypes.data_as(self.c_double_p) # Get node boundaries self._libInstance.MeshGraph_getNodeBoundaries.restype = ctypes.c_int self._libInstance.MeshGraph_getNodeBoundaries.argtypes = \ [ ctypes.c_uint, self.c_double_p, ctypes.c_uint, ctypes.c_uint ] status = self._libInstance.MeshGraph_getNodeBoundaries( idx, boundaries_p, numGraphNodes, ctypes.c_uint(nodes.shape[1])) if status != 0: raise Exception("Uknown error occured!") # Create a list of list of triangles (one for each graph node) self.triangleList = [None] * np.uintc(numGraphNodes) numTriangles = ctypes.c_uint(0) # Define functions for acquiring triangle lists self._libInstance.MeshGraph_getNodeNumTriangles.restype = ctypes.c_int self._libInstance.MeshGraph_getNodeNumTriangles.argtypes = [ ctypes.c_uint, ctypes.c_uint, self.c_uint_p ] self._libInstance.MeshGraph_getNodeTriangles.restype = ctypes.c_int self._libInstance.MeshGraph_getNodeTriangles.argtypes = [ ctypes.c_uint, ctypes.c_uint, ctypes.c_uint, \ ctypes.c_uint, self.c_uint_p ] # Loop through all nodes and populate for iterNodes in range(np.uintc(numGraphNodes)): # Get number of triangles status = self._libInstance.MeshGraph_getNodeNumTriangles( idx, ctypes.c_uint(iterNodes), ctypes.byref(numTriangles)) if status != 0: raise Exception("Uknown error occured!") # Preallocate space for the triangles self.triangleList[iterNodes] = np.zeros(np.uintc(numTriangles), dtype=np.uintc) triangles_p = self.triangleList[iterNodes].ctypes.data_as( self.c_uint_p) # Acquire the triangles status = self._libInstance.MeshGraph_getNodeTriangles( \ idx, ctypes.c_uint( iterNodes ), ctypes.c_uint( nodes.shape[1] ), numTriangles, triangles_p ) if status != 0: raise Exception("Uknown error occured!") # Free graph self._libInstance.MeshGraph_freeGraph.restype = ctypes.c_int self._libInstance.MeshGraph_freeGraph.argtypes = [ctypes.c_uint] status = self._libInstance.MeshGraph_freeGraph(idx) if status != 0: raise Exception("Uknown error occured!")
def bucket_sort_device(d_bufferK, d_bufferV, d_start, d_bucketSize, bucketCount, d_TK, d_TV): logger = logging.getLogger('eulercuda.pygpuhash.bucket_sort_device') logger.info("started.") mod = SourceModule(""" typedef unsigned long long KEY_T ; typedef KEY_T *KEY_PTR; typedef unsigned int VALUE_T; typedef VALUE_T *VALUE_PTR; #define MAX_BUCKET_ITEM (520) #define GET_KEY_INDEX(blockIdx,itemIdx) ((blockIdx) * MAX_BUCKET_ITEM + (itemIdx)) #define GET_VALUE_INDEX(blockIdx,itemIdx) ((blockIdx) * MAX_BUCKET_ITEM + (itemIdx)) __global__ void bucketSort( KEY_PTR bufferK, VALUE_PTR bufferV, unsigned int *start, unsigned int *bucketSize, unsigned int bucketCount, KEY_PTR TK, VALUE_PTR TV) { __shared__ KEY_T keys[MAX_BUCKET_ITEM]; unsigned int keyCount[MAX_BUCKET_ITEM / 32]; unsigned int blockOffset = start[blockIdx.x]; unsigned int size = bucketSize[blockIdx.x]; unsigned int chunks = size >> 5; chunks = (chunks << 5 == size) ? chunks : chunks + 1; for(unsigned int j = 0; j < chunks; j++) { if ((j << 5) + threadIdx.x < size) keys[(j << 5) + threadIdx.x] = bufferK[blockOffset + (j << 5) + threadIdx.x]; } __syncthreads(); for (unsigned int j = 0; j < chunks; j++) { if ((j << 5) + threadIdx.x < size) { keyCount[j] = 0; for(int i=0; i < size; i++) { keyCount[j] = ( keys[(j << 5) + threadIdx.x] > keys[i] ) ? keyCount[j] + 1 : keyCount[j]; } } } __syncthreads(); for (unsigned int j = 0; j < chunks; j++) { if ((j << 5) + threadIdx.x < size) { TK[GET_KEY_INDEX(blockIdx.x, keyCount[j])] = keys[(j << 5) + threadIdx.x]; TV[GET_VALUE_INDEX(blockIdx.x, keyCount[j])] = bufferV[blockOffset + (j << 5) + threadIdx.x]; } } } """) bucket_sort = mod.get_function('bucketSort') block_dim = (32, 1, 1) grid_dim = (bucketCount, 1, 1)#(32, 1, 1) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) np_d_TK = gpuarray.to_gpu(d_TK) np_d_TV = gpuarray.to_gpu(d_TV) bucket_sort( drv.In(d_bufferK), drv.In(d_bufferV), drv.In(d_start), drv.In(d_bucketSize), np.uintc(bucketCount), np_d_TK, np_d_TV, grid=grid_dim, block=block_dim # What about shared? Original source doesn't have it. ) np_d_TK.get(d_TK) np_d_TV.get(d_TV) devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) # logger.info("Occupancy = %s" % (orec.occupancy * 100)) logger.info("Finished. Leaving.") return d_TK, d_TV
def copy_to_bucket_device(d_keys, d_values, d_offset, d_length, d_start, bucketCount, d_bufferK, d_bufferV): logger = logging.getLogger('eulercuda.pygpuhash.copy_to_bucket_device') logger.info("started.") mod = SourceModule(""" // #include <stdio.h> //typedef unsigned long long KEY_T ; //typedef KEY_T *KEY_PTR; //typedef unsigned int VALUE_T; //typedef VALUE_T *VALUE_PTR; #define C0 0x01010101 #define C1 0x12345678 #define LARGE_PRIME 1900813 #define MAX_INT 0xffffffff __forceinline__ __device__ unsigned int hash_h(unsigned long long key, unsigned int bucketCount) { return ((C0 + C1 * key) % LARGE_PRIME ) % bucketCount; } __global__ void copyToBucket( unsigned long long *keys, unsigned int *values, unsigned int * offset, unsigned int length, unsigned int* start, unsigned int bucketCount, unsigned long long * bufferK, unsigned int *bufferV) { unsigned tid = (blockDim.x * blockDim.y * gridDim.x * blockIdx.y) + (blockDim.x * blockDim.y * blockIdx.x) + (blockDim.x * threadIdx.y) + threadIdx.x; if (tid < length) { unsigned long long key = keys[tid]; unsigned int bucket = hash_h(key,bucketCount); // printf(" bucket = %u ", bucket); unsigned int value = values[tid]; unsigned int index = start[bucket] + offset[tid]; // printf(" index = %u ", index); // printf(" tid = %u, offset = %u bucket = %u start = %u index = %u ", tid, offset[tid], bucket, start[bucket], (start[bucket] + offset[tid])); bufferK[index] = key; bufferV[index] = value; //printf(" bufferV = %u ", bufferV[index]); } } """) copy_to_bucket = mod.get_function("copyToBucket") np_d_keys = np.array(d_keys).astype(np.ulonglong) np_d_values = np.array(d_values).astype(np.uintc) # np_d_start = np.array(len(d_keys), dtype = np.uint32) # np_d_bufferK = np.empty(np_d_keys.size, dtype = np.uint64) # # np_d_bufferV = np.empty(np_d_values.size, dtype = np.uint32) keys_gpu = gpuarray.to_gpu(np_d_keys) values_gpu = gpuarray.to_gpu(np_d_values) offset_gpu = gpuarray.to_gpu(d_offset) # start_gpu = gpuarray.to_gpu(np_d_start) np_d_bufferK = gpuarray.to_gpu(d_bufferK) np_d_bufferV = gpuarray.to_gpu(d_bufferV) block_dim = (1024, 1, 1) if (d_length//1024) == 0: grid_dim = (1, 1, 1) else: grid_dim = (d_length//1024, 1, 1) logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) copy_to_bucket( keys_gpu, values_gpu, offset_gpu, np.uintc(d_length), drv.In(d_start), #start_gpu, np.uintc(bucketCount), np_d_bufferK, #bufferK_gpu, np_d_bufferV, #bufferV_gpu, grid = grid_dim, block = block_dim ) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) # logger.info("Occupancy = %s" % (orec.occupancy * 100)) np_d_bufferK.get(d_bufferK) np_d_bufferV.get(d_bufferV) logger.info('Finished. Leaving.') # d_start = start_gpu.get() # d_bufferK = bufferK_gpu.get() # d_bufferV = bufferV_gpu.get() return d_bufferK, d_bufferV
# Aliases reveal_type(np.unicode_()) # E: numpy.str_ reveal_type(np.str0()) # E: numpy.str_ reveal_type(np.byte()) # E: numpy.signedinteger[numpy.typing._ reveal_type(np.short()) # E: numpy.signedinteger[numpy.typing._ reveal_type(np.intc()) # E: numpy.signedinteger[numpy.typing._ reveal_type(np.intp()) # E: numpy.signedinteger[numpy.typing._ reveal_type(np.int0()) # E: numpy.signedinteger[numpy.typing._ reveal_type(np.int_()) # E: numpy.signedinteger[numpy.typing._ reveal_type(np.longlong()) # E: numpy.signedinteger[numpy.typing._ reveal_type(np.ubyte()) # E: numpy.unsignedinteger[numpy.typing._ reveal_type(np.ushort()) # E: numpy.unsignedinteger[numpy.typing._ reveal_type(np.uintc()) # E: numpy.unsignedinteger[numpy.typing._ reveal_type(np.uintp()) # E: numpy.unsignedinteger[numpy.typing._ reveal_type(np.uint0()) # E: numpy.unsignedinteger[numpy.typing._ reveal_type(np.uint()) # E: numpy.unsignedinteger[numpy.typing._ reveal_type(np.ulonglong()) # E: numpy.unsignedinteger[numpy.typing._ reveal_type(np.half()) # E: numpy.floating[numpy.typing._ reveal_type(np.single()) # E: numpy.floating[numpy.typing._ reveal_type(np.double()) # E: numpy.floating[numpy.typing._ reveal_type(np.float_()) # E: numpy.floating[numpy.typing._ reveal_type(np.longdouble()) # E: numpy.floating[numpy.typing._ reveal_type(np.longfloat()) # E: numpy.floating[numpy.typing._ reveal_type(np.csingle()) # E: numpy.complexfloating[numpy.typing._ reveal_type(np.singlecomplex()) # E: numpy.complexfloating[numpy.typing._ reveal_type(np.cdouble()) # E: numpy.complexfloating[numpy.typing._
from time import time limit = 10000 # limit = int(input('Limit: ')) start_time = time() with open('primes1.txt') as f: primes = np.fromiter(map(int, f.read().strip().split(',')), dtype=np.uint32) antiprimes = [] most = 0 for x in np.arange(2, limit + 1): fac = {} x_temp = x for prime in primes: more = np.mod(x_temp, prime) == np.uintc(0) if more: fac[prime] = 0 while more: fac[prime] += 1 x_temp = np.floor_divide(x_temp, prime) more = np.mod(x_temp, prime) == np.uintc(0) if x_temp <= np.uintc(1): break if not fac: continue num_primes = len(fac) if not np.array_equal(np.fromiter(fac, np.uint32), primes[:num_primes]): continue ''' iter_fac = iter(fac)
limit = 100 # limit = int(input('Limit: ')) start_time = time() with open('primes1.txt') as f: primes = np.fromiter(map(int, f.read().strip().split(',')), dtype=np.uint32) p_gpu = gpuarray.to_gpu(primes) antiprimes = [] most = 0 for x in gpuarray.arange(2, limit + 1, dtype=np.uint32): print(x) fac = [[], []] x_temp = x.copy() for prime in p_gpu: more = cumath.fmod(x_temp, prime).get() == np.uintc(0) if more: power = 0 while more: power += 1 x_temp = x_temp / prime more = cumath.fmod(x_temp, prime).get() == np.uintc(0) if more: fac[0].append(prime) fac[1].append(power) if x_temp.get() <= np.uintc(1): break if not fac: continue num_primes = len(fac[0]) eq_gpu = gpuarray.to_gpu(np.empty(num_primes, dtype=np.int32))
def Seed2Key(self, recievedSeedBytes): SECURITY_MASK = np.uint32(0xEF6FD7) # Mask for position C. Bytes C21, C16, C13, C6 and C4. (111011110110111111010111) SECURITY_POSITION_A_CONSTANT = np.uint32(0xC541A9) # Position A:3 bytes fixed constants in the specification SECURITY_FIXEDBYTES = np.uint64(0x7A03DB3571) # Position A:3 bytes fixed constants in the specification retVal = np.uint32(0x0) # return value 00,R1,R2,R3 seed = np.uint32(0x0) # convert the recievedSeedBytes [0x00,0x00,0x00] into np uint32 type for i in range(3): seed = np.left_shift(seed,8) seed = np.bitwise_or(seed,np.uint8(recievedSeedBytes[i])) #print("recievedSeedBytes[i]: ", str(hex(recievedSeedBytes[i]))) #print("seed Value: ", str(hex(seed))) # np.int_() , c type, long. # np.uintc() , c type, unsigned int. R1, R2, R3, R_LHS, R_RHS = np.int_(),np.int_(),np.int_(),np.int_(),np.int_() # final response bytes CB_H, CB_L = np.int_(),np.int_() # challenge bytes A = np.int_() # A initial B24, B21, B16, B13, B6, B4 = np.int_(),np.int_(),np.int_(),np.int_(),np.int_(),np.int_() # single bits in lower block C21, C16, C13, C6, C4 = np.int_(),np.int_(),np.int_(),np.int_(),np.int_() # single bits in upper block i = np.uintc() CB_32 = np.uint32(0x0) S1, S2, S3 =np.uint8(0x0),np.uint8(0x0),np.uint8(0x0) # np.right_shift(x1, x2) , Shift the bits of an integer to the right. S1 = np.bitwise_and(np.right_shift(seed,16),0xFF) S2 = np.bitwise_and(np.right_shift(seed,8),0xFF) S3 = np.bitwise_and(seed,0xFF) print("current seed byte1 : ", str(hex(S1)),"current seed byte2 : ", str(hex(S2)),"current seed byte3 : ", str(hex(S3))) # Calculate last 4 bytes of the challenge number (F5 F4 F3 F2) */ # CB_H = np.right_shift(SECURITY_FIXEDBYTES,8) CB_H = np.uint32(0x7A03DB35) # Calculate first 4 bytes of the challenge number (F1 S3 S2 S1) */ CB_L = (np.bitwise_and(0x71 ,0xFF) * 256) + S3; CB_L = (CB_L * 256) + S2; CB_L = (CB_L * 256) + S1; #First 4 bytes of the challenge number (F1 S3 S2 S1) */ A = SECURITY_POSITION_A_CONSTANT; # 3 bytes initial value, These are fixed constants in the Ford specification */ i = 0 CB_32 = CB_L ########################################## while (i < 64): i=i+1 if (i == 33): CB_32 = CB_H B24 = np.bitwise_xor(np.bitwise_and(A ,0x01) ,np.bitwise_and(CB_32 ,0x01)) A = np.right_shift(A,1) # printf("\nA first time %X\n", A); */ A = np.left_shift(B24 ,23) + A # printf("\nA second time %X B24 second time %X\n", A, B24); */ # Position A */ B21 = np.right_shift(A , 20) B21 = np.bitwise_and(B21 , 0x01) B16 = np.right_shift(A ,15) B16 = np.bitwise_and(B16 , 0x01) B13 = np.right_shift(A , 12) B13 = np.bitwise_and(B13 , 0x01) B6 = np.right_shift(A , 5) B6 = np.bitwise_and(B6 , 0x01) B4 = np.right_shift(A , 3) B4 = np.bitwise_and(B4 , 0x01) # Position B */ C21 = np.bitwise_xor(B24 , B21) C21 = np.bitwise_and(C21 , 0x01) C16 = np.bitwise_xor(B24 , B16) C16 = np.bitwise_and(C16 , 0x01) C13 = np.bitwise_xor(B24 , B13) C13 = np.bitwise_and(C13 , 0x01) C6 = np.bitwise_xor(B24 , B6) C6 = np.bitwise_and(C6 , 0x01) C4 = np.bitwise_xor(B24 , B4) C4 = np.bitwise_and(C4 , 0x01) A = np.bitwise_and(A , SECURITY_MASK) # Position C */ A = np.left_shift(C21 , 20) + A A = np.left_shift(C16 , 15) + A A = np.left_shift(C13 , 12) + A A = np.left_shift(C6 , 5) + A A = np.left_shift(C4 , 3) + A CB_32 = np.right_shift(CB_32 , 1) # Calculate R1 */ R1 = np.bitwise_and(A , 0xFFF) R1 = np.right_shift(R1 , 4) # Calculate R2 */ R_RHS = np.right_shift(A , 20) R_RHS = np.bitwise_and(R_RHS , 0xF) R_LHS = np.right_shift(A , 12) R_LHS = np.bitwise_and(R_LHS , 0xF) R_LHS = np.left_shift(R_LHS , 4) R2 = R_LHS + R_RHS; # Calculate R3 */ R_LHS = np.bitwise_and(A , 0xF) R_LHS = np.left_shift(R_LHS , 4) R_RHS = np.right_shift(A , 16) R_RHS = np.bitwise_and(R_RHS , 0xF) R3 = R_LHS + R_RHS; #print("A Value: ", str(hex(A))) #print("current R1 : ", str(hex(R1)),"current R2 : ", str(hex(R2)),"current R3 : ", str(hex(R3))) retVal = np.uintc( (np.left_shift(np.bitwise_and(R1 , 0x000000FF) , 16) | np.left_shift(np.bitwise_and(R2 , 0x000000FF) , 8) | np.bitwise_and(R3 , 0x000000FF)) ) print("current retVal : ", str(hex(retVal))) ########################################## # retVal into the decodedKeyBytes[] and return it back # highest bytes in the retVal set into 1st position in the list for i in range(2,-1,-1): self.decodedKeyBytes[i] = np.bitwise_and(retVal,0xFF) retVal = np.right_shift(retVal,8) #print("current decodedKeyBytes[ ", i,"]hex value: ", str(hex(self.decodedKeyBytes[i]))) return self.decodedKeyBytes
def assign_circuit_graph_edge_data(d_ev, d_e, vcount, d_D, d_cg_offset, ecount, d_cg_edge_start, d_cedgeCount, circuitVertexSize, d_cg_edge, circuitGraphEdgeCount): """ :param d_ev: :param d_e: :param vcount: :param d_D: :param d_cg_offset: :param ecount: :param d_cg_edge_start: :param d_cedgeCount: :param circuitVertexSize: :param d_cg_edge: :param circuitGraphEdgeCount: :return: """ logger = logging.getLogger('eulercuda.pyeulertour.assign_circuit_graph_edge_data') logger.info("started.") mod = SourceModule(""" typedef unsigned long long KEY_T ; typedef struct EulerVertex{ KEY_T vid; unsigned int ep; unsigned int ecount; unsigned int lp; unsigned int lcount; }EulerVertex; typedef struct CircuitEdge{ unsigned int ceid; unsigned e1; unsigned e2; unsigned c1; unsigned c2; }CircuitEdge; __global__ void assignCircuitGraphEdgeData(EulerVertex* v, unsigned int * e, unsigned vCount, unsigned int * D, unsigned int * map, unsigned int ecount, unsigned int * cedgeOffset, unsigned int * cedgeCount, unsigned int cvCount, CircuitEdge * cedge, unsigned int cecount) { unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+ (blockDim.x*threadIdx.y)+threadIdx.x; unsigned int index=0; unsigned int maxIndex=0; if(tid<vCount && v[tid].ecount>0){ index=v[tid].ep; maxIndex=index+v[tid].ecount-1; while (index<maxIndex && index < ecount ) { if (e[index] < ecount && e[index + 1] < ecount) { unsigned int c1=map[D[e[index]]]; unsigned int c2=map[D[e[index+1]]]; if( c1 != c2) { unsigned int c=min(c1,c2); unsigned int t=max(c1,c2); unsigned int i=atomicDec(cedgeCount+c,ecount); i=i-1; cedge[cedgeOffset[c]+i].c1=c; cedge[cedgeOffset[c]+i].c2=t; cedge[cedgeOffset[c]+i].e1=e[index]; cedge[cedgeOffset[c]+i].e2=e[index+1]; } } index++; } } } """) block_dim, grid_dim = getOptimalLaunchConfiguration(vcount, 512) np_d_cg_edge = gpuarray.to_gpu(d_cg_edge) cged = mod.get_function('assignCircuitGraphEdgeData') cged( drv.In(d_ev), drv.In(d_e), np.uintc(vcount), drv.In(d_D), drv.In(d_cg_offset), np.uintc(ecount), drv.In(d_cg_edge_start), drv.In(d_cedgeCount), np.uintc(circuitVertexSize), np_d_cg_edge, np.uintc(circuitGraphEdgeCount), block=block_dim, grid=grid_dim ) np_d_cg_edge.get(d_cg_edge) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # logger.debug("Occupancy = %s" % (orec.occupancy * 100)) logger.info('Finished.') return d_cg_edge
def encode_lmer_device (buffer, readCount, d_lmers, readLength, lmerLength): # module_logger = logging.getLogger('eulercuda.pyencode.encode_lmer_device') module_logger.info("started encode_lmer_device.") # readLength is total number of bases read. mod = SourceModule(""" #include <stdio.h> typedef unsigned long long KEY_T ; typedef KEY_T * KEY_PTR ; __device__ __constant__ KEY_T lmerMask[] ={ 0x0000000000000003, 0x000000000000000F, 0x000000000000003F, 0x00000000000000FF, // 0 1 2 3 0x00000000000003FF, 0x0000000000000FFF, 0x0000000000003FFF, 0x000000000000FFFF, // 4 5 6 7 0x000000000003FFFF, 0x00000000000FFFFF, 0x00000000003FFFFF, 0x0000000000FFFFFF, // 8 9 10 11 0x0000000003FFFFFF, 0x000000000FFFFFFF, 0x000000003FFFFFFF, 0x00000000FFFFFFFF, // 12 13 14 15 0x00000003FFFFFFFF, 0x0000000FFFFFFFFF, 0x0000003FFFFFFFFF, 0x000000FFFFFFFFFF, // 16 17 18 19 0x000003FFFFFFFFFF, 0x00000FFFFFFFFFFF, 0x00003FFFFFFFFFFF, 0x0000FFFFFFFFFFFF, // 20 21 22 23 0x0003FFFFFFFFFFFF, 0x000FFFFFFFFFFFFF, 0x003FFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF, // 24 25 26 27 0x03FFFFFFFFFFFFFF, 0x0FFFFFFFFFFFFFFF, 0x3FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF // 28 29 30 31 }; __device__ __constant__ unsigned char shifter[4] [4]= { {0,0,0,0}, {1,4,16,64}, {2,8,32,128}, {3,12,48,192}, }; __device__ __constant__ char codeF[]={0,0,0,1,3,0,0,2}; __device__ __constant__ char codeR[]={0,3,0,2,0,0,0,1}; __global__ void encodeLmerDevice( char * read, // const unsigned int buffSize, // const unsigned int readLength, KEY_PTR lmers, const unsigned int lmerLength ) { // extern __shared__ char read[]; // const unsigned int tid=threadIdx.x; const unsigned int rOffset=(blockDim.x*blockDim.y*gridDim.x*blockIdx.y) +(blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y); const unsigned int tid = rOffset + threadIdx.x; KEY_T lmer=0; // read[tid] = buffer[rOffset + tid]; __syncthreads(); for (unsigned int i = 0; i < 8; i++) //calculate lmer { lmer = (lmer<< 8) | ((KEY_T)(shifter[codeF[read[tid + i * 4]& 0x07]][3] | shifter[codeF[read[tid + i * 4 + 1]& 0x07]][2] | shifter[codeF[read[tid + i * 4 + 2]& 0x07]][1] | codeF[read[tid + i * 4 + 3] & 0x07]) ) ; } lmer = (lmer >> ((32 - lmerLength) << 1)) & lmerMask[lmerLength-1]; // printf(" offset = %u, lmer = %llu ", (tid + rOffset),lmer); //lmers[rOffset + tid] = lmer; lmers[tid] = lmer; } """) encode_lmer = mod.get_function("encodeLmerDevice") block_dim, grid_dim = getOptimalLaunchConfiguration(readCount, lmerLength) module_logger.debug("block_dim = %s, grid_dim = %s" % (block_dim, grid_dim)) if isinstance(buffer, np.ndarray) and isinstance(d_lmers, np.ndarray): module_logger.info("Going to GPU.") np_d_lmers = gpuarray.to_gpu(d_lmers) encode_lmer(drv.In(buffer), np_d_lmers, np.uintc(lmerLength), block=block_dim, grid=grid_dim) #, # shared=48000) np_d_lmers.get(d_lmers) else: print(isinstance(buffer, np.ndarray), isinstance(d_lmers, np.ndarray)) module_logger.debug("Generated %s lmers." % (len(d_lmers))) devdata = pycuda.tools.DeviceData() orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) module_logger.debug("Occupancy = %s" % (orec.occupancy * 100)) module_logger.info("finished encode_lmer_device.") return d_lmers
def mark_spanning_euler_edges(d_ee, d_mark , ecount,d_cg_edge,cg_edgeCount,d_tree, treeCount): logger = logging.getLogger(__name__) logger.info("started.") mod = SourceModule(""" typedef unsigned long long KEY_T ; typedef struct EulerVertex{ KEY_T vid; unsigned int ep; unsigned int ecount; unsigned int lp; unsigned int lcount; }EulerVertex; typedef struct CircuitEdge{ unsigned int ceid; unsigned e1; unsigned e2; unsigned c1; unsigned c2; }CircuitEdge; typedef struct EulerEdge{ KEY_T eid; unsigned int v1; unsigned int v2; unsigned int s; unsigned int pad; }EulerEdge; __global__ void markSpanningEulerEdges( EulerEdge * ee, unsigned int * mark , unsigned int ecount, CircuitEdge * cg_edge, unsigned int cg_edgeCount, unsigned int * tree, unsigned int treeCount) { unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x; if(tid < treeCount) { /*if(tree[tid]==1)*/{ atomicExch(mark+min(cg_edge[tree[tid]].e1,cg_edge[tree[tid]].e2),1); // important: assumption if(mark[i]=1) means mark[i]and mark[i+1] are swipe //atomicExch(mark+cg_edge[tree[tid]].e2,1); } } } """) block_dim, grid_dim = getOptimalLaunchConfiguration(treeCount, 512) mark = mod.get_function('markSpanningEulerEdges') np_d_mark = gpuarray.to_gpu(d_mark) mark( drv.In(d_ee), np_d_mark, np.uintc(ecount), drv.In(d_cg_edge), np.uintc(cg_edgeCount), drv.In(d_tree), np.uintc(treeCount), block = block_dim, grid = grid_dim ) np_d_mark.get(d_mark) # devdata = pycuda.tools.DeviceData() # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1]) # logger.debug("Occupancy = %s" % (orec.occupancy * 100)) logger.info('Finished.') return d_mark
def compute_lmer_complement_device(buffer, readCount, d_lmers, readLength, lmerLength): # logger = logging.getLogger('eulercuda.pyencode.compute_lmer_complement_device') module_logger.info("started compute_lmer_complement_device.") mod = SourceModule(""" __device__ __constant__ char codeF[]={0,0,0,1,3,0,0,2}; __device__ __constant__ char codeR[]={0,3,0,2,0,0,0,1}; typedef unsigned long long KEY_T ; typedef KEY_T * KEY_PTR ; __global__ void encodeLmerComplementDevice( char * dnaRead, KEY_PTR lmers, const unsigned int lmerLength, const unsigned int readCount ) { const unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; const unsigned int row = blockIdx.x * blockDim.x + threadIdx.x; // const unsigned int col = blockIdx.y + blockDim.y + threadIdx.y; if (tid < readCount) { // extern __shared__ char dnaRead[]; //unsigned int lmerLength = 0; KEY_T lmer = 0; KEY_T temp = 0; // lmerLength = d_lmerLength[tid]; // dnaRead[tid] = buffer[row + tid]; __syncthreads(); dnaRead[tid] = codeR[dnaRead[tid] & 0x07]; __syncthreads(); for (unsigned int i = 0; i < lmerLength; i++) { temp = ((KEY_T)dnaRead[(tid + i) % blockDim.x]); lmer = (temp << (i << 1)) | lmer; } lmers[row + tid] = lmer; __syncthreads(); } } """) encode_lmer_complement = mod.get_function("encodeLmerComplementDevice") block_dim, grid_dim = getOptimalLaunchConfiguration(readCount, readLength) module_logger.debug('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim)) if isinstance(buffer, np.ndarray) and isinstance(d_lmers, np.ndarray): np_lmerLength = np.uintc(lmerLength) np_d_lmers = gpuarray.to_gpu(d_lmers) module_logger.info("Going to GPU.") encode_lmer_complement( drv.In(buffer), np_d_lmers, np_lmerLength, np.uintc(readCount), block=block_dim, grid=grid_dim ) np_d_lmers.get(d_lmers) else: print("Problem with data to GPU") module_logger.warn("problem with data to GPU.") devdata = pycuda.tools.DeviceData() orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0]) module_logger.info("Occupancy = %s" % (orec.occupancy * 100)) module_logger.info("Finished compute_lmer_complement_device.") return d_lmers
def run_simulation(self): # setup data#{{{ data = { 'weights': self.weights, 'lengths': self.lengths, 'params': self.params.T } base_shape = self.n_work_items, for name, shape in dict( tavg0=(self.exposures, self.args.n_regions,), tavg1=(self.exposures, self.args.n_regions,), state=(self.buf_len, self.states * self.args.n_regions), ).items(): # memory error exception for compute device try: data[name] = np.zeros(shape + base_shape, 'f') except MemoryError as e: self.logger.error('%s.\n\t Please check the parameter dimensions %d x %d, they are to large ' 'for this compute device', e, self.args.n_sweep_arg0, self.args.n_sweep_arg1) exit(1) gpu_data = self.make_gpu_data(data)#{{{ # setup CUDA stuff#{{{ step_fn = self.make_kernel( source_file=self.args.filename, warp_size=32, # block_dim_x=self.args.n_sweep_arg0, # ext_options=preproccesor_defines, # caching=args.caching, args=self.args, lineinfo=self.args.lineinfo, nh=self.buf_len, )#}}} # setup simulation#{{{ tic = time.time() n_streams = 32 streams = [drv.Stream() for i in range(n_streams)] events = [drv.Event() for i in range(n_streams)] tavg_unpinned = [] try: tavg = drv.pagelocked_zeros((n_streams,) + data['tavg0'].shape, dtype=np.float32) except drv.MemoryError as e: self.logger.error( '%s.\n\t Please check the parameter dimensions, %d parameters are too large for this GPU', e, self.params.size) exit(1) # determine optimal grid recursively def dog(fgd): maxgd, mingd = max(fgd), min(fgd) maxpos = fgd.index(max(fgd)) if (maxgd - 1) * mingd * bx * by >= nwi: fgd[maxpos] = fgd[maxpos] - 1 dog(fgd) else: return fgd # n_sweep_arg0 scales griddim.x, n_sweep_arg1 scales griddim.y # form an optimal grid recursively bx, by = self.args.blockszx, self.args.blockszy nwi = self.n_work_items rootnwi = int(np.ceil(np.sqrt(nwi))) gridx = int(np.ceil(rootnwi / bx)) gridy = int(np.ceil(rootnwi / by)) final_block_dim = bx, by, 1 fgd = [gridx, gridy] dog(fgd) final_grid_dim = fgd[0], fgd[1] assert gridx * gridy * bx * by >= nwi self.logger.info('history shape %r', gpu_data['state'].shape) self.logger.info('gpu_data %s', gpu_data['tavg0'].shape) self.logger.info('on device mem: %.3f MiB' % (self.nbytes(data) / 1024 / 1024, )) self.logger.info('final block dim %r', final_block_dim) self.logger.info('final grid dim %r', final_grid_dim) # run simulation#{{{ nstep = self.args.n_time self.gpu_mem_info() if self.args.verbose else None try: for i in tqdm.trange(nstep, file=sys.stdout): try: event = events[i % n_streams] stream = streams[i % n_streams] if i > 0: stream.wait_for_event(events[(i - 1) % n_streams]) step_fn(np.uintc(i * self.n_inner_steps), np.uintc(self.args.n_regions), np.uintc(self.buf_len), np.uintc(self.n_inner_steps), np.uintc(self.n_work_items), np.float32(self.dt), gpu_data['weights'], gpu_data['lengths'], gpu_data['params'], gpu_data['state'], gpu_data['tavg%d' % (i%2,)], block=final_block_dim, grid=final_grid_dim) event.record(streams[i % n_streams]) except drv.LaunchError as e: self.logger.error('%s', e) exit(1) tavgk = 'tavg%d' % ((i + 1) % 2,) # async wrt. other streams & host, but not this stream. if i >= n_streams: stream.synchronize() tavg_unpinned.append(tavg[i % n_streams].copy()) drv.memcpy_dtoh_async(tavg[i % n_streams], gpu_data[tavgk].ptr, stream=stream) # recover uncopied data from pinned buffer if nstep > n_streams: for i in range(nstep % n_streams, n_streams): stream.synchronize() tavg_unpinned.append(tavg[i].copy()) for i in range(nstep % n_streams): stream.synchronize() tavg_unpinned.append(tavg[i].copy()) except drv.LogicError as e: self.logger.error('%s. Check the number of states of the model or ' 'GPU block shape settings blockdim.x/y %r, griddim %r.', e, final_block_dim, final_grid_dim) exit(1) except drv.RuntimeError as e: self.logger.error('%s', e) exit(1) # self.logger.info('kernel finish..') # release pinned memory tavg = np.array(tavg_unpinned) # also release gpu_data self.release_gpumem(gpu_data) self.logger.info('kernel finished') return tavg
class TestNumpy: @staticmethod def test_get_numpy() -> None: """ Test get_numpy when module is present """ # Arrange # Act result = Numpy.get_numpy() # Assert assert result is np @staticmethod def test_get_numpy_missing(mocker: MockFixture) -> None: """ Test get_numpy when module is missing """ # Arrange mocker.patch.dict("sys.modules", {"numpy": None}) # Act result = Numpy.get_numpy() # Assert assert result is None @staticmethod def test_get_numpy_missing_error(mocker: MockFixture) -> None: """ Test get_numpy when module is missing raises error """ # Arrange mocker.patch.dict("sys.modules", {"numpy": None}) # Act / assert with pytest.raises(ImportError, match="foo"): Numpy.get_numpy(raise_error=True, custom_error_message="foo") @staticmethod @pytest.mark.parametrize("value, expected", [(np.array([1, 2, 3]), True), ([1, 2, 3], False)]) def test_is_numpy_object(value, expected) -> None: """ Test is_numpy_object """ # Arrange # Act result = Numpy.is_numpy_object(value) # Assert assert result == expected @staticmethod def test_get_numpy_primatives() -> None: """ Test _get_numpy_primatives """ # Arrange # Act result = Numpy._get_numpy_primatives(np) # Assert assert len(result) == 33 # Expected number of types for thing in result: assert "numpy" in getattr(thing, "__module__", "").split( ".") # Check that type is from numpy assert type(thing) is type # Check that each type is a type @staticmethod def test_encode_numpy_error(): """ Test that the encode_numpy raises an error if no encoding is defined. """ # Arrange value = "not a numpy" # Act & Assert with pytest.raises(NotImplementedError): Numpy.encode_numpy(value) @staticmethod @pytest.mark.parametrize( "value, expected", [ # fmt: off (np.array([['balloons'], ['are'], ['awesome'] ]), [['balloons'], ['are'], ['awesome']]), (np.bool_(1), True), (np.byte(4), 4), (np.ubyte(4), 4), (np.short(4), 4), (np.ushort(4), 4), (np.intc(4), 4), (np.uintc(4), 4), (np.int_(4), 4), (np.uint(4), 4), (np.longlong(4), 4), (np.ulonglong(4), 4), (np.float16(4), 4), (np.single(4), 4), (np.double(4), 4), (np.longdouble(4), 4), (np.csingle(4), 4), (np.cdouble(4), 4), (np.clongdouble(4), 4), (np.int8(4), 4), (np.int16(4), 4), (np.int32(4), 4), (np.int64(4), 4), (np.uint8(4), 4), (np.uint16(4), 4), (np.uint32(4), 4), (np.uint64(4), 4), (np.intp(4), 4), (np.uintp(4), 4), (np.float32(4), 4), (np.float64(4), 4), (np.complex64(4), 4 + 0j), (np.complex128(4), 4 + 0j), (np.complex_(4), 4 + 0j), # fmt: on ], ) def test_encode_numpy(value, expected) -> None: """ Test encode_numpy """ # Arrange # Act result = Numpy.encode_numpy(value) # Assert assert result == expected