def __init__(self, p, collength=16, verbose=False): """ Instantiates a MAP object corresponding to MAP files. The data_format is how the file is set up for use with reading and writing HDF5 tables. The dictionary key represents the name of the column, the first part of the value tuple represents where it is located when the line is split and the second part represents the datatype to use within the table. :param p: The full file path """ self.data_format = {"chromosome": (0, StringCol(collength)), "identifier": (1, StringCol(collength)), "distance": (2, StringCol(collength)), "position": (3, StringCol(collength))} DataType.__init__(self, p, "map", verbose)
def __init__(self, p, collength=16, verbose=False): """ Instantiates a MAP object corresponding to MAP files. The data_format is how the file is set up for use with reading and writing HDF5 tables. The dictionary key represents the name of the column, the first part of the value tuple represents where it is located when the line is split and the second part represents the datatype to use within the table. :param p: The full file path """ self.data_format = { "chromosome": (0, StringCol(collength)), "identifier": (1, StringCol(collength)), "distance": (2, StringCol(collength)), "position": (3, StringCol(collength)) } DataType.__init__(self, p, "map", verbose)
def view(self, dtype=None, type_=None): if type_ is not None: dtype = type type_type = type( int) # HACK to form an instance of 'type' (very confusing) if isinstance(dtype, type_type) and issubclass(dtype, np.ndarray): return super(ndarray, self).view(dtype) else: # TODO: Endianness changes are not supported here # Consider building byteorder into DataType dtype_bf = DataType(dtype) dtype_np = np.dtype(dtype_bf.as_numpy_dtype()) v = super(ndarray, self).view(dtype_np) v.bf.dtype = dtype_bf v._update_BFarray() return v
def __init__(self, p, collength=16, writing=False, verbose=False): """ Instantiates a PED object corresponding to MAP files. See MAP.py for data_format explanation. :param p: The full file path. """ self.input = p self.data_format = {"family": (0, StringCol(collength)), "sample": (1, StringCol(collength)), "paternal": (2, StringCol(collength)), "maternal": (3, StringCol(collength)), "sex": (4, StringCol(collength)), "affection": (5, StringCol(collength))} if not writing: self.geno_length = self.get_geno_length() self.data_format["genotype"] = (6, StringCol(self.geno_length)) else: self.data_format["genotype"] = (6, StringCol(4)) DataType.__init__(self, p, "ped", verbose)
def tensor(self): # TODO: This shouldn't be public if self._tensor is not None: return self._tensor header = self.header shape = header['_tensor']['shape'] ringlet_shape, frame_shape = split_shape(shape) nringlet = reduce(lambda x,y:x*y, ringlet_shape, 1) frame_nelement = reduce(lambda x,y:x*y, frame_shape, 1) dtype = header['_tensor']['dtype'] nbit = DataType(dtype).itemsize_bits assert(nbit % 8 == 0) frame_nbyte = frame_nelement * nbit // 8 self._tensor = {} self._tensor['dtype'] = DataType(dtype) self._tensor['ringlet_shape'] = ringlet_shape self._tensor['nringlet'] = nringlet self._tensor['frame_shape'] = frame_shape self._tensor['frame_nbyte'] = frame_nbyte self._tensor['dtype_nbyte'] = nbit // 8 return self._tensor
def __init__(self, p, collength=16, writing=False, verbose=False): """ Instantiates a PED object corresponding to MAP files. See MAP.py for data_format explanation. :param p: The full file path. """ self.input = p self.data_format = { "family": (0, StringCol(collength)), "sample": (1, StringCol(collength)), "paternal": (2, StringCol(collength)), "maternal": (3, StringCol(collength)), "sex": (4, StringCol(collength)), "affection": (5, StringCol(collength)) } if not writing: self.geno_length = self.get_geno_length() self.data_format["genotype"] = (6, StringCol(self.geno_length)) else: self.data_format["genotype"] = (6, StringCol(4)) DataType.__init__(self, p, "ped", verbose)
def data_view(self, dtype=np.uint8, shape=-1): itemsize = DataType(dtype).itemsize assert( self.size % itemsize == 0 ) assert( self.stride % itemsize == 0 ) data_ptr = self._data_ptr #if self.sequence.ring.space == 'cuda': # # TODO: See if can wrap this in something like PyCUDA's GPUArray # # Ideally actual GPUArray, but it doesn't appear to support wrapping pointers # # Could also try writing a custom GPUArray implem for this purpose # return data_ptr span_size = self.size stride = self.stride #nringlet = self.sequence.nringlet nringlet = self.nringlet #print "******", span_size, stride, nringlet #BufferType = c_byte*(span_size*self.stride) # TODO: We should really map the actual ring memory space and index # it with offset rather than mapping from the current pointer. BufferType = ctypes.c_byte * (nringlet * stride) data_buffer_ptr = ctypes.cast(data_ptr, ctypes.POINTER(BufferType)) data_buffer = data_buffer_ptr.contents #print len(data_buffer), (nringlet, span_size), (self.stride, 1) _shape = (nringlet, span_size // itemsize) strides = (self.stride, itemsize) if nringlet > 1 else None #space = self.sequence.ring.space space = self.ring.space """ if space != 'cuda': data_array = np.ndarray(shape=_shape, strides=strides, buffer=data_buffer, dtype=dtype) else: data_array = GPUArray(shape=_shape, strides=strides, buffer=data_ptr, dtype=dtype) data_array.flags['SPACE'] = space """ data_array = ndarray(shape=_shape, strides=strides, buffer=data_ptr, dtype=dtype, space=space) # Note: This is a non-standard attribute #data_array.flags['SPACE'] = space if not self.writeable: data_array.flags['WRITEABLE'] = False if shape != -1: # TODO: Check that this still wraps the same memory data_array = data_array.reshape(shape) return data_array
def __array_finalize__(self, obj): if obj is None: # Already initialized self.bf in __new__ return # Initialize as view of existing array if isinstance(obj, ndarray): # Copy metadata from existing bf.ndarray self.bf = BFArrayInfo(obj.bf.space, obj.bf.dtype, obj.bf.native, obj.bf.conjugated) else: # Generate metadata from existing np.ndarray space = str(Space(raw_get_space(obj.ctypes.data))) #dtype = str(DataType(obj.dtype)) # **TODO: Decide on bf.dtype being DataType vs. string (and same for space) dtype = DataType(obj.dtype) native = obj.dtype.isnative conjugated = False self.bf = BFArrayInfo(space, dtype, native, conjugated) self._update_BFarray()
def __new__(cls, base=None, space=None, shape=None, dtype=None, buffer=None, offset=0, strides=None, native=None, conjugated=None): if isinstance(shape, int): shape = [shape] ownbuffer = None if base is not None: if (shape is not None or # dtype is not None or buffer is not None or offset != 0 or strides is not None or native is not None): raise ValueError('Invalid combination of arguments when base ' 'is specified') if 'pycuda' in sys.modules: from pycuda.gpuarray import GPUArray as pycuda_GPUArray if isinstance(base, pycuda_GPUArray): return ndarray.__new__(cls, space='cuda', buffer=int(base.gpudata), shape=base.shape, dtype=base.dtype, strides=base.strides, native=np.dtype( base.dtype).isnative) if dtype is not None: dtype = DataType(dtype) if space is None and dtype is None: if not isinstance(base, np.ndarray): base = np.asarray(base) # TODO: This may not be a good idea # Create view of base array obj = base.view(cls) # Note: This calls obj.__array_finalize__ # Allow conjugated to be redefined if conjugated is not None: obj.bf.conjugated = conjugated obj._update_BFarray() else: if not isinstance(base, np.ndarray): # Convert base to np.ndarray if dtype is not None: base = np.array(base, dtype=DataType(dtype).as_numpy_dtype()) else: base = np.array(base) if not isinstance(base, ndarray) and dtype is not None: base = base.astype(dtype.as_numpy_dtype()) base = ndarray(base) # View base as bf.ndarray if dtype is not None and base.bf.dtype != dtype: raise TypeError('Unable to convert type %s to %s during ' 'array construction' % (base.bf.dtype, dtype)) #base = base.view(cls #if dtype is not None: # base = base.astype(DataType(dtype).as_numpy_dtype()) if conjugated is None: conjugated = base.bf.conjugated # Create copy of base array obj = ndarray.__new__(cls, space=space, shape=base.shape, dtype=base.bf.dtype, native=base.bf.native, conjugated=conjugated) copy_array(obj, base) else: # Create new array if dtype is None: dtype = 'f32' # Default dtype dtype = DataType(dtype) if native is None: native = True # Default byteorder if conjugated is None: conjugated = False # Default unconjugated if strides is None: #itemsize = dtype.itemsize itemsize_bits = dtype.itemsize_bits # HACK to support 'packed' arrays, by folding the last # dimension of the shape into the dtype. # TODO: Consider using bit strides when dtype < 8 bits # It's hacky, but it may be worth it if itemsize_bits < 8: pack_factor = 8 // itemsize_bits if shape[-1] % pack_factor != 0 or not len(shape): raise ValueError("Array cannot be packed") shape = list(shape) shape[-1] //= pack_factor itemsize = 1 else: itemsize = itemsize_bits // 8 if len(shape): # This magic came from http://stackoverflow.com/a/32874295 strides = (itemsize * np.r_[ 1, np.cumprod(shape[::-1][:-1], dtype=np.int64)][::-1]) strides = tuple(strides) else: strides = tuple() nbyte = strides[0] * shape[0] if len(shape) else itemsize if buffer is None: # Allocate new buffer if space is None: space = 'system' # Default space if shape is None: raise ValueError('Either buffer or shape must be ' 'specified') ownbuffer = raw_malloc(nbyte, space) buffer = ownbuffer else: if space is None: #space = _get(_bf.bfGetSpace(buffer)) # TODO: raw_get_space should probably return string, and needs a better name space = str(Space(raw_get_space(buffer))) # TODO: Should move np.dtype() into as_numpy_dtype? dtype_np = np.dtype(dtype.as_numpy_dtype()) if not native: dtype_np = dtype_np.newbyteorder() data_buffer = _address_as_buffer(buffer, nbyte) obj = np.ndarray.__new__(cls, shape, dtype_np, data_buffer, offset, strides) obj.bf = BFArrayInfo(space, dtype, native, conjugated, ownbuffer) obj._update_BFarray() return obj