def test_bit_count2(self): for n in list(range(50)) + [randint(1000, 2000)]: a = bitarray() a.frombytes(os.urandom(bits2bytes(n))) del a[n:] b = bitarray() b.frombytes(os.urandom(bits2bytes(n))) del b[n:] self.assertEqual(count_and(a, b), (a & b).count()) self.assertEqual(count_or(a, b), (a | b).count()) self.assertEqual(count_xor(a, b), (a ^ b).count())
def make_endian(a, endian): """make_endian(bitarray, endian, /) -> bitarray When the endianness of the given bitarray is different from `endian`, return a new bitarray, with endianness `endian` and the same elements as the original bitarray, i.e. even though the binary representation of the new bitarray will be different, the returned bitarray will equal the original one. Otherwise (endianness is already `endian`) the original bitarray is returned unchanged. """ if not isinstance(a, _bitarray): raise TypeError("bitarray expected") if a.endian() == endian: return a b = bitarray(a, endian) la = a.length() if la == 0: return b b.bytereverse() if la % 8: p = 8 * (bits2bytes(la) - 1) b[p:] = a[p:] return b
def int2ba(i, length=None, endian=None, signed=False): """int2ba(int, /, length=None, endian=None, signed=False) -> bitarray Convert the given integer to a bitarray (with given endianness, and no leading (big-endian) / trailing (little-endian) zeros), unless the `length` of the bitarray is provided. An `OverflowError` is raised if the integer is not representable with the given number of bits. `signed` determines whether two's complement is used to represent the integer, and requires `length` to be provided. If signed is False and a negative integer is given, an OverflowError is raised. """ if not isinstance(i, (int, long) if _is_py2 else int): raise TypeError("int expected, got '%s'" % type(i).__name__) if length is not None: if not isinstance(length, int): raise TypeError("int expected for length") if length <= 0: raise ValueError("integer larger than 0 expected for length") if signed and length is None: raise TypeError("signed requires length") if i == 0: # there are special cases for 0 which we'd rather not deal with below return zeros(length or 1, endian) if signed: if i >= 1 << (length - 1) or i < -(1 << (length - 1)): raise OverflowError("signed integer out of range") if i < 0: i += 1 << length elif i < 0 or (length and i >= 1 << length): raise OverflowError("unsigned integer out of range") a = bitarray(0, get_default_endian() if endian is None else endian) big_endian = bool(a.endian() == 'big') if _is_py2: c = bytearray() while i: i, r = divmod(i, 256) c.append(r) if big_endian: c.reverse() b = bytes(c) else: # py3 b = i.to_bytes(bits2bytes(i.bit_length()), byteorder=a.endian()) a.frombytes(b) if length is None: return strip(a, 'left' if big_endian else 'right') la = len(a) if la > length: a = a[-length:] if big_endian else a[:length] if la < length: pad = zeros(length - la, endian) a = pad + a if big_endian else a + pad assert len(a) == length return a
def urandom(length, endian=None): """urandom(length, /, endian=None) -> bitarray Return a bitarray of `length` random bits (uses `os.urandom`). """ a = bitarray(0, get_default_endian() if endian is None else endian) a.frombytes(os.urandom(bits2bytes(length))) del a[length:] return a
def test_zeros_and_ones(self): for endian in 'little', 'big': for n in range(100): a = zeros(n, endian) s = serialize(a) self.assertEqual(s[1:], b'\0' * bits2bytes(n)) self.assertEQUAL(a, deserialize(s)) a.setall(1) self.assertEQUAL(a, deserialize(serialize(a)))
def compress(f, size, quality=50, grey_level=False, subsampling_mode=1): start_time = time.perf_counter() logging.getLogger(__name__).info('Original file size: ' f'{os.fstat(f.fileno()).st_size} Bytes') if quality <= 0 or quality > 95: raise ValueError('Quality should within (0, 95].') img_arr = np.fromfile( f, dtype=np.uint8).reshape(size if grey_level else (*size, 3)) if grey_level: data = {Y: img_arr.astype(float)} else: data = rgbtoycbcr(*(img_arr[:, :, idx] for idx in range(3))) data[CB] = downsample(data[CB], subsampling_mode) data[CR] = downsample(data[CR], subsampling_mode) data[Y] = data[Y] - 128 for key, layer in data.items(): nrows, ncols = layer.shape data[key] = np.pad( layer, ((0, (nrows // 8 + 1) * 8 - nrows if nrows % 8 else 0), (0, (ncols // 8 + 1) * 8 - ncols if ncols % 8 else 0)), mode='constant') data[key] = block_slice(data[key], 8, 8) for idx, block in enumerate(data[key]): # 2D DCT data[key][idx] = dct2d(block) # Quantization data[key][idx] = quantize(data[key][idx], key, quality=quality) data[key] = np.rint(data[key]).astype(int) if grey_level: # Entropy Encoder encoded = Encoder(data[Y], LUMINANCE).encode() order = (encoded[DC], encoded[AC]) else: # RGB # Entropy Encoder encoded = { LUMINANCE: Encoder(data[Y], LUMINANCE).encode(), CHROMINANCE: Encoder(np.vstack((data[CB], data[CR])), CHROMINANCE).encode() } order = (encoded[LUMINANCE][DC], encoded[LUMINANCE][AC], encoded[CHROMINANCE][DC], encoded[CHROMINANCE][AC]) bits = bitarray(''.join(order)) logging.getLogger(__name__).info('Time elapsed: %.4f seconds' % (time.perf_counter() - start_time)) return { 'data': bits, 'header': { 'size': size, 'grey_level': grey_level, 'quality': quality, 'subsampling_mode': subsampling_mode, 'remaining_bits_length': bits2bytes(len(bits)) * 8 - len(bits), 'data_slice_lengths': tuple(len(d) for d in order) } }
def _get_message(self): data = self._get_data() msgid = ord(data[0]) if msgid == 0: #choke if not self.peer_choking: self.torrent_downloader.update_choking_status(False) self.peer_choking = True self.torrent_downloader.ui.update_log('choked') self.n_requests_in_flight = 0 if msgid == 1: #unchoke if self.peer_choking: self.torrent_downloader.update_choking_status(True) self.peer_choking = False self.torrent_downloader.ui.update_log( 'unchoked') if msgid == 2: #interested self.peer_interested = True self.torrent_downloader.interest_state(self) self.torrent_downloader.ui.update_log( 'interested') if msgid == 3: #uninstrested self.peer_interested = False self.torrent_downloader.interest_state(self) self.torrent_downloader.ui.update_log( 'uninterested') if msgid == 4: #have index, = struct.unpack('>L', data[1:]) self.bitfield[index] = True self.torrent_downloader.ui.update_log('have ' + str(index) + ' for ' + str(self.bitfield.count())) if msgid == 5: #bitfield if len(data[1:]) != bits2bytes(len(self.bitfield)): #wrong length self.close_when_done() return self.bitfield = bitarray('') self.bitfield.frombytes(data[1:]) self.bitfield = self.bitfield[:len(self.fileinfo.pieces)] self.torrent_downloader.ui.update_log( 'bitfield, has ' + str(self.bitfield.count())) if msgid == 6: #request if not self.am_choking: req = struct.unpack('>LLL', data[1:13]) self.torrent_downloader.ui.update_log( 'request for piece' + repr(req)) self.torrent_downloader.got_request(self, req) if msgid == 7: #piece block = data[9:] index, begin = struct.unpack('>LL', data[1:9]) self.n_requests_in_flight -= 1 #self.torrent_downloader.ui.update_log( 'got block for piece ' + repr(index) + ' at ' + str(begin)) self.torrent_downloader.got_piece(index,begin,block) if msgid == 8: #cancel self.torrent_downloader.ui.update_log( 'cancel ' + repr(struct.unpack('>LLL', data[1:13]))) #DK how to i get rid of individual items from a queue? if msgid == 9: #dht port pass
def tobytes(self): logger.debug('rendering element: %s', self) data = bitarray() for attribute in self.attributes: try: data += attribute.tobytes() except: logger.exception('error rendering attribute %s of %s', attribute, self) raise for child in self.children: try: data += child.tobytes() except: logger.exception('error rendering child %s of %s', child, self) raise if self.cdata is not None: data += self.cdata.tobytes() # b0-b7: element tag bits = int_to_bitarray(self.tag, 8) # b8-15: element data length (0-253 bytes) # b16-31: extended element length (256-65536 bytes) # b16-39: extended element length (65537-16777216 bytes) datalength = bits2bytes(data.length()) if datalength <= 253: tmp = int_to_bitarray(datalength, 8) bits += tmp elif datalength >= 254 and datalength <= 1 << 16: tmp = bitarray() tmp.frombytes('\xfe') bits += tmp tmp = int_to_bitarray(datalength, 16) bits += tmp elif datalength > 1 << 16 and datalength <= 1 << 24: tmp = bitarray() tmp.fromstring('\xff') bits += tmp tmp = int_to_bitarray(datalength, 24) bits += tmp else: raise ValueError( 'element data length exceeds the maximum allowed by the extended element length (24bits): %s > %s' + datalength + " > " + (1 << 24)) bits += data return bits
def abre_archivo(nombre_archivo): meta_datos = [] items = [] try: archivo = open(nombre_archivo, "rb") archivo_abierto = True except IOError: print 'No se pudo abrir: ' + nombre_archivo + "!" if archivo_abierto == True: try: bx = convierte_a_int(archivo.read(4)) by = convierte_a_int(archivo.read(4)) be = convierte_a_int(archivo.read(4)) bn = ord(archivo.read(1)) meta_datos.append(bx) meta_datos.append(by) for i in range(be): item = [] simbolo = ord(archivo.read(1)) tam_codigo = ord(archivo.read(1)) bit_array = bitarray(endian='big') bit_array.frombytes(archivo.read(bits2bytes(tam_codigo))) codigo = bit_array.to01()[:tam_codigo] item.append(simbolo) item.append(tam_codigo) item.append(codigo) items.append(item) meta_datos.append(items) bit_array = bitarray(endian='big') bit_array.frombytes(archivo.read()) cadena01 = bit_array.to01() meta_datos.append(cadena01) meta_datos.append(bn) archivo.seek(0) archivo.close() except: print "Error al leer " + nombre_archivo return meta_datos
def abre_archivo(nombre_archivo): meta_datos = [] items = [] try: archivo = open(nombre_archivo,"rb") archivo_abierto = True except IOError: print 'No se pudo abrir: '+nombre_archivo+"!" if archivo_abierto == True: try: bx = convierte_a_int(archivo.read(4)) by = convierte_a_int(archivo.read(4)) be = convierte_a_int(archivo.read(4)) bn = ord(archivo.read(1)) meta_datos.append(bx) meta_datos.append(by) for i in range(be): item = [] simbolo = ord(archivo.read(1)) tam_codigo = ord(archivo.read(1)) bit_array = bitarray(endian='big') bit_array.frombytes(archivo.read(bits2bytes(tam_codigo))) codigo = bit_array.to01()[:tam_codigo] item.append(simbolo) item.append(tam_codigo) item.append(codigo) items.append(item) meta_datos.append(items) bit_array = bitarray(endian='big') bit_array.frombytes(archivo.read()) cadena01 = bit_array.to01() meta_datos.append(cadena01) meta_datos.append(bn) archivo.seek(0) archivo.close() except: print "Error al leer "+nombre_archivo return meta_datos
def encode(self): def bin_str2bool_list(binary_string): return [c == '1' for c in binary_string] code = bitarray() for symbol in tqdm(self.byte_seq,desc="COMPRESSING",colour='green',unit='bytes'): result = self.tree.search(symbol) if result['first_appearance']: code.extend(result['code']) code.frombytes(bytes([symbol])) else: code.extend(result['code']) self.update(symbol,result['first_appearance']) remaining_length = bits2bytes(len(code)+3)*8 - (len(code)+3) code = (bitarray(bin_str2bool_list('{:03b}'.format(remaining_length))) + code) return code
def update(self): w, h = self.size self.bytes_per_row = bits2bytes(w) self.bits_per_row = 8 * self.bytes_per_row self.bytes = self.bytes_per_row * h self.bits = 8 * self.bytes
def compress(file_object, size, quality=50, grey_level=False, subsampling_mode=1): # pylint: disable=too-many-locals start_time = time.perf_counter() logging.getLogger(__name__).info( 'Original file size: %d Bytes', os.fstat(file_object.fileno()).st_size ) if quality <= 0 or quality > 95: raise ValueError('Quality should within (0, 95].') img_arr = np.fromfile(file_object, dtype=np.uint8).reshape( size if grey_level else (*size, 3) ) if grey_level: data = {Y: img_arr.astype(float)} else: # RGB # Color Space Conversion (w/o Level Offset) data = rgb2ycbcr(*(img_arr[:, :, idx] for idx in range(3))) # Subsampling data[CB] = downsample(data[CB], subsampling_mode) data[CR] = downsample(data[CR], subsampling_mode) # Level Offset data[Y] = data[Y] - 128 for key, layer in data.items(): nrows, ncols = layer.shape # Pad Layers to 8N * 8N data[key] = np.pad( layer, ( (0, (nrows // 8 + 1) * 8 - nrows if nrows % 8 else 0), (0, (ncols // 8 + 1) * 8 - ncols if ncols % 8 else 0) ), mode='constant' ) # Block Slicing data[key] = block_slice(data[key], 8, 8) for idx, block in enumerate(data[key]): # 2D DCT data[key][idx] = dct2d(block) # Quantization data[key][idx] = quantize(data[key][idx], key, quality=quality) # Rounding data[key] = np.rint(data[key]).astype(int) if grey_level: # Entropy Encoder encoded = Encoder(data[Y], LUMINANCE).encode() # Combine grey level data as binary in the order: # DC, AC order = (encoded[DC], encoded[AC]) else: # RGB # Entropy Encoder encoded = { LUMINANCE: Encoder(data[Y], LUMINANCE).encode(), CHROMINANCE: Encoder( np.vstack((data[CB], data[CR])), CHROMINANCE ).encode() } # Combine RGB data as binary in the order: # LUMINANCE.DC, LUMINANCE.AC, CHROMINANCE.DC, CHROMINANCE.AC order = (encoded[LUMINANCE][DC], encoded[LUMINANCE][AC], encoded[CHROMINANCE][DC], encoded[CHROMINANCE][AC]) bits = bitarray(''.join(order)) logging.getLogger(__name__).info( 'Time elapsed: %.4f seconds', (time.perf_counter() - start_time) ) return { 'data': bits, 'header': { 'size': size, 'grey_level': grey_level, 'quality': quality, 'subsampling_mode': subsampling_mode, # Remaining bits length is the fake filled bits for 8 bits as a # byte. 'remaining_bits_length': bits2bytes(len(bits)) * 8 - len(bits), 'data_slice_lengths': tuple(len(d) for d in order) } }
def tobytes(self): if not self.f: raise ValueError("cant encode this attribute without an encoding function") # encode data data = None logger.debug("encoding attribute %s with function %s", self, self.f) data = self.f(self.value, *self.args, **self.kwargs) # if isinstance(self.value, int) or isinstance(self.value, long): # integer # if self.bitlength is None: raise ValueError('attribute %s with int value has no bitlength specification' % self) # logger.debug('encoding attribute %s as int with %d bits', self, self.bitlength) # data = encode_number(self.value, self.bitlength) # elif isinstance(self.value, datetime.timedelta): # duration # data = encode_number(self.value.seconds, 16) # logger.debug('encoding attribute %s as duration', self) # elif isinstance(self.value, Crid): # CRID # data = bitarray() # data.fromstring(str(self.value)) # logger.debug('encoding attribute %s as CRID', self) # elif isinstance(self.value, Genre): # genre # data = encode_genre(self.value) # logger.debug('encoding attribute %s as genre', self) # elif isinstance(self.value, datetime.datetime): # time # data = encode_timepoint(self.value) # logger.debug('encoding attribute %s as timepoint', self) # elif isinstance(self.value, str): # string # data = bitarray() # data.fromstring(self.value) # logger.debug('encoding attribute %s as string', self) # elif isinstance(self.value, Bearer): # data = encode_bearer(self.value) # logger.debug('encoding attribute %s as bearer', self) # elif isinstance(self.value, Ensemble): # data = encode_ensembleid(self.value.ecc, self.value.eid) # logger.debug('encoding attribute %s as ensemble ID', self.value) # else: # raise ValueError('dont know how to encode this type: %s = %s' % (self.value.__class__.__name__, str(self.value))) # data.fill() # b0-b7: tag bits = encode_number(self.tag, 8) # b8-15: element data length (0-253 bytes) # b16-31: extended element length (256-65536 bytes) # b16-39: extended element length (65537-16777216 bytes) datalength = bits2bytes(data.length()) if datalength <= 253: bits += encode_number(datalength, 8) elif datalength >= 254 and datalength <= 1 << 16: tmp = bitarray() tmp.fromstring("\xfe") bits += tmp bits += encode_number(datalength, 16) elif datalength > 1 << 16 and datalength <= 1 << 24: tmp = bitarray() tmp.fromstring("\xff") bits += tmp bits += encode_number(datalength, 24) else: raise ValueError( "element data length exceeds the maximum allowed by the extended element length (24bits): %s > %s" + datalength + " > " + (1 << 24) ) bits += data return bits
def int2ba(i, length=None, endian='big'): """int2ba(int, /, length=None, endian='big') -> bitarray Convert the given integer into a bitarray (with given endianness, and no leading (big-endian) / trailing (little-endian) zeros). If length is provided, the result will be of this length, and an `OverflowError` will be raised, if the integer cannot be represented within length bits. """ if not isinstance(i, (int, long) if _is_py2 else int): raise TypeError("integer expected") if i < 0: raise ValueError("non-negative integer expected") if length is not None: if not isinstance(length, int): raise TypeError("integer expected for length") if length <= 0: raise ValueError("integer larger than 0 expected for length") if not isinstance(endian, str): raise TypeError("string expected for endian") if endian not in ('big', 'little'): raise ValueError("endian can only be 'big' or 'little'") if i == 0: # there a special cases for 0 which we'd rather not deal with below return zeros(length or 1, endian=endian) big_endian = bool(endian == 'big') if _is_py2: c = bytearray() while i: i, r = divmod(i, 256) c.append(r) if big_endian: c.reverse() b = bytes(c) else: # py3 b = i.to_bytes(bits2bytes(i.bit_length()), byteorder=endian) a = bitarray(endian=endian) a.frombytes(b) la = a.length() if la == length: return a if length is None: return strip(a, 'left' if big_endian else 'right') if la > length: size = (la - a.index(1)) if big_endian else (rindex(a) + 1) if size > length: raise OverflowError("cannot represent %d bit integer in " "%d bits" % (size, length)) a = a[la - length:] if big_endian else a[:length - la] if la < length: if big_endian: a = zeros(length - la, 'big') + a else: a += zeros(length - la, 'little') assert a.length() == length return a
def tobytes(self): # encode data data = None if isinstance(self.value, int) or isinstance(self.value, long): # integer if self.bitlength is None: raise ValueError( 'attribute with int value has no bitlength specification: %s' % self) logger.debug('encoding attribute %s as int with %d bits', self, self.bitlength) data = int_to_bitarray(self.value, self.bitlength) elif isinstance(self.value, datetime.timedelta): # duration data = int_to_bitarray(self.value.seconds, 16) logger.debug('encoding attribute %s as duration', self) elif isinstance(self.value, Crid): # CRID data = bitarray() data.fromstring(str(self.value)) logger.debug('encoding attribute %s as CRID', self) elif isinstance(self.value, Genre): # genre data = encode_genre(self.value) logger.debug('encoding attribute %s as genre', self) elif isinstance(self.value, datetime.datetime): # time data = encode_timepoint(self.value) logger.debug('encoding attribute %s as timepoint', self) elif isinstance(self.value, str): # string data = bitarray() data.fromstring(self.value) logger.debug('encoding attribute %s as string', self) elif isinstance(self.value, Bearer): data = encode_contentid(self.value.id) logger.debug('encoding attribute %s as content ID from bearer', self) elif isinstance(self.value, ContentId): data = encode_contentid(self.value) logger.debug('encoding attribute %s as content ID', self) else: raise ValueError('dont know how to encode this type: %s = %s' % (self.value.__class__.__name__, str(self.value))) data.fill() # b0-b7: tag bits = int_to_bitarray(self.tag, 8) # b8-15: element data length (0-253 bytes) # b16-31: extended element length (256-65536 bytes) # b16-39: extended element length (65537-16777216 bytes) datalength = bits2bytes(data.length()) if datalength <= 253: bits += int_to_bitarray(datalength, 8) elif datalength >= 254 and datalength <= 1 << 16: tmp = bitarray() tmp.fromstring('\xfe') bits += tmp bits += int_to_bitarray(datalength, 16) elif datalength > 1 << 16 and datalength <= 1 << 24: tmp = bitarray() tmp.fromstring('\xff') bits += tmp bits += int_to_bitarray(datalength, 24) else: raise ValueError( 'element data length exceeds the maximum allowed by the extended element length (24bits): %s > %s' + datalength + " > " + (1 << 24)) bits += data return bits
def read_to_bases(self, filetype, filename, fn, exp_chr = None, header = False): self.filename = os.path.basename(filename) sys.stderr.write('Reading file... ' + self.filename + ' ') if filetype == self.binarybedfile: if exp_chr == None: print "must send exp_chr for bb files!" sys.exit() pass ar = array('B') try: ar.fromfile(open(filename, 'rb'), self.genome_len+1) except EOFError: pass if len(ar) != self.chr_lens[exp_chr] and not (exp_chr == 'chrM' and len(ar) == self.chr_lens[exp_chr]+1): print "expected length of array to match given start and end!" print filename print exp_chr print len(ar), self.chr_lens[exp_chr] sys.exit(-1) pass for i in xrange(self.chr_lens[exp_chr]): if ar[i] == 1: self.bases[self.chr_offset[exp_chr]+i] = fn(self.bases[self.chr_offset[exp_chr]+i]) pass pass elif filetype == self.binarybedfilegenome: ar = bitarray() code = bitarray() file = open(filename, 'r') #code.fromfile(file) code.fromfile(file, 8) if code.to01() != myBedTools.binarybedfilegenome_code: print "unexpected code for binary bed file genome!" print "code: ", code print "expected: ", myBedTools.binarybedfilegenome_code sys.exit(-1) try: #ar.fromfile(file, self.genome_len + 16) ar.fromfile(file) except EOFError: pass if ar.length() != bitarray_m.bits2bytes(self.genome_len) * 8: print "expected length of array to match given start and end!" print filename print ar.length(), bitarray_m.bits2bytes(self.genome_len), self.genome_len sys.exit(-1) pass # shorten ar to genome_len for i in xrange(ar.length() - self.genome_len): #print self.genome_len, ar.length(), 'popping' ar.pop() pass #### COULD CHECK FNS FOR BITARRAY STUFF (AND, OR ETC) if not self.initialize: self.initialize = True if fn == myBedTools.set_to_one: self.bases = ar elif fn == myBedTools.set_to_zero: self.bases = ~ar else: print "not initialized, and using fn other than set_to_one!" print fn print len(self.bases) sys.exit(-1) return pass elif fn == myBedTools.set_to_one: self.bases |= ar elif fn == myBedTools.set_to_zero: self.bases &= ~ar elif fn == myBedTools.bitfn_and: self.bases &= ar else: for i in xrange(self.end): if ar[i] == 1: self.bases[i] = fn(self.bases[i]) pass pass pass elif filetype == self.binaryseqfilegenome: ar = bitarray() code = bitarray() if filename.endswith('z'): file = gzip.open(filename, 'r') else: file = open(filename, 'r') pass code.fromfile(file, 8) if code.to01() != myBedTools.binaryseqfilegenome_code: print "unexpected code for binary seq file genome!" print "code: ", code print "expected: ", myBedTools.binaryseqfilegenome_code sys.exit(-1) try: #ar.fromfile(file, self.genome_len * self.factor + 16) ar.fromfile(file) except EOFError: pass if ar.length() != bitarray_m.bits2bytes(self.genome_len * self.factor) * 8: print "expected length of array to match given start and end!" print filename print ar.length(), bitarray_m.bits2bytes(self.genome_len * self.factor), self.genome_len * self.factor sys.exit(-1) pass # shorten ar to genome_len for i in xrange(ar.length() - self.genome_len * self.factor): #print self.genome_len, ar.length(), 'popping' ar.pop() pass self.bases = ar pass elif filetype == self.binaryseqfilegenomechr: print "THIS DOESN'T WORK" sys.exit(-1) ar = bitarray() code = bitarray() file = open(filename, 'r') code.fromfile(file, 8) if code.to01() != myBedTools.binaryseqfilegenome_code: print "unexpected code for binary seq file genome!" print "code: ", code print "expected: ", myBedTools.binaryseqfilegenome_code sys.exit(-1) try: chr_byte_start = self.chr_offset[exp_chr] * self.factor // 8 chr_partial_byte_start = (self.chr_offset[exp_chr] * self.factor) % 8 print 'starting at', chr_byte_start, 'bytes, with', chr_partial_byte_start, 'offset' file.seek(chr_byte_start, 0) ar.fromfile(file, self.chr_lens[exp_chr] * self.factor + 16) print ar[0:100] except EOFError: pass if ar.length() != bitarray_m.bits2bytes(self.chr_lens[exp_chr] * self.factor) * 8: print "expected length of array to match given start and end!" print filename print ar.length(), bitarray_m.bits2bytes(self.genome_len * self.factor), self.genome_len * self.factor sys.exit(-1) pass # shorten ar to genome_len for i in xrange(ar.length() - self.genome_len * self.factor): #print self.genome_len, ar.length(), 'popping' ar.pop() pass self.bases = ar pass elif self.output_type == myBedTools.binaryseqfilegenome: infile = myBedTools.open_file(filename, discard_header = header) for line in infile: if line.strip().startswith('#'): continue #print line [l_chr, l_start, l_end, l_base] = self.parse_line(filetype, line, return_base = True) if len(l_base) != 1: continue if self.debug: print 'bed', l_chr, l_start, l_end, l_base site = self.chr_offset[l_chr] * self.factor + l_start * self.factor self.bases[site : site + self.factor] = self.binaryseq_decode[l_base.upper()] pass pass else: infile = myBedTools.open_file(filename, discard_header = header) for line in infile: [l_chr, l_start, l_end] = self.parse_line(filetype, line) if self.debug: print 'bed', l_chr, l_start, l_end for i in xrange(l_start, l_end): self.bases[self.chr_offset[l_chr]+i] = fn(self.bases[self.chr_offset[l_chr]+i]) pass pass pass gc.collect() sys.stderr.write(' done\n') return
def tobytes(self): if not self.f: raise ValueError('cant encode this attribute without an encoding function') # encode data data = None logger.debug('encoding attribute %s with function %s', self, self.f) data = self.f(self.value, *self.args, **self.kwargs) #if isinstance(self.value, int) or isinstance(self.value, long): # integer # if self.bitlength is None: raise ValueError('attribute %s with int value has no bitlength specification' % self) # logger.debug('encoding attribute %s as int with %d bits', self, self.bitlength) # data = encode_number(self.value, self.bitlength) #elif isinstance(self.value, datetime.timedelta): # duration # data = encode_number(self.value.seconds, 16) # logger.debug('encoding attribute %s as duration', self) #elif isinstance(self.value, Crid): # CRID # data = bitarray() # data.fromstring(str(self.value)) # logger.debug('encoding attribute %s as CRID', self) #elif isinstance(self.value, Genre): # genre # data = encode_genre(self.value) # logger.debug('encoding attribute %s as genre', self) #elif isinstance(self.value, datetime.datetime): # time # data = encode_timepoint(self.value) # logger.debug('encoding attribute %s as timepoint', self) #elif isinstance(self.value, str): # string # data = bitarray() # data.fromstring(self.value) # logger.debug('encoding attribute %s as string', self) #elif isinstance(self.value, Bearer): # data = encode_bearer(self.value) # logger.debug('encoding attribute %s as bearer', self) #elif isinstance(self.value, Ensemble): # data = encode_ensembleid(self.value.ecc, self.value.eid) # logger.debug('encoding attribute %s as ensemble ID', self.value) #else: # raise ValueError('dont know how to encode this type: %s = %s' % (self.value.__class__.__name__, str(self.value))) #data.fill() # b0-b7: tag bits = encode_number(self.tag, 8) # b8-15: element data length (0-253 bytes) # b16-31: extended element length (256-65536 bytes) # b16-39: extended element length (65537-16777216 bytes) datalength = bits2bytes(data.length()) if datalength <= 253: bits += encode_number(datalength, 8) elif datalength >= 254 and datalength <= 1<<16: tmp = bitarray() tmp.fromstring('\xfe') bits += tmp bits += encode_number(datalength, 16) elif datalength > 1<<16 and datalength <= 1<<24: tmp = bitarray() tmp.fromstring('\xff') bits += tmp bits += encode_number(datalength, 24) else: raise ValueError('element data length exceeds the maximum allowed by the extended element length (24bits): %s > %s' + datalength + " > " + (1<<24)) bits += data return bits
def __init__(self, data): super(ExtendedModel, self).__init__(data) bots_data = bitarray(data[bits2bytes(self.size**3 + 8) * 8:], endian="big") bots = numpy.frombuffer(bots_data.tobytes(), dtype=numpy.int8) self.bots = bots.reshape((-1, 4))
def encode(self): """Encode the target byte sequence into compressed bit sequence by adaptive Huffman coding. Returns: bitarray: The compressed bitarray. Use `bitarray.tofile()` to save to file. """ def encode_fixed_code(dec): """Convert a decimal number into specified fixed code. Arguments: dec {int} -- The alphabet need to be converted into fixed code. Returns: list of bool -- Fixed codes. """ alphabet_idx = dec - (self._alphabet_first_num - 1) if alphabet_idx <= 2 * self.rem: fixed_str = '{:0{padding}b}'.format(alphabet_idx - 1, padding=self.exp + 1) else: fixed_str = '{:0{padding}b}'.format(alphabet_idx - self.rem - 1, padding=self.exp) return bin_str2bool_list(fixed_str) progressbar = ShadyBar('encoding', max=len(self.byte_seq), suffix='%(percent).1f%% - %(elapsed_td)ss') if self.dpcm: self.byte_seq = tuple(encode_dpcm(self.byte_seq)) logging.getLogger(__name__).info('entropy: %f', entropy(self.byte_seq)) code = [] for symbol in self.byte_seq: fixed_code = encode_fixed_code(symbol) result = self.tree.search(fixed_code) if result['first_appearance']: code.extend(result['code']) # send code of NYT code.extend(fixed_code) # send fixed code of symbol else: # send code which is path from root to the node of symbol code.extend(result['code']) self.update(fixed_code, result['first_appearance']) progressbar.next() # Add remaining bits length info at the beginning of the code in order # to avoid the decoder regarding the remaining bits as actual data. The # remaining bits length info require 3 bits to store the length. Note # that the first 3 bits are stored as big endian binary string. remaining_bits_length = (bits2bytes(len(code) + 3) * 8 - (len(code) + 3)) code = (bin_str2bool_list('{:03b}'.format(remaining_bits_length)) + code) progressbar.finish() return bitarray(code)