Beispiel #1
0
 def test_bit_count2(self):
     for n in list(range(50)) + [randint(1000, 2000)]:
         a = bitarray()
         a.frombytes(os.urandom(bits2bytes(n)))
         del a[n:]
         b = bitarray()
         b.frombytes(os.urandom(bits2bytes(n)))
         del b[n:]
         self.assertEqual(count_and(a, b), (a & b).count())
         self.assertEqual(count_or(a, b), (a | b).count())
         self.assertEqual(count_xor(a, b), (a ^ b).count())
Beispiel #2
0
def make_endian(a, endian):
    """make_endian(bitarray, endian, /) -> bitarray

When the endianness of the given bitarray is different from `endian`,
return a new bitarray, with endianness `endian` and the same elements
as the original bitarray, i.e. even though the binary representation of the
new bitarray will be different, the returned bitarray will equal the original
one.
Otherwise (endianness is already `endian`) the original bitarray is returned
unchanged.
"""
    if not isinstance(a, _bitarray):
        raise TypeError("bitarray expected")

    if a.endian() == endian:
        return a

    b = bitarray(a, endian)
    la = a.length()
    if la == 0:
        return b

    b.bytereverse()
    if la % 8:
        p = 8 * (bits2bytes(la) - 1)
        b[p:] = a[p:]
    return b
def int2ba(i, length=None, endian=None, signed=False):
    """int2ba(int, /, length=None, endian=None, signed=False) -> bitarray

Convert the given integer to a bitarray (with given endianness,
and no leading (big-endian) / trailing (little-endian) zeros), unless
the `length` of the bitarray is provided.  An `OverflowError` is raised
if the integer is not representable with the given number of bits.
`signed` determines whether two's complement is used to represent the integer,
and requires `length` to be provided.
If signed is False and a negative integer is given, an OverflowError
is raised.
"""
    if not isinstance(i, (int, long) if _is_py2 else int):
        raise TypeError("int expected, got '%s'" % type(i).__name__)
    if length is not None:
        if not isinstance(length, int):
            raise TypeError("int expected for length")
        if length <= 0:
            raise ValueError("integer larger than 0 expected for length")
    if signed and length is None:
        raise TypeError("signed requires length")

    if i == 0:
        # there are special cases for 0 which we'd rather not deal with below
        return zeros(length or 1, endian)

    if signed:
        if i >= 1 << (length - 1) or i < -(1 << (length - 1)):
            raise OverflowError("signed integer out of range")
        if i < 0:
            i += 1 << length
    elif i < 0 or (length and i >= 1 << length):
        raise OverflowError("unsigned integer out of range")

    a = bitarray(0, get_default_endian() if endian is None else endian)
    big_endian = bool(a.endian() == 'big')
    if _is_py2:
        c = bytearray()
        while i:
            i, r = divmod(i, 256)
            c.append(r)
        if big_endian:
            c.reverse()
        b = bytes(c)
    else:  # py3
        b = i.to_bytes(bits2bytes(i.bit_length()), byteorder=a.endian())

    a.frombytes(b)
    if length is None:
        return strip(a, 'left' if big_endian else 'right')

    la = len(a)
    if la > length:
        a = a[-length:] if big_endian else a[:length]
    if la < length:
        pad = zeros(length - la, endian)
        a = pad + a if big_endian else a + pad
    assert len(a) == length
    return a
def urandom(length, endian=None):
    """urandom(length, /, endian=None) -> bitarray

Return a bitarray of `length` random bits (uses `os.urandom`).
"""
    a = bitarray(0, get_default_endian() if endian is None else endian)
    a.frombytes(os.urandom(bits2bytes(length)))
    del a[length:]
    return a
Beispiel #5
0
 def test_zeros_and_ones(self):
     for endian in 'little', 'big':
         for n in range(100):
             a = zeros(n, endian)
             s = serialize(a)
             self.assertEqual(s[1:], b'\0' * bits2bytes(n))
             self.assertEQUAL(a, deserialize(s))
             a.setall(1)
             self.assertEQUAL(a, deserialize(serialize(a)))
Beispiel #6
0
def compress(f, size, quality=50, grey_level=False, subsampling_mode=1):
    start_time = time.perf_counter()
    logging.getLogger(__name__).info('Original file size: '
                                     f'{os.fstat(f.fileno()).st_size} Bytes')
    if quality <= 0 or quality > 95:
        raise ValueError('Quality should within (0, 95].')
    img_arr = np.fromfile(
        f, dtype=np.uint8).reshape(size if grey_level else (*size, 3))
    if grey_level:
        data = {Y: img_arr.astype(float)}
    else:
        data = rgbtoycbcr(*(img_arr[:, :, idx] for idx in range(3)))
        data[CB] = downsample(data[CB], subsampling_mode)
        data[CR] = downsample(data[CR], subsampling_mode)
    data[Y] = data[Y] - 128
    for key, layer in data.items():
        nrows, ncols = layer.shape
        data[key] = np.pad(
            layer, ((0, (nrows // 8 + 1) * 8 - nrows if nrows % 8 else 0),
                    (0, (ncols // 8 + 1) * 8 - ncols if ncols % 8 else 0)),
            mode='constant')
        data[key] = block_slice(data[key], 8, 8)
        for idx, block in enumerate(data[key]):
            # 2D DCT
            data[key][idx] = dct2d(block)
            # Quantization
            data[key][idx] = quantize(data[key][idx], key, quality=quality)
        data[key] = np.rint(data[key]).astype(int)
    if grey_level:
        # Entropy Encoder
        encoded = Encoder(data[Y], LUMINANCE).encode()
        order = (encoded[DC], encoded[AC])
    else:  # RGB
        # Entropy Encoder
        encoded = {
            LUMINANCE:
            Encoder(data[Y], LUMINANCE).encode(),
            CHROMINANCE:
            Encoder(np.vstack((data[CB], data[CR])), CHROMINANCE).encode()
        }
        order = (encoded[LUMINANCE][DC], encoded[LUMINANCE][AC],
                 encoded[CHROMINANCE][DC], encoded[CHROMINANCE][AC])
    bits = bitarray(''.join(order))
    logging.getLogger(__name__).info('Time elapsed: %.4f seconds' %
                                     (time.perf_counter() - start_time))
    return {
        'data': bits,
        'header': {
            'size': size,
            'grey_level': grey_level,
            'quality': quality,
            'subsampling_mode': subsampling_mode,
            'remaining_bits_length': bits2bytes(len(bits)) * 8 - len(bits),
            'data_slice_lengths': tuple(len(d) for d in order)
        }
    }
Beispiel #7
0
 def _get_message(self):
     data = self._get_data()
     msgid = ord(data[0])
     if msgid == 0: #choke
         if not self.peer_choking:
             self.torrent_downloader.update_choking_status(False)
         self.peer_choking = True
         self.torrent_downloader.ui.update_log('choked')
         self.n_requests_in_flight = 0
     if msgid == 1: #unchoke
         if self.peer_choking:
             self.torrent_downloader.update_choking_status(True)
         self.peer_choking = False
         self.torrent_downloader.ui.update_log( 'unchoked')
     if msgid == 2: #interested
         self.peer_interested = True
         self.torrent_downloader.interest_state(self)
         self.torrent_downloader.ui.update_log( 'interested')
     if msgid == 3: #uninstrested
         self.peer_interested = False
         self.torrent_downloader.interest_state(self)
         self.torrent_downloader.ui.update_log( 'uninterested')
     if msgid == 4: #have
         index, = struct.unpack('>L', data[1:])
         self.bitfield[index] = True
         self.torrent_downloader.ui.update_log('have ' + str(index) + ' for ' + str(self.bitfield.count()))
     if msgid == 5: #bitfield
         if len(data[1:]) != bits2bytes(len(self.bitfield)): #wrong length
             self.close_when_done()
             return
         self.bitfield = bitarray('')
         self.bitfield.frombytes(data[1:])
         self.bitfield = self.bitfield[:len(self.fileinfo.pieces)]
         self.torrent_downloader.ui.update_log( 'bitfield, has ' + str(self.bitfield.count()))
     if msgid == 6: #request
         if not self.am_choking:
             req = struct.unpack('>LLL', data[1:13])
             self.torrent_downloader.ui.update_log( 'request for piece' + repr(req))
             self.torrent_downloader.got_request(self, req)
     if msgid == 7: #piece
         block = data[9:]
         index, begin = struct.unpack('>LL', data[1:9])
         self.n_requests_in_flight -= 1
         #self.torrent_downloader.ui.update_log( 'got block for piece ' +  repr(index) + ' at ' + str(begin))
         self.torrent_downloader.got_piece(index,begin,block)
     if msgid == 8: #cancel
         self.torrent_downloader.ui.update_log( 'cancel ' + repr(struct.unpack('>LLL', data[1:13])))
         #DK how to i get rid of individual items from a queue?
     if msgid == 9: #dht port
         pass
Beispiel #8
0
    def tobytes(self):
        logger.debug('rendering element: %s', self)
        data = bitarray()
        for attribute in self.attributes:
            try:
                data += attribute.tobytes()
            except:
                logger.exception('error rendering attribute %s of %s',
                                 attribute, self)
                raise
        for child in self.children:
            try:
                data += child.tobytes()
            except:
                logger.exception('error rendering child %s of %s', child, self)
                raise
        if self.cdata is not None: data += self.cdata.tobytes()

        # b0-b7: element tag
        bits = int_to_bitarray(self.tag, 8)

        # b8-15: element data length (0-253 bytes)
        # b16-31: extended element length (256-65536 bytes)
        # b16-39: extended element length (65537-16777216 bytes)
        datalength = bits2bytes(data.length())
        if datalength <= 253:
            tmp = int_to_bitarray(datalength, 8)
            bits += tmp
        elif datalength >= 254 and datalength <= 1 << 16:
            tmp = bitarray()
            tmp.frombytes('\xfe')
            bits += tmp
            tmp = int_to_bitarray(datalength, 16)
            bits += tmp
        elif datalength > 1 << 16 and datalength <= 1 << 24:
            tmp = bitarray()
            tmp.fromstring('\xff')
            bits += tmp
            tmp = int_to_bitarray(datalength, 24)
            bits += tmp
        else:
            raise ValueError(
                'element data length exceeds the maximum allowed by the extended element length (24bits): %s > %s'
                + datalength + " > " + (1 << 24))

        bits += data
        return bits
Beispiel #9
0
def abre_archivo(nombre_archivo):
    meta_datos = []
    items = []
    try:
        archivo = open(nombre_archivo, "rb")
        archivo_abierto = True
    except IOError:
        print 'No se pudo abrir: ' + nombre_archivo + "!"

    if archivo_abierto == True:
        try:
            bx = convierte_a_int(archivo.read(4))
            by = convierte_a_int(archivo.read(4))
            be = convierte_a_int(archivo.read(4))
            bn = ord(archivo.read(1))
            meta_datos.append(bx)
            meta_datos.append(by)

            for i in range(be):
                item = []
                simbolo = ord(archivo.read(1))
                tam_codigo = ord(archivo.read(1))
                bit_array = bitarray(endian='big')
                bit_array.frombytes(archivo.read(bits2bytes(tam_codigo)))
                codigo = bit_array.to01()[:tam_codigo]
                item.append(simbolo)
                item.append(tam_codigo)
                item.append(codigo)
                items.append(item)

            meta_datos.append(items)

            bit_array = bitarray(endian='big')
            bit_array.frombytes(archivo.read())
            cadena01 = bit_array.to01()

            meta_datos.append(cadena01)
            meta_datos.append(bn)

            archivo.seek(0)
            archivo.close()
        except:
            print "Error al leer " + nombre_archivo

    return meta_datos
Beispiel #10
0
def abre_archivo(nombre_archivo):
	meta_datos = []
	items = []
	try:
		archivo = open(nombre_archivo,"rb")
		archivo_abierto = True
	except IOError:
		print 'No se pudo abrir: '+nombre_archivo+"!"

	if archivo_abierto == True:
		try:
			bx = convierte_a_int(archivo.read(4))
			by = convierte_a_int(archivo.read(4))
			be = convierte_a_int(archivo.read(4))
			bn = ord(archivo.read(1))
			meta_datos.append(bx)
			meta_datos.append(by)

			for i in range(be):
				item = []
				simbolo = ord(archivo.read(1))
				tam_codigo = ord(archivo.read(1))
				bit_array = bitarray(endian='big')
				bit_array.frombytes(archivo.read(bits2bytes(tam_codigo)))
				codigo = bit_array.to01()[:tam_codigo]
				item.append(simbolo)
				item.append(tam_codigo)
				item.append(codigo)
				items.append(item)

			meta_datos.append(items)

			bit_array = bitarray(endian='big')
			bit_array.frombytes(archivo.read())
			cadena01 = bit_array.to01()

			meta_datos.append(cadena01)
			meta_datos.append(bn)

			archivo.seek(0)
			archivo.close()
		except:
			print "Error al leer "+nombre_archivo

	return meta_datos
Beispiel #11
0
    def encode(self):
        
        def bin_str2bool_list(binary_string):
            return [c == '1' for c in binary_string]

        code = bitarray()
        for symbol in tqdm(self.byte_seq,desc="COMPRESSING",colour='green',unit='bytes'):
            result = self.tree.search(symbol)
            if result['first_appearance']:
                code.extend(result['code'])
                code.frombytes(bytes([symbol]))
            else:
                code.extend(result['code'])
            self.update(symbol,result['first_appearance'])


        remaining_length = bits2bytes(len(code)+3)*8 - (len(code)+3)
        code = (bitarray(bin_str2bool_list('{:03b}'.format(remaining_length))) + code)
        return code
Beispiel #12
0
 def update(self):
     w, h = self.size
     self.bytes_per_row = bits2bytes(w)
     self.bits_per_row = 8 * self.bytes_per_row
     self.bytes = self.bytes_per_row * h
     self.bits = 8 * self.bytes
Beispiel #13
0
def compress(file_object, size, quality=50, grey_level=False, subsampling_mode=1):  # pylint: disable=too-many-locals
    start_time = time.perf_counter()
    logging.getLogger(__name__).info(
        'Original file size: %d Bytes', os.fstat(file_object.fileno()).st_size
    )

    if quality <= 0 or quality > 95:
        raise ValueError('Quality should within (0, 95].')

    img_arr = np.fromfile(file_object, dtype=np.uint8).reshape(
        size if grey_level else (*size, 3)
    )

    if grey_level:
        data = {Y: img_arr.astype(float)}

    else:  # RGB
        # Color Space Conversion (w/o Level Offset)
        data = rgb2ycbcr(*(img_arr[:, :, idx] for idx in range(3)))

        # Subsampling
        data[CB] = downsample(data[CB], subsampling_mode)
        data[CR] = downsample(data[CR], subsampling_mode)

    # Level Offset
    data[Y] = data[Y] - 128

    for key, layer in data.items():
        nrows, ncols = layer.shape

        # Pad Layers to 8N * 8N
        data[key] = np.pad(
            layer,
            (
                (0, (nrows // 8 + 1) * 8 - nrows if nrows % 8 else 0),
                (0, (ncols // 8 + 1) * 8 - ncols if ncols % 8 else 0)
            ),
            mode='constant'
        )

        # Block Slicing
        data[key] = block_slice(data[key], 8, 8)

        for idx, block in enumerate(data[key]):
            # 2D DCT
            data[key][idx] = dct2d(block)

            # Quantization
            data[key][idx] = quantize(data[key][idx], key, quality=quality)

        # Rounding
        data[key] = np.rint(data[key]).astype(int)

    if grey_level:
        # Entropy Encoder
        encoded = Encoder(data[Y], LUMINANCE).encode()

        # Combine grey level data as binary in the order:
        #   DC, AC
        order = (encoded[DC], encoded[AC])

    else:  # RGB
        # Entropy Encoder
        encoded = {
            LUMINANCE: Encoder(data[Y], LUMINANCE).encode(),
            CHROMINANCE: Encoder(
                np.vstack((data[CB], data[CR])),
                CHROMINANCE
            ).encode()
        }

        # Combine RGB data as binary in the order:
        #   LUMINANCE.DC, LUMINANCE.AC, CHROMINANCE.DC, CHROMINANCE.AC
        order = (encoded[LUMINANCE][DC], encoded[LUMINANCE][AC],
                 encoded[CHROMINANCE][DC], encoded[CHROMINANCE][AC])

    bits = bitarray(''.join(order))

    logging.getLogger(__name__).info(
        'Time elapsed: %.4f seconds', (time.perf_counter() - start_time)
    )
    return {
        'data': bits,
        'header': {
            'size': size,
            'grey_level': grey_level,
            'quality': quality,
            'subsampling_mode': subsampling_mode,
            # Remaining bits length is the fake filled bits for 8 bits as a
            # byte.
            'remaining_bits_length': bits2bytes(len(bits)) * 8 - len(bits),
            'data_slice_lengths': tuple(len(d) for d in order)
        }
    }
    def tobytes(self):

        if not self.f:
            raise ValueError("cant encode this attribute without an encoding function")

        # encode data
        data = None
        logger.debug("encoding attribute %s with function %s", self, self.f)
        data = self.f(self.value, *self.args, **self.kwargs)

        # if isinstance(self.value, int) or isinstance(self.value, long): # integer
        #    if self.bitlength is None: raise ValueError('attribute %s with int value has no bitlength specification' % self)
        #    logger.debug('encoding attribute %s as int with %d bits', self, self.bitlength)
        #    data = encode_number(self.value, self.bitlength)
        # elif isinstance(self.value, datetime.timedelta): # duration
        #    data = encode_number(self.value.seconds, 16)
        #    logger.debug('encoding attribute %s as duration', self)
        # elif isinstance(self.value, Crid): # CRID
        #    data = bitarray()
        #    data.fromstring(str(self.value))
        #    logger.debug('encoding attribute %s as CRID', self)
        # elif isinstance(self.value, Genre): # genre
        #    data = encode_genre(self.value)
        #    logger.debug('encoding attribute %s as genre', self)
        # elif isinstance(self.value, datetime.datetime): # time
        #    data = encode_timepoint(self.value)
        #    logger.debug('encoding attribute %s as timepoint', self)
        # elif isinstance(self.value, str): # string
        #    data = bitarray()
        #    data.fromstring(self.value)
        #    logger.debug('encoding attribute %s as string', self)
        # elif isinstance(self.value, Bearer):
        #    data = encode_bearer(self.value)
        #    logger.debug('encoding attribute %s as bearer', self)
        # elif isinstance(self.value, Ensemble):
        #    data = encode_ensembleid(self.value.ecc, self.value.eid)
        #    logger.debug('encoding attribute %s as ensemble ID', self.value)
        # else:
        #    raise ValueError('dont know how to encode this type: %s = %s' % (self.value.__class__.__name__, str(self.value)))
        # data.fill()

        # b0-b7: tag
        bits = encode_number(self.tag, 8)

        # b8-15: element data length (0-253 bytes)
        # b16-31: extended element length (256-65536 bytes)
        # b16-39: extended element length (65537-16777216 bytes)
        datalength = bits2bytes(data.length())
        if datalength <= 253:
            bits += encode_number(datalength, 8)
        elif datalength >= 254 and datalength <= 1 << 16:
            tmp = bitarray()
            tmp.fromstring("\xfe")
            bits += tmp
            bits += encode_number(datalength, 16)
        elif datalength > 1 << 16 and datalength <= 1 << 24:
            tmp = bitarray()
            tmp.fromstring("\xff")
            bits += tmp
            bits += encode_number(datalength, 24)
        else:
            raise ValueError(
                "element data length exceeds the maximum allowed by the extended element length (24bits): %s > %s"
                + datalength
                + " > "
                + (1 << 24)
            )

        bits += data
        return bits
def int2ba(i, length=None, endian='big'):
    """int2ba(int, /, length=None, endian='big') -> bitarray

Convert the given integer into a bitarray (with given endianness,
and no leading (big-endian) / trailing (little-endian) zeros).
If length is provided, the result will be of this length, and an
`OverflowError` will be raised, if the integer cannot be represented
within length bits.
"""
    if not isinstance(i, (int, long) if _is_py2 else int):
        raise TypeError("integer expected")
    if i < 0:
        raise ValueError("non-negative integer expected")
    if length is not None:
        if not isinstance(length, int):
            raise TypeError("integer expected for length")
        if length <= 0:
            raise ValueError("integer larger than 0 expected for length")
    if not isinstance(endian, str):
        raise TypeError("string expected for endian")
    if endian not in ('big', 'little'):
        raise ValueError("endian can only be 'big' or 'little'")

    if i == 0:
        # there a special cases for 0 which we'd rather not deal with below
        return zeros(length or 1, endian=endian)

    big_endian = bool(endian == 'big')
    if _is_py2:
        c = bytearray()
        while i:
            i, r = divmod(i, 256)
            c.append(r)
        if big_endian:
            c.reverse()
        b = bytes(c)
    else:  # py3
        b = i.to_bytes(bits2bytes(i.bit_length()), byteorder=endian)

    a = bitarray(endian=endian)
    a.frombytes(b)
    la = a.length()
    if la == length:
        return a

    if length is None:
        return strip(a, 'left' if big_endian else 'right')

    if la > length:
        size = (la - a.index(1)) if big_endian else (rindex(a) + 1)
        if size > length:
            raise OverflowError("cannot represent %d bit integer in "
                                "%d bits" % (size, length))
        a = a[la - length:] if big_endian else a[:length - la]

    if la < length:
        if big_endian:
            a = zeros(length - la, 'big') + a
        else:
            a += zeros(length - la, 'little')

    assert a.length() == length
    return a
Beispiel #16
0
    def tobytes(self):

        # encode data
        data = None
        if isinstance(self.value, int) or isinstance(self.value,
                                                     long):  # integer
            if self.bitlength is None:
                raise ValueError(
                    'attribute with int value has no bitlength specification: %s'
                    % self)
            logger.debug('encoding attribute %s as int with %d bits', self,
                         self.bitlength)
            data = int_to_bitarray(self.value, self.bitlength)
        elif isinstance(self.value, datetime.timedelta):  # duration
            data = int_to_bitarray(self.value.seconds, 16)
            logger.debug('encoding attribute %s as duration', self)
        elif isinstance(self.value, Crid):  # CRID
            data = bitarray()
            data.fromstring(str(self.value))
            logger.debug('encoding attribute %s as CRID', self)
        elif isinstance(self.value, Genre):  # genre
            data = encode_genre(self.value)
            logger.debug('encoding attribute %s as genre', self)
        elif isinstance(self.value, datetime.datetime):  # time
            data = encode_timepoint(self.value)
            logger.debug('encoding attribute %s as timepoint', self)
        elif isinstance(self.value, str):  # string
            data = bitarray()
            data.fromstring(self.value)
            logger.debug('encoding attribute %s as string', self)
        elif isinstance(self.value, Bearer):
            data = encode_contentid(self.value.id)
            logger.debug('encoding attribute %s as content ID from bearer',
                         self)
        elif isinstance(self.value, ContentId):
            data = encode_contentid(self.value)
            logger.debug('encoding attribute %s as content ID', self)
        else:
            raise ValueError('dont know how to encode this type: %s = %s' %
                             (self.value.__class__.__name__, str(self.value)))
        data.fill()

        # b0-b7: tag
        bits = int_to_bitarray(self.tag, 8)

        # b8-15: element data length (0-253 bytes)
        # b16-31: extended element length (256-65536 bytes)
        # b16-39: extended element length (65537-16777216 bytes)
        datalength = bits2bytes(data.length())
        if datalength <= 253:
            bits += int_to_bitarray(datalength, 8)
        elif datalength >= 254 and datalength <= 1 << 16:
            tmp = bitarray()
            tmp.fromstring('\xfe')
            bits += tmp
            bits += int_to_bitarray(datalength, 16)
        elif datalength > 1 << 16 and datalength <= 1 << 24:
            tmp = bitarray()
            tmp.fromstring('\xff')
            bits += tmp
            bits += int_to_bitarray(datalength, 24)
        else:
            raise ValueError(
                'element data length exceeds the maximum allowed by the extended element length (24bits): %s > %s'
                + datalength + " > " + (1 << 24))

        bits += data
        return bits
Beispiel #17
0
 def update(self):
     w, h = self.size
     self.bytes_per_row = bits2bytes(w)
     self.bits_per_row = 8 * self.bytes_per_row
     self.bytes = self.bytes_per_row * h
     self.bits = 8 * self.bytes
Beispiel #18
0
    def read_to_bases(self, filetype, filename, fn, exp_chr = None, header = False):
        self.filename = os.path.basename(filename)
        sys.stderr.write('Reading file... ' + self.filename + ' ')
        if filetype == self.binarybedfile:
            if exp_chr == None:
                print "must send exp_chr for bb files!"
                sys.exit()
                pass
            ar = array('B')
            try:
                ar.fromfile(open(filename, 'rb'), self.genome_len+1)
            except EOFError:
                pass
            if len(ar) != self.chr_lens[exp_chr] and not (exp_chr == 'chrM' and len(ar) == self.chr_lens[exp_chr]+1):
                print "expected length of array to match given start and end!"
                print filename
                print exp_chr
                print len(ar), self.chr_lens[exp_chr]
                sys.exit(-1)
                pass
            for i in xrange(self.chr_lens[exp_chr]):
                if ar[i] == 1: self.bases[self.chr_offset[exp_chr]+i] = fn(self.bases[self.chr_offset[exp_chr]+i])
                pass
            pass
        
        elif filetype == self.binarybedfilegenome:
            ar = bitarray()
            code = bitarray()
            file = open(filename, 'r')
            #code.fromfile(file)
            code.fromfile(file, 8)
            if code.to01() != myBedTools.binarybedfilegenome_code:
                print "unexpected code for binary bed file genome!"
                print "code:     ", code
                print "expected: ", myBedTools.binarybedfilegenome_code
                sys.exit(-1)
            try:
                #ar.fromfile(file, self.genome_len + 16)
                ar.fromfile(file)
            except EOFError:
                pass
            if ar.length() != bitarray_m.bits2bytes(self.genome_len) * 8:
                print "expected length of array to match given start and end!"
                print filename
                print ar.length(), bitarray_m.bits2bytes(self.genome_len), self.genome_len
                sys.exit(-1)
                pass
            # shorten ar to genome_len
            for i in xrange(ar.length() - self.genome_len):
                #print self.genome_len, ar.length(), 'popping'
                ar.pop()
                pass
            #### COULD CHECK FNS FOR BITARRAY STUFF (AND, OR ETC)
            if not self.initialize:
                self.initialize = True
                if fn == myBedTools.set_to_one:
                    self.bases = ar
                elif fn == myBedTools.set_to_zero:
                    self.bases = ~ar
                else:
                    print "not initialized, and using fn other than set_to_one!"
                    print fn
                    print len(self.bases)
                    sys.exit(-1)
                    return
                pass
            elif fn == myBedTools.set_to_one:
                self.bases |= ar
            elif fn == myBedTools.set_to_zero:
                self.bases &= ~ar
            elif fn == myBedTools.bitfn_and:
                self.bases &= ar
            else:
                for i in xrange(self.end):
                    if ar[i] == 1: self.bases[i] = fn(self.bases[i])
                    pass
                pass
            pass

        elif filetype == self.binaryseqfilegenome:
            ar = bitarray()
            code = bitarray()
            if filename.endswith('z'):
                file = gzip.open(filename, 'r')
            else:
                file = open(filename, 'r')
                pass
            code.fromfile(file, 8)
            if code.to01() != myBedTools.binaryseqfilegenome_code:
                print "unexpected code for binary seq file genome!"
                print "code:     ", code
                print "expected: ", myBedTools.binaryseqfilegenome_code
                sys.exit(-1)
            try:
                #ar.fromfile(file, self.genome_len * self.factor + 16)
                ar.fromfile(file)
            except EOFError:
                pass
            if ar.length() != bitarray_m.bits2bytes(self.genome_len * self.factor) * 8:
                print "expected length of array to match given start and end!"
                print filename
                print ar.length(), bitarray_m.bits2bytes(self.genome_len * self.factor), self.genome_len * self.factor
                sys.exit(-1)
                pass
            # shorten ar to genome_len
            for i in xrange(ar.length() - self.genome_len * self.factor):
                #print self.genome_len, ar.length(), 'popping'
                ar.pop()
                pass

            self.bases = ar
            pass

        elif filetype == self.binaryseqfilegenomechr:
            print "THIS DOESN'T WORK"
            sys.exit(-1)
            ar = bitarray()
            code = bitarray()
            file = open(filename, 'r')
            code.fromfile(file, 8)
            if code.to01() != myBedTools.binaryseqfilegenome_code:
                print "unexpected code for binary seq file genome!"
                print "code:     ", code
                print "expected: ", myBedTools.binaryseqfilegenome_code
                sys.exit(-1)
            try:
                chr_byte_start = self.chr_offset[exp_chr] * self.factor // 8
                chr_partial_byte_start = (self.chr_offset[exp_chr] * self.factor) % 8
                print 'starting at', chr_byte_start, 'bytes, with', chr_partial_byte_start, 'offset'
                file.seek(chr_byte_start, 0)
                ar.fromfile(file, self.chr_lens[exp_chr] * self.factor + 16)
                print ar[0:100]
            except EOFError:
                pass
            if ar.length() != bitarray_m.bits2bytes(self.chr_lens[exp_chr] * self.factor) * 8:
                print "expected length of array to match given start and end!"
                print filename
                print ar.length(), bitarray_m.bits2bytes(self.genome_len * self.factor), self.genome_len * self.factor
                sys.exit(-1)
                pass
            # shorten ar to genome_len
            for i in xrange(ar.length() - self.genome_len * self.factor):
                #print self.genome_len, ar.length(), 'popping'
                ar.pop()
                pass

            self.bases = ar
            pass

        elif self.output_type == myBedTools.binaryseqfilegenome:
            infile = myBedTools.open_file(filename, discard_header = header)
            
            for line in infile:
                if line.strip().startswith('#'): continue
                #print line
                [l_chr, l_start, l_end, l_base] = self.parse_line(filetype, line, return_base = True)

                if len(l_base) != 1: continue
                
                if self.debug: print 'bed', l_chr, l_start, l_end, l_base
                site = self.chr_offset[l_chr] * self.factor + l_start * self.factor
                self.bases[site : site + self.factor] = self.binaryseq_decode[l_base.upper()]
                pass
            pass
        else:
            infile = myBedTools.open_file(filename, discard_header = header)
            
            for line in infile:
                [l_chr, l_start, l_end] = self.parse_line(filetype, line)
                
                if self.debug: print 'bed', l_chr, l_start, l_end
                for i in xrange(l_start, l_end):
                    self.bases[self.chr_offset[l_chr]+i] = fn(self.bases[self.chr_offset[l_chr]+i])
                    pass
                pass
            pass
        gc.collect()
        sys.stderr.write(' done\n')
        return
Beispiel #19
0
    def tobytes(self):

        if not self.f: raise ValueError('cant encode this attribute without an encoding function')

        # encode data
        data = None
        logger.debug('encoding attribute %s with function %s', self, self.f) 
        data = self.f(self.value, *self.args, **self.kwargs)

        #if isinstance(self.value, int) or isinstance(self.value, long): # integer
        #    if self.bitlength is None: raise ValueError('attribute %s with int value has no bitlength specification' % self)
        #    logger.debug('encoding attribute %s as int with %d bits', self, self.bitlength)
        #    data = encode_number(self.value, self.bitlength)
        #elif isinstance(self.value, datetime.timedelta): # duration
        #    data = encode_number(self.value.seconds, 16)
        #    logger.debug('encoding attribute %s as duration', self)
        #elif isinstance(self.value, Crid): # CRID
        #    data = bitarray()
        #    data.fromstring(str(self.value))
        #    logger.debug('encoding attribute %s as CRID', self)
        #elif isinstance(self.value, Genre): # genre
        #    data = encode_genre(self.value)
        #    logger.debug('encoding attribute %s as genre', self)
        #elif isinstance(self.value, datetime.datetime): # time
        #    data = encode_timepoint(self.value)
        #    logger.debug('encoding attribute %s as timepoint', self)
        #elif isinstance(self.value, str): # string
        #    data = bitarray()
        #    data.fromstring(self.value)
        #    logger.debug('encoding attribute %s as string', self)
        #elif isinstance(self.value, Bearer):
        #    data = encode_bearer(self.value)
        #    logger.debug('encoding attribute %s as bearer', self)
        #elif isinstance(self.value, Ensemble):
        #    data = encode_ensembleid(self.value.ecc, self.value.eid)
        #    logger.debug('encoding attribute %s as ensemble ID', self.value)
        #else:
        #    raise ValueError('dont know how to encode this type: %s = %s' % (self.value.__class__.__name__, str(self.value)))
        #data.fill()
        
        # b0-b7: tag
        bits = encode_number(self.tag, 8)
  
        # b8-15: element data length (0-253 bytes)
        # b16-31: extended element length (256-65536 bytes)
        # b16-39: extended element length (65537-16777216 bytes)
        datalength = bits2bytes(data.length())
        if datalength <= 253:
            bits += encode_number(datalength, 8)
        elif datalength >= 254 and datalength <= 1<<16:
            tmp = bitarray()
            tmp.fromstring('\xfe')
            bits += tmp
            bits += encode_number(datalength, 16)
        elif datalength > 1<<16 and datalength <= 1<<24: 
            tmp = bitarray()
            tmp.fromstring('\xff')
            bits += tmp
            bits += encode_number(datalength, 24)
        else: raise ValueError('element data length exceeds the maximum allowed by the extended element length (24bits): %s > %s' + datalength + " > " + (1<<24))
                
        bits += data
        return bits
Beispiel #20
0
 def __init__(self, data):
     super(ExtendedModel, self).__init__(data)
     bots_data = bitarray(data[bits2bytes(self.size**3 + 8) * 8:],
                          endian="big")
     bots = numpy.frombuffer(bots_data.tobytes(), dtype=numpy.int8)
     self.bots = bots.reshape((-1, 4))
Beispiel #21
0
    def encode(self):
        """Encode the target byte sequence into compressed bit sequence by
        adaptive Huffman coding.

        Returns:
            bitarray: The compressed bitarray. Use `bitarray.tofile()` to save
                to file.
        """
        def encode_fixed_code(dec):
            """Convert a decimal number into specified fixed code.

            Arguments:
                dec {int} -- The alphabet need to be converted into fixed code.

            Returns:
                list of bool -- Fixed codes.
            """

            alphabet_idx = dec - (self._alphabet_first_num - 1)
            if alphabet_idx <= 2 * self.rem:
                fixed_str = '{:0{padding}b}'.format(alphabet_idx - 1,
                                                    padding=self.exp + 1)
            else:
                fixed_str = '{:0{padding}b}'.format(alphabet_idx - self.rem -
                                                    1,
                                                    padding=self.exp)
            return bin_str2bool_list(fixed_str)

        progressbar = ShadyBar('encoding',
                               max=len(self.byte_seq),
                               suffix='%(percent).1f%% - %(elapsed_td)ss')

        if self.dpcm:
            self.byte_seq = tuple(encode_dpcm(self.byte_seq))

        logging.getLogger(__name__).info('entropy: %f', entropy(self.byte_seq))

        code = []
        for symbol in self.byte_seq:
            fixed_code = encode_fixed_code(symbol)
            result = self.tree.search(fixed_code)
            if result['first_appearance']:
                code.extend(result['code'])  # send code of NYT
                code.extend(fixed_code)  # send fixed code of symbol
            else:
                # send code which is path from root to the node of symbol
                code.extend(result['code'])
            self.update(fixed_code, result['first_appearance'])
            progressbar.next()

        # Add remaining bits length info at the beginning of the code in order
        # to avoid the decoder regarding the remaining bits as actual data. The
        # remaining bits length info require 3 bits to store the length. Note
        # that the first 3 bits are stored as big endian binary string.
        remaining_bits_length = (bits2bytes(len(code) + 3) * 8 -
                                 (len(code) + 3))
        code = (bin_str2bool_list('{:03b}'.format(remaining_bits_length)) +
                code)

        progressbar.finish()
        return bitarray(code)