Beispiel #1
0
 def to_dense(self):
     if self.is_sparse():
         new_bitstring = BitArray()
         new_bitstring.frombytes(b'\x00')
         for i in self._bit_1_indexes():
             setbit(new_bitstring, i, 1)
         self.bitstring = new_bitstring
         self.meta[0] = False
Beispiel #2
0
 def get_row(self, index):
     b = BitArray()
     _row_bytes = self.get(index, b'')
     # if not _row_bytes:
     #     logger.warning(
     #         "There is no row %i. Run `bigsi init` and `bigsi build` before `insert` or `search`. Creating row regardless." % index)
     b.frombytes(_row_bytes)
     return b
Beispiel #3
0
 def __init__(self, byte_array=None, meta=b'\x00', bitstring=b'\x00'):
     self.meta = BitArray()
     self.bitstring = BitArray()
     if byte_array is None:
         self.meta.frombytes(meta)
         self.bitstring.frombytes(bitstring)
     else:
         self.meta.frombytes(byte_array[0:1])
         self.bitstring.frombytes(byte_array[1:])
Beispiel #4
0
    def to_sparse(self):
        if self.is_dense():
            indexes = self.indexes()
            self.meta[0] = True

            bo = choose_int_encoding(indexes)
            self._set_sparse_byte_length(bo)
            _bytes = b''.join([
                int(i).to_bytes(self.sparse_byte_length, byteorder='big')
                for i in indexes
            ])
            self.bitstring = BitArray()
            self.bitstring.frombytes(_bytes)
Beispiel #5
0
    def _setbit_sparse(self, pos, i):

        if i == 0:
            self.to_dense()
            self._setbit_dense(pos, i)
            self.to_sparse()
        else:
            if not pos in self.colours():

                if choose_int_encoding([pos]) > self.sparse_byte_length:
                    # lazy option
                    self.to_dense()
                    self._setbit_dense(pos, i)
                    self.to_sparse()

                else:
                    _append_bytes = int(pos).to_bytes(self.sparse_byte_length,
                                                      byteorder='big')
                    b = b''.join([self.bitstring.tobytes(), _append_bytes])
                    self.bitstring = BitArray()
                    self.bitstring.frombytes(b)
Beispiel #6
0
def _batch_insert_prob_redis(conn, names, all_hashes, colour, count=0):
    r = conn
    with r.pipeline() as pipe:
        try:
            pipe.watch(names)
            vals = get_vals(r, names, all_hashes)
            pipe.multi()
            for name, values, hs in zip(names, vals, all_hashes):
                for val, h in zip(values, hs):
                    ba = BitArray()
                    if val is None:
                        val = b''
                    ba.frombytes(val)
                    ba.setbit(colour, 1)
                    pipe.hset(name, h, ba.tobytes())
            pipe.execute()
        except redis.WatchError:
            logger.warning("Retrying %s %s " % (r, name))
            if count < 5:
                self._batch_insert(conn, hk, colour, count=count + 1)
            else:
                logger.warning(
                    "Failed %s %s. Too many retries. Contining regardless." %
                    (r, name))
Beispiel #7
0
 def get_column(self, colour):
     bf = BitArray()
     for i in range(self.size):
         bf.extend([self._getbit(i, colour)])
     return bf
Beispiel #8
0
class ByteArray(object):
    def __init__(self, byte_array=None, meta=b'\x00', bitstring=b'\x00'):
        self.meta = BitArray()
        self.bitstring = BitArray()
        if byte_array is None:
            self.meta.frombytes(meta)
            self.bitstring.frombytes(bitstring)
        else:
            self.meta.frombytes(byte_array[0:1])
            self.bitstring.frombytes(byte_array[1:])

    def intersect(self, ba):
        colours = set(self.colours()) & set(ba.colours())
        new = ByteArray()
        for c in colours:
            new.setbit(c, 1)
        return new

    def __repr__(self):
        return self.__str__()

    def __str__(self):
        return self.bin

    def is_sparse(self):
        # dense or sparse?
        return self.meta[0]

    def is_dense(self):
        return not self.is_sparse()

    def colours(self):
        if self.is_sparse():
            return self._bit_1_indexes()
        else:
            return self.indexes()

    @property
    def sparse_byte_bit_encoding(self):
        return "".join([str(int(i)) for i in self.meta[1:3]])

    @property
    def sparse_byte_length(self):
        return BITS_TO_BYTE_LENGTH[self.sparse_byte_bit_encoding]

    def _set_sparse_byte_length(self, l):
        self.meta[1] = bool(int(BYTE_LENGTH_TO_BITS[l][0]))
        self.meta[2] = bool(int(BYTE_LENGTH_TO_BITS[l][1]))

    def to_sparse(self):
        if self.is_dense():
            indexes = self.indexes()
            self.meta[0] = True

            bo = choose_int_encoding(indexes)
            self._set_sparse_byte_length(bo)
            _bytes = b''.join([
                int(i).to_bytes(self.sparse_byte_length, byteorder='big')
                for i in indexes
            ])
            self.bitstring = BitArray()
            self.bitstring.frombytes(_bytes)

    def indexes(self):
        indexes = []
        i = 0
        if self.is_dense():
            while True:
                try:
                    i = self.bitstring.index(True, i)
                    indexes.append(i)
                    i += 1
                except ValueError:
                    break
        return indexes

    def to_dense(self):
        if self.is_sparse():
            new_bitstring = BitArray()
            new_bitstring.frombytes(b'\x00')
            for i in self._bit_1_indexes():
                setbit(new_bitstring, i, 1)
            self.bitstring = new_bitstring
            self.meta[0] = False

    def _bit_1_indexes(self):
        s = self.sparse_byte_length
        _bytes = self.bitstring.tobytes()
        assert self.is_sparse()
        return [
            int.from_bytes(_bytes[i * s:(i + 1) * s], byteorder='big')
            for i in range(0, int(len(_bytes) / s))
        ]

    def setbit(self, pos, i):
        if self.is_sparse():
            self._setbit_sparse(pos, i)
        else:
            self._setbit_dense(pos, i)

    def _setbit_dense(self, pos, i):
        self.bitstring = setbit(self.bitstring, pos, i)

    def _setbit_sparse(self, pos, i):

        if i == 0:
            self.to_dense()
            self._setbit_dense(pos, i)
            self.to_sparse()
        else:
            if not pos in self.colours():

                if choose_int_encoding([pos]) > self.sparse_byte_length:
                    # lazy option
                    self.to_dense()
                    self._setbit_dense(pos, i)
                    self.to_sparse()

                else:
                    _append_bytes = int(pos).to_bytes(self.sparse_byte_length,
                                                      byteorder='big')
                    b = b''.join([self.bitstring.tobytes(), _append_bytes])
                    self.bitstring = BitArray()
                    self.bitstring.frombytes(b)

    def getbit(self, pos):
        if self.is_sparse():
            if pos in self._bit_1_indexes():
                return 1
            else:
                return 0
        else:
            try:
                return int(self.bitstring[pos])
            except IndexError:
                return 0

    @property
    def bytes(self):
        return b''.join([self.meta.tobytes(), self.bitstring.tobytes()])

    @property
    def bin(self):
        return ''.join([self.meta.to01(), self.bitstring.to01()])

    def choose_optimal_encoding(self, colour=None):
        colours = self.colours()
        if colours:
            if colour:
                byte_order = choose_int_encoding([colour])
            else:
                byte_order = choose_int_encoding(colours)
            sparse_byte_length = byte_order * len(colours)
            dense_byte_length = max(colours) / 8
            if dense_byte_length < sparse_byte_length:
                self.to_dense()
            else:
                self.to_sparse()
        else:
            self.to_sparse()
Beispiel #9
0
 def get_row(self, index):
     b = BitArray()
     b.frombytes(self.get(index, b''))
     return b