Ejemplo n.º 1
0
    def test_reset(self):
        ba = BitArray(10, 1)
        for i in xrange(10):
            self.assertTrue(ba.get_bit(i))

        ba.reset_bit(6)
        self.assertFalse(ba.get_bit(6))
def encode(string, symbol_to_encoding_dict):
    string_len = 0
    for symbol in string:
        string_len += len(symbol_to_encoding_dict[symbol])
    padding = 8 - (string_len % 8)
    bit_array = BitArray((padding - 1) * '1' + '0')
    for symbol in string:
        bit_array += BitArray(symbol_to_encoding_dict[symbol])
    return bit_array.tobytes()
def decode(binary_data, symbol_to_encoding_dict):
    padding = 0
    bit_array = BitArray()
    bit_array.frombytes(binary_data)
    while bool(bit_array[padding]):
        padding += 1
    decoded_chars = bit_array[padding+1:].decode(symbol_to_encoding_dict)
    return ''.join(decoded_chars)
Ejemplo n.º 4
0
 def __init__(self, buf, unitsize):
     if buf.nbytes % unitsize != 0:
         raise InvalidArgument(
             "Buffer provided is not a multiple of unit size")
     self.buf = buf
     self.count = buf.nbytes // unitsize
     self.bits = BitArray(self.buf, 0, (self.count, unitsize * 8),
                          (unitsize * 8, 1))
Ejemplo n.º 5
0
	def __init__(self, peer_id, protocol, torrent):
		self.running = False

		self.is_choked = True
		self.is_interested = False
		self.is_chocked_you = True
		self.is_interested_you = False

		self.peer_id = peer_id
		self.torrent = torrent
		self.protocol = protocol
		self.pieces = BitArray(self.torrent.piece_num)
Ejemplo n.º 6
0
    def test_one_bit(self):
        ba = BitArray(32)
        for i in xrange(32):
            self.assertFalse(ba.get_bit(i))

        ba.set_bit(5)
        self.assertTrue(ba.get_bit(5))
        self.assertFalse(ba.get_bit(4))
        self.assertFalse(ba.get_bit(6))
Ejemplo n.º 7
0
    def test_big_number(self):
        ba = BitArray(2 ** 32)
        bignum = 2 ** 20
        self.assertFalse(ba.get_bit(bignum))

        ba.set_bit(bignum)
        self.assertTrue(ba.get_bit(bignum))
        self.assertFalse(ba.get_bit(bignum + 1))
        self.assertFalse(ba.get_bit(bignum - 1))
Ejemplo n.º 8
0
def sieve(MAX):
    ba = BitArray(MAX, initialize=1)
    for n in xrange(2, MAX, 2):
        ba.reset_bit(n)

    limit = math.sqrt(MAX)
    limit = math.floor(limit)
    limit = int(limit)

    for n in xrange(3, limit, 2):
        if ba.get_bit(n):
            for i in xrange(n * 2, MAX, n):
                ba.reset_bit(i)

    """
def derive_encoding(symbol_to_frequency_dict):
    assert(len(symbol_to_frequency_dict) > 0)
    symbol_to_encoding_dict = \
        dict(((symbol, '') for symbol in symbol_to_frequency_dict.keys()))
    heap = [Node().init_leaf(symbol, frequency)
            for symbol, frequency in symbol_to_frequency_dict.items()]
    heapq.heapify(heap)
    while len(heap) > 1:
        left_child = heapq.heappop(heap)
        right_child = heapq.heappop(heap)
        heapq.heappush(heap, Node().init_parent(left_child, right_child))

        for symbol in left_child.symbols_in_subtree():
            symbol_to_encoding_dict[symbol] = \
                '0' + symbol_to_encoding_dict[symbol]
        for symbol in right_child.symbols_in_subtree():
            symbol_to_encoding_dict[symbol] = \
                '1' + symbol_to_encoding_dict[symbol]

    for key, value in symbol_to_encoding_dict.items():
        symbol_to_encoding_dict[key] = BitArray(value)
    return symbol_to_encoding_dict
Ejemplo n.º 10
0
    def __init__(self, x1, z1, x2, z2,
                 heightmapTypes=["MOTION_BLOCKING",
                                 "MOTION_BLOCKING_NO_LEAVES",
                                 "OCEAN_FLOOR",
                                 "WORLD_SURFACE"]):
        """**Initialise WorldSlice with region and heightmaps**."""
        self.rect = x1, z1, x2 - x1, z2 - z1
        self.chunkRect = (self.rect[0] >> 4, self.rect[1] >> 4,
                          ((self.rect[0] + self.rect[2] - 1) >> 4)
                          - (self.rect[0] >> 4) + 1,
                          ((self.rect[1] + self.rect[3] - 1) >> 4)
                          - (self.rect[1] >> 4) + 1)
        self.heightmapTypes = heightmapTypes

        t0 = time.perf_counter()
        bytes = getChunks(*self.chunkRect, rtype='bytes')

        showPerf = False

        if showPerf: 
            print(f"took {time.perf_counter() - t0}s")
        t0 = time.perf_counter()

        file_like = BytesIO(bytes)

        print("parsing NBT")
        self.nbtfile = nbt.nbt.NBTFile(buffer=file_like)
        if showPerf: 
            print(f"took {time.perf_counter() - t0}s")
        t0 = time.perf_counter()

        rectOffset = [self.rect[0] % 16, self.rect[1] % 16]

        # heightmaps
        self.heightmaps = {}
        for hmName in self.heightmapTypes:
            self.heightmaps[hmName] = np.zeros(
                (self.rect[2], self.rect[3]), dtype=np.int)

        # Sections are in x,z,y order!!! (reverse minecraft order :p)
        self.sections = [[[None for i in range(16)] for z in range(
            self.chunkRect[3])] for x in range(self.chunkRect[2])]

        # heightmaps
        print("extracting heightmaps")

        for x in range(self.chunkRect[2]):
            for z in range(self.chunkRect[3]):
                chunkID = x + z * self.chunkRect[2]

                hms = self.nbtfile['Chunks'][chunkID]['Level']['Heightmaps']
                for hmName in self.heightmapTypes:
                    # hmRaw = hms['MOTION_BLOCKING']
                    hmRaw = hms[hmName]
                    heightmapBitArray = BitArray(9, 16 * 16, hmRaw)
                    heightmap = self.heightmaps[hmName]
                    for cz in range(16):
                        for cx in range(16):
                            try:
                                heightmap[-rectOffset[0] + x * 16 + cx,
                                          -rectOffset[1] + z * 16 + cz] \
                                    = heightmapBitArray.getAt(cz * 16 + cx)
                            except IndexError:
                                pass

        if showPerf: 
            print(f"took {time.perf_counter() - t0}s")
        t0 = time.perf_counter()

        # sections
        print("extracting chunk sections")

        for x in range(self.chunkRect[2]):
            for z in range(self.chunkRect[3]):
                chunkID = x + z * self.chunkRect[2]
                chunk = self.nbtfile['Chunks'][chunkID]
                chunkSections = chunk['Level']['Sections']

                for section in chunkSections:
                    y = section['Y'].value

                    if (not ('BlockStates' in section)
                            or len(section['BlockStates']) == 0):
                        continue

                    palette = section['Palette']
                    rawBlockStates = section['BlockStates']
                    bitsPerEntry = max(4, ceil(log2(len(palette))))
                    blockStatesBitArray = BitArray(
                        bitsPerEntry, 16 * 16 * 16, rawBlockStates)

                    self.sections[x][z][y] = CachedSection(
                        palette, blockStatesBitArray)

        if showPerf: 
            print(f"took {time.perf_counter() - t0}s")
        print("done")
Ejemplo n.º 11
0
 def __init__(self, iterable=(), population=56, probes=6):
     self.population = xrange(population)
     self.probes = probes
     self.data = BitArray(population)
     for name in iterable:
         self.add(name)
Ejemplo n.º 12
0
 def test_initialize(self):
     ba = BitArray(10, 1)
     for i in xrange(10):
         self.assertTrue(ba.get_bit(i))
Ejemplo n.º 13
0
from bitarray import BitArray
import numpy as np

buf = np.arange(10, dtype=np.uint8)
offset = 0
shape = (10, 8)
strides = (8, 1)

b = BitArray(buf, offset, shape, strides)

print(b[1, 0])
print(b[0, 8:16])
print(b[:])
bytes = getChunks(0, 0, 2, 2, rtype='bytes')
print(len(bytes))

print(bytes)
# print(getChunks(0, 0, 2, 2, rtype='text'))
print("")
file_like = BytesIO(bytes)

nbtfile = nbt.nbt.NBTFile(buffer=file_like)

print(nbtfile['Chunks'])
print(nbtfile['Chunks'][0]['Level']['Sections'])
sections = nbtfile['Chunks'][0]['Level']['Sections']

def sectionIsEmpty(section):
    return not ('BlockStates' in section) or len(section['BlockStates']) == 0

for section in sections:
    if not sectionIsEmpty(section):
        palette = section['Palette']
        blockStates = section['BlockStates']
        bitsPerEntry = max(4, ceil(log2(len(palette))))
        bitarray = BitArray(bitsPerEntry, 16*16*16, blockStates)
        
        def printBlock(blockStateID):
            print(palette[blockStateID])

        bitarray.getAll(printBlock)


pass
Ejemplo n.º 15
0
class Connection():
	def __init__(self, peer_id, protocol, torrent):
		self.running = False

		self.is_choked = True
		self.is_interested = False
		self.is_chocked_you = True
		self.is_interested_you = False

		self.peer_id = peer_id
		self.torrent = torrent
		self.protocol = protocol
		self.pieces = BitArray(self.torrent.piece_num)

	def lost(self):
		print self.peer_id, 'lost'
		self.torrent.lostConnection(self)

	##############################
	# Event Interface
	##############################
	def choke(self):
		if not self.is_chocked_you:
			self.is_chocked_you = True
			self.protocol.sendChoke()
	def unchoke(self):
		if self.is_chocked_you:
			self.is_chocked_you = False
			self.protocol.sendUnchoke()
	def interested(self):
		if not self.is_interested_you:
			self.is_interested_you = True
			self.protocol.sendInterested()
	def uninterested(self):
		if self.is_interested_you:
			self.is_interested_you = False
			self.protocol.sendUninterested()
	def have(self, piece_index):
		self.protocol.sendHave(piece_index)
		self.checkInterested()
	def bitfield(self, pieces):
		print 'send bitfield:', len(pieces)
		self.protocol.sendBitfield(pieces)
	def request(self, block_info):
		if self.is_choked:
			return
		self.protocol.sendRequest(block_info[0], block_info[1], block_info[2])
	def piece(self, block_info, block_data):
		if self.is_chocked_you:
			return
		self.protocol.sendPiece(block_info[0], block_info[1], block_data)
	def cancel(self, block_info):
		self.protocol.sendCancel(block_info[0], block_info[1], block_info[2])

	##############################
	# Event Handler
	##############################
	def chokeBy(self):
		self.is_choked = True
	def unchokeBy(self):
		self.is_choked = False
		self.torrent.onUnchoked(self)
	def interestedBy(self):
		self.is_interested = True
	def uninterestedBy(self):
		self.is_interested = False
	def haveBy(self, piece_index):
		self.pieces.set(piece_index, 1)
		self.checkInterested()
	def  bitfieldBy(self, bitfield):
		self.pieces.set_complete_str(bitfield)
		self.checkInterested()
	def requestBy(self, piece_index, block_offset, block_length):
		block_info = (piece_index, block_offset, block_length)
		if not self.is_interested or self.is_chocked_you:
			print self.peer_id, 'can not request me!'
		elif not self.torrent.checkBlockInfo(block_info):
			print self.peer_id, 'invalid block info!'
		else:
			self.torrent.onRequest(self, (piece_index, block_offset, block_length))
	def pieceBy(self, piece_index, block_offset, block_data):
		block_info = (piece_index, block_offset, len(block_data))
		if not self.torrent.checkBlockInfo(block_info):
			print self.peer_id, 'invalid block info!'
		else:
			self.torrent.onPiece(self, (piece_index, block_offset, len(block_data)), block_data)
	def cancelBy(self, piece_index, block_offset, block_length):
		block_info = (piece_index, block_offset, block_length)
		if not self.torrent.checkBlockInfo(block_info):
			print self.peer_id, 'invalid block info!'
		else:
			self.torrent.onCancel(self, (piece_index, block_offset, block_length))

	##############################
	# Utils
	##############################
	def hasPiece(self, piece_index):
		return self.pieces.get(piece_index) > 0
	def checkInterested(self):
		interested = False
		for piece_index in xrange(0, self.pieces.length):
			if not self.torrent.hasPiece(piece_index):
				interested = True
				break
		if interested != self.is_interested_you:
			if interested:
				self.interested()
			else:
				self.uninterested()
	def getDownloadRate(self):
		return self.protocol.getDownloadRate()
	def getUploadRate(self):
		return self.protocol.getUploadRate()
	def resetMeasurement(self):
		self.protocol.resetMeasurement()
Ejemplo n.º 16
0
    def test_small_array(self):
        ba = BitArray(10)
        self.assertFalse(ba.get_bit(6))

        ba.set_bit(6)
        self.assertTrue(ba.get_bit(6))
    def __init__(self,
                 rect,
                 heightmapTypes=[
                     "MOTION_BLOCKING", "MOTION_BLOCKING_NO_LEAVES",
                     "OCEAN_FLOOR", "WORLD_SURFACE"
                 ]):
        self.rect = rect
        self.chunkRect = (rect[0] >> 4, rect[1] >> 4,
                          ((rect[0] + rect[2] - 1) >> 4) - (rect[0] >> 4) + 1,
                          ((rect[1] + rect[3] - 1) >> 4) - (rect[1] >> 4) + 1)
        self.heightmapTypes = heightmapTypes

        bytes = getChunks(*self.chunkRect, rtype='bytes')
        file_like = BytesIO(bytes)

        print("parsing NBT")
        self.nbtfile = nbt.nbt.NBTFile(buffer=file_like)

        rectOffset = [rect[0] % 16, rect[1] % 16]

        # heightmaps
        self.heightmaps = {}
        for hmName in self.heightmapTypes:
            self.heightmaps[hmName] = np.zeros((rect[2], rect[3]),
                                               dtype=np.int)

        # Sections are in x,z,y order!!! (reverse minecraft order :p)
        self.sections = [[[None for i in range(16)]
                          for z in range(self.chunkRect[3])]
                         for x in range(self.chunkRect[2])]

        # heightmaps
        print("extracting heightmaps")

        for x in range(self.chunkRect[2]):
            for z in range(self.chunkRect[3]):
                chunkID = x + z * self.chunkRect[2]

                hms = self.nbtfile['Chunks'][chunkID]['Level']['Heightmaps']
                for hmName in self.heightmapTypes:
                    # hmRaw = hms['MOTION_BLOCKING']
                    hmRaw = hms[hmName]
                    heightmapBitArray = BitArray(9, 16 * 16, hmRaw)
                    heightmap = self.heightmaps[hmName]
                    for cz in range(16):
                        for cx in range(16):
                            try:
                                heightmap[-rectOffset[0] + x * 16 + cx,
                                          -rectOffset[1] + z * 16 +
                                          cz] = heightmapBitArray.getAt(cz *
                                                                        16 +
                                                                        cx)
                            except IndexError:
                                pass

        # sections
        print("extracting chunk sections")

        for x in range(self.chunkRect[2]):
            for z in range(self.chunkRect[3]):
                chunkID = x + z * self.chunkRect[2]
                chunkSections = self.nbtfile['Chunks'][chunkID]['Level'][
                    'Sections']

                for section in chunkSections:
                    y = section['Y'].value

                    if not ('BlockStates' in section) or len(
                            section['BlockStates']) == 0:
                        continue

                    palette = section['Palette']
                    rawBlockStates = section['BlockStates']
                    bitsPerEntry = max(4, ceil(log2(len(palette))))
                    blockStatesBitArray = BitArray(bitsPerEntry, 16 * 16 * 16,
                                                   rawBlockStates)

                    self.sections[x][z][y] = CachedSection(
                        palette, blockStatesBitArray)

        print("done")
    def huffman_compression(self, generate_encoding=False):
        # compress using Huffman encoding
        symbol_to_encoding_dict = {}

        # count all occuring UTF-8 characters
        if generate_encoding:
            symbol_to_frequency_dict = Counter()
            with self.report.measure('counting utf8 characters'):
                with open(f'{self.directory}/index.csv') as index_file:
                    chunk_size = 100000

                    def next_chunk_generator():
                        chunk = index_file.read(chunk_size)
                        while chunk:
                            yield chunk
                            chunk = index_file.read(chunk_size)

                    for i, chunk in enumerate(next_chunk_generator(), 1):
                        symbol_to_frequency_dict.update(Counter(chunk))
                        self.report.progress(
                            i, f' chunks counted ({chunk_size} characters '
                            'each)', 100)
                if '\n' in symbol_to_frequency_dict.keys():
                    del symbol_to_frequency_dict['\n']

            # derive huffman encoding from character counts
            with self.report.measure('deriving huffman encoding'):
                symbol_to_encoding_dict = Huffman.derive_encoding(
                    symbol_to_frequency_dict)
            for key, value in symbol_to_encoding_dict.items():
                assert (len(key) == 1)
                symbol_to_encoding_list[ord(key[0])] = value
            with open(f'{self.directory}/symbol_to_encoding_dict.pickle',
                      mode='wb') as f:
                pickle.dump(symbol_to_encoding_dict, f,
                            pickle.HIGHEST_PROTOCOL)
        else:
            # optimal encoding for guardian
            # character distribution should be similar for all datasets
            symbol_to_encoding_dict = {
                '\a': BitArray('1111'),
                ',': BitArray('001'),
                '0': BitArray('1000'),
                '1': BitArray('011'),
                '2': BitArray('010'),
                '3': BitArray('000'),
                '4': BitArray('1110'),
                '5': BitArray('1101'),
                '6': BitArray('1100'),
                '7': BitArray('1011'),
                '8': BitArray('1010'),
                '9': BitArray('1001')
            }

        with open(f'{self.directory}/symbol_to_encoding_dict.pickle',
                  mode='wb') as f:
            pickle.dump(symbol_to_encoding_dict, f, pickle.HIGHEST_PROTOCOL)

        # save compressed index and corresponding seek_list
        with self.report.measure('saving compressed files'):
            self.compressed_seek_list = []
            with open(f'{self.directory}/compressed_index', mode='wb') \
                    as compressed_index_file:
                offset = 0
                for i, orig_line in enumerate(
                        binary_read_line_generator_path(
                            f'{self.directory}/index.csv'), 1):
                    term = next(
                        csv.reader(io.StringIO(orig_line),
                                   delimiter=posting_list_separator))[0]
                    line_without_term = orig_line[len(term) + 3:]
                    encoded_line = Huffman.encode(line_without_term,
                                                  symbol_to_encoding_dict)
                    compressed_index_file.write(encoded_line)

                    self.compressed_seek_list.append(
                        (term, (offset, len(encoded_line))))

                    self.report.progress(i, ' index lines compressed', 100000)

                    offset += len(encoded_line)
            self.compressed_seek_list = \
                RecordDAWG('>QQ', self.compressed_seek_list)
            self.compressed_seek_list.save(
                f'{self.directory}/compressed_seek_list.dawg')