def test_reset(self): ba = BitArray(10, 1) for i in xrange(10): self.assertTrue(ba.get_bit(i)) ba.reset_bit(6) self.assertFalse(ba.get_bit(6))
def encode(string, symbol_to_encoding_dict): string_len = 0 for symbol in string: string_len += len(symbol_to_encoding_dict[symbol]) padding = 8 - (string_len % 8) bit_array = BitArray((padding - 1) * '1' + '0') for symbol in string: bit_array += BitArray(symbol_to_encoding_dict[symbol]) return bit_array.tobytes()
def decode(binary_data, symbol_to_encoding_dict): padding = 0 bit_array = BitArray() bit_array.frombytes(binary_data) while bool(bit_array[padding]): padding += 1 decoded_chars = bit_array[padding+1:].decode(symbol_to_encoding_dict) return ''.join(decoded_chars)
def __init__(self, buf, unitsize): if buf.nbytes % unitsize != 0: raise InvalidArgument( "Buffer provided is not a multiple of unit size") self.buf = buf self.count = buf.nbytes // unitsize self.bits = BitArray(self.buf, 0, (self.count, unitsize * 8), (unitsize * 8, 1))
def __init__(self, peer_id, protocol, torrent): self.running = False self.is_choked = True self.is_interested = False self.is_chocked_you = True self.is_interested_you = False self.peer_id = peer_id self.torrent = torrent self.protocol = protocol self.pieces = BitArray(self.torrent.piece_num)
def test_one_bit(self): ba = BitArray(32) for i in xrange(32): self.assertFalse(ba.get_bit(i)) ba.set_bit(5) self.assertTrue(ba.get_bit(5)) self.assertFalse(ba.get_bit(4)) self.assertFalse(ba.get_bit(6))
def test_big_number(self): ba = BitArray(2 ** 32) bignum = 2 ** 20 self.assertFalse(ba.get_bit(bignum)) ba.set_bit(bignum) self.assertTrue(ba.get_bit(bignum)) self.assertFalse(ba.get_bit(bignum + 1)) self.assertFalse(ba.get_bit(bignum - 1))
def sieve(MAX): ba = BitArray(MAX, initialize=1) for n in xrange(2, MAX, 2): ba.reset_bit(n) limit = math.sqrt(MAX) limit = math.floor(limit) limit = int(limit) for n in xrange(3, limit, 2): if ba.get_bit(n): for i in xrange(n * 2, MAX, n): ba.reset_bit(i) """
def derive_encoding(symbol_to_frequency_dict): assert(len(symbol_to_frequency_dict) > 0) symbol_to_encoding_dict = \ dict(((symbol, '') for symbol in symbol_to_frequency_dict.keys())) heap = [Node().init_leaf(symbol, frequency) for symbol, frequency in symbol_to_frequency_dict.items()] heapq.heapify(heap) while len(heap) > 1: left_child = heapq.heappop(heap) right_child = heapq.heappop(heap) heapq.heappush(heap, Node().init_parent(left_child, right_child)) for symbol in left_child.symbols_in_subtree(): symbol_to_encoding_dict[symbol] = \ '0' + symbol_to_encoding_dict[symbol] for symbol in right_child.symbols_in_subtree(): symbol_to_encoding_dict[symbol] = \ '1' + symbol_to_encoding_dict[symbol] for key, value in symbol_to_encoding_dict.items(): symbol_to_encoding_dict[key] = BitArray(value) return symbol_to_encoding_dict
def __init__(self, x1, z1, x2, z2, heightmapTypes=["MOTION_BLOCKING", "MOTION_BLOCKING_NO_LEAVES", "OCEAN_FLOOR", "WORLD_SURFACE"]): """**Initialise WorldSlice with region and heightmaps**.""" self.rect = x1, z1, x2 - x1, z2 - z1 self.chunkRect = (self.rect[0] >> 4, self.rect[1] >> 4, ((self.rect[0] + self.rect[2] - 1) >> 4) - (self.rect[0] >> 4) + 1, ((self.rect[1] + self.rect[3] - 1) >> 4) - (self.rect[1] >> 4) + 1) self.heightmapTypes = heightmapTypes t0 = time.perf_counter() bytes = getChunks(*self.chunkRect, rtype='bytes') showPerf = False if showPerf: print(f"took {time.perf_counter() - t0}s") t0 = time.perf_counter() file_like = BytesIO(bytes) print("parsing NBT") self.nbtfile = nbt.nbt.NBTFile(buffer=file_like) if showPerf: print(f"took {time.perf_counter() - t0}s") t0 = time.perf_counter() rectOffset = [self.rect[0] % 16, self.rect[1] % 16] # heightmaps self.heightmaps = {} for hmName in self.heightmapTypes: self.heightmaps[hmName] = np.zeros( (self.rect[2], self.rect[3]), dtype=np.int) # Sections are in x,z,y order!!! (reverse minecraft order :p) self.sections = [[[None for i in range(16)] for z in range( self.chunkRect[3])] for x in range(self.chunkRect[2])] # heightmaps print("extracting heightmaps") for x in range(self.chunkRect[2]): for z in range(self.chunkRect[3]): chunkID = x + z * self.chunkRect[2] hms = self.nbtfile['Chunks'][chunkID]['Level']['Heightmaps'] for hmName in self.heightmapTypes: # hmRaw = hms['MOTION_BLOCKING'] hmRaw = hms[hmName] heightmapBitArray = BitArray(9, 16 * 16, hmRaw) heightmap = self.heightmaps[hmName] for cz in range(16): for cx in range(16): try: heightmap[-rectOffset[0] + x * 16 + cx, -rectOffset[1] + z * 16 + cz] \ = heightmapBitArray.getAt(cz * 16 + cx) except IndexError: pass if showPerf: print(f"took {time.perf_counter() - t0}s") t0 = time.perf_counter() # sections print("extracting chunk sections") for x in range(self.chunkRect[2]): for z in range(self.chunkRect[3]): chunkID = x + z * self.chunkRect[2] chunk = self.nbtfile['Chunks'][chunkID] chunkSections = chunk['Level']['Sections'] for section in chunkSections: y = section['Y'].value if (not ('BlockStates' in section) or len(section['BlockStates']) == 0): continue palette = section['Palette'] rawBlockStates = section['BlockStates'] bitsPerEntry = max(4, ceil(log2(len(palette)))) blockStatesBitArray = BitArray( bitsPerEntry, 16 * 16 * 16, rawBlockStates) self.sections[x][z][y] = CachedSection( palette, blockStatesBitArray) if showPerf: print(f"took {time.perf_counter() - t0}s") print("done")
def __init__(self, iterable=(), population=56, probes=6): self.population = xrange(population) self.probes = probes self.data = BitArray(population) for name in iterable: self.add(name)
def test_initialize(self): ba = BitArray(10, 1) for i in xrange(10): self.assertTrue(ba.get_bit(i))
from bitarray import BitArray import numpy as np buf = np.arange(10, dtype=np.uint8) offset = 0 shape = (10, 8) strides = (8, 1) b = BitArray(buf, offset, shape, strides) print(b[1, 0]) print(b[0, 8:16]) print(b[:])
bytes = getChunks(0, 0, 2, 2, rtype='bytes') print(len(bytes)) print(bytes) # print(getChunks(0, 0, 2, 2, rtype='text')) print("") file_like = BytesIO(bytes) nbtfile = nbt.nbt.NBTFile(buffer=file_like) print(nbtfile['Chunks']) print(nbtfile['Chunks'][0]['Level']['Sections']) sections = nbtfile['Chunks'][0]['Level']['Sections'] def sectionIsEmpty(section): return not ('BlockStates' in section) or len(section['BlockStates']) == 0 for section in sections: if not sectionIsEmpty(section): palette = section['Palette'] blockStates = section['BlockStates'] bitsPerEntry = max(4, ceil(log2(len(palette)))) bitarray = BitArray(bitsPerEntry, 16*16*16, blockStates) def printBlock(blockStateID): print(palette[blockStateID]) bitarray.getAll(printBlock) pass
class Connection(): def __init__(self, peer_id, protocol, torrent): self.running = False self.is_choked = True self.is_interested = False self.is_chocked_you = True self.is_interested_you = False self.peer_id = peer_id self.torrent = torrent self.protocol = protocol self.pieces = BitArray(self.torrent.piece_num) def lost(self): print self.peer_id, 'lost' self.torrent.lostConnection(self) ############################## # Event Interface ############################## def choke(self): if not self.is_chocked_you: self.is_chocked_you = True self.protocol.sendChoke() def unchoke(self): if self.is_chocked_you: self.is_chocked_you = False self.protocol.sendUnchoke() def interested(self): if not self.is_interested_you: self.is_interested_you = True self.protocol.sendInterested() def uninterested(self): if self.is_interested_you: self.is_interested_you = False self.protocol.sendUninterested() def have(self, piece_index): self.protocol.sendHave(piece_index) self.checkInterested() def bitfield(self, pieces): print 'send bitfield:', len(pieces) self.protocol.sendBitfield(pieces) def request(self, block_info): if self.is_choked: return self.protocol.sendRequest(block_info[0], block_info[1], block_info[2]) def piece(self, block_info, block_data): if self.is_chocked_you: return self.protocol.sendPiece(block_info[0], block_info[1], block_data) def cancel(self, block_info): self.protocol.sendCancel(block_info[0], block_info[1], block_info[2]) ############################## # Event Handler ############################## def chokeBy(self): self.is_choked = True def unchokeBy(self): self.is_choked = False self.torrent.onUnchoked(self) def interestedBy(self): self.is_interested = True def uninterestedBy(self): self.is_interested = False def haveBy(self, piece_index): self.pieces.set(piece_index, 1) self.checkInterested() def bitfieldBy(self, bitfield): self.pieces.set_complete_str(bitfield) self.checkInterested() def requestBy(self, piece_index, block_offset, block_length): block_info = (piece_index, block_offset, block_length) if not self.is_interested or self.is_chocked_you: print self.peer_id, 'can not request me!' elif not self.torrent.checkBlockInfo(block_info): print self.peer_id, 'invalid block info!' else: self.torrent.onRequest(self, (piece_index, block_offset, block_length)) def pieceBy(self, piece_index, block_offset, block_data): block_info = (piece_index, block_offset, len(block_data)) if not self.torrent.checkBlockInfo(block_info): print self.peer_id, 'invalid block info!' else: self.torrent.onPiece(self, (piece_index, block_offset, len(block_data)), block_data) def cancelBy(self, piece_index, block_offset, block_length): block_info = (piece_index, block_offset, block_length) if not self.torrent.checkBlockInfo(block_info): print self.peer_id, 'invalid block info!' else: self.torrent.onCancel(self, (piece_index, block_offset, block_length)) ############################## # Utils ############################## def hasPiece(self, piece_index): return self.pieces.get(piece_index) > 0 def checkInterested(self): interested = False for piece_index in xrange(0, self.pieces.length): if not self.torrent.hasPiece(piece_index): interested = True break if interested != self.is_interested_you: if interested: self.interested() else: self.uninterested() def getDownloadRate(self): return self.protocol.getDownloadRate() def getUploadRate(self): return self.protocol.getUploadRate() def resetMeasurement(self): self.protocol.resetMeasurement()
def test_small_array(self): ba = BitArray(10) self.assertFalse(ba.get_bit(6)) ba.set_bit(6) self.assertTrue(ba.get_bit(6))
def __init__(self, rect, heightmapTypes=[ "MOTION_BLOCKING", "MOTION_BLOCKING_NO_LEAVES", "OCEAN_FLOOR", "WORLD_SURFACE" ]): self.rect = rect self.chunkRect = (rect[0] >> 4, rect[1] >> 4, ((rect[0] + rect[2] - 1) >> 4) - (rect[0] >> 4) + 1, ((rect[1] + rect[3] - 1) >> 4) - (rect[1] >> 4) + 1) self.heightmapTypes = heightmapTypes bytes = getChunks(*self.chunkRect, rtype='bytes') file_like = BytesIO(bytes) print("parsing NBT") self.nbtfile = nbt.nbt.NBTFile(buffer=file_like) rectOffset = [rect[0] % 16, rect[1] % 16] # heightmaps self.heightmaps = {} for hmName in self.heightmapTypes: self.heightmaps[hmName] = np.zeros((rect[2], rect[3]), dtype=np.int) # Sections are in x,z,y order!!! (reverse minecraft order :p) self.sections = [[[None for i in range(16)] for z in range(self.chunkRect[3])] for x in range(self.chunkRect[2])] # heightmaps print("extracting heightmaps") for x in range(self.chunkRect[2]): for z in range(self.chunkRect[3]): chunkID = x + z * self.chunkRect[2] hms = self.nbtfile['Chunks'][chunkID]['Level']['Heightmaps'] for hmName in self.heightmapTypes: # hmRaw = hms['MOTION_BLOCKING'] hmRaw = hms[hmName] heightmapBitArray = BitArray(9, 16 * 16, hmRaw) heightmap = self.heightmaps[hmName] for cz in range(16): for cx in range(16): try: heightmap[-rectOffset[0] + x * 16 + cx, -rectOffset[1] + z * 16 + cz] = heightmapBitArray.getAt(cz * 16 + cx) except IndexError: pass # sections print("extracting chunk sections") for x in range(self.chunkRect[2]): for z in range(self.chunkRect[3]): chunkID = x + z * self.chunkRect[2] chunkSections = self.nbtfile['Chunks'][chunkID]['Level'][ 'Sections'] for section in chunkSections: y = section['Y'].value if not ('BlockStates' in section) or len( section['BlockStates']) == 0: continue palette = section['Palette'] rawBlockStates = section['BlockStates'] bitsPerEntry = max(4, ceil(log2(len(palette)))) blockStatesBitArray = BitArray(bitsPerEntry, 16 * 16 * 16, rawBlockStates) self.sections[x][z][y] = CachedSection( palette, blockStatesBitArray) print("done")
def huffman_compression(self, generate_encoding=False): # compress using Huffman encoding symbol_to_encoding_dict = {} # count all occuring UTF-8 characters if generate_encoding: symbol_to_frequency_dict = Counter() with self.report.measure('counting utf8 characters'): with open(f'{self.directory}/index.csv') as index_file: chunk_size = 100000 def next_chunk_generator(): chunk = index_file.read(chunk_size) while chunk: yield chunk chunk = index_file.read(chunk_size) for i, chunk in enumerate(next_chunk_generator(), 1): symbol_to_frequency_dict.update(Counter(chunk)) self.report.progress( i, f' chunks counted ({chunk_size} characters ' 'each)', 100) if '\n' in symbol_to_frequency_dict.keys(): del symbol_to_frequency_dict['\n'] # derive huffman encoding from character counts with self.report.measure('deriving huffman encoding'): symbol_to_encoding_dict = Huffman.derive_encoding( symbol_to_frequency_dict) for key, value in symbol_to_encoding_dict.items(): assert (len(key) == 1) symbol_to_encoding_list[ord(key[0])] = value with open(f'{self.directory}/symbol_to_encoding_dict.pickle', mode='wb') as f: pickle.dump(symbol_to_encoding_dict, f, pickle.HIGHEST_PROTOCOL) else: # optimal encoding for guardian # character distribution should be similar for all datasets symbol_to_encoding_dict = { '\a': BitArray('1111'), ',': BitArray('001'), '0': BitArray('1000'), '1': BitArray('011'), '2': BitArray('010'), '3': BitArray('000'), '4': BitArray('1110'), '5': BitArray('1101'), '6': BitArray('1100'), '7': BitArray('1011'), '8': BitArray('1010'), '9': BitArray('1001') } with open(f'{self.directory}/symbol_to_encoding_dict.pickle', mode='wb') as f: pickle.dump(symbol_to_encoding_dict, f, pickle.HIGHEST_PROTOCOL) # save compressed index and corresponding seek_list with self.report.measure('saving compressed files'): self.compressed_seek_list = [] with open(f'{self.directory}/compressed_index', mode='wb') \ as compressed_index_file: offset = 0 for i, orig_line in enumerate( binary_read_line_generator_path( f'{self.directory}/index.csv'), 1): term = next( csv.reader(io.StringIO(orig_line), delimiter=posting_list_separator))[0] line_without_term = orig_line[len(term) + 3:] encoded_line = Huffman.encode(line_without_term, symbol_to_encoding_dict) compressed_index_file.write(encoded_line) self.compressed_seek_list.append( (term, (offset, len(encoded_line)))) self.report.progress(i, ' index lines compressed', 100000) offset += len(encoded_line) self.compressed_seek_list = \ RecordDAWG('>QQ', self.compressed_seek_list) self.compressed_seek_list.save( f'{self.directory}/compressed_seek_list.dawg')