Python BitArray.BitArray 예제들, bitarray.BitArray.BitArray Python 예제들

예제 #1

0

파일 보기

파일: Huffman.py 프로젝트: antonia69/InformationRetrieval-en.people.cn

def encode(string, symbol_to_encoding_dict):
    string_len = 0
    for symbol in string:
        string_len += len(symbol_to_encoding_dict[symbol])
    padding = 8 - (string_len % 8)
    bit_array = BitArray((padding - 1) * '1' + '0')
    for symbol in string:
        bit_array += BitArray(symbol_to_encoding_dict[symbol])
    return bit_array.tobytes()

예제 #2

0

파일 보기

파일: classes.py 프로젝트: martinling/pysigcore

 def __init__(self, buf, unitsize):
     if buf.nbytes % unitsize != 0:
         raise InvalidArgument(
             "Buffer provided is not a multiple of unit size")
     self.buf = buf
     self.count = buf.nbytes // unitsize
     self.bits = BitArray(self.buf, 0, (self.count, unitsize * 8),
                          (unitsize * 8, 1))

예제 #3

0

파일 보기

파일: Huffman.py 프로젝트: antonia69/InformationRetrieval-en.people.cn

def decode(binary_data, symbol_to_encoding_dict):
    padding = 0
    bit_array = BitArray()
    bit_array.frombytes(binary_data)
    while bool(bit_array[padding]):
        padding += 1
    decoded_chars = bit_array[padding+1:].decode(symbol_to_encoding_dict)
    return ''.join(decoded_chars)

예제 #4

0

파일 보기

파일: Huffman.py 프로젝트: antonia69/InformationRetrieval-en.people.cn

def derive_encoding(symbol_to_frequency_dict):
    assert(len(symbol_to_frequency_dict) > 0)
    symbol_to_encoding_dict = \
        dict(((symbol, '') for symbol in symbol_to_frequency_dict.keys()))
    heap = [Node().init_leaf(symbol, frequency)
            for symbol, frequency in symbol_to_frequency_dict.items()]
    heapq.heapify(heap)
    while len(heap) > 1:
        left_child = heapq.heappop(heap)
        right_child = heapq.heappop(heap)
        heapq.heappush(heap, Node().init_parent(left_child, right_child))

        for symbol in left_child.symbols_in_subtree():
            symbol_to_encoding_dict[symbol] = \
                '0' + symbol_to_encoding_dict[symbol]
        for symbol in right_child.symbols_in_subtree():
            symbol_to_encoding_dict[symbol] = \
                '1' + symbol_to_encoding_dict[symbol]

    for key, value in symbol_to_encoding_dict.items():
        symbol_to_encoding_dict[key] = BitArray(value)
    return symbol_to_encoding_dict

예제 #5

0

파일 보기

파일: worldLoader.py 프로젝트: Flashing-Blinkenlights/gerardus-mercator

    def __init__(self,
                 rect,
                 heightmapTypes=[
                     "MOTION_BLOCKING", "MOTION_BLOCKING_NO_LEAVES",
                     "OCEAN_FLOOR", "WORLD_SURFACE"
                 ]):
        self.rect = rect
        self.chunkRect = (rect[0] >> 4, rect[1] >> 4,
                          ((rect[0] + rect[2] - 1) >> 4) - (rect[0] >> 4) + 1,
                          ((rect[1] + rect[3] - 1) >> 4) - (rect[1] >> 4) + 1)
        self.heightmapTypes = heightmapTypes

        bytes = getChunks(*self.chunkRect, rtype='bytes')
        file_like = BytesIO(bytes)

        print("parsing NBT")
        self.nbtfile = nbt.nbt.NBTFile(buffer=file_like)

        rectOffset = [rect[0] % 16, rect[1] % 16]

        # heightmaps
        self.heightmaps = {}
        for hmName in self.heightmapTypes:
            self.heightmaps[hmName] = np.zeros((rect[2], rect[3]),
                                               dtype=np.int)

        # Sections are in x,z,y order!!! (reverse minecraft order :p)
        self.sections = [[[None for i in range(16)]
                          for z in range(self.chunkRect[3])]
                         for x in range(self.chunkRect[2])]

        # heightmaps
        print("extracting heightmaps")

        for x in range(self.chunkRect[2]):
            for z in range(self.chunkRect[3]):
                chunkID = x + z * self.chunkRect[2]

                hms = self.nbtfile['Chunks'][chunkID]['Level']['Heightmaps']
                for hmName in self.heightmapTypes:
                    # hmRaw = hms['MOTION_BLOCKING']
                    hmRaw = hms[hmName]
                    heightmapBitArray = BitArray(9, 16 * 16, hmRaw)
                    heightmap = self.heightmaps[hmName]
                    for cz in range(16):
                        for cx in range(16):
                            try:
                                heightmap[-rectOffset[0] + x * 16 + cx,
                                          -rectOffset[1] + z * 16 +
                                          cz] = heightmapBitArray.getAt(cz *
                                                                        16 +
                                                                        cx)
                            except IndexError:
                                pass

        # sections
        print("extracting chunk sections")

        for x in range(self.chunkRect[2]):
            for z in range(self.chunkRect[3]):
                chunkID = x + z * self.chunkRect[2]
                chunkSections = self.nbtfile['Chunks'][chunkID]['Level'][
                    'Sections']

                for section in chunkSections:
                    y = section['Y'].value

                    if not ('BlockStates' in section) or len(
                            section['BlockStates']) == 0:
                        continue

                    palette = section['Palette']
                    rawBlockStates = section['BlockStates']
                    bitsPerEntry = max(4, ceil(log2(len(palette))))
                    blockStatesBitArray = BitArray(bitsPerEntry, 16 * 16 * 16,
                                                   rawBlockStates)

                    self.sections[x][z][y] = CachedSection(
                        palette, blockStatesBitArray)

        print("done")

예제 #6

0

파일 보기

파일: worldLoader.py 프로젝트: nilsgawlik/gdmc_2021_submission

    def __init__(self, x1, z1, x2, z2,
                 heightmapTypes=["MOTION_BLOCKING",
                                 "MOTION_BLOCKING_NO_LEAVES",
                                 "OCEAN_FLOOR",
                                 "WORLD_SURFACE"]):
        """**Initialise WorldSlice with region and heightmaps**."""
        self.rect = x1, z1, x2 - x1, z2 - z1
        self.chunkRect = (self.rect[0] >> 4, self.rect[1] >> 4,
                          ((self.rect[0] + self.rect[2] - 1) >> 4)
                          - (self.rect[0] >> 4) + 1,
                          ((self.rect[1] + self.rect[3] - 1) >> 4)
                          - (self.rect[1] >> 4) + 1)
        self.heightmapTypes = heightmapTypes

        t0 = time.perf_counter()
        bytes = getChunks(*self.chunkRect, rtype='bytes')

        showPerf = False

        if showPerf: 
            print(f"took {time.perf_counter() - t0}s")
        t0 = time.perf_counter()

        file_like = BytesIO(bytes)

        print("parsing NBT")
        self.nbtfile = nbt.nbt.NBTFile(buffer=file_like)
        if showPerf: 
            print(f"took {time.perf_counter() - t0}s")
        t0 = time.perf_counter()

        rectOffset = [self.rect[0] % 16, self.rect[1] % 16]

        # heightmaps
        self.heightmaps = {}
        for hmName in self.heightmapTypes:
            self.heightmaps[hmName] = np.zeros(
                (self.rect[2], self.rect[3]), dtype=np.int)

        # Sections are in x,z,y order!!! (reverse minecraft order :p)
        self.sections = [[[None for i in range(16)] for z in range(
            self.chunkRect[3])] for x in range(self.chunkRect[2])]

        # heightmaps
        print("extracting heightmaps")

        for x in range(self.chunkRect[2]):
            for z in range(self.chunkRect[3]):
                chunkID = x + z * self.chunkRect[2]

                hms = self.nbtfile['Chunks'][chunkID]['Level']['Heightmaps']
                for hmName in self.heightmapTypes:
                    # hmRaw = hms['MOTION_BLOCKING']
                    hmRaw = hms[hmName]
                    heightmapBitArray = BitArray(9, 16 * 16, hmRaw)
                    heightmap = self.heightmaps[hmName]
                    for cz in range(16):
                        for cx in range(16):
                            try:
                                heightmap[-rectOffset[0] + x * 16 + cx,
                                          -rectOffset[1] + z * 16 + cz] \
                                    = heightmapBitArray.getAt(cz * 16 + cx)
                            except IndexError:
                                pass

        if showPerf: 
            print(f"took {time.perf_counter() - t0}s")
        t0 = time.perf_counter()

        # sections
        print("extracting chunk sections")

        for x in range(self.chunkRect[2]):
            for z in range(self.chunkRect[3]):
                chunkID = x + z * self.chunkRect[2]
                chunk = self.nbtfile['Chunks'][chunkID]
                chunkSections = chunk['Level']['Sections']

                for section in chunkSections:
                    y = section['Y'].value

                    if (not ('BlockStates' in section)
                            or len(section['BlockStates']) == 0):
                        continue

                    palette = section['Palette']
                    rawBlockStates = section['BlockStates']
                    bitsPerEntry = max(4, ceil(log2(len(palette))))
                    blockStatesBitArray = BitArray(
                        bitsPerEntry, 16 * 16 * 16, rawBlockStates)

                    self.sections[x][z][y] = CachedSection(
                        palette, blockStatesBitArray)

        if showPerf: 
            print(f"took {time.perf_counter() - t0}s")
        print("done")

예제 #7

0

파일 보기

파일: bloomfilter.py 프로젝트: Jackychen8/advanced_python

 def __init__(self, iterable=(), population=56, probes=6):
     self.population = xrange(population)
     self.probes = probes
     self.data = BitArray(population)
     for name in iterable:
         self.add(name)

예제 #8

0

파일 보기

파일: nbtLoaderTest.py 프로젝트: nilsgawlik/gdmc_2021_submission

bytes = getChunks(0, 0, 2, 2, rtype='bytes')
print(len(bytes))

print(bytes)
# print(getChunks(0, 0, 2, 2, rtype='text'))
print("")
file_like = BytesIO(bytes)

nbtfile = nbt.nbt.NBTFile(buffer=file_like)

print(nbtfile['Chunks'])
print(nbtfile['Chunks'][0]['Level']['Sections'])
sections = nbtfile['Chunks'][0]['Level']['Sections']

def sectionIsEmpty(section):
    return not ('BlockStates' in section) or len(section['BlockStates']) == 0

for section in sections:
    if not sectionIsEmpty(section):
        palette = section['Palette']
        blockStates = section['BlockStates']
        bitsPerEntry = max(4, ceil(log2(len(palette))))
        bitarray = BitArray(bitsPerEntry, 16*16*16, blockStates)
        
        def printBlock(blockStateID):
            print(palette[blockStateID])

        bitarray.getAll(printBlock)


pass

예제 #9

0

파일 보기

from bitarray import BitArray
import numpy as np

buf = np.arange(10, dtype=np.uint8)
offset = 0
shape = (10, 8)
strides = (8, 1)

b = BitArray(buf, offset, shape, strides)

print(b[1, 0])
print(b[0, 8:16])
print(b[:])

예제 #10

0

파일 보기

파일: IndexCreator.py 프로젝트: antonia69/InformationRetrieval-en.people.cn

    def huffman_compression(self, generate_encoding=False):
        # compress using Huffman encoding
        symbol_to_encoding_dict = {}

        # count all occuring UTF-8 characters
        if generate_encoding:
            symbol_to_frequency_dict = Counter()
            with self.report.measure('counting utf8 characters'):
                with open(f'{self.directory}/index.csv') as index_file:
                    chunk_size = 100000

                    def next_chunk_generator():
                        chunk = index_file.read(chunk_size)
                        while chunk:
                            yield chunk
                            chunk = index_file.read(chunk_size)

                    for i, chunk in enumerate(next_chunk_generator(), 1):
                        symbol_to_frequency_dict.update(Counter(chunk))
                        self.report.progress(
                            i, f' chunks counted ({chunk_size} characters '
                            'each)', 100)
                if '\n' in symbol_to_frequency_dict.keys():
                    del symbol_to_frequency_dict['\n']

            # derive huffman encoding from character counts
            with self.report.measure('deriving huffman encoding'):
                symbol_to_encoding_dict = Huffman.derive_encoding(
                    symbol_to_frequency_dict)
            for key, value in symbol_to_encoding_dict.items():
                assert (len(key) == 1)
                symbol_to_encoding_list[ord(key[0])] = value
            with open(f'{self.directory}/symbol_to_encoding_dict.pickle',
                      mode='wb') as f:
                pickle.dump(symbol_to_encoding_dict, f,
                            pickle.HIGHEST_PROTOCOL)
        else:
            # optimal encoding for guardian
            # character distribution should be similar for all datasets
            symbol_to_encoding_dict = {
                '\a': BitArray('1111'),
                ',': BitArray('001'),
                '0': BitArray('1000'),
                '1': BitArray('011'),
                '2': BitArray('010'),
                '3': BitArray('000'),
                '4': BitArray('1110'),
                '5': BitArray('1101'),
                '6': BitArray('1100'),
                '7': BitArray('1011'),
                '8': BitArray('1010'),
                '9': BitArray('1001')
            }

        with open(f'{self.directory}/symbol_to_encoding_dict.pickle',
                  mode='wb') as f:
            pickle.dump(symbol_to_encoding_dict, f, pickle.HIGHEST_PROTOCOL)

        # save compressed index and corresponding seek_list
        with self.report.measure('saving compressed files'):
            self.compressed_seek_list = []
            with open(f'{self.directory}/compressed_index', mode='wb') \
                    as compressed_index_file:
                offset = 0
                for i, orig_line in enumerate(
                        binary_read_line_generator_path(
                            f'{self.directory}/index.csv'), 1):
                    term = next(
                        csv.reader(io.StringIO(orig_line),
                                   delimiter=posting_list_separator))[0]
                    line_without_term = orig_line[len(term) + 3:]
                    encoded_line = Huffman.encode(line_without_term,
                                                  symbol_to_encoding_dict)
                    compressed_index_file.write(encoded_line)

                    self.compressed_seek_list.append(
                        (term, (offset, len(encoded_line))))

                    self.report.progress(i, ' index lines compressed', 100000)

                    offset += len(encoded_line)
            self.compressed_seek_list = \
                RecordDAWG('>QQ', self.compressed_seek_list)
            self.compressed_seek_list.save(
                f'{self.directory}/compressed_seek_list.dawg')