Exemple #1
0
def test_vocab_codec():
    def get_lex_props(string, prob):
        return {
            'flags': 0,
            'length': len(string),
            'orth': string,
            'lower': string, 
            'norm': string,
            'shape': string,
            'prefix': string[0],
            'suffix': string[-3:],
            'cluster': 0,
            'prob': prob,
            'sentiment': 0
        }

    vocab = Vocab()
    vocab['dog'] = get_lex_props('dog', 0.001)
    vocab['the'] = get_lex_props('the', 0.05)
    vocab['jumped'] = get_lex_props('jumped', 0.005)

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()
    
    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Exemple #2
0
def test_binary():
    codec = _BinaryCodec()
    bits = BitArray()
    msg = numpy.array([0, 1, 0, 1, 1], numpy.int32)
    codec.encode(msg, bits)
    result = numpy.array([0, 0, 0, 0, 0], numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert list(msg) == list(result)
Exemple #3
0
def test_binary():
    codec = _BinaryCodec()
    bits = BitArray()
    msg = numpy.array([0, 1, 0, 1, 1], numpy.int32)
    codec.encode(msg, bits)
    result = numpy.array([0, 0, 0, 0, 0], numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert list(msg) == list(result)
Exemple #4
0
def test_char_packer(vocab):
    packer = Packer(vocab, [])
    bits = BitArray()
    bits.seek(0)

    byte_str = bytearray(b'the dog jumped')
    packer.char_codec.encode(byte_str, bits)
    bits.seek(0)
    result = [b''] * len(byte_str)
    packer.char_codec.decode(bits, result)
    assert bytearray(result) == byte_str
Exemple #5
0
def test_vocab_codec():
    vocab = Vocab()
    lex = vocab['dog']
    lex = vocab['the']
    lex = vocab['jumped']

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()
    
    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Exemple #6
0
def test_attribute():
    freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
            'lazy': 1, 'dog': 2, '.': 9}
 
    int_map = {'the': 0, 'quick': 1, 'brown': 2, 'fox': 3, 'jumped': 4, 'over': 5,
               'lazy': 6, 'dog': 7, '.': 8}

    codec = HuffmanCodec([(int_map[string], freq) for string, freq in freqs.items()])

    bits = BitArray()
    
    msg = numpy.array([1, 7], dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array([0, 0], dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Exemple #7
0
def test_vocab_codec():
    vocab = Vocab()
    lex = vocab['dog']
    lex = vocab['the']
    lex = vocab['jumped']

    codec = HuffmanCodec([(lex.orth, lex.prob) for lex in vocab])

    bits = BitArray()

    ids = [vocab[s].orth for s in ('the', 'dog', 'jumped')]
    msg = numpy.array(ids, dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array(range(len(msg)), dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Exemple #8
0
def test_attribute():
    freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
            'lazy': 1, 'dog': 2, '.': 9}
 
    int_map = {'the': 0, 'quick': 1, 'brown': 2, 'fox': 3, 'jumped': 4, 'over': 5,
               'lazy': 6, 'dog': 7, '.': 8}

    codec = HuffmanCodec([(int_map[string], freq) for string, freq in freqs.items()])

    bits = BitArray()
    
    msg = numpy.array([1, 7], dtype=numpy.int32)
    msg_list = list(msg)
    codec.encode(msg, bits)
    result = numpy.array([0, 0], dtype=numpy.int32)
    bits.seek(0)
    codec.decode(bits, result)
    assert msg_list == list(result)
Exemple #9
0
def test_char_packer(vocab):
    packer = Packer(vocab, [])
    bits = BitArray()
    bits.seek(0)

    byte_str = bytearray(b'the dog jumped')
    packer.char_codec.encode(byte_str, bits)
    bits.seek(0)
    result = [b''] * len(byte_str)
    packer.char_codec.decode(bits, result)
    assert bytearray(result) == byte_str