def write_dict(out, strings, with_signature): '''Writes a dictionary from prepare_dict to byte-oriented output.''' if with_signature: out.write(DICT_SIGNATURE) bits.write_varint(out, len(strings)) for s in strings: encoded = s.encode('utf-8') encoded = re.sub(b'([\x00\x01])', b'\x01\\1', encoded) out.write(encoded) out.write(b'\x00')
def encode_model(self, ty, m): if type(m) is model.TrivialModel: return # print('E', ty, m, self.out.tell()) # if type(m) is model.UnreachableModel: # print('***unreachable model***') # elif len(m.code_to_symbol) < 30: # print(m.code_to_symbol) if type(m) is model.UnreachableModel: self.out.write(b'\x02') elif len(m.symbol_to_code) is 1: #print('single') self.out.write(b'\x00') sym = list(m.code_to_symbol.values())[0] if type(m) is model.ExplicitSymbolModel: assert not model.is_indexed_type(ty) self.encode_symbol(ty, sym) return assert type(m) is model.IndexedSymbolModel assert model.is_indexed_type(ty) self.encode_index(m.index[sym]) elif type(m) is model.ExplicitSymbolModel: #print('multiple, explicit') assert not model.is_indexed_type(ty) # These are not enumerable, we just need to dump the symbols and their lengths. length_sym = list( sorted([(code[1], type(sym) is idl.TyNone and 1 or 2, sym) for code, sym in m.code_to_symbol.items()])) self.out.write(b'\x01') bits.write_varint(self.out, len(length_sym)) for length, _, _ in length_sym: # TODO: In practice lengths are < 32 and we could pack these, etc. assert length < 256 self.out.write(length.to_bytes(1, byteorder='big')) for _, _, sym in length_sym: self.encode_symbol(ty, sym) elif type(m) is model.IndexedSymbolModel: #print('multiple, indexed') self.out.write(b'\x01') assert type(m) is model.IndexedSymbolModel assert model.is_indexed_type(ty) # These are enumerable for i, sym in enumerate(m.symbols): code_length = m.symbol_to_code.get(sym) length = code_length and code_length[1] or 0 assert length < 256 self.out.write(length.to_bytes(1, byteorder='big')) else: assert False, 'unreachable'
def write_piece(ty, node, out): lazy_parts = lazy.LazyMemberExtractor(types) node = lazy_parts.replace(ty, node) encode.encode(types, m, out, ty, node) # Encode the lazy parts in memory lazy_encoded = [] for _, attr, part in lazy_parts.lazies: buf = io.BytesIO() lazy_encoded.append(buf) write_piece(attr.resolved_ty, part, buf) # Write the dictionary of lazy parts, then the lazy parts bits.write_varint(out, len(lazy_encoded)) for encoded_part in lazy_encoded: bits.write_varint(out, encoded_part.tell()) for encoded_part in lazy_encoded: out.write(encoded_part.getbuffer())
def encode_symbol(self, ty, sym): if ty == idl.TY_STRING: assert type(sym) is str bits.write_varint(self.out, self.dictionary[sym]) elif ty == idl.TY_DOUBLE: self.out.write(struct.pack('!d', sym)) elif ty == idl.TY_LONG: self.out.write(struct.pack('!l', sym)) elif ty == idl.TY_UNSIGNED_LONG: self.out.write(struct.pack('!L', sym)) elif ty == idl.TY_BOOLEAN: self.out.write(int(sym).to_bytes(1, byteorder='big')) elif type(ty) is idl.TyFrozenArray: self.encode_symbol(ty.element_ty, sym) elif type(ty) is idl.Alt and ty.ty_set == set([idl.TyNone(), idl.TY_STRING]): if sym == idl.TyNone(): bits.write_varint(self.out, 0) else: bits.write_varint(self.out, self.dictionary[sym] + 1) else: assert False, f'unreachable (type should be indexed?) {ty}: {sym}'
def encode_index(self, i): bits.write_varint(self.out, i)