def read_dict(inp, with_signature): '''Reads a dictionary from byte-oriented input. >>> import ast, idl, io >>> types = idl.parse_es6_idl() >>> tree = ast.load_test_ast('y5R7cnYctJv.js.dump') >>> strings = prepare_dict(types, [(types.interfaces['Script'], tree)]) >>> buf = io.BytesIO() >>> write_dict(buf, strings, True) >>> buf.seek(0) 0 >>> back = read_dict(buf, True) >>> strings == back True ''' if with_signature: signature = inp.read(len(DICT_SIGNATURE)) assert signature == DICT_SIGNATURE, 'signature mismatch: ' + str(signature) n_strings = bits.read_varint(inp) strings = [] for _ in range(n_strings): buf = bytearray() while True: b = inp.read(1) if b == b'\x01': b = inp.read(1) elif b == b'\x00': break buf.extend(b) s = buf.decode('utf-8') strings.append(s) return strings
def decode_symbol(self, ty): if ty == idl.TY_STRING: return self.dictionary[bits.read_varint(self.inp)] elif ty == idl.TY_DOUBLE: return struct.unpack('!d', self.inp.read(struct.calcsize('!d')))[0] elif ty == idl.TY_LONG: return struct.unpack('!l', self.inp.read(struct.calcsize('!l')))[0] elif ty == idl.TY_UNSIGNED_LONG: return struct.unpack('!L', self.inp.read(struct.calcsize('!L')))[0] elif ty == idl.TY_BOOLEAN: return bool(self.inp.read(1)[0]) elif type(ty) is idl.TyFrozenArray: self.decode_symbol(ty.element_ty) elif type(ty) is idl.Alt and ty.ty_set == set([idl.TyNone(), idl.TY_STRING]): n = bits.read_varint(self.inp) if n == 0: return idl.TyNone() else: return self.dictionary[n-1] else: assert False, f'unreachable (type should be indexed?) {ty}: {sym}'
def read_piece(ty): tree = encode.decode(types, m, ty, inp) # Read the dictionary of lazy parts # TODO: We don't need this; it is implicit in the tree we just read. num_lazy_parts = bits.read_varint(inp) lazy_offsets = [0] for _ in range(num_lazy_parts): lazy_size = bits.read_varint(inp) lazy_offsets.append(lazy_offsets[-1] + lazy_size) lazy_offsets = list( map(lambda offset: offset + inp.tell(), lazy_offsets)) def restore_lazy_part(ty, attr, index): inp.seek(lazy_offsets[index]) part = read_piece(attr.resolved_ty) assert inp.tell() == lazy_offsets[ index + 1], f'{inp.tell()}, {lazy_offsets[index + 1]}' return part restorer = lazy.LazyMemberRestorer(types, restore_lazy_part) tree = restorer.replace(ty, tree) inp.seek(lazy_offsets[-1]) return tree
def _decode_model(self, ty): kind = self.inp.read(1)[0] #print('_decode_model', kind) if kind == 0: #print('single') if model.is_indexed_type(ty): syms = model.symbols_for_indexed_type(ty) sym = self.decode_index(ty) return model.IndexedSymbolModel(syms).from_values([sym]) else: sym = self.decode_symbol(ty) return model.ExplicitSymbolModel().from_values([sym]) elif kind == 1: if not model.is_indexed_type(ty): #print('multiple, explicit') # These are not enumerable, we just need to suck in the symbols and their lengths. num_syms = bits.read_varint(self.inp) lengths = [] for _ in range(num_syms): lengths.append(self.inp.read(1)[0]) syms = [] for _ in range(num_syms): syms.append(self.decode_symbol(ty)) length_symbol = list(zip(lengths, [0] * len(syms), syms)) m = model.ExplicitSymbolModel() m.code_to_symbol, m.symbol_to_code = model.huffman_assign_order( length_symbol) return m else: #print('multiple, indexed') # These are enumerable length_symbol = [] syms = model.symbols_for_indexed_type(ty) for i, sym in enumerate(syms): length = self.inp.read(1)[0] length_symbol.append((length, type(sym) is idl.TyNone and 1 or 2, sym)) m = model.IndexedSymbolModel(syms) #print(length_symbol) m.code_to_symbol, m.symbol_to_code = model.huffman_assign_order( list(sorted(length_symbol))) return m elif kind == 2: assert type(ty) is idl.TyFrozenArray return model.UnreachableModel()
def decode_index(self, ty): i = bits.read_varint(self.inp) return model.symbols_for_indexed_type(ty)[i]