def write(types, string_dict, ty, tree, out): '''Compresses ast and writes it to a byte stream. Note, this may modify tree. ShiftAST produces trees with numbers in double fields as ints. Our type-directed encoder coerces them to doubles. This updates the input tree, in place, with this change. Args: types: idl.TypeResolver string_dict: list of strings stored in external file. ty: the type of 'tree'. tree: the AST to encode. out: byte-oriented stream to write content to. ''' # Rewrite ints in float position to floats. tycheck.FloatFixer(types).rewrite(ty, tree) # Check the AST conforms to the IDL. tycheck.TypeChecker(types).check_any(ty, tree) out.write(SIGNATURE) # Collect the local strings and write the string table local_strings = strings.StringCollector(types) local_strings.visit(ty, tree) local_strings.strings -= set(string_dict) local_strings = list(sorted(local_strings.strings)) string_dict = local_strings + string_dict strings.write_dict(out, local_strings, with_signature=False) # Build probability models of the AST and serialize it. m = model.model_tree(types, ty, tree) model_writer = encode.ModelWriter(types, string_dict, out) model_writer.write(ty, m) # Now write the file content. def write_piece(ty, node, out): lazy_parts = lazy.LazyMemberExtractor(types) node = lazy_parts.replace(ty, node) encode.encode(types, m, out, ty, node) # Encode the lazy parts in memory lazy_encoded = [] for _, attr, part in lazy_parts.lazies: buf = io.BytesIO() lazy_encoded.append(buf) write_piece(attr.resolved_ty, part, buf) # Write the dictionary of lazy parts, then the lazy parts bits.write_varint(out, len(lazy_encoded)) for encoded_part in lazy_encoded: bits.write_varint(out, encoded_part.tell()) for encoded_part in lazy_encoded: out.write(encoded_part.getbuffer()) write_piece(ty, tree, out)
def make_dict(in_files, out_file): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] sources = [] for in_file in in_files: proggy = json.loads(in_file.read()) tycheck.FloatFixer(types).rewrite(ty_script, proggy) tycheck.TypeChecker(types).check_any(ty_script, proggy) sources.append((ty_script, proggy)) string_dict = strings.prepare_dict(types, sources) strings.write_dict(out_file, string_dict, with_signature=True)
def type_check(in_files): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] for in_file in in_files: proggy = json.loads(in_file.read()) print(f'checking {in_file.name}... ', end='') try: tycheck.TypeChecker(types).check_any(ty_script, proggy) except Exception as e: # FIXME: Make this a specific exception type, do error recovery, etc. print(f'NG, {e!s}') continue print('OK')
def read(types, string_dict, ty, inp): '''Decompresses ast from a byte stream and returns an AST. >>> import json >>> import ast, idl, strings >>> types = idl.parse_es6_idl() >>> ty_script = types.interfaces['Script'] >>> tree_in = ast.load_test_ast('y5R7cnYctJv.js.dump') >>> #tree_in = ast.load_test_ast('three.min.js.dump') >>> string_dict = strings.prepare_dict(types, [(ty_script, tree_in)]) >>> buf = io.BytesIO() >>> write(types, string_dict, ty_script, tree_in, buf) >>> buf.tell() 1884 >>> buf.seek(0) 0 >>> tree_out = read(types, string_dict, ty_script, buf) >>> #assert json.dumps(tree_in) == json.dumps(tree_out) >>> s_in = json.dumps(tree_in, indent=1).split('\\n') >>> s_out = json.dumps(tree_out, indent=1).split('\\n') >>> for i, (l_in, l_out) in enumerate(zip(s_in, s_out)): ... if l_in != l_out: ... print(f'{i:3d} {l_in}') ... print(f' {l_out}') ... print('mismatch') ... break Now try to round-trip something which uses laziness: >>> import opt >>> tree_in = opt.optimize(tree_in) lazified 1 functions >>> buf = io.BytesIO() >>> write(types, string_dict, ty_script, tree_in, buf) >>> buf.tell() 1898 >>> buf.seek(0) 0 >>> tree_out = read(types, string_dict, ty_script, buf) >>> assert json.dumps(tree_in) == json.dumps(tree_out) Now try to round-trip something which misses the dictionary: >>> del string_dict[-10:-3] >>> buf = io.BytesIO() >>> write(types, string_dict, ty_script, tree_in, buf) >>> buf.tell() 1934 >>> buf.seek(0) 0 >>> tree_out = read(types, string_dict, ty_script, buf) >>> assert json.dumps(tree_in) == json.dumps(tree_out) ''' # Read the local string table local_strings = strings.read_dict(inp, with_signature=False) string_dict = local_strings + string_dict # Read the probability models model_reader = encode.ModelReader(types, string_dict, inp) m = model_reader.read(ty) def read_piece(ty): tree = encode.decode(types, m, ty, inp) # Read the dictionary of lazy parts # TODO: We don't need this; it is implicit in the tree we just read. num_lazy_parts = bits.read_varint(inp) lazy_offsets = [0] for _ in range(num_lazy_parts): lazy_size = bits.read_varint(inp) lazy_offsets.append(lazy_offsets[-1] + lazy_size) lazy_offsets = list( map(lambda offset: offset + inp.tell(), lazy_offsets)) def restore_lazy_part(ty, attr, index): inp.seek(lazy_offsets[index]) part = read_piece(attr.resolved_ty) assert inp.tell() == lazy_offsets[ index + 1], f'{inp.tell()}, {lazy_offsets[index + 1]}' return part restorer = lazy.LazyMemberRestorer(types, restore_lazy_part) tree = restorer.replace(ty, tree) inp.seek(lazy_offsets[-1]) return tree tree = read_piece(ty) type_checker = tycheck.TypeChecker(types) type_checker.check_any(ty, tree) return tree
# This file transforms ASTs in ways outside the scope of file format # and compression. There are no BinAST minifiers yet, so for now it's # convenient to integrate it into the encoder/decoder tool. import doctest import ast import idl import types import tycheck # Test helpers. resolver = idl.parse_es6_idl() resolver.resolve_types() checker = tycheck.TypeChecker(resolver) def check_ty(ty_name, obj): '''Checks whether obj matches the type named ty_name.''' ty = resolver.resolve(idl.TyRef(ty_name)) checker.check_any(ty, obj) def make_iffe(): '''Makes an AST fragment which is an immediately-invoked function expression. >>> check_ty('Expression', make_iffe()) ''' return { 'type': 'CallExpression',