def type_check(in_files): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] for in_file in in_files: proggy = json.loads(in_file.read()) print(f'checking {in_file.name}... ', end='') try: tycheck.TypeChecker(types).check_any(ty_script, proggy) except Exception as e: # FIXME: Make this a specific exception type, do error recovery, etc. print(f'NG, {e!s}') continue print('OK')
def encode_dir(dict_file, binjs_encode, in_path, out_path, skip_errors=True, copy_source=True): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] string_dict = strings.read_dict(dict_file, with_signature=True) in_path = os.path.abspath(in_path) out_path = os.path.abspath(out_path) ignored_out_directory = tempfile.TemporaryDirectory() for root, _, sources in os.walk(in_path): # 1. Prepare destination directory suffix = os.path.relpath(root, in_path) dest_root = os.path.join(out_path, suffix) print('Encoding from {root} to {dest_root}'.format( root=root, dest_root=dest_root)) os.makedirs(dest_root, exist_ok=True) for source in sources: source_path = os.path.join(root, source) if not source[-3:] == '.js': print('...skipping {}'.format(source_path)) continue # 2. Extract AST print('Preprocessing {}'.format(source_path)) process = subprocess.run([ binjs_encode, '--quiet', '--show-ast', '--in', source_path, '--out', ignored_out_directory.name ], capture_output=True) try: proggy = json.loads(process.stdout.decode('utf-8')) # 3. Encode dest_path = os.path.join(dest_root, source[:-3] + '.binjs') print('Encoding {source_path} => {dest_path}'.format( source_path=source_path, dest_path=dest_path)) dest_file = open(dest_path, 'wb') format.write(types, string_dict, ty_script, proggy, dest_file) # 4. Copy source file if copy_source: shutil.copy(source_path, dest_root) except: if skip_errors: print('...does not parse') else: raise
def fix_types(in_file): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] proggy = json.loads(in_file.read()) tycheck.FloatFixer(types).rewrite(ty_script, proggy) json.dump(proggy, sys.stdout)
def decode(dict_file, in_file, out_file): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] string_dict = strings.read_dict(dict_file, with_signature=True) proggy = format.read(types, string_dict, ty_script, in_file) json.dump(proggy, out_file)
def encode(dict_file, in_file, out_file): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] string_dict = strings.read_dict(dict_file, with_signature=True) proggy = json.loads(in_file.read()) format.write(types, string_dict, ty_script, proggy, out_file)
def main(): parser = argparse.ArgumentParser() parser.set_defaults(func=lambda args: print('use --help to see commands')) parser.add_argument('--dir', help='Directory to sample/encode', nargs='+', required=True) parser.add_argument('--seed', help='Seed value', default=0, type=int) parser.add_argument('--sampling', help='Sample probability. 0 = no dictionary', default=0.2, type=float) parser.add_argument('--binjs_encode', help='Path to binjs_encode', required=True) parser.add_argument('--show_errors', help='Show errors', default=False, type=bool) parser.add_argument('--apply-brotli', help='Apply brotli after encoding', default=False, type=bool) args = parser.parse_args() sys.setrecursionlimit(10000) # Initialize grammar grammar = idl.parse_es6_idl() ty_script = grammar.interfaces['Script'] float_fixer = tycheck.FloatFixer(grammar) # Initialize RNG rng = random.Random(None) rng.seed(args.seed) # The files we're going to use to extract a dictionary. dictionary_group = [] # The files we're going to use to test compression level. control_group = [] # Walk subdirectories and sort files to dictionary group / control group. for root in args.dir: for local, _, paths in os.walk(root): if local.find(".git") != -1: # Skip .git subdirectory continue for path in paths: print("Let's look at %s" % [path]) full_path = os.path.join(local, path) if rng.random() < args.sampling: dictionary_group.append(full_path) else: control_group.append(full_path) # Prepare dictionary print("Preparing dictionary") dictionary_sources = [] for i, path in enumerate(dictionary_group): print("%(index)d/%(len)d Adding %(path)s to dictionary" % { "path": path, "index": i, "len": len(dictionary_group) }) proc = subprocess.run([ args.binjs_encode, "--quiet", "--show-ast", "-i", path, "-o", "/tmp/binjs" ], capture_output=True) if proc.returncode != 0: # Skip if the file somehow can't be processed. print("...skipping (cannot parse)") if args.show_errors: print(proc.stderr) continue if len(proc.stdout) == 0: # We can't handle empty files. continue try: ast = json.loads(proc.stdout) float_fixer.rewrite(ty_script, ast) dictionary_sources.append((ty_script, ast)) except Exception as e: print("... skipping (cannot process)") if args.show_errors: print(e) continue strings_dictionary = strings.prepare_dict(grammar, dictionary_sources) # Compress with dictionary print("Compressing with dictionary") total_encoded_size = 0 total_unencoded_brotli_size = 0 for i, path in enumerate(control_group): print("%(index)d/%(len)d Compressing %(path)s with dictionary" % { "path": path, "index": i, "len": len(control_group) }) TMP_DEST_PATH = "/tmp/encoded.binjs" # Execute external binjs_encode to parse JavaScript proc = subprocess.run([ args.binjs_encode, "--quiet", "--show-ast", "-i", path, "-o", "/tmp/binjs" ], capture_output=True) if proc.returncode != 0: # Skip if the file somehow can't be processed. print("...skipping (cannot parse)") if args.show_errors: print(proc.stderr) continue if len(proc.stdout) == 0: # We can't handle empty files. continue ast = None try: # Rewrite integer literals which should be floating point numbers ast = json.loads(proc.stdout) float_fixer.rewrite(ty_script, ast) except Exception as e: print("... skipping (cannot process)") if args.show_errors: print(e) continue # Encode file dest = open(TMP_DEST_PATH, 'wb') format.write(grammar, strings_dictionary, ty_script, ast, dest) dest.close() len_encoded = 0 if args.apply_brotli: # Compress encoded version proc = subprocess.run( ["brotli", "--stdout", TMP_DEST_PATH, "--best"], capture_output=True) proc.check_returncode() encoded_brotli = proc.stdout len_encoded = len(encoded_brotli) else: len_encoded = os.stat(TMP_DEST_PATH).st_size total_encoded_size += len_encoded # Compress unencoded version, for comparison proc = subprocess.run(["brotli", "--stdout", path, "--best"], capture_output=True) proc.check_returncode() raw_brotli = proc.stdout total_unencoded_brotli_size += len(raw_brotli) print("... ratio: %f" % (len_encoded / len(raw_brotli))) print("... global ratio so far: %f" % (total_encoded_size / total_unencoded_brotli_size)) print("Run complete") print("Global ratio: %f" % (total_encoded_size / total_unencoded_brotli_size))
# LICENSE file in the root directory of this source tree. # This file transforms ASTs in ways outside the scope of file format # and compression. There are no BinAST minifiers yet, so for now it's # convenient to integrate it into the encoder/decoder tool. import doctest import ast import idl import types import tycheck # Test helpers. resolver = idl.parse_es6_idl() resolver.resolve_types() checker = tycheck.TypeChecker(resolver) def check_ty(ty_name, obj): '''Checks whether obj matches the type named ty_name.''' ty = resolver.resolve(idl.TyRef(ty_name)) checker.check_any(ty, obj) def make_iffe(): '''Makes an AST fragment which is an immediately-invoked function expression. >>> check_ty('Expression', make_iffe()) '''