Example #1
0
def type_check(in_files):
  types = idl.parse_es6_idl()
  ty_script = types.interfaces['Script']
  for in_file in in_files:
    proggy = json.loads(in_file.read())
    print(f'checking {in_file.name}... ', end='')
    try:
      tycheck.TypeChecker(types).check_any(ty_script, proggy)
    except Exception as e:
      # FIXME: Make this a specific exception type, do error recovery, etc.
      print(f'NG, {e!s}')
      continue
    print('OK')
Example #2
0
def encode_dir(dict_file,
               binjs_encode,
               in_path,
               out_path,
               skip_errors=True,
               copy_source=True):
    types = idl.parse_es6_idl()
    ty_script = types.interfaces['Script']
    string_dict = strings.read_dict(dict_file, with_signature=True)
    in_path = os.path.abspath(in_path)
    out_path = os.path.abspath(out_path)
    ignored_out_directory = tempfile.TemporaryDirectory()
    for root, _, sources in os.walk(in_path):
        # 1. Prepare destination directory
        suffix = os.path.relpath(root, in_path)
        dest_root = os.path.join(out_path, suffix)
        print('Encoding from {root} to {dest_root}'.format(
            root=root, dest_root=dest_root))
        os.makedirs(dest_root, exist_ok=True)

        for source in sources:
            source_path = os.path.join(root, source)
            if not source[-3:] == '.js':
                print('...skipping {}'.format(source_path))
                continue

            # 2. Extract AST
            print('Preprocessing {}'.format(source_path))
            process = subprocess.run([
                binjs_encode, '--quiet', '--show-ast', '--in', source_path,
                '--out', ignored_out_directory.name
            ],
                                     capture_output=True)
            try:
                proggy = json.loads(process.stdout.decode('utf-8'))

                # 3. Encode
                dest_path = os.path.join(dest_root, source[:-3] + '.binjs')
                print('Encoding {source_path} => {dest_path}'.format(
                    source_path=source_path, dest_path=dest_path))
                dest_file = open(dest_path, 'wb')
                format.write(types, string_dict, ty_script, proggy, dest_file)

                # 4. Copy source file
                if copy_source:
                    shutil.copy(source_path, dest_root)
            except:
                if skip_errors:
                    print('...does not parse')
                else:
                    raise
Example #3
0
def fix_types(in_file):
  types = idl.parse_es6_idl()
  ty_script = types.interfaces['Script']
  proggy = json.loads(in_file.read())
  tycheck.FloatFixer(types).rewrite(ty_script, proggy)
  json.dump(proggy, sys.stdout)
Example #4
0
def decode(dict_file, in_file, out_file):
  types = idl.parse_es6_idl()
  ty_script = types.interfaces['Script']
  string_dict = strings.read_dict(dict_file, with_signature=True)
  proggy = format.read(types, string_dict, ty_script, in_file)
  json.dump(proggy, out_file)
Example #5
0
def encode(dict_file, in_file, out_file):
  types = idl.parse_es6_idl()
  ty_script = types.interfaces['Script']
  string_dict = strings.read_dict(dict_file, with_signature=True)
  proggy = json.loads(in_file.read())
  format.write(types, string_dict, ty_script, proggy, out_file)
Example #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.set_defaults(func=lambda args: print('use --help to see commands'))
    parser.add_argument('--dir',
                        help='Directory to sample/encode',
                        nargs='+',
                        required=True)
    parser.add_argument('--seed', help='Seed value', default=0, type=int)
    parser.add_argument('--sampling',
                        help='Sample probability. 0 = no dictionary',
                        default=0.2,
                        type=float)
    parser.add_argument('--binjs_encode',
                        help='Path to binjs_encode',
                        required=True)
    parser.add_argument('--show_errors',
                        help='Show errors',
                        default=False,
                        type=bool)
    parser.add_argument('--apply-brotli',
                        help='Apply brotli after encoding',
                        default=False,
                        type=bool)
    args = parser.parse_args()

    sys.setrecursionlimit(10000)

    # Initialize grammar
    grammar = idl.parse_es6_idl()
    ty_script = grammar.interfaces['Script']
    float_fixer = tycheck.FloatFixer(grammar)

    # Initialize RNG
    rng = random.Random(None)
    rng.seed(args.seed)

    # The files we're going to use to extract a dictionary.
    dictionary_group = []

    # The files we're going to use to test compression level.
    control_group = []

    # Walk subdirectories and sort files to dictionary group / control group.
    for root in args.dir:
        for local, _, paths in os.walk(root):
            if local.find(".git") != -1:
                # Skip .git subdirectory
                continue
            for path in paths:
                print("Let's look at %s" % [path])
                full_path = os.path.join(local, path)
                if rng.random() < args.sampling:
                    dictionary_group.append(full_path)
                else:
                    control_group.append(full_path)

    # Prepare dictionary
    print("Preparing dictionary")
    dictionary_sources = []
    for i, path in enumerate(dictionary_group):
        print("%(index)d/%(len)d Adding %(path)s to dictionary" % {
            "path": path,
            "index": i,
            "len": len(dictionary_group)
        })
        proc = subprocess.run([
            args.binjs_encode, "--quiet", "--show-ast", "-i", path, "-o",
            "/tmp/binjs"
        ],
                              capture_output=True)

        if proc.returncode != 0:
            # Skip if the file somehow can't be processed.
            print("...skipping (cannot parse)")
            if args.show_errors:
                print(proc.stderr)
            continue

        if len(proc.stdout) == 0:
            # We can't handle empty files.
            continue

        try:
            ast = json.loads(proc.stdout)
            float_fixer.rewrite(ty_script, ast)
            dictionary_sources.append((ty_script, ast))
        except Exception as e:
            print("... skipping (cannot process)")
            if args.show_errors:
                print(e)
            continue

    strings_dictionary = strings.prepare_dict(grammar, dictionary_sources)

    # Compress with dictionary
    print("Compressing with dictionary")
    total_encoded_size = 0
    total_unencoded_brotli_size = 0
    for i, path in enumerate(control_group):
        print("%(index)d/%(len)d Compressing %(path)s with dictionary" % {
            "path": path,
            "index": i,
            "len": len(control_group)
        })
        TMP_DEST_PATH = "/tmp/encoded.binjs"

        # Execute external binjs_encode to parse JavaScript
        proc = subprocess.run([
            args.binjs_encode, "--quiet", "--show-ast", "-i", path, "-o",
            "/tmp/binjs"
        ],
                              capture_output=True)
        if proc.returncode != 0:
            # Skip if the file somehow can't be processed.
            print("...skipping (cannot parse)")
            if args.show_errors:
                print(proc.stderr)
            continue

        if len(proc.stdout) == 0:
            # We can't handle empty files.
            continue

        ast = None
        try:
            # Rewrite integer literals which should be floating point numbers
            ast = json.loads(proc.stdout)
            float_fixer.rewrite(ty_script, ast)
        except Exception as e:
            print("... skipping (cannot process)")
            if args.show_errors:
                print(e)
            continue

        # Encode file
        dest = open(TMP_DEST_PATH, 'wb')
        format.write(grammar, strings_dictionary, ty_script, ast, dest)
        dest.close()

        len_encoded = 0
        if args.apply_brotli:
            # Compress encoded version
            proc = subprocess.run(
                ["brotli", "--stdout", TMP_DEST_PATH, "--best"],
                capture_output=True)
            proc.check_returncode()
            encoded_brotli = proc.stdout
            len_encoded = len(encoded_brotli)
        else:
            len_encoded = os.stat(TMP_DEST_PATH).st_size

        total_encoded_size += len_encoded

        # Compress unencoded version, for comparison
        proc = subprocess.run(["brotli", "--stdout", path, "--best"],
                              capture_output=True)
        proc.check_returncode()
        raw_brotli = proc.stdout
        total_unencoded_brotli_size += len(raw_brotli)

        print("... ratio: %f" % (len_encoded / len(raw_brotli)))
        print("... global ratio so far: %f" %
              (total_encoded_size / total_unencoded_brotli_size))

    print("Run complete")
    print("Global ratio: %f" %
          (total_encoded_size / total_unencoded_brotli_size))
Example #7
0
# LICENSE file in the root directory of this source tree.

# This file transforms ASTs in ways outside the scope of file format
# and compression. There are no BinAST minifiers yet, so for now it's
# convenient to integrate it into the encoder/decoder tool.

import doctest

import ast
import idl
import types
import tycheck

# Test helpers.

resolver = idl.parse_es6_idl()
resolver.resolve_types()
checker = tycheck.TypeChecker(resolver)


def check_ty(ty_name, obj):
  '''Checks whether obj matches the type named ty_name.'''
  ty = resolver.resolve(idl.TyRef(ty_name))
  checker.check_any(ty, obj)


def make_iffe():
  '''Makes an AST fragment which is an immediately-invoked function expression.

  >>> check_ty('Expression', make_iffe())
  '''