Esempio n. 1
0
def encode_dir(dict_file,
               binjs_encode,
               in_path,
               out_path,
               skip_errors=True,
               copy_source=True):
    types = idl.parse_es6_idl()
    ty_script = types.interfaces['Script']
    string_dict = strings.read_dict(dict_file, with_signature=True)
    in_path = os.path.abspath(in_path)
    out_path = os.path.abspath(out_path)
    ignored_out_directory = tempfile.TemporaryDirectory()
    for root, _, sources in os.walk(in_path):
        # 1. Prepare destination directory
        suffix = os.path.relpath(root, in_path)
        dest_root = os.path.join(out_path, suffix)
        print('Encoding from {root} to {dest_root}'.format(
            root=root, dest_root=dest_root))
        os.makedirs(dest_root, exist_ok=True)

        for source in sources:
            source_path = os.path.join(root, source)
            if not source[-3:] == '.js':
                print('...skipping {}'.format(source_path))
                continue

            # 2. Extract AST
            print('Preprocessing {}'.format(source_path))
            process = subprocess.run([
                binjs_encode, '--quiet', '--show-ast', '--in', source_path,
                '--out', ignored_out_directory.name
            ],
                                     capture_output=True)
            try:
                proggy = json.loads(process.stdout.decode('utf-8'))

                # 3. Encode
                dest_path = os.path.join(dest_root, source[:-3] + '.binjs')
                print('Encoding {source_path} => {dest_path}'.format(
                    source_path=source_path, dest_path=dest_path))
                dest_file = open(dest_path, 'wb')
                format.write(types, string_dict, ty_script, proggy, dest_file)

                # 4. Copy source file
                if copy_source:
                    shutil.copy(source_path, dest_root)
            except:
                if skip_errors:
                    print('...does not parse')
                else:
                    raise
Esempio n. 2
0
def read(types, string_dict, ty, inp):
    '''Decompresses ast from a byte stream and returns an AST.

  >>> import json
  >>> import ast, idl, strings
  >>> types = idl.parse_es6_idl()
  >>> ty_script = types.interfaces['Script']
  >>> tree_in = ast.load_test_ast('y5R7cnYctJv.js.dump')
  >>> #tree_in = ast.load_test_ast('three.min.js.dump')
  >>> string_dict = strings.prepare_dict(types, [(ty_script, tree_in)])
  >>> buf = io.BytesIO()
  >>> write(types, string_dict, ty_script, tree_in, buf)
  >>> buf.tell()
  1884
  >>> buf.seek(0)
  0
  >>> tree_out = read(types, string_dict, ty_script, buf)
  >>> #assert json.dumps(tree_in) == json.dumps(tree_out)
  >>> s_in = json.dumps(tree_in, indent=1).split('\\n')
  >>> s_out = json.dumps(tree_out, indent=1).split('\\n')
  >>> for i, (l_in, l_out) in enumerate(zip(s_in, s_out)):
  ...   if l_in != l_out:
  ...     print(f'{i:3d} {l_in}')
  ...     print(f'     {l_out}')
  ...     print('mismatch')
  ...     break

  Now try to round-trip something which uses laziness:

  >>> import opt
  >>> tree_in = opt.optimize(tree_in)
  lazified 1 functions
  >>> buf = io.BytesIO()
  >>> write(types, string_dict, ty_script, tree_in, buf)
  >>> buf.tell()
  1898
  >>> buf.seek(0)
  0
  >>> tree_out = read(types, string_dict, ty_script, buf)
  >>> assert json.dumps(tree_in) == json.dumps(tree_out)

  Now try to round-trip something which misses the dictionary:

  >>> del string_dict[-10:-3]
  >>> buf = io.BytesIO()
  >>> write(types, string_dict, ty_script, tree_in, buf)
  >>> buf.tell()
  1934
  >>> buf.seek(0)
  0
  >>> tree_out = read(types, string_dict, ty_script, buf)
  >>> assert json.dumps(tree_in) == json.dumps(tree_out)
  '''

    # Read the local string table
    local_strings = strings.read_dict(inp, with_signature=False)
    string_dict = local_strings + string_dict

    # Read the probability models
    model_reader = encode.ModelReader(types, string_dict, inp)
    m = model_reader.read(ty)

    def read_piece(ty):
        tree = encode.decode(types, m, ty, inp)

        # Read the dictionary of lazy parts
        # TODO: We don't need this; it is implicit in the tree we just read.
        num_lazy_parts = bits.read_varint(inp)
        lazy_offsets = [0]
        for _ in range(num_lazy_parts):
            lazy_size = bits.read_varint(inp)
            lazy_offsets.append(lazy_offsets[-1] + lazy_size)
        lazy_offsets = list(
            map(lambda offset: offset + inp.tell(), lazy_offsets))

        def restore_lazy_part(ty, attr, index):
            inp.seek(lazy_offsets[index])
            part = read_piece(attr.resolved_ty)
            assert inp.tell() == lazy_offsets[
                index + 1], f'{inp.tell()}, {lazy_offsets[index + 1]}'
            return part

        restorer = lazy.LazyMemberRestorer(types, restore_lazy_part)
        tree = restorer.replace(ty, tree)
        inp.seek(lazy_offsets[-1])
        return tree

    tree = read_piece(ty)
    type_checker = tycheck.TypeChecker(types)
    type_checker.check_any(ty, tree)
    return tree
Esempio n. 3
0
def decode(dict_file, in_file, out_file):
  types = idl.parse_es6_idl()
  ty_script = types.interfaces['Script']
  string_dict = strings.read_dict(dict_file, with_signature=True)
  proggy = format.read(types, string_dict, ty_script, in_file)
  json.dump(proggy, out_file)
Esempio n. 4
0
def encode(dict_file, in_file, out_file):
  types = idl.parse_es6_idl()
  ty_script = types.interfaces['Script']
  string_dict = strings.read_dict(dict_file, with_signature=True)
  proggy = json.loads(in_file.read())
  format.write(types, string_dict, ty_script, proggy, out_file)