Ejemplo n.º 1
0
def run_on_js(filename,
              passes,
              extra_info=None,
              just_split=False,
              just_concat=False):
    with ToolchainProfiler.profile_block('js_optimizer.split_markers'):
        if not isinstance(passes, list):
            passes = [passes]

        js = open(filename).read()
        if os.linesep != '\n':
            js = js.replace(os.linesep,
                            '\n')  # we assume \n in the splitting code

        # Find suffix
        suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS'
        suffix_start = js.find(suffix_marker)
        suffix = ''
        if suffix_start >= 0:
            suffix_end = js.find('\n', suffix_start)
            suffix = js[suffix_start:suffix_end] + '\n'
            # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything.

        # Find markers
        start_funcs = js.find(start_funcs_marker)
        end_funcs = js.rfind(end_funcs_marker)

        if start_funcs < 0 or end_funcs < start_funcs or not suffix:
            shared.exit_with_error(
                'Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s'
                % (start_funcs, end_funcs, suffix_start))

        minify_globals = 'minifyNames' in passes
        if minify_globals:
            passes = [
                p if p != 'minifyNames' else 'minifyLocals' for p in passes
            ]
            start_asm = js.find(start_asm_marker)
            end_asm = js.rfind(end_asm_marker)
            assert (start_asm >= 0) == (end_asm >= 0)

        closure = 'closure' in passes
        if closure:
            passes = [p for p in passes
                      if p != 'closure']  # we will do it manually

        cleanup = 'cleanup' in passes
        if cleanup:
            passes = [p for p in passes
                      if p != 'cleanup']  # we will do it manually

    if not minify_globals:
        with ToolchainProfiler.profile_block('js_optimizer.no_minify_globals'):
            pre = js[:start_funcs + len(start_funcs_marker)]
            post = js[end_funcs + len(end_funcs_marker):]
            js = js[start_funcs + len(start_funcs_marker):end_funcs]
            if 'asm' not in passes:
                # can have Module[..] and inlining prevention code, push those to post
                class Finals(object):
                    buf = []

                def process(line):
                    if len(line) and (line.startswith(
                        ('Module[', 'if (globalScope)'))
                                      or line.endswith('["X"]=1;')):
                        Finals.buf.append(line)
                        return False
                    return True

                js = '\n'.join(filter(process, js.split('\n')))
                post = '\n'.join(Finals.buf) + '\n' + post
            post = end_funcs_marker + post
    else:
        with ToolchainProfiler.profile_block('js_optimizer.minify_globals'):
            # We need to split out the asm shell as well, for minification
            pre = js[:start_asm + len(start_asm_marker)]
            post = js[end_asm:]
            asm_shell = js[start_asm + len(start_asm_marker):start_funcs +
                           len(start_funcs_marker)] + '''
EMSCRIPTEN_FUNCS();
''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)]
            js = js[start_funcs + len(start_funcs_marker):end_funcs]

            # we assume there is a maximum of one new name per line
            minifier = Minifier(js)

            def check_symbol_mapping(p):
                if p.startswith('symbolMap='):
                    minifier.symbols_file = p.split('=', 1)[1]
                    return False
                if p == 'profilingFuncs':
                    minifier.profiling_funcs = True
                    return False
                return True

            passes = list(filter(check_symbol_mapping, passes))
            asm_shell_pre, asm_shell_post = minifier.minify_shell(
                asm_shell, 'minifyWhitespace'
                in passes).split('EMSCRIPTEN_FUNCS();')
            asm_shell_post = asm_shell_post.replace('});', '})')
            pre += asm_shell_pre + '\n' + start_funcs_marker
            post = end_funcs_marker + asm_shell_post + post

            minify_info = minifier.serialize()

            if extra_info:
                for key, value in extra_info.items():
                    assert key not in minify_info or value == minify_info[
                        key], [key, value, minify_info[key]]
                    minify_info[key] = value

            # if DEBUG:
            #   print >> sys.stderr, 'minify info:', minify_info

    with ToolchainProfiler.profile_block(
            'js_optimizer.remove_suffix_and_split'):
        # remove suffix if no longer needed
        if suffix and 'last' in passes:
            suffix_start = post.find(suffix_marker)
            suffix_end = post.find('\n', suffix_start)
            post = post[:suffix_start] + post[suffix_end:]

        total_size = len(js)
        funcs = split_funcs(js, just_split)
        js = None

    with ToolchainProfiler.profile_block('js_optimizer.split_to_chunks'):
        # if we are making source maps, we want our debug numbering to start from the
        # top of the file, so avoid breaking the JS into chunks
        cores = building.get_num_cores()

        if not just_split:
            intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE))
            chunk_size = min(
                MAX_CHUNK_SIZE,
                max(MIN_CHUNK_SIZE, total_size / intended_num_chunks))
            chunks = chunkify(funcs, chunk_size)
        else:
            # keep same chunks as before
            chunks = [f[1] for f in funcs]

        chunks = [chunk for chunk in chunks if len(chunk)]
        if DEBUG and len(chunks):
            print('chunkification: num funcs:',
                  len(funcs),
                  'actual num chunks:',
                  len(chunks),
                  'chunk size range:',
                  max(map(len, chunks)),
                  '-',
                  min(map(len, chunks)),
                  file=sys.stderr)
        funcs = None

        if len(chunks):
            serialized_extra_info = suffix_marker + '\n'
            if minify_globals:
                serialized_extra_info += '// EXTRA_INFO:' + json.dumps(
                    minify_info)
            elif extra_info:
                serialized_extra_info += '// EXTRA_INFO:' + json.dumps(
                    extra_info)
            with ToolchainProfiler.profile_block('js_optimizer.write_chunks'):

                def write_chunk(chunk, i):
                    temp_file = temp_files.get('.jsfunc_%d.js' % i).name
                    with open(temp_file, 'w') as f:
                        f.write(chunk)
                        f.write(serialized_extra_info)
                    return temp_file

                filenames = [
                    write_chunk(chunks[i], i) for i in range(len(chunks))
                ]
        else:
            filenames = []

    with ToolchainProfiler.profile_block('run_optimizer'):
        if len(filenames):
            commands = [
                config.NODE_JS + [JS_OPTIMIZER, f, 'noPrintMetadata'] + passes
                for f in filenames
            ]

            cores = min(cores, len(filenames))
            if len(chunks) > 1 and cores >= 2:
                # We can parallelize
                if DEBUG:
                    print(
                        'splitting up js optimization into %d chunks, using %d cores  (total: %.2f MB)'
                        % (len(chunks), cores, total_size / (1024 * 1024.)),
                        file=sys.stderr)
                with ToolchainProfiler.profile_block('optimizer_pool'):
                    pool = building.get_multiprocessing_pool()
                    filenames = pool.map(run_on_chunk, commands, chunksize=1)
            else:
                # We can't parallize, but still break into chunks to avoid uglify/node memory issues
                if len(chunks) > 1 and DEBUG:
                    print('splitting up js optimization into %d chunks' %
                          (len(chunks)),
                          file=sys.stderr)
                filenames = [run_on_chunk(command) for command in commands]
        else:
            filenames = []

        for filename in filenames:
            temp_files.note(filename)

    with ToolchainProfiler.profile_block('split_closure_cleanup'):
        if closure or cleanup:
            # run on the shell code, everything but what we js-optimize
            start_asm = '// EMSCRIPTEN_START_ASM\n'
            end_asm = '// EMSCRIPTEN_END_ASM\n'
            cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(wakaGlobal,wakaEnv,wakaBuffer)\n'

            with temp_files.get_file('.cl.js') as cle:
                pre_1, pre_2 = pre.split(start_asm)
                post_1, post_2 = post.split(end_asm)
                with open(cle, 'w') as f:
                    f.write(pre_1)
                    f.write(cl_sep)
                    f.write(post_2)
                cld = cle
                if closure:
                    if DEBUG:
                        print('running closure on shell code', file=sys.stderr)
                    cld = building.closure_compiler(cld,
                                                    pretty='minifyWhitespace'
                                                    not in passes)
                    temp_files.note(cld)
                elif cleanup:
                    if DEBUG:
                        print('running cleanup on shell code', file=sys.stderr)
                    acorn_passes = ['JSDCE']
                    if 'minifyWhitespace' in passes:
                        acorn_passes.append('minifyWhitespace')
                    cld = building.acorn_optimizer(cld, acorn_passes)
                    temp_files.note(cld)
                coutput = open(cld).read()

            coutput = coutput.replace('wakaUnknownBefore();', start_asm)
            after = 'wakaUnknownAfter'
            start = coutput.find(after)
            end = coutput.find(')', start)
            # If the closure comment to suppress useless code is present, we need to look one
            # brace past it, as the first is in there. Otherwise, the first brace is the
            # start of the function body (what we want).
            USELESS_CODE_COMMENT = '/** @suppress {uselessCode} */ '
            USELESS_CODE_COMMENT_BODY = 'uselessCode'
            brace = pre_2.find('{') + 1
            has_useless_code_comment = False
            if pre_2[brace:brace + len(USELESS_CODE_COMMENT_BODY
                                       )] == USELESS_CODE_COMMENT_BODY:
                brace = pre_2.find('{', brace) + 1
                has_useless_code_comment = True
            pre = coutput[:start] + '(' + (
                USELESS_CODE_COMMENT if has_useless_code_comment else
                '') + 'function(global,env,buffer) {\n' + pre_2[brace:]
            post = post_1 + end_asm + coutput[end + 1:]

    with ToolchainProfiler.profile_block('write_pre'):
        filename += '.jo.js'
        temp_files.note(filename)
        f = open(filename, 'w')
        f.write(pre)
        pre = None

    with ToolchainProfiler.profile_block('sort_or_concat'):
        if not just_concat:
            # sort functions by size, to make diffing easier and to improve aot times
            funcses = []
            for out_file in filenames:
                funcses.append(split_funcs(open(out_file).read(), False))
            funcs = [item for sublist in funcses for item in sublist]
            funcses = None
            if not os.environ.get('EMCC_NO_OPT_SORT'):
                funcs.sort(key=lambda x: (len(x[1]), x[0]), reverse=True)

            if 'last' in passes and len(funcs):
                count = funcs[0][1].count('\n')
                if count > 3000:
                    print(
                        'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz)'
                        % (count, funcs[0][0]),
                        file=sys.stderr)

            for func in funcs:
                f.write(func[1])
            funcs = None
        else:
            # just concat the outputs
            for out_file in filenames:
                f.write(open(out_file).read())

    with ToolchainProfiler.profile_block('write_post'):
        f.write('\n')
        f.write(post)
        # No need to write suffix: if there was one, it is inside post which exists when suffix is there
        f.write('\n')
        f.close()

    return filename
def run_on_js(filename, gen_hash_info=False):
    js_engine = shared.NODE_JS

    js = open(filename).read()
    if os.linesep != '\n':
        js = js.replace(os.linesep, '\n')  # we assume \n in the splitting code

    equivalentfn_hash_info = None
    passed_in_filename = filename

    # Find markers
    start_funcs = js.find(start_funcs_marker)
    end_funcs = js.rfind(end_funcs_marker)

    if start_funcs < 0 or end_funcs < start_funcs:
        logging.critical(
            'Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)'
            % (start_funcs, end_funcs))
        sys.exit(1)

    if not gen_hash_info:
        equivalentfn_hash_info = js[js.rfind('//'):]

        start_asm = js.find(start_asm_marker)
        end_asm = js.rfind(end_asm_marker)
        assert (start_asm >= 0) == (end_asm >= 0)

        # We need to split out the asm shell as well, for minification
        pre = js[:start_asm + len(start_asm_marker)]
        post = js[end_asm:]
        asm_shell_pre = js[start_asm + len(start_asm_marker):start_funcs +
                           len(start_funcs_marker)]
        # Prevent "uglify" from turning 0.0 into 0 in variables' initialization. To do this we first replace 0.0 with
        # ZERO$DOT$ZERO and then replace it back.
        asm_shell_pre = re.sub(r'(\S+\s*=\s*)0\.0', r'\1ZERO$DOT$ZERO',
                               asm_shell_pre)
        asm_shell_post = js[end_funcs + len(end_funcs_marker):end_asm +
                            len(end_asm_marker)]
        asm_shell = asm_shell_pre + '\nEMSCRIPTEN_FUNCS();\n' + asm_shell_post
        js = js[start_funcs + len(start_funcs_marker):end_funcs]

        # we assume there is a maximum of one new name per line
        asm_shell_pre, asm_shell_post = process_shell(
            js_engine, asm_shell,
            equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();')
        asm_shell_pre = re.sub(r'(\S+\s*=\s*)ZERO\$DOT\$ZERO', r'\g<1>0.0',
                               asm_shell_pre)
        asm_shell_post = asm_shell_post.replace('});', '})')
        pre += asm_shell_pre + '\n' + start_funcs_marker
        post = end_funcs_marker + asm_shell_post + post

        if not gen_hash_info:
            # We don't need the extra info at the end
            post = post[:post.rfind('//')].strip()
    else:
        pre = js[:start_funcs + len(start_funcs_marker)]
        post = js[end_funcs + len(end_funcs_marker):]
        js = js[start_funcs + len(start_funcs_marker):end_funcs]
        post = end_funcs_marker + post

    total_size = len(js)
    funcs = split_funcs(js, False)

    js = None

    # if we are making source maps, we want our debug numbering to start from the
    # top of the file, so avoid breaking the JS into chunks
    cores = building.get_num_cores()

    intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE))
    chunk_size = min(MAX_CHUNK_SIZE,
                     max(MIN_CHUNK_SIZE, total_size / intended_num_chunks))
    chunks = shared.chunkify(funcs, chunk_size)

    chunks = [chunk for chunk in chunks if len(chunk)]
    if DEBUG and len(chunks):
        print('chunkification: num funcs:',
              len(funcs),
              'actual num chunks:',
              len(chunks),
              'chunk size range:',
              max(map(len, chunks)),
              '-',
              min(map(len, chunks)),
              file=sys.stderr)
    funcs = None

    if len(chunks):

        def write_chunk(chunk, i):
            temp_file = temp_files.get('.jsfunc_%d.js' % i).name
            with open(temp_file, 'w') as f:
                f.write(chunk)
                if not gen_hash_info:
                    f.write('\n')
                    f.write(equivalentfn_hash_info)
            return temp_file

        filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))]
    else:
        filenames = []

    old_filenames = filenames[:]
    if len(filenames):
        commands = [
            js_engine + [
                DUPLICATE_FUNCTION_ELIMINATOR, f,
                '--gen-hash-info' if gen_hash_info else '--use-hash-info',
                '--no-minimize-whitespace'
            ] for f in filenames
        ]

        if DEBUG and commands is not None:
            print([
                ' '.join(command if command is not None else '(null)')
                for command in commands
            ],
                  file=sys.stderr)

        cores = min(cores, len(filenames))
        if len(chunks) > 1 and cores >= 2:
            # We can parallelize
            if DEBUG:
                print(
                    'splitting up js optimization into %d chunks, using %d cores  (total: %.2f MB)'
                    % (len(chunks), cores, total_size / (1024 * 1024.)),
                    file=sys.stderr)
            pool = building.get_multiprocessing_pool()
            filenames = pool.map(run_on_chunk, commands, chunksize=1)
        else:
            # We can't parallize, but still break into chunks to avoid uglify/node memory issues
            if len(chunks) > 1 and DEBUG:
                print('splitting up js optimization into %d chunks' %
                      (len(chunks)),
                      file=sys.stderr)
            filenames = [run_on_chunk(command) for command in commands]
    else:
        filenames = []

    # we create temp files in the child threads, clean them up here when we are done
    for filename in filenames:
        temp_files.note(filename)

    json_files = []

    # We're going to be coalescing the files back at the end
    # Just replace the file list with the ones provided in
    # the command list - and save off the generated Json
    if gen_hash_info:
        json_files = filenames[:]
        filenames = old_filenames[:]

    for filename in filenames:
        temp_files.note(filename)

    filename += '.jo.js'
    f = open(filename, 'w')
    f.write(pre)
    pre = None

    # sort functions by size, to make diffing easier and to improve aot times
    funcses = []
    for out_file in filenames:
        funcses.append(split_funcs(open(out_file).read(), False))
    funcs = [item for sublist in funcses for item in sublist]
    funcses = None
    if not os.environ.get('EMCC_NO_OPT_SORT'):
        funcs.sort(key=lambda x: (len(x[1]), x[0]), reverse=True)

    for func in funcs:
        f.write(func[1])
    funcs = None

    f.write('\n')
    f.write(post)
    # No need to write suffix: if there was one, it is inside post which exists when suffix is there
    f.write('\n')

    if gen_hash_info and len(json_files):
        write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename)
    f.close()

    return filename