def run(filename, passes, js_engine=shared.NODE_JS, source_map=False, extra_info=None, just_split=False, just_concat=False): if 'receiveJSON' in passes: just_split = True if 'emitJSON' in passes: just_concat = True js_engine = shared.listify(js_engine) with ToolchainProfiler.profile_block('js_optimizer.run_on_js'): return temp_files.run_and_clean( lambda: run_on_js(filename, passes, js_engine, source_map, extra_info, just_split, just_concat))
def chunkify(funcs, chunk_size, DEBUG=False): with ToolchainProfiler.profile_block('chunkify'): chunks = [] # initialize reasonably, the rest of the funcs we need to split out curr = [] total_size = 0 for i in range(len(funcs)): func = funcs[i] curr_size = len(func[1]) if total_size + curr_size < chunk_size: curr.append(func) total_size += curr_size else: chunks.append(curr) curr = [func] total_size = curr_size if curr: chunks.append(curr) curr = None return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names
def run_on_js(filename, passes, js_engine, source_map=False, extra_info=None, just_split=False, just_concat=False): if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs or not suffix: logging.critical( 'Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s' % (start_funcs, end_funcs, suffix_start)) sys.exit(1) minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually split_memory = 'splitMemory' in passes if not minify_globals: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith( ('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=')[1] return False if p == 'profilingFuncs': minifier.profiling_funcs = True return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell( asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();') asm_shell_post = asm_shell_post.replace('});', '})') pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] total_size = len(js) funcs = split_funcs(js, just_split) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int( os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len( funcs), 'actual num chunks:', len( chunks), 'chunk size range:', max(map(len, chunks)), '-', min( map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if minify_globals: if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[ key], [key, value, minify_info[key]] minify_info[key] = value f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(minify_info)) elif extra_info: f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(extra_info)) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] with ToolchainProfiler.profile_block('run_optimizer'): if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map( lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) else: # use the native optimizer shared.logging.debug('js optimizer using native') assert not source_map # XXX need to use js optimizer commands = map( lambda filename: [get_native_optimizer(), filename] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % ( len(chunks), cores, total_size / (1024 * 1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) try: # Shut down the pool, since otherwise processes are left alive and would only be lazily terminated, # and in other parts of the toolchain we also build up multiprocessing pools. pool.terminate() pool.join() except Exception, e: # On Windows we get occassional "Access is denied" errors when attempting to tear down the pool, ignore these. logging.debug( 'Attempting to tear down multiprocessing pool failed with an exception: ' + str(e)) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % ( len(chunks)) filenames = [run_on_chunk(command) for command in commands] else:
from toolchain_profiler import ToolchainProfiler if __name__ == '__main__': ToolchainProfiler.record_process_start() import os, sys, subprocess, multiprocessing, re, string, json, shutil, logging import shared configuration = shared.configuration temp_files = configuration.get_temp_files() __rootpath__ = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) def path_from_root(*pathelems): return os.path.join(__rootpath__, *pathelems) NATIVE_PASSES = set([ 'asm', 'asmPreciseF32', 'receiveJSON', 'emitJSON', 'eliminateDeadFuncs', 'eliminate', 'eliminateMemSafe', 'simplifyExpressions', 'simplifyIfs', 'optimizeFrounds', 'registerize', 'registerizeHarder', 'minifyNames', 'minifyLocals', 'minifyWhitespace', 'cleanup', 'asmLastOpts', 'last', 'noop', 'closure' ]) JS_OPTIMIZER = path_from_root('tools', 'js-optimizer.js') NUM_CHUNKS_PER_CORE = 3 MIN_CHUNK_SIZE = int( os.environ.get('EMCC_JSOPT_MIN_CHUNK_SIZE') or 512 * 1024) # configuring this is just for debugging purposes
def run_on_js(filename, passes, js_engine, source_map=False, extra_info=None, just_split=False, just_concat=False): with ToolchainProfiler.profile_block('js_optimizer.split_markers'): if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs or not suffix: logging.critical( 'Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s' % (start_funcs, end_funcs, suffix_start)) sys.exit(1) minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually split_memory = 'splitMemory' in passes if not minify_globals: with ToolchainProfiler.profile_block('js_optimizer.no_minify_globals'): pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals(object): buf = [] def process(line): if len(line) > 0 and (line.startswith( ('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: with ToolchainProfiler.profile_block('js_optimizer.minify_globals'): # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=', 1)[1] return False if p == 'profilingFuncs': minifier.profiling_funcs = True return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell( asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();') asm_shell_post = asm_shell_post.replace('});', '})') pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[ key], [key, value, minify_info[key]] minify_info[key] = value #if DEBUG: print >> sys.stderr, 'minify info:', minify_info with ToolchainProfiler.profile_block( 'js_optimizer.remove_suffix_and_split'): # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] total_size = len(js) funcs = split_funcs(js, just_split) js = None with ToolchainProfiler.profile_block('js_optimizer.split_to_chunks'): # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int( os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min( MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len( funcs), 'actual num chunks:', len( chunks), 'chunk size range:', max(map(len, chunks)), '-', min( map(len, chunks)) funcs = None if len(chunks) > 0: serialized_extra_info = suffix_marker + '\n' if minify_globals: serialized_extra_info += '// EXTRA_INFO:' + json.dumps( minify_info) elif extra_info: serialized_extra_info += '// EXTRA_INFO:' + json.dumps( extra_info) with ToolchainProfiler.profile_block('js_optimizer.write_chunks'): def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(serialized_extra_info) f.close() return temp_file filenames = [ write_chunk(chunks[i], i) for i in range(len(chunks)) ] else: filenames = [] with ToolchainProfiler.profile_block('run_optimizer'): if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map( lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) else: # use the native optimizer shared.logging.debug('js optimizer using native') assert not source_map # XXX need to use js optimizer commands = map( lambda filename: [get_native_optimizer(), filename] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % ( len(chunks), cores, total_size / (1024 * 1024.)) with ToolchainProfiler.profile_block('optimizer_pool'): pool = shared.Building.get_multiprocessing_pool() filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % ( len(chunks)) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) with ToolchainProfiler.profile_block('split_closure_cleanup'): if closure or cleanup or split_memory: # run on the shell code, everything but what we js-optimize start_asm = '// EMSCRIPTEN_START_ASM\n' end_asm = '// EMSCRIPTEN_END_ASM\n' cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(global,env,buffer)\n' with temp_files.get_file('.cl.js') as cle: c = open(cle, 'w') pre_1, pre_2 = pre.split(start_asm) post_1, post_2 = post.split(end_asm) c.write(pre_1) c.write(cl_sep) c.write(post_2) c.close() cld = cle if split_memory: if DEBUG: print >> sys.stderr, 'running splitMemory on shell code' cld = run_on_chunk(js_engine + [JS_OPTIMIZER, cld, 'splitMemoryShell']) f = open(cld, 'a') f.write(suffix_marker) f.close() if closure: if DEBUG: print >> sys.stderr, 'running closure on shell code' cld = shared.Building.closure_compiler( cld, pretty='minifyWhitespace' not in passes) temp_files.note(cld) elif cleanup: if DEBUG: print >> sys.stderr, 'running cleanup on shell code' next = cld + '.cl.js' temp_files.note(next) proc = subprocess.Popen( js_engine + [JS_OPTIMIZER, cld, 'noPrintMetadata', 'JSDCE'] + (['minifyWhitespace'] if 'minifyWhitespace' in passes else []), stdout=open(next, 'w')) proc.communicate() assert proc.returncode == 0 cld = next coutput = open(cld).read() coutput = coutput.replace('wakaUnknownBefore();', start_asm) after = 'wakaUnknownAfter' start = coutput.find(after) end = coutput.find(')', start) pre = coutput[:start] + '(function(global,env,buffer) {\n' + pre_2[ pre_2.find('{') + 1:] post = post_1 + end_asm + coutput[end + 1:] with ToolchainProfiler.profile_block('write_pre'): filename += '.jo.js' f = open(filename, 'w') f.write(pre) pre = None with ToolchainProfiler.profile_block('sort_or_concat'): if not just_concat: # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read(), False)) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 if not os.environ.get('EMCC_NO_OPT_SORT'): funcs.sort(sorter) if 'last' in passes and len(funcs) > 0: count = funcs[0][1].count('\n') if count > 3000: print >> sys.stderr, 'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % ( count, funcs[0][0]) for func in funcs: f.write(func[1]) funcs = None else: # just concat the outputs for out_file in filenames: f.write(open(out_file).read()) with ToolchainProfiler.profile_block('write_post'): f.write('\n') f.write(post) # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') f.close() return filename
--use-preload-plugins Tells the file packager to run preload plugins on the files as they are loaded. This performs tasks like decoding images and audio using the browser's codecs. Notes: * The file packager generates unix-style file paths. So if you are on windows and a file is accessed at subdir\file, in JS it will be subdir/file. For simplicity we treat the web platform as a *NIX. TODO: You can also provide .crn files yourself, pre-crunched. With this option, they will be decompressed to dds files in the browser, exactly the same as if this tool compressed them. ''' from toolchain_profiler import ToolchainProfiler if __name__ == '__main__': ToolchainProfiler.record_process_start() import os, sys, shutil, random, uuid, ctypes import posixpath import shared from shared import execute, suffix, unsuffixed from jsrun import run_js from subprocess import Popen, PIPE, STDOUT import fnmatch import json if len(sys.argv) == 1: print '''Usage: file_packager.py TARGET [--preload A...] [--embed B...] [--exclude C...] [--no-closure] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--no-heap-copy] [--separate-metadata] See the source for more details.''' sys.exit(0)
def run(filename, passes, js_engine=shared.NODE_JS, source_map=False, extra_info=None, just_split=False, just_concat=False): if 'receiveJSON' in passes: just_split = True if 'emitJSON' in passes: just_concat = True js_engine = shared.listify(js_engine) with ToolchainProfiler.profile_block('js_optimizer.run_on_js'): return temp_files.run_and_clean(lambda: run_on_js(filename, passes, js_engine, source_map, extra_info, just_split, just_concat))
def run_on_js(filename, passes, js_engine, source_map=False, extra_info=None, just_split=False, just_concat=False): if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs or not suffix: logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s' % (start_funcs, end_funcs, suffix_start)) sys.exit(1) minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually split_memory = 'splitMemory' in passes if not minify_globals: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=')[1] return False if p == 'profilingFuncs': minifier.profiling_funcs = True return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();'); asm_shell_post = asm_shell_post.replace('});', '})'); pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] total_size = len(js) funcs = split_funcs(js, just_split) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if minify_globals: if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]] minify_info[key] = value f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(minify_info)) elif extra_info: f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(extra_info)) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] with ToolchainProfiler.profile_block('run_optimizer'): if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) else: # use the native optimizer shared.logging.debug('js optimizer using native') assert not source_map # XXX need to use js optimizer commands = map(lambda filename: [get_native_optimizer(), filename] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) try: # Shut down the pool, since otherwise processes are left alive and would only be lazily terminated, # and in other parts of the toolchain we also build up multiprocessing pools. pool.terminate() pool.join() except Exception, e: # On Windows we get occassional "Access is denied" errors when attempting to tear down the pool, ignore these. logging.debug('Attempting to tear down multiprocessing pool failed with an exception: ' + str(e)) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks)) filenames = [run_on_chunk(command) for command in commands] else:
from toolchain_profiler import ToolchainProfiler if __name__ == '__main__': ToolchainProfiler.record_process_start() import os, sys, subprocess, multiprocessing, re, string, json, shutil, logging import shared configuration = shared.configuration temp_files = configuration.get_temp_files() __rootpath__ = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) def path_from_root(*pathelems): return os.path.join(__rootpath__, *pathelems) NATIVE_PASSES = set(['asm', 'asmPreciseF32', 'receiveJSON', 'emitJSON', 'eliminateDeadFuncs', 'eliminate', 'eliminateMemSafe', 'simplifyExpressions', 'simplifyIfs', 'optimizeFrounds', 'registerize', 'registerizeHarder', 'minifyNames', 'minifyLocals', 'minifyWhitespace', 'cleanup', 'asmLastOpts', 'last', 'noop', 'closure']) JS_OPTIMIZER = path_from_root('tools', 'js-optimizer.js') NUM_CHUNKS_PER_CORE = 3 MIN_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MIN_CHUNK_SIZE') or 512*1024) # configuring this is just for debugging purposes MAX_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MAX_CHUNK_SIZE') or 5*1024*1024) WINDOWS = sys.platform.startswith('win') DEBUG = os.environ.get('EMCC_DEBUG') func_sig = re.compile('function ([_\w$]+)\(') func_sig_json = re.compile('\["defun", ?"([_\w$]+)",') import_sig = re.compile('(var|const) ([_\w$]+ *=[^;]+);')
def run_on_js(filename, passes, js_engine, source_map=False, extra_info=None, just_split=False, just_concat=False): with ToolchainProfiler.profile_block('js_optimizer.split_markers'): if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs or not suffix: logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s' % (start_funcs, end_funcs, suffix_start)) sys.exit(1) minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually split_memory = 'splitMemory' in passes if not minify_globals: with ToolchainProfiler.profile_block('js_optimizer.no_minify_globals'): pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals(object): buf = [] def process(line): if len(line) > 0 and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: with ToolchainProfiler.profile_block('js_optimizer.minify_globals'): # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=')[1] return False if p == 'profilingFuncs': minifier.profiling_funcs = True return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();'); asm_shell_post = asm_shell_post.replace('});', '})'); pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]] minify_info[key] = value #if DEBUG: print >> sys.stderr, 'minify info:', minify_info with ToolchainProfiler.profile_block('js_optimizer.remove_suffix_and_split'): # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] total_size = len(js) funcs = split_funcs(js, just_split) js = None with ToolchainProfiler.profile_block('js_optimizer.split_to_chunks'): # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)) funcs = None if len(chunks) > 0: serialized_extra_info = suffix_marker + '\n' if minify_globals: serialized_extra_info += '// EXTRA_INFO:' + json.dumps(minify_info) elif extra_info: serialized_extra_info += '// EXTRA_INFO:' + json.dumps(extra_info) with ToolchainProfiler.profile_block('js_optimizer.write_chunks'): def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(serialized_extra_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] with ToolchainProfiler.profile_block('run_optimizer'): if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) else: # use the native optimizer shared.logging.debug('js optimizer using native') assert not source_map # XXX need to use js optimizer commands = map(lambda filename: [get_native_optimizer(), filename] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.)) with ToolchainProfiler.profile_block('optimizer_pool'): pool = shared.Building.get_multiprocessing_pool() filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks)) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) with ToolchainProfiler.profile_block('split_closure_cleanup'): if closure or cleanup or split_memory: # run on the shell code, everything but what we js-optimize start_asm = '// EMSCRIPTEN_START_ASM\n' end_asm = '// EMSCRIPTEN_END_ASM\n' cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(global,env,buffer)\n' with temp_files.get_file('.cl.js') as cle: c = open(cle, 'w') pre_1, pre_2 = pre.split(start_asm) post_1, post_2 = post.split(end_asm) c.write(pre_1) c.write(cl_sep) c.write(post_2) c.close() cld = cle if split_memory: if DEBUG: print >> sys.stderr, 'running splitMemory on shell code' cld = run_on_chunk(js_engine + [JS_OPTIMIZER, cld, 'splitMemoryShell']) f = open(cld, 'a') f.write(suffix_marker) f.close() if closure: if DEBUG: print >> sys.stderr, 'running closure on shell code' cld = shared.Building.closure_compiler(cld, pretty='minifyWhitespace' not in passes) temp_files.note(cld) elif cleanup: if DEBUG: print >> sys.stderr, 'running cleanup on shell code' next = cld + '.cl.js' temp_files.note(next) proc = subprocess.Popen(js_engine + [JS_OPTIMIZER, cld, 'noPrintMetadata', 'JSDCE'] + (['minifyWhitespace'] if 'minifyWhitespace' in passes else []), stdout=open(next, 'w')) proc.communicate() assert proc.returncode == 0 cld = next coutput = open(cld).read() coutput = coutput.replace('wakaUnknownBefore();', start_asm) after = 'wakaUnknownAfter' start = coutput.find(after) end = coutput.find(')', start) pre = coutput[:start] + '(function(global,env,buffer) {\n' + pre_2[pre_2.find('{')+1:] post = post_1 + end_asm + coutput[end+1:] with ToolchainProfiler.profile_block('write_pre'): filename += '.jo.js' f = open(filename, 'w') f.write(pre); pre = None with ToolchainProfiler.profile_block('sort_or_concat'): if not just_concat: # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read(), False)) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 if not os.environ.get('EMCC_NO_OPT_SORT'): funcs.sort(sorter) if 'last' in passes and len(funcs) > 0: count = funcs[0][1].count('\n') if count > 3000: print >> sys.stderr, 'info: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % (count, funcs[0][0]) for func in funcs: f.write(func[1]) funcs = None else: # just concat the outputs for out_file in filenames: f.write(open(out_file).read()) with ToolchainProfiler.profile_block('write_post'): f.write('\n') f.write(post); # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') f.close() return filename