def processFile(js_file_path): pid = int(multiprocessing.current_process().ident) try: signal.alarm(600) prepro = Preprocessor(js_file_path) prepro.write_temp_file('tmp_%d.js' % pid) beauty = Beautifier() ok = beauty.run('tmp_%d.js' % pid, 'tmp_%d.b.js' % pid) if ok: mc = MiniChecker('tmp_%d.b.js' % pid) try: isMini = mc.compare(keep_mini=False) except Exception as e: isMini = str(e) cleanup(pid) return [os.path.basename(js_file_path), isMini] else: cleanup(pid) return [os.path.basename(js_file_path), 'Beautifier failed'] except TimeExceededError: cleanup(pid) return [os.path.basename(js_file_path), 'Timeout']
def processFile(l): js_file_path = l[0] pid = int(multiprocessing.current_process().ident) try: # Temp files to be created during processing path_tmp = 'tmp_%d.js' % pid path_tmp_b = 'tmp_%d.b.js' % pid # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(path_tmp) except: cleanup(pid) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout clear = Beautifier() ok = clear.run(path_tmp, path_tmp_b) if not ok: cleanup(pid) return (js_file_path, None, 'Beautifier fail') try: iBuilder_clear = IndexBuilder(Lexer(path_tmp_b).tokenList) except: cleanup(pid) return (js_file_path, None, 'IndexBuilder fail') n_lines = len(iBuilder_clear.tokens) max_line_len = max([len(l) for l in iBuilder_clear.tokens]) cleanup(pid) return (js_file_path, n_lines, max_line_len) except Exception, e: cleanup(pid) return (js_file_path, None, str(e))
def processFile(row): js_file_path = os.path.join(corpus_root, row[0]) pid = int(multiprocessing.current_process().ident) base_name = os.path.splitext(os.path.basename(js_file_path))[0] # Temp files to be created during processing temp_files = { 'path_tmp': 'tmp_%d.js' % pid, 'path_tmp_b': 'tmp_%d.b.js' % pid, 'path_tmp_b_a': 'tmp_%d.b.a.js' % pid, 'path_tmp_u': 'tmp_%d.u.js' % pid, 'path_tmp_u_a': 'tmp_%d.u.a.js' % pid } try: # Pass through beautifier to fix layout: # # - once through JSNice without renaming # jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) # # (ok, _out, _err) = jsNiceBeautifier.run(js_file_path, # temp_files['path_tmp']) # if not ok: # cleanup(temp_files) # return (js_file_path, False, 'JSNice Beautifier fail') # # # # Weird JSNice renamings despite --no-rename # try: # before = set([token for (token, token_type) in # Lexer(js_file_path).tokenList # if is_token_subtype(token_type, Token.Name)]) # after = set([token for (token, token_type) in # Lexer(temp_files['path_tmp']).tokenList # if is_token_subtype(token_type, Token.Name)]) # # if not before == after: # return (js_file_path, False, 'Weird JSNice renaming') # # except: # cleanup(temp_files) # return (js_file_path, False, 'Lexer fail') # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(temp_files['path_tmp']) except: cleanup(temp_files) return (js_file_path, None, 'Preprocessor fail') # - and another time through uglifyjs pretty print only clear = Beautifier() ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b']) if not ok: cleanup(temp_files) return (js_file_path, False, 'Beautifier fail') # Minify ugly = Uglifier() ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u']) if not ok: cleanup(temp_files) return (js_file_path, False, 'Uglifier fail') # Num tokens before vs after try: tok_clear = Lexer(temp_files['path_tmp_b']).tokenList tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList except: cleanup(temp_files) return (js_file_path, False, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(temp_files) return (js_file_path, False, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u']) except: cleanup(temp_files) return (js_file_path, False, 'Aligner fail') # Check if minification resulted in any change # It's not very interesting otherwise if open(temp_files['path_tmp_b_a']).read() == \ open(temp_files['path_tmp_u_a']).read(): cleanup(temp_files) return (js_file_path, False, 'Not minified') try: lex_ugly = Lexer(temp_files['path_tmp_u_a']) _iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup(temp_files) return (js_file_path, False, 'IndexBuilder fail') # Store original and uglified versions ok = clear.run(temp_files['path_tmp_b_a'], os.path.join(output_path, '%s.js' % base_name)) if not ok: cleanup(temp_files) cleanupProcessed(base_name) return (js_file_path, False, 'Beautifier fail') ok = clear.run(temp_files['path_tmp_u_a'], os.path.join(output_path, '%s.u.js' % base_name)) if not ok: cleanup(temp_files) cleanupProcessed(base_name) return (js_file_path, False, 'Beautifier fail') cleanup(temp_files) return (js_file_path, True, 'OK') except Exception, e: cleanup(temp_files) return (js_file_path, False, str(e))
def processFile(l): js_file_path = l[0] if js_file_path in seen: return (js_file_path, None, 'Skipped') pid = int(multiprocessing.current_process().ident) # Temp files to be created during processing temp_files = {'path_tmp': 'tmp_%d.js' % pid, 'path_tmp_b': 'tmp_%d.b.js' % pid, 'path_tmp_b_n': 'tmp_%d.b.n.js' % pid, 'path_tmp_u': 'tmp_%d.u.js' % pid, 'path_tmp_u_n': 'tmp_%d.u.n.js' % pid, 'path_tmp_b_a': 'tmp_%d.b.a.js' % pid, 'path_tmp_u_a': 'tmp_%d.u.a.js' % pid} try: # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(temp_files['path_tmp']) except: cleanup(temp_files) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout: # - once through JSNice without renaming # jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp'], # temp_files['path_tmp_b_n']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'JSNice Beautifier fail') # # - and another time through uglifyjs pretty print only # clear = Beautifier() # ok = clear.run(temp_files['path_tmp_b_n'], # temp_files['path_tmp_b']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # JSNice is down! clear = Beautifier() ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_b_n']), False, temp_files['path_tmp_b']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') # Minify ugly = Uglifier() ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Uglifier fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_n']), False, temp_files['path_tmp_u']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') # Num tokens before vs after try: tok_clear = Lexer(temp_files['path_tmp_b']).tokenList tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList except: cleanup(temp_files) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(temp_files) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u']) except: cleanup(temp_files) return (js_file_path, None, 'Aligner fail') try: lex_clear = Lexer(temp_files['path_tmp_b_a']) iBuilder_clear = IndexBuilder(lex_clear.tokenList) lex_ugly = Lexer(temp_files['path_tmp_u_a']) iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_b']), True, temp_files['path_tmp_u_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') try: lex_norm = Lexer(temp_files['path_tmp_u_n']) iBuilder_norm = IndexBuilder(lex_norm.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') normalized = [] for line_idx, line in enumerate(iBuilder_norm.tokens): normalized.append(' '.join([t for (_tt,t) in line]) + "\n") # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_a'])) # _name2defScope = scopeAnalyst.resolve_scope() # _isGlobal = scopeAnalyst.isGlobal # _name2useScope = scopeAnalyst.resolve_use_scope() except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') orig = [] no_renaming = [] for line_idx, line in enumerate(iBuilder_ugly.tokens): orig.append(' '.join([t for (_tt,t) in \ iBuilder_clear.tokens[line_idx]]) + "\n") no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") # # Simple renaming: disambiguate overloaded names using scope id basic_renaming = renameUsingScopeId(scopeAnalyst, iBuilder_ugly) # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. # hash_renaming = renameUsingHashAllPrec(scopeAnalyst, # iBuilder_ugly, # debug=True) hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=True, debug=False) cleanup(temp_files) return (js_file_path, orig, no_renaming, basic_renaming, normalized, # hash_renaming, hash_def_one_renaming, hash_def_two_renaming) except Exception, e: cleanup(temp_files) return (js_file_path, None, str(e))
def processFile(l): js_file_path = l[0] base_name = os.path.splitext(os.path.basename(js_file_path))[0] pid = int(multiprocessing.current_process().ident) temp_files = { 'path_tmp': 'tmp_%d.js' % pid, 'path_tmp_b': 'tmp_%d.b.js' % pid, 'path_tmp_b_1': 'tmp_%d.b.1.js' % pid, 'path_tmp_b_2': 'tmp_%d.b.2.js' % pid, 'path_tmp_b_a': 'tmp_%d.b.a.js' % pid, 'path_tmp_u': 'tmp_%d.u.js' % pid, 'path_tmp_u_a': 'tmp_%d.u.a.js' % pid, 'path_tmp_unugly': 'tmp_%d.n2p.js' % pid, 'path_tmp_unugly_1': 'tmp_%d.n2p.1.js' % pid, 'path_tmp_unugly_2': 'tmp_%d.n2p.2.js' % pid, 'path_tmp_jsnice': 'tmp_%d.jsnice.js' % pid, 'f2': 'tmp_%d.no_renaming.js' % pid, # 'f3': 'tmp_%d.basic_renaming.js' % pid, # 'f4': 'tmp_%d.hash_renaming.js' % pid, 'f5': 'tmp_%d.hash_def_one_renaming.js' % pid, # 'f6': 'tmp_%d.hash_def_two_renaming.js' % pid, 'f7': 'tmp_%d.hash_def_one_renaming_fb.js' % pid, 'path_orig': os.path.join(output_path, '%s.js' % base_name), 'path_ugly': os.path.join(output_path, '%s.u.js' % base_name), 'path_unugly': os.path.join(output_path, '%s.n2p.js' % base_name), 'path_jsnice': os.path.join(output_path, '%s.jsnice.js' % base_name) } # for strategy in ['js', 'lm.js', 'len.js', 'freqlen.js']: # for renaming in ['no_renaming', 'hash_def_one_renaming']: # temp_files['path_tmp_%s_%s' % (renaming, strategy)] = \ # 'tmp_%d.%s.%s' % (pid, renaming, strategy) candidates = [] # if True: try: # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(temp_files['path_tmp']) except: cleanup(temp_files) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout clear = Beautifier() ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # # Pass through beautifier to fix layout # clear = Beautifier() # ok = clear.run(temp_files['path_tmp'], # temp_files['path_tmp_b_1']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_b_1'], # temp_files['path_tmp_b_2']) # if not ok: # cleanup(temp_files) # print js_file_path, _err # return (js_file_path, None, 'JSNice Beautifier fail') # # ok = clear.run(temp_files['path_tmp_b_2'], # temp_files['path_tmp_b']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # # # Weird JSNice renamings despite --no-rename # try: # before = set([token for (token, token_type) in # Lexer(temp_files['path_tmp_b_1']).tokenList # if is_token_subtype(token_type, Token.Name)]) # after = set([token for (token, token_type) in # Lexer(temp_files['path_tmp_b']).tokenList # if is_token_subtype(token_type, Token.Name)]) # # if not before == after: # return (js_file_path, None, 'Weird JSNice renaming') # # except: # cleanup(temp_files) # return (js_file_path, None, 'Lexer fail') # Minify ugly = Uglifier() ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Uglifier fail') # Num tokens before vs after try: tok_clear = Lexer(temp_files['path_tmp_b']).tokenList tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList except: cleanup(temp_files) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(temp_files) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u']) except: cleanup(temp_files) return (js_file_path, None, 'Aligner fail') if open(temp_files['path_tmp_b']).read() == \ open(temp_files['path_tmp_u']).read(): cleanup(temp_files) return (js_file_path, None, 'Not minified') try: lex_ugly = Lexer(temp_files['path_tmp_u_a']) iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') ############################################################ # From now on only work with path_tmp_b_a and path_tmp_u_a ############################################################ # Store original and uglified versions ok = clear.run(temp_files['path_tmp_b_a'], temp_files['path_orig']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') ok = clear.run(temp_files['path_tmp_u_a'], temp_files['path_ugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # Run the JSNice from http://www.nice2predict.org unuglifyJS = UnuglifyJS() (ok, _out, _err) = unuglifyJS.run(temp_files['path_tmp_u_a'], temp_files['path_tmp_unugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Nice2Predict fail') ok = clear.run(temp_files['path_tmp_unugly'], temp_files['path_unugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # ok = clear.run(temp_files['path_tmp_unugly'], # temp_files['path_tmp_unugly_1']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_unugly_1'], # temp_files['path_tmp_unugly_2']) # if not ok: # cleanup(temp_files) # print js_file_path, _err # return (js_file_path, None, 'JSNice Beautifier fail') # # ok = clear.run(temp_files['path_tmp_unugly_2'], # temp_files['path_unugly']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') try: lexer = Lexer(temp_files['path_unugly']) iBuilder = IndexBuilder(lexer.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') try: scopeAnalyst = ScopeAnalyst( os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_unugly'])) nameOrigin = scopeAnalyst.nameOrigin isGlobal = scopeAnalyst.isGlobal for (name, def_scope) in nameOrigin.iterkeys(): pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)] (lin, col) = iBuilder.revFlatMat[pos] (tok_lin, tok_col) = iBuilder.revTokMap[(lin, col)] candidates.append(('Nice2Predict', def_scope, tok_lin, tok_col, isGlobal.get((name, pos), True), name, '', '')) except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') # # Run the JSNice from http://www.jsnice.org # jsNice = JSNice() # (ok, _out, _err) = jsNice.run(temp_files['path_tmp_u_a'], # temp_files['path_tmp_jsnice']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'JSNice fail') # # ok = clear.run(temp_files['path_tmp_jsnice'], # temp_files['path_jsnice']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # try: # lexer = Lexer(temp_files['path_jsnice']) # iBuilder = IndexBuilder(lexer.tokenList) # except: # cleanup(temp_files) # return (js_file_path, None, 'IndexBuilder fail') # # try: # scopeAnalyst = ScopeAnalyst(os.path.join( # os.path.dirname(os.path.realpath(__file__)), # temp_files['path_jsnice'])) # nameOrigin = scopeAnalyst.nameOrigin # isGlobal = scopeAnalyst.isGlobal # # for (name, def_scope) in nameOrigin.iterkeys(): # # pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)] # (lin,col) = iBuilder.revFlatMat[pos] # (tok_lin,tok_col) = iBuilder.revTokMap[(lin,col)] # # candidates.append(('JSNice', def_scope, # tok_lin, tok_col, # isGlobal.get((name, pos), True), # name, '','')) # except: # cleanup(temp_files) # return (js_file_path, None, 'ScopeAnalyst fail') # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst( os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_a'])) except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') # Baseline translation: No renaming, no scoping no_renaming = [] for _line_idx, line in enumerate(iBuilder_ugly.tokens): no_renaming.append(' '.join([t for (_tt, t) in line]) + "\n") with open(temp_files['f2'], 'w') as f_no_renaming: f_no_renaming.writelines(no_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.no_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation_no_renaming, _err) = moses.run(temp_files['f2']) nc = processTranslationUnscoped(translation_no_renaming, iBuilder_ugly, lm_path, temp_files['f2'], output_path, base_name) if nc: candidates += nc # translation, iBuilder, lm_path, # f_path, output_path, base_name # Default translation: No renaming # no_renaming = [] # for _line_idx, line in enumerate(iBuilder_ugly.tokens): # no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") # # with open(temp_files['f2'], 'w') as f_no_renaming: # f_no_renaming.writelines(no_renaming) # # moses = MosesDecoder(ini_path=os.path.join(ini_path, \ # 'train.no_renaming', 'tuning', 'moses.ini')) # (_moses_ok, translation, _err) = moses.run(temp_files['f2']) nc = processTranslationScoped(translation_no_renaming, iBuilder_ugly, scopeAnalyst, lm_path, temp_files['f2'], output_path, base_name) if nc: candidates += nc # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) with open(temp_files['f5'], 'w') as f_hash_def_one_renaming: f_hash_def_one_renaming.writelines(hash_def_one_renaming) # moses = MosesDecoder(ini_path=os.path.join(ini_path, \ # 'train.hash_def_one_renaming', 'tuning', 'moses.ini')) # (_moses_ok, # translation_hash_renaming, # _err) = moses.run(temp_files['f5']) mosesParams = {} mosesParams["text"] = hash_def_one_renaming #lex_ugly.collapsedText #mosesParams["align"] = "true" #mosesParams["report-all-factors"] = "true" mresults = proxy.translate( mosesParams) # __request("translate", mosesParams) rawText = Postprocessor(mresults["nbest"]) translation_hash_renaming = rawText.getProcessedOutput() nc = processTranslationScoped(translation_hash_renaming, iBuilder_ugly, scopeAnalyst, lm_path, temp_files['f5'], output_path, base_name) if nc: candidates += nc # nc = processTranslationScopedFallback(translation_hash_renaming, # translation_no_renaming, # iBuilder_ugly, # scopeAnalyst, # lm_path, # temp_files['f7'], # output_path, # base_name) # if nc: # candidates += nc cleanup(temp_files) cleanupRenamed(pid) return (js_file_path, 'OK', candidates) except Exception, e: cleanup(temp_files) cleanupRenamed(pid) return (js_file_path, None, str(e).replace("\n", ""))
def processFile(l): def localCleanup(output_path, base_names): for base_name in base_names: tryRemove(os.path.join(output_path, base_name)) js_file_path = l[0] base_name = os.path.splitext(os.path.basename(js_file_path))[0] pid = int(multiprocessing.current_process().ident) candidates = [] try: # if True: # Temp files to be created during processing path_tmp = 'tmp_%d.js' % (pid) path_tmp_b = 'tmp_%d.b.js' % (pid) path_tmp_b_a = 'tmp_%d.b.a.js' % (pid) path_tmp_u = 'tmp_%d.u.js' % (pid) path_tmp_u_a = 'tmp_%d.u.a.js' % (pid) path_tmp_unugly = 'tmp_%d.n2p.js' % (pid) path_tmp_jsnice = 'tmp_%d.jsnice.js' % (pid) f2 = 'tmp_%d.no_renaming.js' % (pid) f3 = 'tmp_%d.basic_renaming.js' % (pid) f4 = 'tmp_%d.hash_renaming.js' % (pid) f5 = 'tmp_%d.hash_def_one_renaming.js' % (pid) f6 = 'tmp_%d.hash_def_two_renaming.js' % (pid) path_orig = '%s.js' % (base_name) path_ugly = '%s.u.js' % (base_name) path_unugly = '%s.n2p.js' % (base_name) path_jsnice = '%s.jsnice.js' % (base_name) # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(path_tmp) except: cleanup(pid) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout clear = Beautifier() ok = clear.run(path_tmp, path_tmp_b+'.tmp1') if not ok: cleanup(pid) return (js_file_path, None, 'Beautifier 1 fail') jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_b+'.tmp1', path_tmp_b+'.tmp2') if not ok: cleanup(pid) return (js_file_path, None, 'JSNice Beautifier 1 fail') ok = clear.run(path_tmp_b+'.tmp2', path_tmp_b) if not ok: cleanup(pid) return (js_file_path, None, 'Beautifier 1 fail') # Minify ugly = Uglifier() ok = ugly.run(path_tmp_b, path_tmp_u) if not ok: cleanup(pid) return (js_file_path, None, 'Uglifier fail') # Num tokens before vs after try: tok_clear = Lexer(path_tmp_b).tokenList tok_ugly = Lexer(path_tmp_u).tokenList except: cleanup(pid) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(pid) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(path_tmp_b, path_tmp_u) except: cleanup(pid) return (js_file_path, None, 'Aligner fail') try: # iBuilder_clear = IndexBuilder(Lexer(path_tmp_b_a).tokenList) iBuilder_ugly = IndexBuilder(Lexer(path_tmp_u_a).tokenList) except: cleanup(pid) return (js_file_path, None, 'IndexBuilder fail') # Store original and uglified versions ok = clear.run(path_tmp_u_a, os.path.join(output_path, path_ugly)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly]) return (js_file_path, None, 'Beautifier 2 fail') ok = clear.run(path_tmp_b_a, os.path.join(output_path, path_orig)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig]) return (js_file_path, None, 'Beautifier 3 fail') # Run the JSNice from http://www.nice2predict.org unuglifyJS = UnuglifyJS() (ok, _out, _err) = unuglifyJS.run(path_tmp_b_a, path_tmp_unugly) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig]) return (js_file_path, None, 'Nice2Predict fail') ok = clear.run(path_tmp_unugly, path_tmp_unugly+'.tmp1') if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'Beautifier 4 fail') (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_unugly+'.tmp1', path_tmp_unugly+'.tmp2') if not ok: cleanup(pid) return (js_file_path, None, 'JSNice Beautifier 2 fail') ok = clear.run(path_tmp_unugly+'.tmp2', os.path.join(output_path, path_unugly)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'Beautifier 4 fail') try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_unugly)) nameOrigin = scopeAnalyst.nameOrigin for (name, def_scope) in nameOrigin.iterkeys(): candidates.append(('Nice2Predict', def_scope, name, '', '')) except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'ScopeAnalyst fail') # Run the JSNice from http://www.jsnice.org jsNice = JSNice() (ok, _out, _err) = jsNice.run(path_tmp_b_a, path_tmp_jsnice) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'JSNice fail') ok = clear.run(path_tmp_jsnice, os.path.join(output_path, path_jsnice)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'Beautifier 5 fail') try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_jsnice)) nameOrigin = scopeAnalyst.nameOrigin for (name, def_scope) in nameOrigin.iterkeys(): candidates.append(('JSNice', def_scope, name, '', '')) except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'ScopeAnalyst fail') # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_u_a)) _name2defScope = scopeAnalyst.resolve_scope() _isGlobal = scopeAnalyst.isGlobal _name2useScope = scopeAnalyst.resolve_use_scope() except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'ScopeAnalyst fail') no_renaming = [] for _line_idx, line in enumerate(iBuilder_ugly.tokens): no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") with open(f2, 'w') as f_no_renaming: f_no_renaming.writelines(no_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.no_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f2) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f2, output_path, base_name, clear) if nc: candidates += nc # Simple renaming: disambiguate overloaded names using scope id basic_renaming = renameUsingScopeId(scopeAnalyst, iBuilder_ugly) with open(f3, 'w') as f_basic_renaming: f_basic_renaming.writelines(basic_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.basic_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f3) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f3, output_path, base_name, clear) if nc: candidates += nc # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. hash_renaming = renameUsingHashAllPrec(scopeAnalyst, iBuilder_ugly, debug=False) # print hash_renaming with open(f4, 'w') as f_hash_renaming: f_hash_renaming.writelines(hash_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f4) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f4, output_path, base_name, clear) if nc: candidates += nc hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) with open(f5, 'w') as f_hash_def_one_renaming: f_hash_def_one_renaming.writelines(hash_def_one_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_def_one_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f5) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f5, output_path, base_name, clear) if nc: candidates += nc hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=True, debug=False) with open(f6, 'w') as f_hash_def_two_renaming: f_hash_def_two_renaming.writelines(hash_def_two_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_def_two_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f6) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f6, output_path, base_name, clear) if nc: candidates += nc cleanup(pid) cleanupRenamed(pid) return (js_file_path, 'OK', candidates) except Exception, e: cleanup(pid) cleanupRenamed(pid) return (js_file_path, None, str(e).replace("\n", ""))
js_tmp = open(out_file_path, 'w') js_tmp.write('\n'.join([' '.join([token for (_token_type, token) in line]) for line in lines]).encode('utf8')) js_tmp.write('\n') js_tmp.close() input_file = os.path.abspath(sys.argv[1]) output_file = os.path.abspath(sys.argv[2]) mode = int(sys.argv[3]) prepro = Preprocessor(input_file) prepro.write_temp_file('tmp.js') clear = Beautifier() ok = clear.run('tmp.js', 'tmp.b.js') lexer = Lexer('tmp.b.js') iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), 'tmp.b.js')) hash_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder, twoLines=False,