def setUp(self): ''' Don't run Moses server here (just incremental tests) - Take all the .orig test files and create the IndexBuilders and ScopeAnalysts here ''' self.testDir = Folder("./testing/test_files/") self.clearTextFiles = self.fileSort( self.testDir.baseFileNames("*.orig.js")) print("Files: " + str(self.clearTextFiles)) self.obsfuscatedTextFiles = self.fileSort( self.testDir.baseFileNames("*.obs.js")) self.postTextFiles = self.fileSort( self.testDir.baseFileNames("*.post_input.js")) self.clearTextFiles = [ os.path.join(self.testDir.path, file) for file in self.clearTextFiles ] self.obsfuscatedTextFiles = [ os.path.join(self.testDir.path, file) for file in self.obsfuscatedTextFiles ] self.postTextFiles = [ os.path.join(self.testDir.path, file) for file in self.postTextFiles ] #print(self.testDir.path) #print(self.clearTextFiles) self.clearLexed = [Lexer(file) for file in self.clearTextFiles] self.obsLexed = [Lexer(file) for file in self.obsfuscatedTextFiles] self.postText = [ "".join(open(file, "r").readlines()) for file in self.postTextFiles ]
def testFiles(self): tf = [1, 5, 6, 7, 8, 9, 10, 11] #tf = [11] for i in tf: print("-----------------------------------------------------") lexed = Lexer(self.fileList[i - 1]) ib = IndexBuilder(lexed.tokenList) #print(ib) sa = ScopeAnalyst(self.fileList[i - 1]) print(sa) nameCount = {} #TODO: Grab only the non-globals to look at (get the start key and look it up) for variable in sa.nameDefScope2pos.keys(): start = sa.nameDefScope2pos[variable] name = variable[0] if (not sa.isGlobal[(name, start)]): if (name in nameCount): nameCount[name] += 1 else: nameCount[name] = 1 print( str(name) + " : " + str(sa.nameDefScope2pos[variable]) + " -> " + str(ib.revFlatMat[sa.nameDefScope2pos[variable]]) + " Manual: " + str(self.file_definitions[i][name])) assert (ib.revFlatMat[sa.nameDefScope2pos[variable]][0] in self.file_definitions[i][name]) #Finally make sure that the count of definitions matches our manual check. for name, count in nameCount.iteritems(): print(name + " : " + str(count) + " =?= " + str(len(self.file_definitions[i][name]))) assert (len(self.file_definitions[i][name]) == count)
def processFile(js_file_name): candidates = [] lexer = Lexer(js_file_name) iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst(js_file_name) nameOrigin = scopeAnalyst.nameOrigin isGlobal = scopeAnalyst.isGlobal nameDefScope2pos = scopeAnalyst.nameDefScope2pos for (name, def_scope) in nameOrigin.iterkeys(): pos = nameDefScope2pos[(name, def_scope)] (lin, col) = iBuilder.revFlatMat[pos] scope = iBuilder.revTokMap[(lin, col)] glb = isGlobal.get((name, pos), True) if name != 'TOKEN_LITERAL_STRING' and \ name != 'TOKEN_LITERAL_NUMBER': candidates.append((scope, name, pos, (lin, col), glb, def_scope)) print print for c in sorted(candidates, key=lambda e: e[0]): (scope, name, pos, (lin, col), glb, def_scope) = c if name == 'n' or name == 'calendarEventId': print '\t', scope, name, pos, (lin, col), glb print '\t\t', def_scope
def load(pth): lexer = Lexer(pth) iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst( os.path.join(os.path.dirname(os.path.realpath(__file__)), pth)) return (iBuilder, scopeAnalyst)
def testFiles(self): #Known bugs: The definitions of sum and numberEquals in test_file1 seem to be pointing to the wrong instance... i = 1 lexed = Lexer(self.fileList[0]) ib = IndexBuilder(lexed.tokenList) sa = ScopeAnalyst(self.fileList[0]) for variable in sa.nameDefScope2pos.keys(): print( str(variable[0]) + " : " + str(sa.nameDefScope2pos[variable]) + " -> " + str(ib.revFlatMat[sa.nameDefScope2pos[variable]]))
def processFile(l): js_file_name = l candidates = [] if(True): #try: print(js_file_name) lexer = Lexer(js_file_name) return IndexBuilder(lexer.tokenList)
def testfileDebug(self): for f in self.fileList: print("---------------------------------- " + f + " ----------------------------------") orig = f + ".js" min = f + ".u.js" lo = Lexer(orig) lm = Lexer(min) print( "---------------------------------- original text ----------------------------------" ) print(lo.programText) print( "---------------------------------- minified text ----------------------------------" ) print(lm.programText) for id in self.ids: to_read = f + id + ".js" print("---------------------------------- " + to_read + " ----------------------------------") lexed = Lexer(to_read) print( "---------------------------------- text ----------------------------------" ) print(lexed.programText) print( "---------------------------------- tokenlist ----------------------------------" ) print(lexed.tokenList) ib = IndexBuilder(lexed.tokenList) print( "---------------------------------- IndexBuilder ----------------------------------" ) print(ib) sa = ScopeAnalyst(to_read) print( "---------------------------------- ScopeAnalyst ----------------------------------" ) print(sa)
def processFile(l): js_file_name = l candidates = [] try: lexer = Lexer(os.path.join(results_path, js_file_name)) iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst(os.path.join(results_path, js_file_name)) nameOrigin = scopeAnalyst.nameOrigin isGlobal = scopeAnalyst.isGlobal nameDefScope2pos = scopeAnalyst.nameDefScope2pos for (name, def_scope) in nameOrigin.iterkeys(): pos = nameDefScope2pos[(name, def_scope)] (lin, col) = iBuilder.revFlatMat[pos] scope = iBuilder.revTokMap[(lin, col)] glb = isGlobal.get((name, pos), True) # print name, def_scope, pos, scope, glb #, (lin,col) # if not isGlobal.get((name, pos), True): # scope = def_scope.replace("\"","") # i = scope.find('[variables][_values]') # if i > -1: # scope = scope[:i+len('[variables][_values]')] # i = scope.find('[functions][_values]') # if i > -1: # scope = scope[:i+len('[functions][_values]')] if name != 'TOKEN_LITERAL_STRING' and \ name != 'TOKEN_LITERAL_NUMBER': candidates.append((scope, name, glb)) except: return (js_file_name, None, 'ScopeAnalyst fail') # print 'candidates------------------' # for candidate in candidates: # print candidate return (js_file_name, 'OK', candidates)
def summarizeUnscopedTranslation(renaming_map, f_path, translation_strategy, output_path, base_name, name_candidates, name_positions, iBuilder): nc = [] f_base = os.path.basename(f_path) training_strategy = f_base.split('.')[1] tmp_path = '%s.%s.js' % (f_base[:-3], translation_strategy) o_path = '%s.%s.unscoped.%s.js' % (base_name, training_strategy, translation_strategy) # print f_path, f_base, training_strategy, tmp_path, o_path, base_name writeTmpLines(renameHashed(iBuilder, name_positions, renaming_map), tmp_path) clear = Beautifier() ok = clear.run(tmp_path, os.path.join(output_path, o_path)) if not ok: return False try: lexer = Lexer(os.path.join(output_path, o_path)) iBuilder_local = IndexBuilder(lexer.tokenList) scopeAnalyst_local = ScopeAnalyst(os.path.join(output_path, o_path)) except: return False nameOrigin = scopeAnalyst_local.nameOrigin isGlobal = scopeAnalyst_local.isGlobal for (name, def_scope) in nameOrigin.iterkeys(): pos = scopeAnalyst_local.nameDefScope2pos[(name, def_scope)] if not False: #isGlobal.get((name, pos), True): (lin, col) = iBuilder_local.revFlatMat[pos] (tok_lin, tok_col) = iBuilder_local.revTokMap[(lin, col)] nc.append( ('%s.unscoped.%s' % (training_strategy, translation_strategy), def_scope, tok_lin, tok_col, isGlobal.get( (name, pos), True), name, '', '')) return nc
def processFile(l): js_file_path = l[0] pid = int(multiprocessing.current_process().ident) try: # Temp files to be created during processing path_tmp = 'tmp_%d.js' % pid path_tmp_b = 'tmp_%d.b.js' % pid # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(path_tmp) except: cleanup(pid) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout clear = Beautifier() ok = clear.run(path_tmp, path_tmp_b) if not ok: cleanup(pid) return (js_file_path, None, 'Beautifier fail') try: iBuilder_clear = IndexBuilder(Lexer(path_tmp_b).tokenList) except: cleanup(pid) return (js_file_path, None, 'IndexBuilder fail') n_lines = len(iBuilder_clear.tokens) max_line_len = max([len(l) for l in iBuilder_clear.tokens]) cleanup(pid) return (js_file_path, n_lines, max_line_len) except Exception, e: cleanup(pid) return (js_file_path, None, str(e))
def processFile(l): base_name = l[0] js_file_path = l[1] print(base_name) print(js_file_path) #if(True): try: lexed = Lexer(js_file_path) ib = IndexBuilder(lexed.tokenList) sa = ScopeAnalyst(js_file_path) #num globals = all in is_global == True + all unique names #in name2CharPositions not in is_global base_global = set( [name for name, value in sa.isGlobal.iteritems() if value == True]) #Get all known names in the file. known_names = set([name for name, value in sa.isGlobal.iteritems()]) for name, loc in ib.name2CharPositions.iteritems(): if (name not in known_names): #if never seen, its a global base_global.add(name) return [base_name, len(base_global)] except: return [base_name, None]
def processFile(row): js_file_path = os.path.join(corpus_root, row[0]) pid = int(multiprocessing.current_process().ident) base_name = os.path.splitext(os.path.basename(js_file_path))[0] # Temp files to be created during processing temp_files = { 'path_tmp': 'tmp_%d.js' % pid, 'path_tmp_b': 'tmp_%d.b.js' % pid, 'path_tmp_b_a': 'tmp_%d.b.a.js' % pid, 'path_tmp_u': 'tmp_%d.u.js' % pid, 'path_tmp_u_a': 'tmp_%d.u.a.js' % pid } try: # Pass through beautifier to fix layout: # # - once through JSNice without renaming # jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) # # (ok, _out, _err) = jsNiceBeautifier.run(js_file_path, # temp_files['path_tmp']) # if not ok: # cleanup(temp_files) # return (js_file_path, False, 'JSNice Beautifier fail') # # # # Weird JSNice renamings despite --no-rename # try: # before = set([token for (token, token_type) in # Lexer(js_file_path).tokenList # if is_token_subtype(token_type, Token.Name)]) # after = set([token for (token, token_type) in # Lexer(temp_files['path_tmp']).tokenList # if is_token_subtype(token_type, Token.Name)]) # # if not before == after: # return (js_file_path, False, 'Weird JSNice renaming') # # except: # cleanup(temp_files) # return (js_file_path, False, 'Lexer fail') # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(temp_files['path_tmp']) except: cleanup(temp_files) return (js_file_path, None, 'Preprocessor fail') # - and another time through uglifyjs pretty print only clear = Beautifier() ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b']) if not ok: cleanup(temp_files) return (js_file_path, False, 'Beautifier fail') # Minify ugly = Uglifier() ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u']) if not ok: cleanup(temp_files) return (js_file_path, False, 'Uglifier fail') # Num tokens before vs after try: tok_clear = Lexer(temp_files['path_tmp_b']).tokenList tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList except: cleanup(temp_files) return (js_file_path, False, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(temp_files) return (js_file_path, False, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u']) except: cleanup(temp_files) return (js_file_path, False, 'Aligner fail') # Check if minification resulted in any change # It's not very interesting otherwise if open(temp_files['path_tmp_b_a']).read() == \ open(temp_files['path_tmp_u_a']).read(): cleanup(temp_files) return (js_file_path, False, 'Not minified') try: lex_ugly = Lexer(temp_files['path_tmp_u_a']) _iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup(temp_files) return (js_file_path, False, 'IndexBuilder fail') # Store original and uglified versions ok = clear.run(temp_files['path_tmp_b_a'], os.path.join(output_path, '%s.js' % base_name)) if not ok: cleanup(temp_files) cleanupProcessed(base_name) return (js_file_path, False, 'Beautifier fail') ok = clear.run(temp_files['path_tmp_u_a'], os.path.join(output_path, '%s.u.js' % base_name)) if not ok: cleanup(temp_files) cleanupProcessed(base_name) return (js_file_path, False, 'Beautifier fail') cleanup(temp_files) return (js_file_path, True, 'OK') except Exception, e: cleanup(temp_files) return (js_file_path, False, str(e))
def processFile(js_file_path): try: # Num tokens before vs after try: tok1 = Lexer(os.path.join(files_root, 'orig', js_file_path)).tokenList tok2 = Lexer(os.path.join(files_root, 'no_renaming', js_file_path)).tokenList # tok3 = Lexer(os.path.join(files_root, 'basic_renaming', js_file_path)).tokenList # tok4 = Lexer(os.path.join(files_root, 'normalized', js_file_path)).tokenList tok5 = Lexer( os.path.join(files_root, 'hash_def_one_renaming', js_file_path)).tokenList tok6 = Lexer( os.path.join(files_root, 'hash_def_two_renaming', js_file_path)).tokenList except: return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(set([len(tok1), len(tok2), len(tok5), len(tok6)])) == 1: return (js_file_path, None, 'Num tokens mismatch') clear = Beautifier() # Align minified and clear files, in case the beautifier # did something weird aligner = Aligner() (aligned1, aligned2) = aligner.web_align(tok1, tok2) (ok, beautified1, _err) = clear.web_run(aligned1) tok11 = WebLexer(beautified1).tokenList (ok, beautified2, _err) = clear.web_run(aligned2) tok22 = WebLexer(beautified2).tokenList (aligned5, aligned2) = aligner.web_align(tok5, tok2) (ok, beautified5, _err) = clear.web_run(aligned5) tok55 = WebLexer(beautified5).tokenList (aligned6, aligned2) = aligner.web_align(tok6, tok2) (ok, beautified6, _err) = clear.web_run(aligned6) tok66 = WebLexer(beautified6).tokenList # try: # aligner = Aligner() # # This is already the baseline corpus, no (smart) renaming yet # aligner.align(temp_files['path_tmp_b'], # temp_files['path_tmp_u']) # except: # return (js_file_path, None, 'Aligner fail') try: iBuilder1 = IndexBuilder(tok11) iBuilder2 = IndexBuilder(tok22) # iBuilder3 = IndexBuilder(tok3) # iBuilder4 = IndexBuilder(tok4) iBuilder5 = IndexBuilder(tok55) iBuilder6 = IndexBuilder(tok66) except: return (js_file_path, None, 'IndexBuilder fail') # Check that at least one variable was renamed during minification orig_names = set([ token for line in iBuilder1.tokens for (token_type, token) in line if is_token_subtype(token_type, Token.Name) ]) ugly_names = set([ token for line in iBuilder2.tokens for (token_type, token) in line if is_token_subtype(token_type, Token.Name) ]) if not len(orig_names.difference(ugly_names)): return (js_file_path, None, 'Not minified') orig = [] no_renaming = [] # basic_renaming = [] # normalized = [] hash_def_one_renaming = [] hash_def_two_renaming = [] for _line_idx, line in enumerate(iBuilder1.tokens): orig.append(' '.join([t for (_tt, t) in line]) + "\n") for _line_idx, line in enumerate(iBuilder2.tokens): no_renaming.append(' '.join([t for (_tt, t) in line]) + "\n") # for _line_idx, line in enumerate(iBuilder3.tokens): # basic_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") # for _line_idx, line in enumerate(iBuilder4.tokens): # normalized.append(' '.join([t for (_tt,t) in line]) + "\n") for _line_idx, line in enumerate(iBuilder5.tokens): hash_def_one_renaming.append(' '.join([t for (_tt, t) in line]) + "\n") for _line_idx, line in enumerate(iBuilder6.tokens): hash_def_two_renaming.append(' '.join([t for (_tt, t) in line]) + "\n") return ( js_file_path, orig, no_renaming, # basic_renaming, # normalized, hash_def_one_renaming, hash_def_two_renaming) except Exception, e: return (js_file_path, None, str(e))
def processFile(l): js_file_path = l[0] if js_file_path in seen: return (js_file_path, None, 'Skipped') pid = int(multiprocessing.current_process().ident) # Temp files to be created during processing temp_files = {'path_tmp': 'tmp_%d.js' % pid, 'path_tmp_b': 'tmp_%d.b.js' % pid, 'path_tmp_b_n': 'tmp_%d.b.n.js' % pid, 'path_tmp_u': 'tmp_%d.u.js' % pid, 'path_tmp_u_n': 'tmp_%d.u.n.js' % pid, 'path_tmp_b_a': 'tmp_%d.b.a.js' % pid, 'path_tmp_u_a': 'tmp_%d.u.a.js' % pid} try: # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(temp_files['path_tmp']) except: cleanup(temp_files) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout: # - once through JSNice without renaming # jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp'], # temp_files['path_tmp_b_n']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'JSNice Beautifier fail') # # - and another time through uglifyjs pretty print only # clear = Beautifier() # ok = clear.run(temp_files['path_tmp_b_n'], # temp_files['path_tmp_b']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # JSNice is down! clear = Beautifier() ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_b_n']), False, temp_files['path_tmp_b']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') # Minify ugly = Uglifier() ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Uglifier fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_n']), False, temp_files['path_tmp_u']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') # Num tokens before vs after try: tok_clear = Lexer(temp_files['path_tmp_b']).tokenList tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList except: cleanup(temp_files) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(temp_files) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u']) except: cleanup(temp_files) return (js_file_path, None, 'Aligner fail') try: lex_clear = Lexer(temp_files['path_tmp_b_a']) iBuilder_clear = IndexBuilder(lex_clear.tokenList) lex_ugly = Lexer(temp_files['path_tmp_u_a']) iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_b']), True, temp_files['path_tmp_u_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') try: lex_norm = Lexer(temp_files['path_tmp_u_n']) iBuilder_norm = IndexBuilder(lex_norm.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') normalized = [] for line_idx, line in enumerate(iBuilder_norm.tokens): normalized.append(' '.join([t for (_tt,t) in line]) + "\n") # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_a'])) # _name2defScope = scopeAnalyst.resolve_scope() # _isGlobal = scopeAnalyst.isGlobal # _name2useScope = scopeAnalyst.resolve_use_scope() except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') orig = [] no_renaming = [] for line_idx, line in enumerate(iBuilder_ugly.tokens): orig.append(' '.join([t for (_tt,t) in \ iBuilder_clear.tokens[line_idx]]) + "\n") no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") # # Simple renaming: disambiguate overloaded names using scope id basic_renaming = renameUsingScopeId(scopeAnalyst, iBuilder_ugly) # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. # hash_renaming = renameUsingHashAllPrec(scopeAnalyst, # iBuilder_ugly, # debug=True) hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=True, debug=False) cleanup(temp_files) return (js_file_path, orig, no_renaming, basic_renaming, normalized, # hash_renaming, hash_def_one_renaming, hash_def_two_renaming) except Exception, e: cleanup(temp_files) return (js_file_path, None, str(e))
input_file = os.path.abspath(sys.argv[1]) output_file = os.path.abspath(sys.argv[2]) mode = int(sys.argv[3]) prepro = Preprocessor(input_file) prepro.write_temp_file('tmp.js') clear = Beautifier() ok = clear.run('tmp.js', 'tmp.b.js') lexer = Lexer('tmp.b.js') iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), 'tmp.b.js')) hash_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder, twoLines=False, debug=mode) with open(output_file, 'w') as f: f.writelines(hash_renaming) # writeTmpLines(hash_renaming, output_file)
def deobfuscateJS(self, obfuscatedCode, transactionID): proxy = xmlrpclib.ServerProxy("http://godeep.cs.ucdavis.edu:8080/RPC2") mosesParams = {} candidates = [] baseDir = "/home/ccasal/temp/" tempFile = baseDir + str(transactionID) + "_temp.js" lm_path = "/data/bogdanv/deobfuscator/experiments/corpora/corpus.lm.970k/js.blm.lm" preproFile = baseDir + str(transactionID) + "_prepro.js" beautFile = baseDir + str(transactionID) + "_beaut.js" # Strip comments, replace literals, etc try: prepro = WebPreprocessor(obfuscatedCode) #TODO replace with: prepro = WebPreprocessor(text) prepro.write_temp_file(preproFile) except: cleanup([preproFile]) print("Preprocessor failed") return ("Preprocessor Failed") clear = Beautifier() #TODO: Need a text version of beautifier to avoid the file read and write. #(ok, beautText, err) = clear.webRun(preproText) ok = clear.run(preproFile, beautFile) print(ok) if (not ok): cleanup([preproFile, beautFile]) return ("Beautifier Failed") #quit() try: lex_ugly = Lexer(beautFile) iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup([preproFile, beautFile]) print("IndexBuilder fail") return ("IndexBuilder Failed") lex_ugly.write_temp_file(tempFile) #Do Scope related tasks #a raw text version try: scopeAnalyst = ScopeAnalyst(tempFile) except: cleanup({"temp": tempFile}) print("ScopeAnalyst Fail") return ("ScopeAnalyst Failed") #Do Rename related tasks #In our case, I don't think we need to actually do anything for no_renaming #no_renaming = [] #for _line_idx, line in enumerate(iBuilder_ugly.tokens): # no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") #Hash_def_one_renaming #beautText = renameUsingHashDefLine(scopeAnalyst, # iBuilder_ugly, # twoLines=False, # debug=False) print(lex_ugly.collapsedText) mosesParams["text"] = lex_ugly.collapsedText mosesParams["align"] = "true" mosesParams["report-all-factors"] = "true" results = proxy.translate( mosesParams) # __request("translate", mosesParams) rawText = Postprocessor(results["nbest"]) translation = rawText.getProcessedOutput() #Send to output: cleanup([preproFile, beautFile, tempFile]) return (translation)
def processFile(l): js_file_path = l[0] base_name = os.path.splitext(os.path.basename(js_file_path))[0] pid = int(multiprocessing.current_process().ident) temp_files = { 'path_tmp': 'tmp_%d.js' % pid, 'path_tmp_b': 'tmp_%d.b.js' % pid, 'path_tmp_b_1': 'tmp_%d.b.1.js' % pid, 'path_tmp_b_2': 'tmp_%d.b.2.js' % pid, 'path_tmp_b_a': 'tmp_%d.b.a.js' % pid, 'path_tmp_u': 'tmp_%d.u.js' % pid, 'path_tmp_u_a': 'tmp_%d.u.a.js' % pid, 'path_tmp_unugly': 'tmp_%d.n2p.js' % pid, 'path_tmp_unugly_1': 'tmp_%d.n2p.1.js' % pid, 'path_tmp_unugly_2': 'tmp_%d.n2p.2.js' % pid, 'path_tmp_jsnice': 'tmp_%d.jsnice.js' % pid, 'f2': 'tmp_%d.no_renaming.js' % pid, # 'f3': 'tmp_%d.basic_renaming.js' % pid, # 'f4': 'tmp_%d.hash_renaming.js' % pid, 'f5': 'tmp_%d.hash_def_one_renaming.js' % pid, # 'f6': 'tmp_%d.hash_def_two_renaming.js' % pid, 'f7': 'tmp_%d.hash_def_one_renaming_fb.js' % pid, 'path_orig': os.path.join(output_path, '%s.js' % base_name), 'path_ugly': os.path.join(output_path, '%s.u.js' % base_name), 'path_unugly': os.path.join(output_path, '%s.n2p.js' % base_name), 'path_jsnice': os.path.join(output_path, '%s.jsnice.js' % base_name) } # for strategy in ['js', 'lm.js', 'len.js', 'freqlen.js']: # for renaming in ['no_renaming', 'hash_def_one_renaming']: # temp_files['path_tmp_%s_%s' % (renaming, strategy)] = \ # 'tmp_%d.%s.%s' % (pid, renaming, strategy) candidates = [] # if True: try: # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(temp_files['path_tmp']) except: cleanup(temp_files) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout clear = Beautifier() ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # # Pass through beautifier to fix layout # clear = Beautifier() # ok = clear.run(temp_files['path_tmp'], # temp_files['path_tmp_b_1']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_b_1'], # temp_files['path_tmp_b_2']) # if not ok: # cleanup(temp_files) # print js_file_path, _err # return (js_file_path, None, 'JSNice Beautifier fail') # # ok = clear.run(temp_files['path_tmp_b_2'], # temp_files['path_tmp_b']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # # # Weird JSNice renamings despite --no-rename # try: # before = set([token for (token, token_type) in # Lexer(temp_files['path_tmp_b_1']).tokenList # if is_token_subtype(token_type, Token.Name)]) # after = set([token for (token, token_type) in # Lexer(temp_files['path_tmp_b']).tokenList # if is_token_subtype(token_type, Token.Name)]) # # if not before == after: # return (js_file_path, None, 'Weird JSNice renaming') # # except: # cleanup(temp_files) # return (js_file_path, None, 'Lexer fail') # Minify ugly = Uglifier() ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Uglifier fail') # Num tokens before vs after try: tok_clear = Lexer(temp_files['path_tmp_b']).tokenList tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList except: cleanup(temp_files) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(temp_files) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u']) except: cleanup(temp_files) return (js_file_path, None, 'Aligner fail') if open(temp_files['path_tmp_b']).read() == \ open(temp_files['path_tmp_u']).read(): cleanup(temp_files) return (js_file_path, None, 'Not minified') try: lex_ugly = Lexer(temp_files['path_tmp_u_a']) iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') ############################################################ # From now on only work with path_tmp_b_a and path_tmp_u_a ############################################################ # Store original and uglified versions ok = clear.run(temp_files['path_tmp_b_a'], temp_files['path_orig']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') ok = clear.run(temp_files['path_tmp_u_a'], temp_files['path_ugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # Run the JSNice from http://www.nice2predict.org unuglifyJS = UnuglifyJS() (ok, _out, _err) = unuglifyJS.run(temp_files['path_tmp_u_a'], temp_files['path_tmp_unugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Nice2Predict fail') ok = clear.run(temp_files['path_tmp_unugly'], temp_files['path_unugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # ok = clear.run(temp_files['path_tmp_unugly'], # temp_files['path_tmp_unugly_1']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_unugly_1'], # temp_files['path_tmp_unugly_2']) # if not ok: # cleanup(temp_files) # print js_file_path, _err # return (js_file_path, None, 'JSNice Beautifier fail') # # ok = clear.run(temp_files['path_tmp_unugly_2'], # temp_files['path_unugly']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') try: lexer = Lexer(temp_files['path_unugly']) iBuilder = IndexBuilder(lexer.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') try: scopeAnalyst = ScopeAnalyst( os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_unugly'])) nameOrigin = scopeAnalyst.nameOrigin isGlobal = scopeAnalyst.isGlobal for (name, def_scope) in nameOrigin.iterkeys(): pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)] (lin, col) = iBuilder.revFlatMat[pos] (tok_lin, tok_col) = iBuilder.revTokMap[(lin, col)] candidates.append(('Nice2Predict', def_scope, tok_lin, tok_col, isGlobal.get((name, pos), True), name, '', '')) except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') # # Run the JSNice from http://www.jsnice.org # jsNice = JSNice() # (ok, _out, _err) = jsNice.run(temp_files['path_tmp_u_a'], # temp_files['path_tmp_jsnice']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'JSNice fail') # # ok = clear.run(temp_files['path_tmp_jsnice'], # temp_files['path_jsnice']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # try: # lexer = Lexer(temp_files['path_jsnice']) # iBuilder = IndexBuilder(lexer.tokenList) # except: # cleanup(temp_files) # return (js_file_path, None, 'IndexBuilder fail') # # try: # scopeAnalyst = ScopeAnalyst(os.path.join( # os.path.dirname(os.path.realpath(__file__)), # temp_files['path_jsnice'])) # nameOrigin = scopeAnalyst.nameOrigin # isGlobal = scopeAnalyst.isGlobal # # for (name, def_scope) in nameOrigin.iterkeys(): # # pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)] # (lin,col) = iBuilder.revFlatMat[pos] # (tok_lin,tok_col) = iBuilder.revTokMap[(lin,col)] # # candidates.append(('JSNice', def_scope, # tok_lin, tok_col, # isGlobal.get((name, pos), True), # name, '','')) # except: # cleanup(temp_files) # return (js_file_path, None, 'ScopeAnalyst fail') # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst( os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_a'])) except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') # Baseline translation: No renaming, no scoping no_renaming = [] for _line_idx, line in enumerate(iBuilder_ugly.tokens): no_renaming.append(' '.join([t for (_tt, t) in line]) + "\n") with open(temp_files['f2'], 'w') as f_no_renaming: f_no_renaming.writelines(no_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.no_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation_no_renaming, _err) = moses.run(temp_files['f2']) nc = processTranslationUnscoped(translation_no_renaming, iBuilder_ugly, lm_path, temp_files['f2'], output_path, base_name) if nc: candidates += nc # translation, iBuilder, lm_path, # f_path, output_path, base_name # Default translation: No renaming # no_renaming = [] # for _line_idx, line in enumerate(iBuilder_ugly.tokens): # no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") # # with open(temp_files['f2'], 'w') as f_no_renaming: # f_no_renaming.writelines(no_renaming) # # moses = MosesDecoder(ini_path=os.path.join(ini_path, \ # 'train.no_renaming', 'tuning', 'moses.ini')) # (_moses_ok, translation, _err) = moses.run(temp_files['f2']) nc = processTranslationScoped(translation_no_renaming, iBuilder_ugly, scopeAnalyst, lm_path, temp_files['f2'], output_path, base_name) if nc: candidates += nc # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) with open(temp_files['f5'], 'w') as f_hash_def_one_renaming: f_hash_def_one_renaming.writelines(hash_def_one_renaming) # moses = MosesDecoder(ini_path=os.path.join(ini_path, \ # 'train.hash_def_one_renaming', 'tuning', 'moses.ini')) # (_moses_ok, # translation_hash_renaming, # _err) = moses.run(temp_files['f5']) mosesParams = {} mosesParams["text"] = hash_def_one_renaming #lex_ugly.collapsedText #mosesParams["align"] = "true" #mosesParams["report-all-factors"] = "true" mresults = proxy.translate( mosesParams) # __request("translate", mosesParams) rawText = Postprocessor(mresults["nbest"]) translation_hash_renaming = rawText.getProcessedOutput() nc = processTranslationScoped(translation_hash_renaming, iBuilder_ugly, scopeAnalyst, lm_path, temp_files['f5'], output_path, base_name) if nc: candidates += nc # nc = processTranslationScopedFallback(translation_hash_renaming, # translation_no_renaming, # iBuilder_ugly, # scopeAnalyst, # lm_path, # temp_files['f7'], # output_path, # base_name) # if nc: # candidates += nc cleanup(temp_files) cleanupRenamed(pid) return (js_file_path, 'OK', candidates) except Exception, e: cleanup(temp_files) cleanupRenamed(pid) return (js_file_path, None, str(e).replace("\n", ""))
def processFile(l): def localCleanup(output_path, base_names): for base_name in base_names: tryRemove(os.path.join(output_path, base_name)) js_file_path = l[0] base_name = os.path.splitext(os.path.basename(js_file_path))[0] pid = int(multiprocessing.current_process().ident) candidates = [] try: # if True: # Temp files to be created during processing path_tmp = 'tmp_%d.js' % (pid) path_tmp_b = 'tmp_%d.b.js' % (pid) path_tmp_b_a = 'tmp_%d.b.a.js' % (pid) path_tmp_u = 'tmp_%d.u.js' % (pid) path_tmp_u_a = 'tmp_%d.u.a.js' % (pid) path_tmp_unugly = 'tmp_%d.n2p.js' % (pid) path_tmp_jsnice = 'tmp_%d.jsnice.js' % (pid) f2 = 'tmp_%d.no_renaming.js' % (pid) f3 = 'tmp_%d.basic_renaming.js' % (pid) f4 = 'tmp_%d.hash_renaming.js' % (pid) f5 = 'tmp_%d.hash_def_one_renaming.js' % (pid) f6 = 'tmp_%d.hash_def_two_renaming.js' % (pid) path_orig = '%s.js' % (base_name) path_ugly = '%s.u.js' % (base_name) path_unugly = '%s.n2p.js' % (base_name) path_jsnice = '%s.jsnice.js' % (base_name) # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(path_tmp) except: cleanup(pid) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout clear = Beautifier() ok = clear.run(path_tmp, path_tmp_b+'.tmp1') if not ok: cleanup(pid) return (js_file_path, None, 'Beautifier 1 fail') jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_b+'.tmp1', path_tmp_b+'.tmp2') if not ok: cleanup(pid) return (js_file_path, None, 'JSNice Beautifier 1 fail') ok = clear.run(path_tmp_b+'.tmp2', path_tmp_b) if not ok: cleanup(pid) return (js_file_path, None, 'Beautifier 1 fail') # Minify ugly = Uglifier() ok = ugly.run(path_tmp_b, path_tmp_u) if not ok: cleanup(pid) return (js_file_path, None, 'Uglifier fail') # Num tokens before vs after try: tok_clear = Lexer(path_tmp_b).tokenList tok_ugly = Lexer(path_tmp_u).tokenList except: cleanup(pid) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(pid) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(path_tmp_b, path_tmp_u) except: cleanup(pid) return (js_file_path, None, 'Aligner fail') try: # iBuilder_clear = IndexBuilder(Lexer(path_tmp_b_a).tokenList) iBuilder_ugly = IndexBuilder(Lexer(path_tmp_u_a).tokenList) except: cleanup(pid) return (js_file_path, None, 'IndexBuilder fail') # Store original and uglified versions ok = clear.run(path_tmp_u_a, os.path.join(output_path, path_ugly)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly]) return (js_file_path, None, 'Beautifier 2 fail') ok = clear.run(path_tmp_b_a, os.path.join(output_path, path_orig)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig]) return (js_file_path, None, 'Beautifier 3 fail') # Run the JSNice from http://www.nice2predict.org unuglifyJS = UnuglifyJS() (ok, _out, _err) = unuglifyJS.run(path_tmp_b_a, path_tmp_unugly) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig]) return (js_file_path, None, 'Nice2Predict fail') ok = clear.run(path_tmp_unugly, path_tmp_unugly+'.tmp1') if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'Beautifier 4 fail') (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_unugly+'.tmp1', path_tmp_unugly+'.tmp2') if not ok: cleanup(pid) return (js_file_path, None, 'JSNice Beautifier 2 fail') ok = clear.run(path_tmp_unugly+'.tmp2', os.path.join(output_path, path_unugly)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'Beautifier 4 fail') try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_unugly)) nameOrigin = scopeAnalyst.nameOrigin for (name, def_scope) in nameOrigin.iterkeys(): candidates.append(('Nice2Predict', def_scope, name, '', '')) except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'ScopeAnalyst fail') # Run the JSNice from http://www.jsnice.org jsNice = JSNice() (ok, _out, _err) = jsNice.run(path_tmp_b_a, path_tmp_jsnice) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'JSNice fail') ok = clear.run(path_tmp_jsnice, os.path.join(output_path, path_jsnice)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'Beautifier 5 fail') try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_jsnice)) nameOrigin = scopeAnalyst.nameOrigin for (name, def_scope) in nameOrigin.iterkeys(): candidates.append(('JSNice', def_scope, name, '', '')) except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'ScopeAnalyst fail') # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_u_a)) _name2defScope = scopeAnalyst.resolve_scope() _isGlobal = scopeAnalyst.isGlobal _name2useScope = scopeAnalyst.resolve_use_scope() except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'ScopeAnalyst fail') no_renaming = [] for _line_idx, line in enumerate(iBuilder_ugly.tokens): no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") with open(f2, 'w') as f_no_renaming: f_no_renaming.writelines(no_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.no_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f2) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f2, output_path, base_name, clear) if nc: candidates += nc # Simple renaming: disambiguate overloaded names using scope id basic_renaming = renameUsingScopeId(scopeAnalyst, iBuilder_ugly) with open(f3, 'w') as f_basic_renaming: f_basic_renaming.writelines(basic_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.basic_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f3) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f3, output_path, base_name, clear) if nc: candidates += nc # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. hash_renaming = renameUsingHashAllPrec(scopeAnalyst, iBuilder_ugly, debug=False) # print hash_renaming with open(f4, 'w') as f_hash_renaming: f_hash_renaming.writelines(hash_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f4) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f4, output_path, base_name, clear) if nc: candidates += nc hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) with open(f5, 'w') as f_hash_def_one_renaming: f_hash_def_one_renaming.writelines(hash_def_one_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_def_one_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f5) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f5, output_path, base_name, clear) if nc: candidates += nc hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=True, debug=False) with open(f6, 'w') as f_hash_def_two_renaming: f_hash_def_two_renaming.writelines(hash_def_two_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_def_two_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f6) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f6, output_path, base_name, clear) if nc: candidates += nc cleanup(pid) cleanupRenamed(pid) return (js_file_path, 'OK', candidates) except Exception, e: cleanup(pid) cleanupRenamed(pid) return (js_file_path, None, str(e).replace("\n", ""))
def testFiles(self): #TODO: Automated checks against the files. #Known bugs: The definitions of sum and numberEquals in test_file1 seem to be pointing to the wrong instance... i = 1 for nextFile in self.fileList: print(nextFile) lexed = Lexer(nextFile) ib = IndexBuilder(lexed.tokenList) sa = ScopeAnalyst(nextFile) s_min = ScopeAnalyst( os.path.join(self.testDir.path, "test_file1.obs.js")) #print(s_min.name2defScope) #print("TokenList----------------------------------------------------------------") #print(lexed.tokenList) #print("Index Builder----------------------------------------------------------------") #print(ib) #print("Scope Analyst----------------------------------------------------------------") #print(sa) vm = VariableMetrics(sa, ib, lexed.tokenList) #print("VM----------------------------------------------------------------") #print(vm) #print("VM----------------------------------------------------------------") for var in vm.getVariables(): print(var) print( "Num Lines,Max Lines,Global Def,Global Usage,For,While,Literal Def,Literal Usage,Max Length Line,Ave Line Length" ) print vm.getNameMetrics(var) #Automated tests: csv_file = os.path.join(self.testDir.path, "test_file" + str(i) + ".csv") print(csv_file) if (os.path.exists(csv_file)): with open(csv_file, 'r') as f: csv_reader = csv.reader(f, delimiter=",") #Skip header next(csv_reader, None) for row in csv_reader: key = (row[0], row[1]) print(key) (num_lines, max_lines, external_def, external_use, in_for, in_while, literal_def, literal_use, max_length_line, ave_line_length) = vm.getNameMetrics(key) self.assertTrue(num_lines == int(row[2])) self.assertTrue(max_lines == int(row[3])) self.assertTrue(external_def == self.asBool(row[4])) self.assertTrue(external_use == int(row[5])) self.assertTrue(in_for == int(row[6])) self.assertTrue(in_while == int(row[7])) self.assertTrue(literal_def == self.asBool(row[8])) self.assertTrue(literal_use == int(row[9])) self.assertTrue(max_length_line == int(row[10])) self.assertAlmostEqual(ave_line_length, float(row[11]), places=3) else: print("no manually annotated csv file for: " + nextFile) break