def testScopeAnalyst(self): ''' TODO: Check that the scope analyst works properly ''' #__main__.py in tools is a useful tool for examining these. #print(self.obsfuscatedTextFiles[0]) #This doesn't work when run inside pyDev for some weird reason. sa1 = ScopeAnalyst(self.obsfuscatedTextFiles[0]) #print(sa1) #Not really sure how to test this effectively. #Check (using minified file) if identifier name maps to different variables if #they are in different scopes. Can look at __main__.py #Variables: geom2d,t,i,r,x,y,n,e,o,u #Why do x and y not appear in the variables? (Is it b/c they are not defined anywhere in this snippet?) self.assertTrue(len(sa1.nameScopes[(u'geom2d')]) == 1) self.assertTrue(len(sa1.nameScopes[(u'numeric')]) == 1) self.assertTrue(len(sa1.nameScopes[(u't')]) == 3) self.assertTrue(len(sa1.nameScopes[(u'i')]) == 1) self.assertTrue(len(sa1.nameScopes[(u'r')]) == 4) self.assertTrue(len(sa1.nameScopes[(u'n')]) == 4) #self.assertTrue(len(sa1.nameScopes[(u'x')]) == 2) #self.assertTrue(len(sa1.nameScopes[(u'y')]) == 2) self.assertTrue(len(sa1.nameScopes[(u'u')]) == 1) self.assertTrue(len(sa1.nameScopes[(u'e')]) == 1) self.assertTrue(len(sa1.nameScopes[(u'o')]) == 1) #isGlobal: #print("IsGlobal-----------------------------------------------") #print(sa1.isGlobal) #print("IsGlobal-----------------------------------------------") self.assertTrue(sa1.isGlobal[(u'geom2d', 4)] == True) self.assertTrue(sa1.isGlobal[(u'i', 85)] == False) self.assertTrue(True)
def processFile(js_file_name): candidates = [] lexer = Lexer(js_file_name) iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst(js_file_name) nameOrigin = scopeAnalyst.nameOrigin isGlobal = scopeAnalyst.isGlobal nameDefScope2pos = scopeAnalyst.nameDefScope2pos for (name, def_scope) in nameOrigin.iterkeys(): pos = nameDefScope2pos[(name, def_scope)] (lin, col) = iBuilder.revFlatMat[pos] scope = iBuilder.revTokMap[(lin, col)] glb = isGlobal.get((name, pos), True) if name != 'TOKEN_LITERAL_STRING' and \ name != 'TOKEN_LITERAL_NUMBER': candidates.append((scope, name, pos, (lin, col), glb, def_scope)) print print for c in sorted(candidates, key=lambda e: e[0]): (scope, name, pos, (lin, col), glb, def_scope) = c if name == 'n' or name == 'calendarEventId': print '\t', scope, name, pos, (lin, col), glb print '\t\t', def_scope
def testFiles(self): tf = [1, 5, 6, 7, 8, 9, 10, 11] #tf = [11] for i in tf: print("-----------------------------------------------------") lexed = Lexer(self.fileList[i - 1]) ib = IndexBuilder(lexed.tokenList) #print(ib) sa = ScopeAnalyst(self.fileList[i - 1]) print(sa) nameCount = {} #TODO: Grab only the non-globals to look at (get the start key and look it up) for variable in sa.nameDefScope2pos.keys(): start = sa.nameDefScope2pos[variable] name = variable[0] if (not sa.isGlobal[(name, start)]): if (name in nameCount): nameCount[name] += 1 else: nameCount[name] = 1 print( str(name) + " : " + str(sa.nameDefScope2pos[variable]) + " -> " + str(ib.revFlatMat[sa.nameDefScope2pos[variable]]) + " Manual: " + str(self.file_definitions[i][name])) assert (ib.revFlatMat[sa.nameDefScope2pos[variable]][0] in self.file_definitions[i][name]) #Finally make sure that the count of definitions matches our manual check. for name, count in nameCount.iteritems(): print(name + " : " + str(count) + " =?= " + str(len(self.file_definitions[i][name]))) assert (len(self.file_definitions[i][name]) == count)
def load(pth): lexer = Lexer(pth) iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst( os.path.join(os.path.dirname(os.path.realpath(__file__)), pth)) return (iBuilder, scopeAnalyst)
def testMinifiableLines(self): expected = {} expected[0] = set([1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 15, 16, 17, 20]) expected[5] = set([8, 9]) for i in [0, 5]: ib = IndexBuilder(self.clearLexed[i].tokenList) sa = ScopeAnalyst(self.clearTextFiles[i]) lines = sa.getMinifiableLines(ib) print("i:" + str(i)) print(lines) print(expected[i]) self.assertTrue(lines == expected[i]) text = ib.get_text_on_lines_wo_literals(lines) print(text) print(len(text.split("\n"))) print(len(expected[i])) self.assertTrue(len(text.split("\n")) == len(expected[i]))
def testFiles(self): #Known bugs: The definitions of sum and numberEquals in test_file1 seem to be pointing to the wrong instance... i = 1 lexed = Lexer(self.fileList[0]) ib = IndexBuilder(lexed.tokenList) sa = ScopeAnalyst(self.fileList[0]) for variable in sa.nameDefScope2pos.keys(): print( str(variable[0]) + " : " + str(sa.nameDefScope2pos[variable]) + " -> " + str(ib.revFlatMat[sa.nameDefScope2pos[variable]]))
def summarizeUnscopedTranslation(renaming_map, f_path, translation_strategy, output_path, base_name, name_candidates, name_positions, iBuilder): nc = [] f_base = os.path.basename(f_path) training_strategy = f_base.split('.')[1] tmp_path = '%s.%s.js' % (f_base[:-3], translation_strategy) o_path = '%s.%s.unscoped.%s.js' % (base_name, training_strategy, translation_strategy) # print f_path, f_base, training_strategy, tmp_path, o_path, base_name writeTmpLines(renameHashed(iBuilder, name_positions, renaming_map), tmp_path) clear = Beautifier() ok = clear.run(tmp_path, os.path.join(output_path, o_path)) if not ok: return False try: lexer = Lexer(os.path.join(output_path, o_path)) iBuilder_local = IndexBuilder(lexer.tokenList) scopeAnalyst_local = ScopeAnalyst(os.path.join(output_path, o_path)) except: return False nameOrigin = scopeAnalyst_local.nameOrigin isGlobal = scopeAnalyst_local.isGlobal for (name, def_scope) in nameOrigin.iterkeys(): pos = scopeAnalyst_local.nameDefScope2pos[(name, def_scope)] if not False: #isGlobal.get((name, pos), True): (lin, col) = iBuilder_local.revFlatMat[pos] (tok_lin, tok_col) = iBuilder_local.revTokMap[(lin, col)] nc.append( ('%s.unscoped.%s' % (training_strategy, translation_strategy), def_scope, tok_lin, tok_col, isGlobal.get( (name, pos), True), name, '', '')) return nc
def processFile(l): js_file_name = l candidates = [] try: lexer = Lexer(os.path.join(results_path, js_file_name)) iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst(os.path.join(results_path, js_file_name)) nameOrigin = scopeAnalyst.nameOrigin isGlobal = scopeAnalyst.isGlobal nameDefScope2pos = scopeAnalyst.nameDefScope2pos for (name, def_scope) in nameOrigin.iterkeys(): pos = nameDefScope2pos[(name, def_scope)] (lin, col) = iBuilder.revFlatMat[pos] scope = iBuilder.revTokMap[(lin, col)] glb = isGlobal.get((name, pos), True) # print name, def_scope, pos, scope, glb #, (lin,col) # if not isGlobal.get((name, pos), True): # scope = def_scope.replace("\"","") # i = scope.find('[variables][_values]') # if i > -1: # scope = scope[:i+len('[variables][_values]')] # i = scope.find('[functions][_values]') # if i > -1: # scope = scope[:i+len('[functions][_values]')] if name != 'TOKEN_LITERAL_STRING' and \ name != 'TOKEN_LITERAL_NUMBER': candidates.append((scope, name, glb)) except: return (js_file_name, None, 'ScopeAnalyst fail') # print 'candidates------------------' # for candidate in candidates: # print candidate return (js_file_name, 'OK', candidates)
def testfileDebug(self): for f in self.fileList: print("---------------------------------- " + f + " ----------------------------------") orig = f + ".js" min = f + ".u.js" lo = Lexer(orig) lm = Lexer(min) print( "---------------------------------- original text ----------------------------------" ) print(lo.programText) print( "---------------------------------- minified text ----------------------------------" ) print(lm.programText) for id in self.ids: to_read = f + id + ".js" print("---------------------------------- " + to_read + " ----------------------------------") lexed = Lexer(to_read) print( "---------------------------------- text ----------------------------------" ) print(lexed.programText) print( "---------------------------------- tokenlist ----------------------------------" ) print(lexed.tokenList) ib = IndexBuilder(lexed.tokenList) print( "---------------------------------- IndexBuilder ----------------------------------" ) print(ib) sa = ScopeAnalyst(to_read) print( "---------------------------------- ScopeAnalyst ----------------------------------" ) print(sa)
def processFile(l): base_name = l[0] js_file_path = l[1] print(base_name) print(js_file_path) #if(True): try: lexed = Lexer(js_file_path) ib = IndexBuilder(lexed.tokenList) sa = ScopeAnalyst(js_file_path) #num globals = all in is_global == True + all unique names #in name2CharPositions not in is_global base_global = set( [name for name, value in sa.isGlobal.iteritems() if value == True]) #Get all known names in the file. known_names = set([name for name, value in sa.isGlobal.iteritems()]) for name, loc in ib.name2CharPositions.iteritems(): if (name not in known_names): #if never seen, its a global base_global.add(name) return [base_name, len(base_global)] except: return [base_name, None]
def testFiles(self): #TODO: Automated checks against the files. #Known bugs: The definitions of sum and numberEquals in test_file1 seem to be pointing to the wrong instance... i = 1 for nextFile in self.fileList: print(nextFile) lexed = Lexer(nextFile) ib = IndexBuilder(lexed.tokenList) sa = ScopeAnalyst(nextFile) s_min = ScopeAnalyst( os.path.join(self.testDir.path, "test_file1.obs.js")) #print(s_min.name2defScope) #print("TokenList----------------------------------------------------------------") #print(lexed.tokenList) #print("Index Builder----------------------------------------------------------------") #print(ib) #print("Scope Analyst----------------------------------------------------------------") #print(sa) vm = VariableMetrics(sa, ib, lexed.tokenList) #print("VM----------------------------------------------------------------") #print(vm) #print("VM----------------------------------------------------------------") for var in vm.getVariables(): print(var) print( "Num Lines,Max Lines,Global Def,Global Usage,For,While,Literal Def,Literal Usage,Max Length Line,Ave Line Length" ) print vm.getNameMetrics(var) #Automated tests: csv_file = os.path.join(self.testDir.path, "test_file" + str(i) + ".csv") print(csv_file) if (os.path.exists(csv_file)): with open(csv_file, 'r') as f: csv_reader = csv.reader(f, delimiter=",") #Skip header next(csv_reader, None) for row in csv_reader: key = (row[0], row[1]) print(key) (num_lines, max_lines, external_def, external_use, in_for, in_while, literal_def, literal_use, max_length_line, ave_line_length) = vm.getNameMetrics(key) self.assertTrue(num_lines == int(row[2])) self.assertTrue(max_lines == int(row[3])) self.assertTrue(external_def == self.asBool(row[4])) self.assertTrue(external_use == int(row[5])) self.assertTrue(in_for == int(row[6])) self.assertTrue(in_while == int(row[7])) self.assertTrue(literal_def == self.asBool(row[8])) self.assertTrue(literal_use == int(row[9])) self.assertTrue(max_length_line == int(row[10])) self.assertAlmostEqual(ave_line_length, float(row[11]), places=3) else: print("no manually annotated csv file for: " + nextFile) break
def processFile(l): js_file_path = l[0] if js_file_path in seen: return (js_file_path, None, 'Skipped') pid = int(multiprocessing.current_process().ident) # Temp files to be created during processing temp_files = {'path_tmp': 'tmp_%d.js' % pid, 'path_tmp_b': 'tmp_%d.b.js' % pid, 'path_tmp_b_n': 'tmp_%d.b.n.js' % pid, 'path_tmp_u': 'tmp_%d.u.js' % pid, 'path_tmp_u_n': 'tmp_%d.u.n.js' % pid, 'path_tmp_b_a': 'tmp_%d.b.a.js' % pid, 'path_tmp_u_a': 'tmp_%d.u.a.js' % pid} try: # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(temp_files['path_tmp']) except: cleanup(temp_files) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout: # - once through JSNice without renaming # jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp'], # temp_files['path_tmp_b_n']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'JSNice Beautifier fail') # # - and another time through uglifyjs pretty print only # clear = Beautifier() # ok = clear.run(temp_files['path_tmp_b_n'], # temp_files['path_tmp_b']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # JSNice is down! clear = Beautifier() ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_b_n']), False, temp_files['path_tmp_b']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') # Minify ugly = Uglifier() ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Uglifier fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_n']), False, temp_files['path_tmp_u']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') # Num tokens before vs after try: tok_clear = Lexer(temp_files['path_tmp_b']).tokenList tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList except: cleanup(temp_files) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(temp_files) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u']) except: cleanup(temp_files) return (js_file_path, None, 'Aligner fail') try: lex_clear = Lexer(temp_files['path_tmp_b_a']) iBuilder_clear = IndexBuilder(lex_clear.tokenList) lex_ugly = Lexer(temp_files['path_tmp_u_a']) iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') # Normalize norm = Normalizer() ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_b']), True, temp_files['path_tmp_u_n']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Normalizer fail') try: lex_norm = Lexer(temp_files['path_tmp_u_n']) iBuilder_norm = IndexBuilder(lex_norm.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') normalized = [] for line_idx, line in enumerate(iBuilder_norm.tokens): normalized.append(' '.join([t for (_tt,t) in line]) + "\n") # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_a'])) # _name2defScope = scopeAnalyst.resolve_scope() # _isGlobal = scopeAnalyst.isGlobal # _name2useScope = scopeAnalyst.resolve_use_scope() except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') orig = [] no_renaming = [] for line_idx, line in enumerate(iBuilder_ugly.tokens): orig.append(' '.join([t for (_tt,t) in \ iBuilder_clear.tokens[line_idx]]) + "\n") no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") # # Simple renaming: disambiguate overloaded names using scope id basic_renaming = renameUsingScopeId(scopeAnalyst, iBuilder_ugly) # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. # hash_renaming = renameUsingHashAllPrec(scopeAnalyst, # iBuilder_ugly, # debug=True) hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=True, debug=False) cleanup(temp_files) return (js_file_path, orig, no_renaming, basic_renaming, normalized, # hash_renaming, hash_def_one_renaming, hash_def_two_renaming) except Exception, e: cleanup(temp_files) return (js_file_path, None, str(e))
def testMosesPerformance(self): ''' Run the deobfuscateJS method on each of our files and recorib.py:1279(request) what the times were for each into a csv style report. ''' i = 0 restart_attempt = False with open("./testing/PerformanceMetrics" + str(id_start) + ".csv", 'w') as output_csv: writer = csv.writer(output_csv, delimiter=",") writer.writerow([ "file", "is_parallel", "lines", "minifiable_instances", "local_name_count", "jsnice_status", "preprocess_time", "prepreprocessor_time", "jsnice_time", "renaming_time", "lex_time", "builder_time", "scoper_time", "moses_time_serial", "moses_rn_parallel", "postprocessing_time", "total_time" ]) for next_file in self.clearTextFiles: print(next_file) if (i < id_start ): # Skip until at start ID (used in failure cases) i += 1 continue #if("220053" not in next_file): # continue text = open(next_file, 'r').read() lineCount = text.count("\n") + 1 print(lineCount) #if(lineCount > 500): #Bogdan didn't count these correctly? or was counting SLOC? # continue for is_parallel in [True, False]: #if(True): try: sa = ScopeAnalyst(next_file) local = [ n for n, isG in sa.isGlobal.iteritems() if isG == False ] local_instances = [ n for n, def_scope in sa.name2defScope.iteritems() if n in local ] minCount = len(local_instances) uniqueCount = len(local) start = time.time() #result = self.client.deobfuscateJS(text,True,i,True,is_parallel,use_local) #Debug mode result = self.client.deobfuscateJS( text, True, i, False, is_parallel, use_local) #For timings total_time = time.time() - start if ("Moses server failed" in result[0]): #Skip and wait for revival scrip to restart the server? if (not restart_attempt): restart_attempt = True #Wait 10 minutes for restarting script to try to boot up servers again #Only do this once per server crash. time.sleep(10 * 60) else: restart_attempt = False #Server is working, make sure we reset restarter flag if needed except: minCount = 0 uniqueCount = 0 result = [ text, "other error.", (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) ] #Write output to a separate file. file_id = str(self.getFileId(next_file)) output_file = file_id + str(is_parallel) + ".out.js" with open( os.path.join("./testing/performance_output/", output_file), "w") as f2: f2.write(result[0]) #Write js_error + times to csv. writer.writerow([ file_id, is_parallel, lineCount, minCount, uniqueCount, result[1] ] + list(result[2]) + [total_time]) i += 1
def processFile(js_file_path): # Load in the minified file minified = open(js_file_path).read() # Create lexer lexer = get_lexer_for_filename(js_file_path) # Tokenize input and compute mappings between the different # indices used: (line, col), flat, (l,c) in token list indexBuilder = IndexBuilder(lex(minified, lexer)) tokens = indexBuilder.tokens # print 'RUNNING IndexBuilder:', len(tokens)>0 # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. scopeAnalyst = ScopeAnalyst(js_file_path) name2defScope = scopeAnalyst.resolve_scope() isGlobal = scopeAnalyst.isGlobal name2useScope = scopeAnalyst.name2useScope name2pth = scopeAnalyst.name2pth nameOrigin = scopeAnalyst.nameOrigin scopes = set(name2useScope.values()) print print '=== FOUND %d SCOPES ===' % len(scopes) print for scope in scopes: print 'USE SCOPE:', scope lc_list = [ indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]] for (t, pos) in name2useScope.keys() if name2useScope[(t, pos)] == scope ] highlight(tokens, lc_list) print scopes = set(name2defScope.values()) print print '=== FOUND %d NAME SCOPES ===' % len(scopes) print for scope in scopes: print 'DEF SCOPE:', scope lc_list = [ indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]] for (t, pos) in name2defScope.keys() if name2defScope[(t, pos)] == scope ] highlight(tokens, lc_list) print # Discover the path to the source map map_path = sourcemap.discover(minified) # Read and parse our sourcemap if map_path: sourcemapIndex = sourcemap.load(open(map_path)) # Cluster names by scope nameScope2Positions = {} # Index data by (name,scope) for token, l in indexBuilder.name2CharPositions.iteritems(): for (line, col) in sorted(l, key=lambda (a, b): (a, b)): pos = indexBuilder.flatMap[(line, col)] if name2defScope.has_key((token, pos)): scope = name2defScope[(token, pos)] use_scope = name2useScope[(token, pos)] pth = name2pth[(token, pos)] glb = isGlobal[(token, pos)] nameScope2Positions.setdefault((token, scope, glb), []) nameScope2Positions[(token, scope, glb)].append((line, col)) # print token, pos # print 'def:', scope # print 'use:', use_scope # print 'pth:', pth # highlight(tokens, [indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]]) # print print print for (token,scope,glb), positions in sorted(nameScope2Positions.iteritems(), \ key=lambda (x,y):x[0]): pos = sorted(positions, key=lambda e: (e[0], e[1])) tt = [] line_tok_idxs = set([]) for (l, c) in pos: (tl, tc) = indexBuilder.revTokMap[(l, c)] line_tok_idxs.add(tl) p = indexBuilder.flatMap[(l, c)] if map_path: orig = sourcemapIndex.lookup(line=l, column=c).name else: orig = token print token, scope, (l, c), orig tt.append(((tl, tc), p, orig)) # t.append(orig) # if token == 'n': print '\nNAME:', token.encode( 'utf-8'), '( isGlobal =', glb, '; original =', orig, ')' # print scope # highlight(tokens, [indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]]) for ((tli, tci), p, orig) in tt: scope = name2defScope[(token, p)] use_scope = name2useScope[(token, p)] pth = name2pth[(token, p)] origin = nameOrigin[(token, scope)] # print token #, p, origin # print # print 'def:', scope # print 'use:', use_scope # print 'pth:', pth # print for tl in sorted(set([tli for ((tli, tci), p, orig) in tt])): l = list(tokens[tl]) for tc in [tci for ((tli, tci), p, orig) in tt if tli == tl]: l[tc] = (l[tc][0], unichr(0x2588) + token + unichr(0x2588)) # pos = indexBuilder.flatMap[(line,col)] print ' ', '%d:' % (tl + 1), ' '.join( [x[1].encode('utf-8') for x in l]) print return
def processFile(l): js_file_path = l[0] base_name = os.path.splitext(os.path.basename(js_file_path))[0] pid = int(multiprocessing.current_process().ident) temp_files = { 'path_tmp': 'tmp_%d.js' % pid, 'path_tmp_b': 'tmp_%d.b.js' % pid, 'path_tmp_b_1': 'tmp_%d.b.1.js' % pid, 'path_tmp_b_2': 'tmp_%d.b.2.js' % pid, 'path_tmp_b_a': 'tmp_%d.b.a.js' % pid, 'path_tmp_u': 'tmp_%d.u.js' % pid, 'path_tmp_u_a': 'tmp_%d.u.a.js' % pid, 'path_tmp_unugly': 'tmp_%d.n2p.js' % pid, 'path_tmp_unugly_1': 'tmp_%d.n2p.1.js' % pid, 'path_tmp_unugly_2': 'tmp_%d.n2p.2.js' % pid, 'path_tmp_jsnice': 'tmp_%d.jsnice.js' % pid, 'f2': 'tmp_%d.no_renaming.js' % pid, # 'f3': 'tmp_%d.basic_renaming.js' % pid, # 'f4': 'tmp_%d.hash_renaming.js' % pid, 'f5': 'tmp_%d.hash_def_one_renaming.js' % pid, # 'f6': 'tmp_%d.hash_def_two_renaming.js' % pid, 'f7': 'tmp_%d.hash_def_one_renaming_fb.js' % pid, 'path_orig': os.path.join(output_path, '%s.js' % base_name), 'path_ugly': os.path.join(output_path, '%s.u.js' % base_name), 'path_unugly': os.path.join(output_path, '%s.n2p.js' % base_name), 'path_jsnice': os.path.join(output_path, '%s.jsnice.js' % base_name) } # for strategy in ['js', 'lm.js', 'len.js', 'freqlen.js']: # for renaming in ['no_renaming', 'hash_def_one_renaming']: # temp_files['path_tmp_%s_%s' % (renaming, strategy)] = \ # 'tmp_%d.%s.%s' % (pid, renaming, strategy) candidates = [] # if True: try: # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(temp_files['path_tmp']) except: cleanup(temp_files) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout clear = Beautifier() ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # # Pass through beautifier to fix layout # clear = Beautifier() # ok = clear.run(temp_files['path_tmp'], # temp_files['path_tmp_b_1']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_b_1'], # temp_files['path_tmp_b_2']) # if not ok: # cleanup(temp_files) # print js_file_path, _err # return (js_file_path, None, 'JSNice Beautifier fail') # # ok = clear.run(temp_files['path_tmp_b_2'], # temp_files['path_tmp_b']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # # # Weird JSNice renamings despite --no-rename # try: # before = set([token for (token, token_type) in # Lexer(temp_files['path_tmp_b_1']).tokenList # if is_token_subtype(token_type, Token.Name)]) # after = set([token for (token, token_type) in # Lexer(temp_files['path_tmp_b']).tokenList # if is_token_subtype(token_type, Token.Name)]) # # if not before == after: # return (js_file_path, None, 'Weird JSNice renaming') # # except: # cleanup(temp_files) # return (js_file_path, None, 'Lexer fail') # Minify ugly = Uglifier() ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Uglifier fail') # Num tokens before vs after try: tok_clear = Lexer(temp_files['path_tmp_b']).tokenList tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList except: cleanup(temp_files) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(temp_files) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u']) except: cleanup(temp_files) return (js_file_path, None, 'Aligner fail') if open(temp_files['path_tmp_b']).read() == \ open(temp_files['path_tmp_u']).read(): cleanup(temp_files) return (js_file_path, None, 'Not minified') try: lex_ugly = Lexer(temp_files['path_tmp_u_a']) iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') ############################################################ # From now on only work with path_tmp_b_a and path_tmp_u_a ############################################################ # Store original and uglified versions ok = clear.run(temp_files['path_tmp_b_a'], temp_files['path_orig']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') ok = clear.run(temp_files['path_tmp_u_a'], temp_files['path_ugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # Run the JSNice from http://www.nice2predict.org unuglifyJS = UnuglifyJS() (ok, _out, _err) = unuglifyJS.run(temp_files['path_tmp_u_a'], temp_files['path_tmp_unugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Nice2Predict fail') ok = clear.run(temp_files['path_tmp_unugly'], temp_files['path_unugly']) if not ok: cleanup(temp_files) return (js_file_path, None, 'Beautifier fail') # ok = clear.run(temp_files['path_tmp_unugly'], # temp_files['path_tmp_unugly_1']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_unugly_1'], # temp_files['path_tmp_unugly_2']) # if not ok: # cleanup(temp_files) # print js_file_path, _err # return (js_file_path, None, 'JSNice Beautifier fail') # # ok = clear.run(temp_files['path_tmp_unugly_2'], # temp_files['path_unugly']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') try: lexer = Lexer(temp_files['path_unugly']) iBuilder = IndexBuilder(lexer.tokenList) except: cleanup(temp_files) return (js_file_path, None, 'IndexBuilder fail') try: scopeAnalyst = ScopeAnalyst( os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_unugly'])) nameOrigin = scopeAnalyst.nameOrigin isGlobal = scopeAnalyst.isGlobal for (name, def_scope) in nameOrigin.iterkeys(): pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)] (lin, col) = iBuilder.revFlatMat[pos] (tok_lin, tok_col) = iBuilder.revTokMap[(lin, col)] candidates.append(('Nice2Predict', def_scope, tok_lin, tok_col, isGlobal.get((name, pos), True), name, '', '')) except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') # # Run the JSNice from http://www.jsnice.org # jsNice = JSNice() # (ok, _out, _err) = jsNice.run(temp_files['path_tmp_u_a'], # temp_files['path_tmp_jsnice']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'JSNice fail') # # ok = clear.run(temp_files['path_tmp_jsnice'], # temp_files['path_jsnice']) # if not ok: # cleanup(temp_files) # return (js_file_path, None, 'Beautifier fail') # # try: # lexer = Lexer(temp_files['path_jsnice']) # iBuilder = IndexBuilder(lexer.tokenList) # except: # cleanup(temp_files) # return (js_file_path, None, 'IndexBuilder fail') # # try: # scopeAnalyst = ScopeAnalyst(os.path.join( # os.path.dirname(os.path.realpath(__file__)), # temp_files['path_jsnice'])) # nameOrigin = scopeAnalyst.nameOrigin # isGlobal = scopeAnalyst.isGlobal # # for (name, def_scope) in nameOrigin.iterkeys(): # # pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)] # (lin,col) = iBuilder.revFlatMat[pos] # (tok_lin,tok_col) = iBuilder.revTokMap[(lin,col)] # # candidates.append(('JSNice', def_scope, # tok_lin, tok_col, # isGlobal.get((name, pos), True), # name, '','')) # except: # cleanup(temp_files) # return (js_file_path, None, 'ScopeAnalyst fail') # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst( os.path.join(os.path.dirname(os.path.realpath(__file__)), temp_files['path_tmp_u_a'])) except: cleanup(temp_files) return (js_file_path, None, 'ScopeAnalyst fail') # Baseline translation: No renaming, no scoping no_renaming = [] for _line_idx, line in enumerate(iBuilder_ugly.tokens): no_renaming.append(' '.join([t for (_tt, t) in line]) + "\n") with open(temp_files['f2'], 'w') as f_no_renaming: f_no_renaming.writelines(no_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.no_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation_no_renaming, _err) = moses.run(temp_files['f2']) nc = processTranslationUnscoped(translation_no_renaming, iBuilder_ugly, lm_path, temp_files['f2'], output_path, base_name) if nc: candidates += nc # translation, iBuilder, lm_path, # f_path, output_path, base_name # Default translation: No renaming # no_renaming = [] # for _line_idx, line in enumerate(iBuilder_ugly.tokens): # no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") # # with open(temp_files['f2'], 'w') as f_no_renaming: # f_no_renaming.writelines(no_renaming) # # moses = MosesDecoder(ini_path=os.path.join(ini_path, \ # 'train.no_renaming', 'tuning', 'moses.ini')) # (_moses_ok, translation, _err) = moses.run(temp_files['f2']) nc = processTranslationScoped(translation_no_renaming, iBuilder_ugly, scopeAnalyst, lm_path, temp_files['f2'], output_path, base_name) if nc: candidates += nc # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) with open(temp_files['f5'], 'w') as f_hash_def_one_renaming: f_hash_def_one_renaming.writelines(hash_def_one_renaming) # moses = MosesDecoder(ini_path=os.path.join(ini_path, \ # 'train.hash_def_one_renaming', 'tuning', 'moses.ini')) # (_moses_ok, # translation_hash_renaming, # _err) = moses.run(temp_files['f5']) mosesParams = {} mosesParams["text"] = hash_def_one_renaming #lex_ugly.collapsedText #mosesParams["align"] = "true" #mosesParams["report-all-factors"] = "true" mresults = proxy.translate( mosesParams) # __request("translate", mosesParams) rawText = Postprocessor(mresults["nbest"]) translation_hash_renaming = rawText.getProcessedOutput() nc = processTranslationScoped(translation_hash_renaming, iBuilder_ugly, scopeAnalyst, lm_path, temp_files['f5'], output_path, base_name) if nc: candidates += nc # nc = processTranslationScopedFallback(translation_hash_renaming, # translation_no_renaming, # iBuilder_ugly, # scopeAnalyst, # lm_path, # temp_files['f7'], # output_path, # base_name) # if nc: # candidates += nc cleanup(temp_files) cleanupRenamed(pid) return (js_file_path, 'OK', candidates) except Exception, e: cleanup(temp_files) cleanupRenamed(pid) return (js_file_path, None, str(e).replace("\n", ""))
def deobfuscateJS(self, obfuscatedCode, transactionID): proxy = xmlrpclib.ServerProxy("http://godeep.cs.ucdavis.edu:8080/RPC2") mosesParams = {} candidates = [] baseDir = "/home/ccasal/temp/" tempFile = baseDir + str(transactionID) + "_temp.js" lm_path = "/data/bogdanv/deobfuscator/experiments/corpora/corpus.lm.970k/js.blm.lm" preproFile = baseDir + str(transactionID) + "_prepro.js" beautFile = baseDir + str(transactionID) + "_beaut.js" # Strip comments, replace literals, etc try: prepro = WebPreprocessor(obfuscatedCode) #TODO replace with: prepro = WebPreprocessor(text) prepro.write_temp_file(preproFile) except: cleanup([preproFile]) print("Preprocessor failed") return ("Preprocessor Failed") clear = Beautifier() #TODO: Need a text version of beautifier to avoid the file read and write. #(ok, beautText, err) = clear.webRun(preproText) ok = clear.run(preproFile, beautFile) print(ok) if (not ok): cleanup([preproFile, beautFile]) return ("Beautifier Failed") #quit() try: lex_ugly = Lexer(beautFile) iBuilder_ugly = IndexBuilder(lex_ugly.tokenList) except: cleanup([preproFile, beautFile]) print("IndexBuilder fail") return ("IndexBuilder Failed") lex_ugly.write_temp_file(tempFile) #Do Scope related tasks #a raw text version try: scopeAnalyst = ScopeAnalyst(tempFile) except: cleanup({"temp": tempFile}) print("ScopeAnalyst Fail") return ("ScopeAnalyst Failed") #Do Rename related tasks #In our case, I don't think we need to actually do anything for no_renaming #no_renaming = [] #for _line_idx, line in enumerate(iBuilder_ugly.tokens): # no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") #Hash_def_one_renaming #beautText = renameUsingHashDefLine(scopeAnalyst, # iBuilder_ugly, # twoLines=False, # debug=False) print(lex_ugly.collapsedText) mosesParams["text"] = lex_ugly.collapsedText mosesParams["align"] = "true" mosesParams["report-all-factors"] = "true" results = proxy.translate( mosesParams) # __request("translate", mosesParams) rawText = Postprocessor(results["nbest"]) translation = rawText.getProcessedOutput() #Send to output: cleanup([preproFile, beautFile, tempFile]) return (translation)
def processFile(l): def localCleanup(output_path, base_names): for base_name in base_names: tryRemove(os.path.join(output_path, base_name)) js_file_path = l[0] base_name = os.path.splitext(os.path.basename(js_file_path))[0] pid = int(multiprocessing.current_process().ident) candidates = [] try: # if True: # Temp files to be created during processing path_tmp = 'tmp_%d.js' % (pid) path_tmp_b = 'tmp_%d.b.js' % (pid) path_tmp_b_a = 'tmp_%d.b.a.js' % (pid) path_tmp_u = 'tmp_%d.u.js' % (pid) path_tmp_u_a = 'tmp_%d.u.a.js' % (pid) path_tmp_unugly = 'tmp_%d.n2p.js' % (pid) path_tmp_jsnice = 'tmp_%d.jsnice.js' % (pid) f2 = 'tmp_%d.no_renaming.js' % (pid) f3 = 'tmp_%d.basic_renaming.js' % (pid) f4 = 'tmp_%d.hash_renaming.js' % (pid) f5 = 'tmp_%d.hash_def_one_renaming.js' % (pid) f6 = 'tmp_%d.hash_def_two_renaming.js' % (pid) path_orig = '%s.js' % (base_name) path_ugly = '%s.u.js' % (base_name) path_unugly = '%s.n2p.js' % (base_name) path_jsnice = '%s.jsnice.js' % (base_name) # Strip comments, replace literals, etc try: prepro = Preprocessor(os.path.join(corpus_root, js_file_path)) prepro.write_temp_file(path_tmp) except: cleanup(pid) return (js_file_path, None, 'Preprocessor fail') # Pass through beautifier to fix layout clear = Beautifier() ok = clear.run(path_tmp, path_tmp_b+'.tmp1') if not ok: cleanup(pid) return (js_file_path, None, 'Beautifier 1 fail') jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename']) (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_b+'.tmp1', path_tmp_b+'.tmp2') if not ok: cleanup(pid) return (js_file_path, None, 'JSNice Beautifier 1 fail') ok = clear.run(path_tmp_b+'.tmp2', path_tmp_b) if not ok: cleanup(pid) return (js_file_path, None, 'Beautifier 1 fail') # Minify ugly = Uglifier() ok = ugly.run(path_tmp_b, path_tmp_u) if not ok: cleanup(pid) return (js_file_path, None, 'Uglifier fail') # Num tokens before vs after try: tok_clear = Lexer(path_tmp_b).tokenList tok_ugly = Lexer(path_tmp_u).tokenList except: cleanup(pid) return (js_file_path, None, 'Lexer fail') # For now only work with minified files that have # the same number of tokens as the originals if not len(tok_clear) == len(tok_ugly): cleanup(pid) return (js_file_path, None, 'Num tokens mismatch') # Align minified and clear files, in case the beautifier # did something weird try: aligner = Aligner() # This is already the baseline corpus, no (smart) renaming yet aligner.align(path_tmp_b, path_tmp_u) except: cleanup(pid) return (js_file_path, None, 'Aligner fail') try: # iBuilder_clear = IndexBuilder(Lexer(path_tmp_b_a).tokenList) iBuilder_ugly = IndexBuilder(Lexer(path_tmp_u_a).tokenList) except: cleanup(pid) return (js_file_path, None, 'IndexBuilder fail') # Store original and uglified versions ok = clear.run(path_tmp_u_a, os.path.join(output_path, path_ugly)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly]) return (js_file_path, None, 'Beautifier 2 fail') ok = clear.run(path_tmp_b_a, os.path.join(output_path, path_orig)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig]) return (js_file_path, None, 'Beautifier 3 fail') # Run the JSNice from http://www.nice2predict.org unuglifyJS = UnuglifyJS() (ok, _out, _err) = unuglifyJS.run(path_tmp_b_a, path_tmp_unugly) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig]) return (js_file_path, None, 'Nice2Predict fail') ok = clear.run(path_tmp_unugly, path_tmp_unugly+'.tmp1') if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'Beautifier 4 fail') (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_unugly+'.tmp1', path_tmp_unugly+'.tmp2') if not ok: cleanup(pid) return (js_file_path, None, 'JSNice Beautifier 2 fail') ok = clear.run(path_tmp_unugly+'.tmp2', os.path.join(output_path, path_unugly)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'Beautifier 4 fail') try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_unugly)) nameOrigin = scopeAnalyst.nameOrigin for (name, def_scope) in nameOrigin.iterkeys(): candidates.append(('Nice2Predict', def_scope, name, '', '')) except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'ScopeAnalyst fail') # Run the JSNice from http://www.jsnice.org jsNice = JSNice() (ok, _out, _err) = jsNice.run(path_tmp_b_a, path_tmp_jsnice) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, path_unugly]) return (js_file_path, None, 'JSNice fail') ok = clear.run(path_tmp_jsnice, os.path.join(output_path, path_jsnice)) if not ok: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'Beautifier 5 fail') try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_jsnice)) nameOrigin = scopeAnalyst.nameOrigin for (name, def_scope) in nameOrigin.iterkeys(): candidates.append(('JSNice', def_scope, name, '', '')) except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'ScopeAnalyst fail') # Compute scoping: name2scope is a dictionary where keys # are (name, start_index) tuples and values are scope identifiers. # Note: start_index is a flat (unidimensional) index, # not a (line_chr_idx, col_chr_idx) index. try: scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), path_tmp_u_a)) _name2defScope = scopeAnalyst.resolve_scope() _isGlobal = scopeAnalyst.isGlobal _name2useScope = scopeAnalyst.resolve_use_scope() except: cleanup(pid) localCleanup(output_path, [path_ugly, path_orig, \ path_unugly, path_jsnice]) return (js_file_path, None, 'ScopeAnalyst fail') no_renaming = [] for _line_idx, line in enumerate(iBuilder_ugly.tokens): no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n") with open(f2, 'w') as f_no_renaming: f_no_renaming.writelines(no_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.no_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f2) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f2, output_path, base_name, clear) if nc: candidates += nc # Simple renaming: disambiguate overloaded names using scope id basic_renaming = renameUsingScopeId(scopeAnalyst, iBuilder_ugly) with open(f3, 'w') as f_basic_renaming: f_basic_renaming.writelines(basic_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.basic_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f3) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f3, output_path, base_name, clear) if nc: candidates += nc # More complicated renaming: collect the context around # each name (global variables, API calls, punctuation) # and build a hash of the concatenation. hash_renaming = renameUsingHashAllPrec(scopeAnalyst, iBuilder_ugly, debug=False) # print hash_renaming with open(f4, 'w') as f_hash_renaming: f_hash_renaming.writelines(hash_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f4) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f4, output_path, base_name, clear) if nc: candidates += nc hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=False, debug=False) with open(f5, 'w') as f_hash_def_one_renaming: f_hash_def_one_renaming.writelines(hash_def_one_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_def_one_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f5) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f5, output_path, base_name, clear) if nc: candidates += nc hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder_ugly, twoLines=True, debug=False) with open(f6, 'w') as f_hash_def_two_renaming: f_hash_def_two_renaming.writelines(hash_def_two_renaming) moses = MosesDecoder(ini_path=os.path.join(ini_path, \ 'train.hash_def_two_renaming', 'tuning', 'moses.ini')) (_moses_ok, translation, _err) = moses.run(f6) nc = processTranslation(translation, iBuilder_ugly, scopeAnalyst, lm_path, f6, output_path, base_name, clear) if nc: candidates += nc cleanup(pid) cleanupRenamed(pid) return (js_file_path, 'OK', candidates) except Exception, e: cleanup(pid) cleanupRenamed(pid) return (js_file_path, None, str(e).replace("\n", ""))
def testHashDefRenaming(self): ''' TODO: Test the hashing functions are using the context correctly for both one and two line options. Goals are to confirm a) correct line summarization b) consistency of naming of the same variable. However, two different variables may map to the same name with insufficient context. ''' #print(self.obsfuscatedTextFiles[0]) ib1 = IndexBuilder(self.obsLexed[0].tokenList) sa1 = ScopeAnalyst(self.obsfuscatedTextFiles[0]) RS = RenamingStrategies() preRen = PreRenamer() oneLine1 = preRen.rename(RS.HASH_ONE, ib1, sa1, True) twoLine1 = preRen.rename(RS.HASH_TWO, ib1, sa1, True) # oneLine1 = renameUsingHashDefLine(sa1, ib1, False, True) # twoLine1 = renameUsingHashDefLine(sa1, ib1, True, True) #print("OneLine1------------------------------------------------") #print(oneLine1) #print("TwoLine1------------------------------------------------") #print(twoLine1) #One line tests lines = oneLine1.split("\n") self.assertTrue(lines[0] == "var geom2d = function ( ) {") #var <<var#=numeric.sum,=numeric.numberEquals;>> = numeric . sum , <<var=numeric.sum,#=numeric.numberEquals;>> = numeric . numberEquals ; self.assertTrue( lines[1] == "var <<var#=numeric.sum,=numeric.numberEquals;>> = numeric . sum , <<var=numeric.sum,#=numeric.numberEquals;>> = numeric . numberEquals ;" ) self.assertTrue( lines[3] == "function <<function#(,){>> ( <<function(#,){>> , <<function(,#){>> ) {" ) self.assertTrue(lines[4] == "this . x = <<function(#,){>> ;" ) #Why is x not transformed? Global, can't change... #print(lines[7]) self.assertTrue( lines[7] == "u ( <<function#(,){>> , {" ) #Why is u not transformed? -> Because u's hash <<function#(,){>> is ALREADY IN USE IN THE SAME SCOPE!! (This is why u can be translated in 2-lines) self.assertTrue( lines[16] == "for ( var <<for(var#in)[]=[];>> in <<function(,#){>> ) <<function(#,){>> [ <<for(var#in)[]=[];>> ] = <<function(,#){>> [ <<for(var#in)[]=[];>> ] ;" ) self.assertTrue(lines[20] == "Vector2d : <<function#(,){>>") #Two line tests (TODO) lines = twoLine1.split("\n") self.assertTrue(lines[0] == "var geom2d = function ( ) {") self.assertTrue( lines[1] == "var <<var#=numeric.sum,=numeric.numberEquals;return#([this.x*.x,this.y*.y]);>> = numeric . sum , <<var=numeric.sum,#=numeric.numberEquals;return#(this.x,.x,)&&(this.y,.y,);>> = numeric . numberEquals ;" ) # function <<function#(,){(#,{>> ( <<function(#,){this.x=#;>> , <<function(,#){this.y=#;>> ) { self.assertTrue( lines[3] == "function <<function#(,){(#,{>> ( <<function(#,){this.x=#;>> , <<function(,#){this.y=#;>> ) {" ) self.assertTrue(lines[4] == "this . x = <<function(#,){this.x=#;>> ;" ) #Why is x not transformed? Global, can't change... #u(r, { # #<<function#(,){#(,{>> ( <<function#(,){(#,{>> , { self.assertTrue( lines[7] == "<<function#(,){#(,{>> ( <<function#(,){(#,{>> , {" ) # is transformed, but order seems backwards. self.assertTrue( lines[16] == "for ( var <<for(var#in)[]=[];for(varin)[#]=[];>> in <<function(,#){for(varin#)[]=[];>> ) <<function(#,){for(varin)#[]=[];>> [ <<for(var#in)[]=[];for(varin)[#]=[];>> ] = <<function(,#){for(varin#)[]=[];>> [ <<for(var#in)[]=[];for(varin)[#]=[];>> ] ;" ) #Not really two lines, but two references? self.assertTrue(lines[20] == "Vector2d : <<function#(,){(#,{>>") self.assertTrue(True)
output_file = os.path.abspath(sys.argv[2]) mode = int(sys.argv[3]) prepro = Preprocessor(input_file) prepro.write_temp_file('tmp.js') clear = Beautifier() ok = clear.run('tmp.js', 'tmp.b.js') lexer = Lexer('tmp.b.js') iBuilder = IndexBuilder(lexer.tokenList) scopeAnalyst = ScopeAnalyst(os.path.join( os.path.dirname(os.path.realpath(__file__)), 'tmp.b.js')) hash_renaming = renameUsingHashDefLine(scopeAnalyst, iBuilder, twoLines=False, debug=mode) with open(output_file, 'w') as f: f.writelines(hash_renaming) # writeTmpLines(hash_renaming, output_file) # clear = Beautifier() # ok = clear.run(tmp_path, os.path.join(output_path, o_path)) # if not ok: # return False