def get_forecast(link): html_doc = urllib2.urlopen(link).read() soup = BeautifulSoup(html_doc, "html.parser") forecast_text = soup.find("div", id="div_wgfcst1").find("script").string parser = Parser() forecast_tree = parser.parse(forecast_text) full_data = { parse_key(node.left): parse_value(node.right) for node in nodevisitor.visit(forecast_tree) if isinstance(node, ast.Assign) } forecast_tree = parser.parse(forecast_text) forecast = { parse_key(node.left): parse_array(node.right) for node in nodevisitor.visit(forecast_tree) if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array) } full_data.update(forecast) return full_data
def _addscript(self, path, date, url, script): checksum = sha1(script).digest() if redis_db.sadd('js_script_checksums', checksum): self.last['script_checksum'] = hexlify(checksum) try: js_parser = JSParser() js_parser.parse(script) except Exception: # Keep unparsable scripts as 'bad' script if they don't look like HTML. mime_type = magic.from_buffer(script.replace('\v', ' '), mime=True) if mime_type == 'text/html' or mime_type == 'application/xml': return print >> sys.stderr, 'Cannot parse ' + path path = '.bad/' + path header = '// Retrieved %sfrom %s\n' % (date + ', ' if date else '', url) path = args.prefix + '/' + path try: os.makedirs(os.path.dirname(path)) except OSError: pass jsfile = open(path, 'w') jsfile.write(header) jsfile.write(script) jsfile.close()
def extract(self, node): ''' Given a BeautifulSoup representation of an HTML document, return a list of all code snippets in that document. ''' node_code = None code_snippets = [] # Attempt to parse content for a code node as JavaScript. # Mark the content as a code snippet if it is parsed successfully. # Skip nodes with nothing but whitespace content. if type(node) is Tag and node.name in self.TAGS: if node.text.strip() != '': try: js_parser = JavaScriptParser() js_parser.parse(node.text) except (SyntaxError, TypeError, AttributeError): logger.debug("Code content could not be parsed as JavaScript.") else: node_code = node.text code_snippets.append(node_code) # If this node did not contain valid code, then visit all children # and check them for code. if node_code is None and hasattr(node, 'children'): for child_element in node.children: code_snippets.extend(self.extract(child_element)) return code_snippets
def case(self, case): parser_a = Parser() result_a = parser_a.parse(case) parser_b = Parser() result_b = parser_b.parse(case) self.assertEqual(result_a, result_b)
def _parse_succeeds(self, text): try: parser = JsParser() parser.parse(text) except (SyntaxError, TypeError): return False else: return True
def _test_function_expression(self): text = """ if (true) { function() { foo; location = 'http://anywhere.com'; } } """ parser = Parser() parser.parse(text)
def scan_js(crawler, url, content): ''' scan javascript for url assignments (like ajax calls). ''' LOGGER.info('Scanning Javascript on %s' % url) parser = Parser() tree = parser.parse(content) for node in nodevisitor.visit(tree): if not isinstance(node, ast.Assign): # <something>: <something> continue leftval = getattr(node.left, 'value', '') # 'leftval': <something> if not leftval: continue if 'url' not in leftval: # 'url': <something> continue if isinstance(node.right, ast.String): # 'url': 'somestring' LOGGER.info('Found interesting url in JS: %s' % node.right.value[1:-1]) crawler.check_link(url, node.right.value[2:-1]) for item in node.right.__dict__.values(): # string in <something> # <something> may be function_call() / variable + 'somestring' if isinstance(item, ast.String): LOGGER.info('Found interesting url in JS: %s' % item.value[1:-1]) crawler.check_link(url, item.value[2:-1])
def extract_glow_lib(): runjs = norm_path('untrusted/run.js') parser = JSParser() with open(runjs) as f: tree = parser.parse(f.read()) for node in nodevisitor.visit(tree): if (isinstance(node, ast.Assign) and isinstance(node.left, ast.DotAccessor) and node.left.identifier.value == 'glowscript_libraries' and isinstance(node.right, ast.Object)): break else: print('Parsing {} failed'.format(runjs)) exit(-1) return preproc_lib_path({ prop.left.value: [ eval(lib.value) for lib in prop.right.items if isinstance(lib, ast.String) ] for prop in node.right.properties })
def load_data(): # Read list names (toplists.js) f = open("./data/masterbugtable.js", "r") tmp = f.read() f.close() masterbugtable = json.loads(tmp[tmp.index("{") :]) # find and process latest test data f = open("./data/testing/index.json", "r") tmp = f.read() f.close() test_file_index = json.loads(tmp) f = open("./data/testing/%s" % test_file_index[-1], "r") test_data = {} test_reader = csv.reader(f) for line in test_reader: test_data[str(line[0])] = {"bug": line[0], "test_date": line[1], "ua": line[2], "test_state": line[3]} f.close() f = open("./data/sitedata.js", "r") parser = Parser() tree = parser.parse(f.read()) f.close() return { "masterbugtable": masterbugtable, "test_result_files": test_file_index, "test_results": test_data, "tests_parsed": tree, }
def minify(text, mangle=False, mangle_toplevel=False): parser = Parser() tree = parser.parse(text) if mangle: mangler.mangle(tree, toplevel=mangle_toplevel) minified = ECMAMinifier().visit(tree) return minified
def position_info_from_naver(url): parsed_obj = urlparse(url) article_info_no = 1 if parsed_obj.netloc == 'm.land.naver.com': article_info_no = parsed_obj.path.split('/')[-1] elif parsed_obj.netloc == 'new.land.naver.com': article_info_no = dict(parse_qsl(parsed_obj.query))['articleNo'] else: raise Exception('unknown url : ' + parsed_obj.netloc ) r = requests.get('https://m.land.naver.com/article/info/' + str(article_info_no)) soup = BeautifulSoup(r.content, "html.parser") res = dict() for sc in soup.findAll("script"): pos = sc.text.find('land.articleDetail.jsonPageData') if pos > 0: bracket_start_pos = sc.text.find('{',pos) bracket_end_pos = sc.text.find(';', pos) #print(sc.text[bracket_start_pos:bracket_end_pos]) #js_obj_txt = sc.text[bracket_start_pos:bracket_end_pos] js_obj_txt = sc.text[pos:bracket_end_pos] # print(js_obj_txt) parser = Parser() tree = parser.parse(js_obj_txt) visitor = NaverDataVisitor() visitor.visit(tree) res = visitor.res return res
def get_property_attributes(url): response = requests.get(url) #html parser soup = BeautifulSoup(response.text, 'html.parser') script = soup.findAll('script', {'type': 'text/javascript'})[3] # if ad link returns valid search result, scan for attributes, else skip if soup.title.string.find('Real Estate Properties') == -1: # if ad is archived, put in dummy date, else get real date if soup.find("span", "status-label label-archive") != None: date = '31 Dec 9999' else: #get date from title of advertisement date = re.findall(r'\d{2}\s\w{3}\s\d{4}', soup.title.string)[0] #javascript parser parser = Parser() tree = parser.parse(script.text) fields = { getattr(node.left, 'value', ''): getattr(node.right, 'value', '') for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign) } fields.update({'"date sold"': '"' + date + '"'}) return fields else: return None
def test_func(self): parser = Parser() tree = parser.parse(input) mangle(tree, toplevel=True) self.assertMultiLineEqual( textwrap.dedent(tree.to_ecma()).strip(), textwrap.dedent(expected).strip())
def analyzeJSCodesFinerBlock(script, display=False): try: t1 = time.time() parser = Parser() script = script.strip() if script.startswith('<!--') and script.endswith('-->'): script = script[4: -3] tree = parser.parse(script) visitor = MyVisitor( display) visitor.visit(tree, 0) if len(visitor.first_level_seq) != len(visitor.scripts): print >>sys.stderr, "error parsing script: scripts and seqs length inconsistent "+script[:100] return None, None t2 = time.time() total_time = t2 - t1 total_len = float(len(script)) try: portion = [len(x)/total_len for x in visitor.scripts] for i in range(len(portion)): t = total_time * portion[i] print "AST_TIME: %f %d" %(t, len(visitor.scripts[i])) except: pass return visitor.first_level_seq, visitor.scripts except Exception as e: print >>sys.stderr, "error parsing script: "+str(e)+" || [START]"+script[:100]+"[END]" return None, None
def chapter_url2image_urls(chapter_url): g = get_info_from_url(chapter_url, chapter2images) p = Parser() for t2_or_t3, (slot_idx, pattern_idx, info_pattern) in g: tag, _, data = t2_or_t3 #p = Parser() tree = p.parse(data) pre = None for node in nodevisitor.visit(tree): if isinstance(node, ast.Identifier) and node.value == 'image_list': break pre = node assert pre != None m = _image_list_match_pattern.match(pre.to_ecma()) assert m != None image_list = eval(m.group(1)) image_list = eval(image_list) ls = [] for info in image_list.values(): src = base64.b64decode(info['src']).decode('ascii') page = info['page'] ls.append((page, src)) ls.sort() ls = tuple(src for _, src in ls) return ls
def find_first_match(test_file_content, pos): test_file_content = TestMethodMatcher.UnitTest.insert_mark(test_file_content, pos) if not test_file_content: return None parser = Parser() tree = parser.parse(test_file_content) return TestMethodMatcher.UnitTest.get_test_name(tree)
def parse_global_js_for_access_id_action_url(global_js): parser = Parser() tree = parser.parse(global_js) parts = ['protocol', 'roDomain', 'ro', 'rt'] UrlParts = namedtuple('UrlParts', parts) url_parts = UrlParts([], [], [], []) getvalue = operator.attrgetter('value') err = "Too many '{}' assignments in global.js." for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign): try: left_value = getvalue(node.left).strip('\'"') except AttributeError: continue if left_value in parts: right_value = getvalue(node.right).strip('\'"') assert right_value not in getattr( url_parts, left_value), err.format('protocol') getattr(url_parts, left_value).append(right_value) return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[ 0] + url_parts.rt[0]
def minify(text, mangle=False): parser = Parser() tree = parser.parse(text) if mangle: mangler.mangle(tree) minified = ECMAMinifier().visit(tree) return minified
def assertUnusedObjects(self, source, expected): parser = Parser() tree = parser.parse(source) uvisit = UnusedObjectsVisitor() uvisit.do(tree) self.maxDiff = None self.assertSequenceEqual(tree.to_ecma(), expected)
def analyzeJSCodesFinerBlock(script, display=False): try: t1 = time.time() parser = Parser() script = script.strip() if script.startswith('<!--') and script.endswith('-->'): script = script[4:-3] tree = parser.parse(script) visitor = MyVisitor(display) visitor.visit(tree, 0) if len(visitor.first_level_seq) != len(visitor.scripts): print >> sys.stderr, "error parsing script: scripts and seqs length inconsistent " + script[: 100] return None, None t2 = time.time() total_time = t2 - t1 total_len = float(len(script)) try: portion = [len(x) / total_len for x in visitor.scripts] for i in range(len(portion)): t = total_time * portion[i] print "AST_TIME: %f %d" % (t, len(visitor.scripts[i])) except: pass return visitor.first_level_seq, visitor.scripts except Exception as e: print >> sys.stderr, "error parsing script: " + str( e) + " || [START]" + script[:100] + "[END]" return None, None
def parse_country(self, response): charts = response.xpath('//*[@class="row graph_row"]') total_corona_chart = charts[0] script = total_corona_chart.xpath('div/script/text()').extract()[0] title = total_corona_chart.xpath('div/h3/text()').extract()[0] try: country_name = title[title.index(" in ")+4:] if country_name[:4] == "the ": country_name = country_name[4:] except e: raise ValueError("Worldometer changed their labels.\ Hold your pain, Harold.") parser = Parser() tree = parser.parse(script) data = [None, None] # dates and corresponding number of cases for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign): if getattr(node.left, 'value', '') == 'categories' and not data[0]: print("\nparsing dates\n") data[0] = [eval(getattr(s, 'value', '')) for s in getattr(node.right, 'items', '')] elif getattr(node.left, 'value', '') == 'data' and not data[1]: print("\nparsing number of cases\n") data[1] = [int(getattr(n, 'value', '')) for n in getattr(node.right, 'items', '')] assert data[0] and data[1] and len(data[0]) == len(data[1]) with open("data/%s.csv" % country_name, 'w+') as f: for k in range(len(data[0])): f.write(data[0][k]) f.write(',') f.write(str(data[1][k])) f.write('\n')
def card_price_history(setname, cardname): ''' Scrapes price history of card from MTGPrice.com, using javascript parser Input: Setname and cardname are strings, generally taken from Scryfall API. Output: A numpy array of price history, each 'row' in the form [timestamp, price] ''' # Turn card data into soup link = 'https://www.mtgprice.com/sets/' + '_'.join( setname.split()) + '/' + '_'.join(cardname.split()) soup = BeautifulSoup(requests.get(link).content, 'html.parser') # GET RESULTS text_to_find = 'var results = [' history = [] for script in soup.findAll('script', type='text/javascript'): if text_to_find in script.text: parser = Parser() tree = parser.parse(script.text) for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign) and getattr( node.left, 'value', '') == "\"data\"": for prices in node.right.items: history.append( [prices.items[0].value, prices.items[1].value]) break return np.array(history)
def extract_g_config(script_text): parser = Parser() ast_tree = parser.parse(script_text) for node in nodevisitor.visit(ast_tree): if isinstance(node, ast.VarDecl) and node.identifier.value == 'g_config': return extract_object_as_map(node.initializer)
def inital_check_for_obfuscation_condtiion_sensitiveFunctions(js_text): parser = Parser() tree = parser.parse(js_text) keywords = set() if_condition = False for node in nodevisitor.visit(tree): if isinstance(node, If): if_condition = True continue stack = [node] #BFS to go to every depth of the AST tree while stack: node = stack.pop() #only dot access has a.b.getStringfromChar if isinstance(node, DotAccessor): try: for i in node.children(): stack.append(i) except: pass continue if isinstance(node, Identifier): #print (node.value), keywords.add(node.value) #print ("Done visit") obfuscation = False profiling = False if if_condition: pass ob_list = set() pro_list = set() for ob in obfuscation_function_names: if ob in keywords: #print ("[Obfuscation keywords]", ob) obfuscation = True ob_list.add(ob) #break for pro in profiling_function_names: if pro in keywords: #print ("[Profiling keywords]", pro) profiling = True pro_list.add(pro) #break #print ('if_condition: {}, obfuscation {}, profiling {}'.format(if_condition,obfuscation,profiling)) #pint (js_text) return if_condition, obfuscation, profiling, ob_list, pro_list
def test_func(self): parser = Parser() tree = parser.parse(input) mangle(tree, toplevel=True) self.assertMultiLineEqual( textwrap.dedent(tree.to_ecma()).strip(), textwrap.dedent(expected).strip() )
def assertFoldingObjects(self, source, expected): parser = Parser() tree = parser.parse(source) uvisit = foldingvisitor.FoldingVisitor() uvisit.do(tree) print(tree.to_ecma()) self.maxDiff = None self.assertSequenceEqual(tree.to_ecma(), expected)
def invJSToZ3(inv, typeEnv): p = Parser() t = p.parse(inv) assert (isinstance(t, jsast.Program)) assert (len(t.children()) == 1) assert (isinstance(t.children()[0], jsast.ExprStatement)) return jsToZ3Expr(t.children()[0].expr, typeEnv)
def whileExtract(s): '''Extracts all the while loops in the script. ''' l = [] parser = Parser() tree = parser.parse(s) for node in nodevisitor.visit(tree): if isinstance(node, ast.While): l+=[node.to_ecma()] return l
def test_bug_no_semicolon_at_the_end_of_block_plus_newline_at_eof(self): # https://github.com/rspivak/slimit/issues/3 text = textwrap.dedent(""" function add(x, y) { return x + y; } """) parser = Parser() tree = parser.parse(text) self.assertTrue(bool(tree.children()))
def _parse_redirect_to_security_challenge_script(script: str) -> str: """ Parses the script which redirects us to security challenge page and gets that URL. """ parser = Parser() tree = parser.parse(script) nodes = [node for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign)] for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign) and hasattr(node, 'left') and isinstance(node.left, ast.DotAccessor): children = node.left.children() if len(children) == 2 and children[0].value == 'window' and children[1].value == 'location': return node.right.value.strip('\'"')
def parse_script(data): # Hack. Fix javascript syntax issue in steam's response to_replace = 'BuildGameRow(game, )' replacement = 'BuildGameRow(game, 0)' data = data.replace(to_replace, replacement) parser = Parser() tree = parser.parse(data) variables = [node for node in nodevisitor.visit(tree) if isinstance(node, ast.VarDecl)] return variables
def addAllIntEnv(inv, env=None): if (env == None): env = {} p = Parser() t = p.parse(inv) for node in nodevisitor.visit(t): if isinstance(node, jsast.Identifier): env[node.value] = Int return env
def parse(text): """ Turn a valid JavaScript source string and turn it into a source tree through the Parser provided by the slimit.parser module. """ global _parser if _parser is None: _parser = Parser() return _parser.parse(text)
def analyzeJSCodes(script, display=False): try: parser = Parser() tree = parser.parse(script) visitor = MyVisitor( display) visitor.visit(tree, 0) #print "first_level_seq: %d" %len(visitor.first_level_seq) return visitor.node_order_list except Exception as e: print >>sys.stderr, "error parsing script: "+str(e)+" || "+script return None
def rewriteJSCodes(script, display=False): try: parser = Parser() tree = parser.parse(script) visitor = RewriteVisitor( display) visitor.visit(tree, 0) x = ECMAVisitor().visit(tree) print x #print tree.to_ecma() #print "first_level_seq: %d" %len(visitor.first_level_seq) except Exception as e: print >>sys.stderr, "error parsing script: "+str(e)+" || "+script
def removeDeclarations(js_file): parser = Parser() tree = parser.parse(js_file) output = "" for child in tree.children(): if type(child) != ast.VarStatement: output += (child.to_ecma() + "\n") else: nodes = [x for x in nodevisitor.visit(child)] if type(nodes[0].initializer) not in [ast.String, ast.Number, ast.BinOp]: output += (child.to_ecma() + "\n") return output
def analyzeJSCodes(script, display=False): try: parser = Parser() tree = parser.parse(script) visitor = MyVisitor(display) visitor.visit(tree, 0) #print "first_level_seq: %d" %len(visitor.first_level_seq) return visitor.node_order_list except Exception as e: print >> sys.stderr, "error parsing script: " + str( e) + " || " + script return None
def main(): args = parse_args() with open(args.filename) as f: source = f.read() parser = Parser() tree = parser.parse(source) visitor = ConstantReductionVisitor(args.debug) tree = visitor.visit(tree) print tree.to_ecma()
def get_forecast(link): html_doc = urllib2.urlopen(link).read() soup = BeautifulSoup(html_doc, "html.parser") forecast_text = soup.find("div", id="div_wgfcst1").find("script").string parser = Parser() forecast_tree = parser.parse(forecast_text) full_data = {parse_key(node.left):parse_value(node.right) for node in nodevisitor.visit(forecast_tree) if isinstance(node, ast.Assign)} forecast_tree = parser.parse(forecast_text) forecast = {parse_key(node.left):parse_array(node.right) for node in nodevisitor.visit(forecast_tree) if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array)} full_data.update(forecast) return full_data
def parse_JavaScript(js): global functions parser = Parser() tree = parser.parse(js) for node in nodevisitor.visit(tree): if isinstance(node, ast.FuncDecl): if len(node.parameters) > 1: last = node.parameters[len(node.parameters) - 1] first = node.parameters[0] # check for the first parameter if first.value == "this$static": # check that the last one is a callback if last.value == "callback": # the function will call createStreamWriter if its used in the client interface if "createStreamWriter" in node.to_ecma(): params = [] # if we have function arguments if len(node.parameters) > 2: # -2 for the 'this' and callback num_of_params = len(node.parameters) - 2 for param in node.parameters: # we just append the arguments we will need to make in the GWT request if param.value != "this$static" and param.value != "callback": params.append(param.value) # else we have no arguments else: num_of_params = 0 # strip the correct function name function = node.identifier.value.replace("$", "") function = re.sub('_\d+', '', function) # append to a list, since we my have functions of the same name, but different signatures extracted.append({ "function": function, "num_of_args": num_of_params, "args": params, "arg_type_data": get_param_types(function, node.to_ecma()) })
def rewriteJSCodes(script, display=False): try: parser = Parser() tree = parser.parse(script) visitor = RewriteVisitor(display) visitor.visit(tree, 0) x = ECMAVisitor().visit(tree) print x #print tree.to_ecma() #print "first_level_seq: %d" %len(visitor.first_level_seq) except Exception as e: print >> sys.stderr, "error parsing script: " + str( e) + " || " + script
def episode(): url = h.extract_var(args, 'url') name = h.extract_var(args, 'name') soup = BeautifulSoup(h.make_request(url, cookie_file, cookie_jar)) div = h.bs_find_with_class(soup, 'div', 'video-player') scripts = div.findAll('script') script = None for _script in scripts: if 'CryptoJS' in _script.text: script = _script break url = '' if script: script_text = script.text _dailytoday = '' _subject = '' parser = Parser() tree = parser.parse(script.text) for node in tree.children(): ecma = node.to_ecma() if ecma.startswith('var dailytoday ='): _dailytoday = node.children()[0].children()[1].to_ecma()[1:-1] elif ecma.startswith('var subject ='): _subject = node.children()[0].children()[1].to_ecma()[1:-1] # elif "var bigmumbai = " not in ecma and "bigmumbai = " in ecma: # print ecma if _dailytoday and _subject: url = decrypt.decrypt_url(_dailytoday, _subject) else: url = script.text.split('bigmumbai = ', 2)[2].split(';')[0][1:-1] print url plot = h.bs_find_with_class(soup, 'div', 'vp-info').find('span', {'itemprop': 'description'}).text thumbnail = soup.find('div', {'itemprop': 'video'}).find('meta', {'itemprop': 'thumbnailUrl'})['content'] h.add_dir_video(addon_handle, name, url, thumbnail, plot) else: iframe = div.find('iframe') if iframe: attrs = dict(iframe.attrs) youtube_url = attrs['src'] print youtube_url video_id = urlparse.urlparse(youtube_url).path.replace('/embed/', '') url = 'plugin://plugin.video.youtube/play/?video_id=%s' % video_id h.add_dir_video(addon_handle, name, url, '', '')
def removeDeclarations(js_file): parser = Parser() tree = parser.parse(js_file) output = "" for child in tree.children(): if type(child) != ast.VarStatement: output += (child.to_ecma() + "\n") else: nodes = [x for x in nodevisitor.visit(child)] if type(nodes[0].initializer) not in [ ast.String, ast.Number, ast.BinOp ]: output += (child.to_ecma() + "\n") return output
def read_js_object(code): parser = Parser() def visit(node): if isinstance(node, ast.Program): d = {} for child in node: if not isinstance(child, ast.VarStatement): raise ValueError("All statements should be var statements") key, val = visit(child) d[key] = val return d elif isinstance(node, ast.VarStatement): return visit(node.children()[0]) elif isinstance(node, ast.VarDecl): return visit(node.identifier), visit(node.initializer) elif isinstance(node, ast.Object): d = {} for property in node: key = visit(property.left) value = visit(property.right) d[key] = value return d elif isinstance(node, ast.BinOp): # simple constant folding if node.op == '+': if isinstance(node.left, ast.String) and isinstance( node.right, ast.String): return visit(node.left) + visit(node.right) elif isinstance(node.left, ast.Number) and isinstance( node.right, ast.Number): return visit(node.left) + visit(node.right) else: raise ValueError( "Cannot + on anything other than two literals") else: raise ValueError("Cannot do operator '{}'".format(node.op)) elif isinstance(node, ast.String): return node.value.strip('"').strip("'") elif isinstance(node, ast.Array): return [visit(x) for x in node] elif isinstance(node, ast.Number) or isinstance(node, ast.Identifier)\ or isinstance(node, ast.Boolean) or isinstance(node, ast.Null): return node.value else: raise Exception("Unhandled node: {}".format(node)) return visit(parser.parse(code))
def ParsingOfFunction(): index = 0 parser = Parser() tree = parser.parse('l = 0;l = h;') # for node in nodevisitor.visit(tree): # if isinstance(node, ast.Identifier) and node.value == 'i': # node.value = 'hello' x = tree.to_ecma() # print awesome javascript :) # print x; # print "Opening the file..." # target = open("file.txt",'w') # target.write(x) # target.close() lines = [line.rstrip('\n') for line in open('file.txt')] map = {} temp = "" i = 0 # print lines; for str in lines: if ';' in str: temp = temp + str temp = temp.lstrip(";") # print temp; map.__setitem__(i, temp) print "Going into the lexer function --------------" lex = LexingofFunction(temp) temp = "" i += 1 else: temp = temp + str # print map; print "----------------------------- print heap now" for x in heap: print x.name print x.level return
def _addscript(self, path, date, url, script): checksum = sha1(script).digest() if redis_db.sadd("js_script_checksums", checksum): self.last["script_checksum"] = hexlify(checksum) try: js_parser = JSParser() js_parser.parse(script) except Exception: # Keep unparsable scripts as 'bad' script if they don't look like HTML. mime_type = magic.from_buffer(script.replace("\v", " "), mime=True) if mime_type == "text/html" or mime_type == "application/xml": return print >>sys.stderr, "Cannot parse " + path path = ".bad/" + path header = "// Retrieved %sfrom %s\n" % (date + ", " if date else "", url) path = args.prefix + "/" + path try: os.makedirs(os.path.dirname(path)) except OSError: pass jsfile = open(path, "w") jsfile.write(header) jsfile.write(script) jsfile.close()
def get_property_attributes(url): response = requests.get(url) #html parser soup = BeautifulSoup(response.text, 'html.parser') script = soup.findAll('script', {'type': 'text/javascript'})[3] #javascript parser parser = Parser() tree = parser.parse(script.text) fields = { getattr(node.left, 'value', ''): getattr(node.right, 'value', '') for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign) } return fields
def treeWalker(js_file): parser = Parser() tree = parser.parse(js_file) variables = {} for child in tree.children(): if type(child) == ast.VarStatement: try: nodes = [x for x in nodevisitor.visit(child)] if type(nodes[0].initializer) == ast.String: variables[nodes[0].identifier.value] = nodes[0].initializer.value elif type(nodes[0].initializer) == ast.Number: variables[nodes[0].identifier.value] = eval(nodes[0].initializer.to_ecma()) elif type(nodes[0].initializer) == ast.BinOp: variables[nodes[0].identifier.value] = eval(nodes[0].initializer.to_ecma()) else: print((nodes[0].identifier.value, nodes[0].initializer)) except Exception as e: print (child.to_ecma()) return variables
def test_modify_tree(self): text = """ for (var i = 0; i < 10; i++) { var x = 5 + i; } """ parser = Parser() tree = parser.parse(text) for node in nodevisitor.visit(tree): if isinstance(node, ast.Identifier) and node.value == 'i': node.value = 'hello' self.assertMultiLineEqual( tree.to_ecma(), textwrap.dedent(""" for (var hello = 0; hello < 10; hello++) { var x = 5 + hello; } """).strip() )
def partitionCode(sourceCode): parser = Parser() print "Source code originally is ............ \n", sourceCode tree = parser.parse(sourceCode) fnList = FunctionDefinitionsPass(tree) mobileDeviceList = MobileDevicesPass(tree) predicateList = PredicatePass(tree) for node in nodevisitor.visit(tree): if isinstance(node, ast.ExprStatement): exprNode = node.expr if isinstance(exprNode, ast.FunctionCall): # check if this is a function call to an object ParseMethodCalls( exprNode, fnList, mobileDeviceList, predicateList ) # TODO: Impose the restiction that all mobile Device declarations come ahead of all else print "-*********\n*******\n------------------THE PARTITIONED CODE IS -----------------------------*********\n*******\n" for key in partitionedCode: print "On node ", key, ", code is \n\n" print partitionedCode[key]
def partitionCode(sourceCode): parser = Parser() sourceCodeInHtml=sourceCode.replace(';','\n'); print "MULTI PHONE SCRIPT \n----------------------------------------\n", sourceCodeInHtml tree=parser.parse(sourceCode); fnList=FunctionDefinitionsPass(tree) mobileDeviceList=MobileDevicesPass(tree) predicateList=PredicatePass(tree) for node in nodevisitor.visit(tree): if(isinstance(node,ast.ExprStatement)): exprNode=node.expr if(isinstance(exprNode,ast.FunctionCall)): # check if this is a function call to an object ParseMethodCalls(exprNode,fnList,mobileDeviceList,predicateList) # TODO: Impose the restiction that all mobile Device declarations come ahead of all else print "\n\nPARTITIONED CODE \n----------------------------------------\n", returnCode =dict() for key in partitionedCode : print "On phone name \"",mobileDeviceList[key],"\" : \n ", print "\t",partitionedCode[key],"\n" returnCode[mobileDeviceList[key]]=partitionedCode[key] print "" return returnCode
def check_expected_output(filename): input_file = 'js/%s.js' % filename expect_file = input_file + '.expect' if not os.path.exists(expect_file): raise SkipTest('No expect file for test ' + filename) with open(input_file) as f, open(expect_file) as g: parser = Parser() tree = parser.parse(f.read()) visitor = ConstantReductionVisitor() tree = visitor.visit(tree) js = tree.to_ecma().strip() expected_output = g.read().strip() if js != expected_output: print 'Expected:' print expected_output print '' print 'Got:' print js assert False
def parse_global_js_for_access_id_action_url(global_js): parser = Parser() tree = parser.parse(global_js) parts = ['protocol', 'roDomain', 'ro', 'rt'] UrlParts = namedtuple('UrlParts', parts) url_parts = UrlParts([], [], [], []) getvalue = operator.attrgetter('value') err = "Too many '{}' assignments in global.js." for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign): try: left_value = getvalue(node.left).strip('\'"') except AttributeError: continue if left_value in parts: right_value = getvalue(node.right).strip('\'"') assert right_value not in getattr(url_parts, left_value), err.format('protocol') getattr(url_parts, left_value).append(right_value) return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[0] + url_parts.rt[0]
def _js_design_as_doc(self, filepath): dir, filename = os.path.split(filepath) name, ext = os.path.splitext(filename) with open(filepath) as f: js = f.read() parser = Parser() tree = parser.parse(js) views = {} for node in tree: if isinstance(node, ast.VarStatement): for child in node.children(): for grandchild in child.children(): if isinstance(grandchild, ast.Identifier): view = {} view_name = grandchild.value views[view_name] = view if isinstance(grandchild, ast.Object): for named in grandchild.children(): function_name = None function_body = None for kv in named.children(): if isinstance(kv, ast.Identifier) and kv.value in VIEW_FUNCTION_NAMES: function_name = kv.value if isinstance(kv, ast.FuncExpr): function_body = kv.to_ecma() if function_name and function_body: view[function_name] = function_body return {"_id": "_design/%s" % name, "views": views}
def post(self): print self.request print "request: %s" % self.request.body request_data = json.loads(self.request.body) javascript = request_data["javascript"].replace(u'\u200b', "") print(javascript) parser = Parser() tree = parser.parse(javascript) jsnodes = [] functions = [] calls = [] for node in nodevisitor.visit(tree): try: node_id = node.identifier.to_ecma() jsnodes.append(node) if isinstance(node, ast.FunctionCall) \ or isinstance(node, ast.Identifier): calls.append(node) if isinstance(node, ast.FuncDecl): functions.append(node) except AttributeError, e: pass
class TestAPIFinder(unittest.TestCase): def setUp(self): self.walker = ASTWalker() self.jstree = JSParser() def test_simple_find(self): script = """ var sendToKaleidos = opera.contexts.menu.createItem(foo) opera.contexts.menu.addItem(sendToKaleidos); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'addItem')) def test_simple_find2(self): script = """ var allTabs = opera.extension.tabs.getAll(); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'getAll')) def test_simple_find3(self): script = """ var temp_tab = opera.extension.tabs.create( {url: 'http://online.translate.ua', focused: false} ); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'create')) def test_simple_find4(self): script = """ var matches = opera.extension.tabs.getFocused() .url.match(/v=([^(\&|$)]*)/) """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'getFocused')) def test_permission(self): script = """ var sendToKaleidos = opera.contexts.menu.createItem(foo) opera.contexts.tabs.getAll(); """ self.assertEqual(self.walker.find_apicall( self.jstree.parse(script), 'getAll'), 'tabs') def test_finder_aliased(self): script = """ var mn = opera.contexts.menu; mn.addItem(button); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'addItem')) def test_finder_aliased2(self): script = """ filter.block.add(document.location.href) """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'add')) def test_finder_aliased3(self): script = """ URLFilterAPI.block.remove(content.replace(bugReg,"*#"),newOptions); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'remove')) def test_finder_aliased4(self): script = """ uiitem.disabled = !o.tabs.getFocused(); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'getFocused')) def test_finder_aliased5(self): script = """ try { return o.tabs.getFocused().url; } catch (e) { return ""; } """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'getFocused')) def test_finder_aliased6(self): script = """ var Current = Tabs.getSelected(); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'getSelected')) def test_finder_aliased7(self): script = """ var oTabs = opera.extension.tabs; oTabs.create({url: READER_URL, focused: true}); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'create')) def test_finder_aliased8(self): script = """ d=opera.contexts.menu.createItem( {title:g_formfills[e].decprofilename,type:"folder"} ); contextParents[b].addItem(d); d.addItem(opera.contexts.menu.createItem({title:gs("Fill Form"), onclick:cmaction1,id:g_formfills[e].ffid})); """ self.assertTrue(self.walker._find(self.jstree.parse(script), 'addItem')) def test_permission_aliased(self): script = """ var tb = opera.contexts.tabs; tb.create(tabs); """ self.assertEqual(self.walker.find_apicall( self.jstree.parse(script), 'create'), 'tabs') def test_permission_aliased2(self): script = """ filter.block.add(document.location.href) """ self.assertEqual(self.walker.find_apicall( self.jstree.parse(script), 'add'), ('webRequest', 'webRequestBlocking')) def test_permission_aliased3(self): script = """ try { return o.tabs.getFocused().url; } catch (e) { return ""; } """ self.assertEqual(self.walker.find_apicall( self.jstree.parse(script), 'getFocused'), 'tabs') def test_permission_aliased4(self): script = """ var Current = Tabs.getSelected(); """ self.assertEqual(self.walker.find_apicall( self.jstree.parse(script), 'getSelected'), 'tabs') def test_permission_aliased5(self): script = """ var tbs = opera.contexts.tabs; tbs.getFocused(); """ self.assertEqual(self.walker.find_apicall( self.jstree.parse(script), 'getFocused'), 'tabs') def test_finder_multi_aliased(self): script = """ var o = opera; c = o.contexts, tbs = c.tabs; tbs.getFocused(); """ self.assertTrue(self.walker._find( self.jstree.parse(script), 'getFocused')) def test_permission_multi_aliased(self): script = """ var o = opera; c = o.contexts, tb = c.tabs; tb.getSelected(); """ self.assertEqual(self.walker.find_apicall( self.jstree.parse(script), 'getSelected'), 'tabs')