def get_forecast(link): html_doc = urllib2.urlopen(link).read() soup = BeautifulSoup(html_doc, "html.parser") forecast_text = soup.find("div", id="div_wgfcst1").find("script").string parser = Parser() forecast_tree = parser.parse(forecast_text) full_data = { parse_key(node.left): parse_value(node.right) for node in nodevisitor.visit(forecast_tree) if isinstance(node, ast.Assign) } forecast_tree = parser.parse(forecast_text) forecast = { parse_key(node.left): parse_array(node.right) for node in nodevisitor.visit(forecast_tree) if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array) } full_data.update(forecast) return full_data
def _parse_redirect_to_security_challenge_script(script: str) -> str: """ Parses the script which redirects us to security challenge page and gets that URL. """ parser = Parser() tree = parser.parse(script) nodes = [node for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign)] for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign) and hasattr(node, 'left') and isinstance(node.left, ast.DotAccessor): children = node.left.children() if len(children) == 2 and children[0].value == 'window' and children[1].value == 'location': return node.right.value.strip('\'"')
def get_embedded_json(): # Strip c from s, without exception def strip(s, c): if isinstance(s, str): return s.strip(c) return s div = soup.find('div', id='JSDF') scripts = div.find_all('script', src=None) # Look for $rwidgets script_texts = [] for script in scripts: for s in script.contents: if '$rwidgets' in s: script_texts.append(s) # Bodge until we get rid of slimit with silence_output(): parser = Parser() raw_values = {} for script_text in script_texts: tree = parser.parse(script_text) # Parsing js for node in nodevisitor.visit(tree): if isinstance(node, ast.FunctionCall): if isinstance(node.identifier, ast.Identifier): if node.identifier.value == '$rwidgets': # Deal with here fields = {} for n in nodevisitor.visit(node): if isinstance(n, ast.Assign): k = getattr(n.left, 'value', '').strip('"') v = strip( getattr(n.right, 'value', ''), '"') if k in duplicates: try: fields[k].append(v) except KeyError: fields[k] = [v] else: fields[k] = v # Merge fields and raw_values, resolving duplicates for (k, v) in fields.items(): if k in duplicates: try: raw_values[k] += v except KeyError: raw_values[k] = v elif v != 'null': raw_values[k] = v return raw_values
def get_test_steps_as_strings(tree, bug): the_node = None output = [] for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign): if str(bug) in getattr(node.left, "value", ""): the_node = node break if the_node: for node in nodevisitor.visit(the_node): if isinstance(node, ast.FuncExpr): output.append(node.to_ecma()) return output
def _parse_redirect_to_security_challenge_script(script: str) -> str: """ Parses the script which redirects us to security challenge page and gets that URL. """ parser = Parser() tree = parser.parse(script) nodes = [ node for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign) ] for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign) and hasattr( node, 'left') and isinstance(node.left, ast.DotAccessor): children = node.left.children() if len(children) == 2 and children[ 0].value == 'window' and children[1].value == 'location': return node.right.value.strip('\'"')
def extract_glow_lib(): runjs = norm_path('untrusted/run.js') parser = JSParser() with open(runjs) as f: tree = parser.parse(f.read()) for node in nodevisitor.visit(tree): if (isinstance(node, ast.Assign) and isinstance(node.left, ast.DotAccessor) and node.left.identifier.value == 'glowscript_libraries' and isinstance(node.right, ast.Object)): break else: print('Parsing {} failed'.format(runjs)) exit(-1) return preproc_lib_path({ prop.left.value: [ eval(lib.value) for lib in prop.right.items if isinstance(lib, ast.String) ] for prop in node.right.properties })
def _parse_text(self, text): parser = slimit.parser.Parser() try: tree = parser.parse(text) fields = { getattr(x.left, 'value', ''): getattr(x.right, 'value', '') for x in nodevisitor.visit(tree) if isinstance(x, slimit.ast.Assign) } pat = re.compile(r'^[\'"]|[\'"]$') reqstrip = lambda x: re.sub(pat, '', x) try: parsed = { reqstrip(k): reqstrip(v) for (k, v) in fields.items() } except TypeError as err: print('{}'.format(err)) sys.stderr.write('Couldn\'t parse text. Exiting...\n') sys.exit(10) return parsed except SyntaxError: print(text) sys.stderr("Couldn't parse text. Exiting...\n") sys.exit(10)
def extract_g_config(script_text): parser = Parser() ast_tree = parser.parse(script_text) for node in nodevisitor.visit(ast_tree): if isinstance(node, ast.VarDecl) and node.identifier.value == 'g_config': return extract_object_as_map(node.initializer)
def get_property_attributes(url): response = requests.get(url) #html parser soup = BeautifulSoup(response.text, 'html.parser') script = soup.findAll('script', {'type': 'text/javascript'})[3] # if ad link returns valid search result, scan for attributes, else skip if soup.title.string.find('Real Estate Properties') == -1: # if ad is archived, put in dummy date, else get real date if soup.find("span", "status-label label-archive") != None: date = '31 Dec 9999' else: #get date from title of advertisement date = re.findall(r'\d{2}\s\w{3}\s\d{4}', soup.title.string)[0] #javascript parser parser = Parser() tree = parser.parse(script.text) fields = { getattr(node.left, 'value', ''): getattr(node.right, 'value', '') for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign) } fields.update({'"date sold"': '"' + date + '"'}) return fields else: return None
def parse_country(self, response): charts = response.xpath('//*[@class="row graph_row"]') total_corona_chart = charts[0] script = total_corona_chart.xpath('div/script/text()').extract()[0] title = total_corona_chart.xpath('div/h3/text()').extract()[0] try: country_name = title[title.index(" in ")+4:] if country_name[:4] == "the ": country_name = country_name[4:] except e: raise ValueError("Worldometer changed their labels.\ Hold your pain, Harold.") parser = Parser() tree = parser.parse(script) data = [None, None] # dates and corresponding number of cases for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign): if getattr(node.left, 'value', '') == 'categories' and not data[0]: print("\nparsing dates\n") data[0] = [eval(getattr(s, 'value', '')) for s in getattr(node.right, 'items', '')] elif getattr(node.left, 'value', '') == 'data' and not data[1]: print("\nparsing number of cases\n") data[1] = [int(getattr(n, 'value', '')) for n in getattr(node.right, 'items', '')] assert data[0] and data[1] and len(data[0]) == len(data[1]) with open("data/%s.csv" % country_name, 'w+') as f: for k in range(len(data[0])): f.write(data[0][k]) f.write(',') f.write(str(data[1][k])) f.write('\n')
def chapter_url2image_urls(chapter_url): g = get_info_from_url(chapter_url, chapter2images) p = Parser() for t2_or_t3, (slot_idx, pattern_idx, info_pattern) in g: tag, _, data = t2_or_t3 #p = Parser() tree = p.parse(data) pre = None for node in nodevisitor.visit(tree): if isinstance(node, ast.Identifier) and node.value == 'image_list': break pre = node assert pre != None m = _image_list_match_pattern.match(pre.to_ecma()) assert m != None image_list = eval(m.group(1)) image_list = eval(image_list) ls = [] for info in image_list.values(): src = base64.b64decode(info['src']).decode('ascii') page = info['page'] ls.append((page, src)) ls.sort() ls = tuple(src for _, src in ls) return ls
def card_price_history(setname, cardname): ''' Scrapes price history of card from MTGPrice.com, using javascript parser Input: Setname and cardname are strings, generally taken from Scryfall API. Output: A numpy array of price history, each 'row' in the form [timestamp, price] ''' # Turn card data into soup link = 'https://www.mtgprice.com/sets/' + '_'.join( setname.split()) + '/' + '_'.join(cardname.split()) soup = BeautifulSoup(requests.get(link).content, 'html.parser') # GET RESULTS text_to_find = 'var results = [' history = [] for script in soup.findAll('script', type='text/javascript'): if text_to_find in script.text: parser = Parser() tree = parser.parse(script.text) for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign) and getattr( node.left, 'value', '') == "\"data\"": for prices in node.right.items: history.append( [prices.items[0].value, prices.items[1].value]) break return np.array(history)
def parse_global_js_for_access_id_action_url(global_js): parser = Parser() tree = parser.parse(global_js) parts = ['protocol', 'roDomain', 'ro', 'rt'] UrlParts = namedtuple('UrlParts', parts) url_parts = UrlParts([], [], [], []) getvalue = operator.attrgetter('value') err = "Too many '{}' assignments in global.js." for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign): try: left_value = getvalue(node.left).strip('\'"') except AttributeError: continue if left_value in parts: right_value = getvalue(node.right).strip('\'"') assert right_value not in getattr( url_parts, left_value), err.format('protocol') getattr(url_parts, left_value).append(right_value) return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[ 0] + url_parts.rt[0]
def fetch(item): msg = "" try: r = requests.get(item["link"]) r.encoding = "utf-8" soup = BeautifulSoup(r.text, "html.parser") msg = r.text item["title"] = soup.find("meta", property="og:title")["content"] item["image"] = soup.find("meta", property="og:image")["content"] script_text = None for s in soup.find_all("script"): if s.string is None: continue if "Fusion.globalContent" in s.string: script_text = s.string text = "" if script_text is not None: tree = JavascriptParser().parse(script_text) for node in nodevisitor.visit(tree): if not isinstance(node, ast.Assign): continue left = node.left.to_ecma() if "Fusion.globalContent" == left: data = json.loads(node.right.to_ecma()).get("content_elements", []) text = "<br/>".join([x["content"] for x in data if "content" in x]) item["text"] = text except Exception as e: print("cannot parse %s" % (item["link"])) raise item["source"] = "appledaily" item["key"] = hashlib.md5(item["link"].encode()).hexdigest() return item
def scan_js(crawler, url, content): ''' scan javascript for url assignments (like ajax calls). ''' LOGGER.info('Scanning Javascript on %s' % url) parser = Parser() tree = parser.parse(content) for node in nodevisitor.visit(tree): if not isinstance(node, ast.Assign): # <something>: <something> continue leftval = getattr(node.left, 'value', '') # 'leftval': <something> if not leftval: continue if 'url' not in leftval: # 'url': <something> continue if isinstance(node.right, ast.String): # 'url': 'somestring' LOGGER.info('Found interesting url in JS: %s' % node.right.value[1:-1]) crawler.check_link(url, node.right.value[2:-1]) for item in node.right.__dict__.values(): # string in <something> # <something> may be function_call() / variable + 'somestring' if isinstance(item, ast.String): LOGGER.info('Found interesting url in JS: %s' % item.value[1:-1]) crawler.check_link(url, item.value[2:-1])
def _basic_init(self): self._page = _download(self._url) if self._page.text[:800].find("Data_fundSharesPositions") >= 0: raise FundTypeError("This code seems to be a fund, use fundinfo instead") parser = Parser() tree = parser.parse(self._page.text) nodenet = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and node.children()[0].children()[0].value == "Data_millionCopiesIncome" ][0] name = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and (node.children()[0].children()[0].value == "fS_name") ][0] self.name = name.value.strip('"') tz_bj = dt.timezone(dt.timedelta(hours=8)) datel = [ dt.datetime.fromtimestamp( int(nodenet.children()[i].children()[0].value) / 1e3, tz=tz_bj ).replace(tzinfo=None) for i in range(len(nodenet.children())) ] ratel = [ float(nodenet.children()[i].children()[1].value) for i in range(len(nodenet.children())) ] netvalue = [1] for dailyrate in ratel: netvalue.append(netvalue[-1] * (1 + dailyrate * 1e-4)) netvalue.remove(1) df = pd.DataFrame( data={ "date": datel, "netvalue": netvalue, "totvalue": netvalue, "comment": [0 for _ in datel], } ) df = df[df["date"].isin(opendate)] df = df.reset_index(drop=True) self.price = df[df["date"] <= yesterdaydash()]
def inital_check_for_obfuscation_condtiion_sensitiveFunctions(js_text): parser = Parser() tree = parser.parse(js_text) keywords = set() if_condition = False for node in nodevisitor.visit(tree): if isinstance(node, If): if_condition = True continue stack = [node] #BFS to go to every depth of the AST tree while stack: node = stack.pop() #only dot access has a.b.getStringfromChar if isinstance(node, DotAccessor): try: for i in node.children(): stack.append(i) except: pass continue if isinstance(node, Identifier): #print (node.value), keywords.add(node.value) #print ("Done visit") obfuscation = False profiling = False if if_condition: pass ob_list = set() pro_list = set() for ob in obfuscation_function_names: if ob in keywords: #print ("[Obfuscation keywords]", ob) obfuscation = True ob_list.add(ob) #break for pro in profiling_function_names: if pro in keywords: #print ("[Profiling keywords]", pro) profiling = True pro_list.add(pro) #break #print ('if_condition: {}, obfuscation {}, profiling {}'.format(if_condition,obfuscation,profiling)) #pint (js_text) return if_condition, obfuscation, profiling, ob_list, pro_list
def whileExtract(s): '''Extracts all the while loops in the script. ''' l = [] parser = Parser() tree = parser.parse(s) for node in nodevisitor.visit(tree): if isinstance(node, ast.While): l+=[node.to_ecma()] return l
def get_m_decl(fil): with codecs.open(fil, 'r', encoding='utf8') as fd: s = fd.read() tree = Parser().parse(s) m = None for node in nodevisitor.visit(tree): if isinstance(node, ast.VarDecl) and node.identifier.value == 'm': m = node.initializer.to_ecma() return m
def defects_mapping_from_js_ast(js_ast): """Generate offices,keywords-to-defects mappings for JS AST. This function is used in the spider, but can also be called manually via this script on reports that failed parsing due to broken AST, after we manually fix them. """ # fetch all array elements from the syntax tree js_arrays = [ node.children() for node in nodevisitor.visit(js_ast) if isinstance(node, ast.Array) ] def get_defects_by_keys(data_raw): """Fetch key-to-defects from raw html string. This is done by building an xml tree from the raw html string, and fetching its embedded text. """ res = {} for element_raw in data_raw: # sometimes data doesn't hold any value. # in this case, continue try: element_raw.value except AttributeError: continue # raw html string looks like this: # # "<div class='tooltip-title'>משרד הבריאות מופיע ב:</div>היבטים במניעת זיהום של מקורות המים<br/>הפיקוח והבקרה על הפעילות הכספית במרכזים הרפואיים המשלתיים-הכלליים<br/>פעולות הרשויות המקומיות וספקי המים להבטחת איכות מי השתייה<br/>" # # we parse it into an xml tree. # we wrap it with a another <div> element, # since its not a valid html: it has tailing </br> elements element_ast = et.fromstring('<div>' + element_raw.value[1:-1] + '</div>') # remove the "מופיע ב:" part from the "משרד הבריאות מופיע ב:" string key_name = element_ast[0].text.split(u'מופיע ב')[0] # defects are the tail of the first <div> # and all subsequenet <br> elements. key_defects = [element_ast[0].tail ] + [d.tail for d in element_ast[1:] if d.tail] # append key-to-defects mapping to result dictionary res[key_name] = key_defects return res return ( get_defects_by_keys(js_arrays[0]), get_defects_by_keys(js_arrays[1]), )
def adaptoutput(text, exit): tree = parser.parse(text) for node in nodevisitor.visit(tree): #text = "var BROKERS = require(\'./mock-brokers\').data;" #-> var output = text; if isinstance(node, ast.VarDecl) and node.identifier.value == exit: return "\tvar output=" + exit + ";" elif isinstance(node, ast.VarDecl) and node.identifier.value != exit: return "\tvar output=" + node.identifier.value + ";" return ""
def get_raw_value(tree, var_name): found = False for node in nodevisitor.visit(tree): # print(node) if found: return node.to_ecma() if isinstance(node, ast.Identifier): if node.value == var_name: found = True return None
def parse_script(data): # Hack. Fix javascript syntax issue in steam's response to_replace = 'BuildGameRow(game, )' replacement = 'BuildGameRow(game, 0)' data = data.replace(to_replace, replacement) parser = Parser() tree = parser.parse(data) variables = [node for node in nodevisitor.visit(tree) if isinstance(node, ast.VarDecl)] return variables
def FunctionDefinitionsPass(tree) : ''' make one pass through the entire tree to get all function definitions. TODO: Make the restriction clear that all function declarations and definitions go together in the function foo= { } form TODO: Allow the anonymous function declaration form as well ie f=function(...) { } ''' fnNames=dict() for node in nodevisitor.visit(tree): #print type(node) if(isinstance(node,ast.FuncDecl)): fnNames[node.children()[0].to_ecma()]=node # look up from the function name to the function Declaration object I guess. return fnNames
def parse_goi_script(script, date_col='date'): '''Extract the data node from the javascript text, then parse each individual price and then merge to create the final data frame. ''' parser = slimit.parser.Parser() tree = parser.parse(script[0]) fields = [node for node in nodevisitor.visit(tree) if isinstance(node, slimit.ast.Array)] var_names = [ast.literal_eval(node.to_ecma()).split(' ')[0] for node in nodevisitor.visit(fields[0]) if isinstance(node, slimit.ast.String)] series = [ast.literal_eval(node.to_ecma().replace('Date.UTC', '')) for node in nodevisitor.visit(fields[0]) if isinstance(node, slimit.ast.Array) and len(node.to_ecma()) > 1000] df_list = [goi_list_to_df(d, n) for n, d in zip(var_names, series)] final_data = reduce(lambda left, right: pd.merge( left, right, on=date_col), df_list) # make the time series a regular spaced time series. max_date = final_data[date_col].max() min_date = final_data[date_col].min() nod = (max_date - min_date).days full_dates = pd.DataFrame({date_col: [min_date + timedelta(days=d) for d in range(0, nod + 1)]}) # Interpolate the data after converting to regular spaced data regular_data = ( pd.merge(final_data, full_dates, on=date_col, how='right') .sort_values(date_col) .reset_index(drop=True) .set_index(date_col) .apply(lambda x: x.interpolate('linear'), axis=0) .reset_index()) return regular_data.rename(index=str, columns={'IGC': 'GOI'})
def _basic_init(self): self._page = _download(self._url) parser = Parser() tree = parser.parse(self._page.text) nodenet = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and node.children() [0].children()[0].value == 'Data_millionCopiesIncome' ][0] name = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and ( node.children()[0].children()[0].value == 'fS_name') ][0] self.name = name.value.strip('"') tz_bj = dt.timezone(dt.timedelta(hours=8)) datel = [ dt.datetime.fromtimestamp( int(nodenet.children()[i].children()[0].value) / 1e3, tz=tz_bj).replace(tzinfo=None) for i in range(len(nodenet.children())) ] ratel = [ float(nodenet.children()[i].children()[1].value) for i in range(len(nodenet.children())) ] netvalue = [1] for dailyrate in ratel: netvalue.append(netvalue[-1] * (1 + dailyrate * 1e-4)) netvalue.remove(1) df = pd.DataFrame( data={ 'date': datel, 'netvalue': netvalue, 'totvalue': netvalue, 'comment': [0 for _ in datel] }) df = df[df['date'].isin(opendate)] df = df.reset_index(drop=True) self.price = df[df['date'] <= yesterdaydash()]
def get_friends_list(tree): for node in nodevisitor.visit(tree): if not isinstance(node, ast.Assign): continue if not (node.op == ':' and "shortProfiles" in node.left.value): continue return json.loads(node.right.to_ecma()) return None
def addAllIntEnv(inv, env=None): if (env == None): env = {} p = Parser() t = p.parse(inv) for node in nodevisitor.visit(t): if isinstance(node, jsast.Identifier): env[node.value] = Int return env
def adaptinput(text, entry): # print "adaptinput ", text, " ",entry tree = parser.parse(text) for node in nodevisitor.visit(tree): #text = "var BROKERS = require(\'./mock-brokers\').data;" #-> var output = text; if isinstance(node, ast.VarDecl) and node.identifier.value == entry: return "\tvar " + node.identifier.value + "=input;" elif isinstance(node, ast.VarDecl) and node.identifier.value != exit: return "\tvar " + node.identifier.value + "=input;" return ""
def AnalyseCallBack(functionDeclaration): """ Looks at the function body of a call back function and gathers all the phone variables that are accessed. This is determined by seeing all expressions with a dot accessor ie x.... """ assert isinstance(functionDeclaration, ast.FuncDecl) phonesTouched = ( [] ) # to keep track of all the phones that this call back function touched, to decide it's placement. tree = Parser().parse(functionDeclaration.to_ecma()) for node in nodevisitor.visit(tree): if isinstance(node, ast.DotAccessor): phonesTouched.append(node.children()[0].to_ecma()) return phonesTouched
def removeDeclarations(js_file): parser = Parser() tree = parser.parse(js_file) output = "" for child in tree.children(): if type(child) != ast.VarStatement: output += (child.to_ecma() + "\n") else: nodes = [x for x in nodevisitor.visit(child)] if type(nodes[0].initializer) not in [ast.String, ast.Number, ast.BinOp]: output += (child.to_ecma() + "\n") return output
def get_forecast(link): html_doc = urllib2.urlopen(link).read() soup = BeautifulSoup(html_doc, "html.parser") forecast_text = soup.find("div", id="div_wgfcst1").find("script").string parser = Parser() forecast_tree = parser.parse(forecast_text) full_data = {parse_key(node.left):parse_value(node.right) for node in nodevisitor.visit(forecast_tree) if isinstance(node, ast.Assign)} forecast_tree = parser.parse(forecast_text) forecast = {parse_key(node.left):parse_array(node.right) for node in nodevisitor.visit(forecast_tree) if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array)} full_data.update(forecast) return full_data
def AnalyseCallBack(functionDeclaration) : ''' Looks at the function body of a call back function and gathers all the phone variables that are accessed. This is determined by seeing all expressions with a dot accessor ie x.... ''' assert(isinstance(functionDeclaration,ast.FuncDecl)) phonesTouched=[] # to keep track of all the phones that this call back function touched, to decide it's placement. tree=Parser().parse(functionDeclaration.to_ecma()); for node in nodevisitor.visit(tree): if(isinstance(node,ast.DotAccessor)): phonesTouched.append(node.children()[0].to_ecma()) # remove all nested dot accessors , because you don't want q.phone and q.wifi to be counted as phones. Return unique elements of set alone phonesTouched = list(set(map (lambda x : x.split('.')[0],phonesTouched))); return phonesTouched
def parse_JavaScript(js): global functions parser = Parser() tree = parser.parse(js) for node in nodevisitor.visit(tree): if isinstance(node, ast.FuncDecl): if len(node.parameters) > 1: last = node.parameters[len(node.parameters) - 1] first = node.parameters[0] # check for the first parameter if first.value == "this$static": # check that the last one is a callback if last.value == "callback": # the function will call createStreamWriter if its used in the client interface if "createStreamWriter" in node.to_ecma(): params = [] # if we have function arguments if len(node.parameters) > 2: # -2 for the 'this' and callback num_of_params = len(node.parameters) - 2 for param in node.parameters: # we just append the arguments we will need to make in the GWT request if param.value != "this$static" and param.value != "callback": params.append(param.value) # else we have no arguments else: num_of_params = 0 # strip the correct function name function = node.identifier.value.replace("$", "") function = re.sub('_\d+', '', function) # append to a list, since we my have functions of the same name, but different signatures extracted.append({ "function": function, "num_of_args": num_of_params, "args": params, "arg_type_data": get_param_types(function, node.to_ecma()) })
def get_threads(tree): for node in nodevisitor.visit(tree): if not isinstance(node, ast.Assign): continue if not (node.op == ':' and "threads" in node.left.value): continue if not isinstance(node.right, ast.Array): continue return json.loads(node.right.to_ecma()) return None
def lvl2(src): tree = parser.parse(src) for node in nodevisitor.visit(tree): pass # if isinstance(node, ast.FunctionCall): #: Builtin square root? Nah fam thats for lightweights # if isinstance(node, ast.FunctionCall) and not isinstance(node.identifier, ast.Identifier): # if node.identifier.identifier.value == "sqrt": # pass ## node.identifier = ast.Identifier(lvl1(newtonsqrt)) # return tree.to_ecma() # print awesome javascript :)
def removeDeclarations(js_file): parser = Parser() tree = parser.parse(js_file) output = "" for child in tree.children(): if type(child) != ast.VarStatement: output += (child.to_ecma() + "\n") else: nodes = [x for x in nodevisitor.visit(child)] if type(nodes[0].initializer) not in [ ast.String, ast.Number, ast.BinOp ]: output += (child.to_ecma() + "\n") return output
def get_property_attributes(url): response = requests.get(url) #html parser soup = BeautifulSoup(response.text, 'html.parser') script = soup.findAll('script', {'type': 'text/javascript'})[3] #javascript parser parser = Parser() tree = parser.parse(script.text) fields = { getattr(node.left, 'value', ''): getattr(node.right, 'value', '') for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign) } return fields
def replace_array(js_file): parser = Parser() tree = parser.parse(js_file) vis1 = ECMAVisitor() variables = {} for child in tree.children(): print child x = [x for x in nodevisitor.visit(child)] if type(child) == ast.VarStatement: try: nodes = [x for x in nodevisitor.visit(child)] if type(nodes[0].initializer) == ast.String: variables[ nodes[0].identifier.value] = nodes[0].initializer.value elif type(nodes[0].initializer) == ast.Number: variables[nodes[0].identifier.value] = eval( nodes[0].initializer.to_ecma()) elif type(nodes[0].initializer) == ast.BinOp: variables[nodes[0].identifier.value] = eval( nodes[0].initializer.to_ecma()) elif type(nodes[0].initializer) == ast.Array: #print(nodes[0].initializer.to_ecma()) variables[nodes[0].identifier.value] = eval( nodes[0].initializer.to_ecma()) else: print((type(nodes[0].identifier), nodes[0].identifier.value, nodes[0].initializer)) except Exception as e: print e #print (child.to_ecma()) visitor = Visitor(variables) return visitor.visit_Program(tree)
def PredicatePass(tree) : # scan only global statements, nothing within functions. ''' One pass over the tree to get the list of all Predicates in the multi script program Parses an expression node in the AST , specifically an expression of the type assignment. It returns the string corresponding to the variable name of the predicate if the assignment is indeed a predicate ... . Checks for reassignment of predicates to other variables , which is a type error ''' predicateList=dict() for node in nodevisitor.visit(tree): # Store all mobile nodes in a list ###### if(isinstance(node,ast.ExprStatement)): exprNode=node.expr predicateVariable=exprNode.children()[0] if(isinstance(exprNode,ast.Assign)): # check if this is an assignment to a Device object if(isinstance(exprNode.children()[1],ast.FunctionCall)) : # check if the RHS is a fnCall fnCallNode=node.expr.children()[1] # get fnCall predicateAST=GetPredicateAST(fnCallNode,predicateList) predicateList[predicateVariable.to_ecma()]=predicateAST return predicateList
def partitionCode(sourceCode): parser = Parser() print "Source code originally is ............ \n", sourceCode tree = parser.parse(sourceCode) fnList = FunctionDefinitionsPass(tree) mobileDeviceList = MobileDevicesPass(tree) predicateList = PredicatePass(tree) for node in nodevisitor.visit(tree): if isinstance(node, ast.ExprStatement): exprNode = node.expr if isinstance(exprNode, ast.FunctionCall): # check if this is a function call to an object ParseMethodCalls( exprNode, fnList, mobileDeviceList, predicateList ) # TODO: Impose the restiction that all mobile Device declarations come ahead of all else print "-*********\n*******\n------------------THE PARTITIONED CODE IS -----------------------------*********\n*******\n" for key in partitionedCode: print "On node ", key, ", code is \n\n" print partitionedCode[key]
def treeWalker(js_file): parser = Parser() tree = parser.parse(js_file) variables = {} for child in tree.children(): if type(child) == ast.VarStatement: try: nodes = [x for x in nodevisitor.visit(child)] if type(nodes[0].initializer) == ast.String: variables[nodes[0].identifier.value] = nodes[0].initializer.value elif type(nodes[0].initializer) == ast.Number: variables[nodes[0].identifier.value] = eval(nodes[0].initializer.to_ecma()) elif type(nodes[0].initializer) == ast.BinOp: variables[nodes[0].identifier.value] = eval(nodes[0].initializer.to_ecma()) else: print((nodes[0].identifier.value, nodes[0].initializer)) except Exception as e: print (child.to_ecma()) return variables
def test_modify_tree(self): text = """ for (var i = 0; i < 10; i++) { var x = 5 + i; } """ parser = Parser() tree = parser.parse(text) for node in nodevisitor.visit(tree): if isinstance(node, ast.Identifier) and node.value == 'i': node.value = 'hello' self.assertMultiLineEqual( tree.to_ecma(), textwrap.dedent(""" for (var hello = 0; hello < 10; hello++) { var x = 5 + hello; } """).strip() )
def partitionCode(sourceCode): parser = Parser() sourceCodeInHtml=sourceCode.replace(';','\n'); print "MULTI PHONE SCRIPT \n----------------------------------------\n", sourceCodeInHtml tree=parser.parse(sourceCode); fnList=FunctionDefinitionsPass(tree) mobileDeviceList=MobileDevicesPass(tree) predicateList=PredicatePass(tree) for node in nodevisitor.visit(tree): if(isinstance(node,ast.ExprStatement)): exprNode=node.expr if(isinstance(exprNode,ast.FunctionCall)): # check if this is a function call to an object ParseMethodCalls(exprNode,fnList,mobileDeviceList,predicateList) # TODO: Impose the restiction that all mobile Device declarations come ahead of all else print "\n\nPARTITIONED CODE \n----------------------------------------\n", returnCode =dict() for key in partitionedCode : print "On phone name \"",mobileDeviceList[key],"\" : \n ", print "\t",partitionedCode[key],"\n" returnCode[mobileDeviceList[key]]=partitionedCode[key] print "" return returnCode
def PredicatePass(tree) : ''' One pass over the tree to get the list of all Predicates in the multi script program Parses an expression node in the AST , specifically an expression of the type assignment. It returns the string corresponding to the variable name of the predicate if the assignment is indeed a predicate ... . Checks for reassignment of predicates to other variables , which is a type error ''' predicateList=[] for node in nodevisitor.visit(tree): # Store all mobile nodes in a list ###### if(isinstance(node,ast.ExprStatement)): exprNode=node.expr if(isinstance(exprNode,ast.Assign)): # check if this is an assignment to a Device object identifierName=exprNode.children()[0].to_ecma() if(identifierName in predicateList) : raise Exception("Re-assignment to variable name ",identifierName," that represents a mobile phone") sys.exit(2) if(len(exprNode.children())==2) : # Check if the expr has exactly two children , because otherwise it can't be a function call assignment if( (isinstance(exprNode.children()[0],ast.Identifier)) and (isinstance(exprNode.children()[1],ast.FunctionCall))) : # check if the LHS is an identifier and the RHS is a fnCall fnCallNode=node.expr.children()[1] # get fnCall functionName=fnCallNode.children()[0].to_ecma() # getString repr. if (functionName.startswith("getPredicate")): # maybe add more calls in the future, TODO: Need some analysis to approx. the call results at compile time or defer to runtime predicateVariable=exprNode.children()[0] predicateList.append(predicateVariable.to_ecma()) return predicateList
def MobileDevicesPass(tree) : ''' One pass over the tree to get the list of all mobile devices in the multi script program Parses an expression node in the AST , specifically an expression of the type assignment. It returns the string corresponding to the variable name of the mobile device if the assignment is indeed getDevice... . Checks for reassignment to mobileDevices using mobileDeviceList ''' mobileDeviceList=dict() # dictionary instead of a list. : ANIRUDH : Change for demo for node in nodevisitor.visit(tree): # Store all mobile nodes in a list ###### if(isinstance(node,ast.ExprStatement)): exprNode=node.expr if(isinstance(exprNode,ast.Assign)): # check if this is an assignment to a Device object identifierName=exprNode.children()[0].to_ecma() if(identifierName in mobileDeviceList) : raise Exception("Re-assignment to variable name ",identifierName," that represents a mobile phone") sys.exit(2) if(len(exprNode.children())==2) : # Check if the expr has exactly two children , because otherwise it can't be a function call assignment if( (isinstance(exprNode.children()[0],ast.Identifier)) and (isinstance(exprNode.children()[1],ast.FunctionCall))) : # check if the LHS is an identifier and the RHS is a fnCall fnCallNode=node.expr.children()[1] # get fnCall functionName=fnCallNode.children()[0].to_ecma() # getString repr. if (functionName.startswith("getDeviceByName")): # maybe add more calls in the future, TODO: Need some analysis to approx. the call results at compile time or defer to runtime mobileDevice=exprNode.children()[0] mobileDeviceList[mobileDevice.to_ecma()]=fnCallNode.children()[1].to_ecma().replace("\"","") # // ANIRUDH: Another change from earlier return mobileDeviceList
def parse_global_js_for_access_id_action_url(global_js): parser = Parser() tree = parser.parse(global_js) parts = ['protocol', 'roDomain', 'ro', 'rt'] UrlParts = namedtuple('UrlParts', parts) url_parts = UrlParts([], [], [], []) getvalue = operator.attrgetter('value') err = "Too many '{}' assignments in global.js." for node in nodevisitor.visit(tree): if isinstance(node, ast.Assign): try: left_value = getvalue(node.left).strip('\'"') except AttributeError: continue if left_value in parts: right_value = getvalue(node.right).strip('\'"') assert right_value not in getattr(url_parts, left_value), err.format('protocol') getattr(url_parts, left_value).append(right_value) return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[0] + url_parts.rt[0]
def post(self): print self.request print "request: %s" % self.request.body request_data = json.loads(self.request.body) javascript = request_data["javascript"].replace(u'\u200b', "") print(javascript) parser = Parser() tree = parser.parse(javascript) jsnodes = [] functions = [] calls = [] for node in nodevisitor.visit(tree): try: node_id = node.identifier.to_ecma() jsnodes.append(node) if isinstance(node, ast.FunctionCall) \ or isinstance(node, ast.Identifier): calls.append(node) if isinstance(node, ast.FuncDecl): functions.append(node) except AttributeError, e: pass
def visit_UnaryOp(self, node): s = self.visit(node.value) if node.op == '!' and s == 0: return '"true"' else: return s visitor = JSONVisitor() parser = Parser() tree = parser.parse(config.decode('utf-8')) flag = False policy_editor_config = "" for node in nodevisitor.visit(tree): if (isinstance(node, ast.Identifier) and node.value == 'PolicyEditorConfig'): flag = True elif flag: policy_editor_config = visitor.visit(node) break d = json.loads(policy_editor_config) try: os.mkdir(basedir) except OSError: pass
def ParseObjectAssignments(js_code): parser = Parser() tree = parser.parse(js_code) return [ParseAssignments(node) \ for node in nodevisitor.visit(tree) if isinstance(node, ast.Object)]