Beispiel #1
0
def get_forecast(link):
    html_doc = urllib2.urlopen(link).read()
    soup = BeautifulSoup(html_doc, "html.parser")

    forecast_text = soup.find("div", id="div_wgfcst1").find("script").string

    parser = Parser()
    forecast_tree = parser.parse(forecast_text)

    full_data = {
        parse_key(node.left): parse_value(node.right)
        for node in nodevisitor.visit(forecast_tree)
        if isinstance(node, ast.Assign)
    }

    forecast_tree = parser.parse(forecast_text)

    forecast = {
        parse_key(node.left): parse_array(node.right)
        for node in nodevisitor.visit(forecast_tree)
        if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array)
    }

    full_data.update(forecast)
    return full_data
Beispiel #2
0
def _parse_redirect_to_security_challenge_script(script: str) -> str:
    """ Parses the script which redirects us to security challenge page and gets that URL. """
    parser = Parser()
    tree = parser.parse(script)
    nodes = [node for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign)]
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign) and hasattr(node, 'left') and isinstance(node.left, ast.DotAccessor):
            children = node.left.children()
            if len(children) == 2 and children[0].value == 'window' and children[1].value == 'location':
                return node.right.value.strip('\'"')
Beispiel #3
0
        def get_embedded_json():
            # Strip c from s, without exception
            def strip(s, c):
                if isinstance(s, str):
                    return s.strip(c)
                return s

            div = soup.find('div', id='JSDF')
            scripts = div.find_all('script', src=None)

            # Look for $rwidgets
            script_texts = []
            for script in scripts:
                for s in script.contents:
                    if '$rwidgets' in s:
                        script_texts.append(s)

            # Bodge until we get rid of slimit
            with silence_output():
                parser = Parser()

            raw_values = {}
            for script_text in script_texts:
                tree = parser.parse(script_text)
                # Parsing js
                for node in nodevisitor.visit(tree):
                    if isinstance(node, ast.FunctionCall):
                        if isinstance(node.identifier, ast.Identifier):
                            if node.identifier.value == '$rwidgets':
                                # Deal with here
                                fields = {}
                                for n in nodevisitor.visit(node):
                                    if isinstance(n, ast.Assign):
                                        k = getattr(n.left, 'value',
                                                    '').strip('"')
                                        v = strip(
                                            getattr(n.right, 'value', ''), '"')
                                        if k in duplicates:
                                            try:
                                                fields[k].append(v)
                                            except KeyError:
                                                fields[k] = [v]
                                        else:
                                            fields[k] = v

                                # Merge fields and raw_values, resolving duplicates
                                for (k, v) in fields.items():
                                    if k in duplicates:
                                        try:
                                            raw_values[k] += v
                                        except KeyError:
                                            raw_values[k] = v
                                    elif v != 'null':
                                        raw_values[k] = v
            return raw_values
Beispiel #4
0
def get_test_steps_as_strings(tree, bug):
    the_node = None
    output = []
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign):
            if str(bug) in getattr(node.left, "value", ""):
                the_node = node
                break
    if the_node:
        for node in nodevisitor.visit(the_node):
            if isinstance(node, ast.FuncExpr):
                output.append(node.to_ecma())
    return output
Beispiel #5
0
def _parse_redirect_to_security_challenge_script(script: str) -> str:
    """ Parses the script which redirects us to security challenge page and gets that URL. """
    parser = Parser()
    tree = parser.parse(script)
    nodes = [
        node for node in nodevisitor.visit(tree)
        if isinstance(node, ast.Assign)
    ]
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign) and hasattr(
                node, 'left') and isinstance(node.left, ast.DotAccessor):
            children = node.left.children()
            if len(children) == 2 and children[
                    0].value == 'window' and children[1].value == 'location':
                return node.right.value.strip('\'"')
Beispiel #6
0
def extract_glow_lib():
    runjs = norm_path('untrusted/run.js')
    parser = JSParser()

    with open(runjs) as f:
        tree = parser.parse(f.read())

    for node in nodevisitor.visit(tree):
        if (isinstance(node, ast.Assign) and
            isinstance(node.left, ast.DotAccessor) and
            node.left.identifier.value == 'glowscript_libraries' and
            isinstance(node.right, ast.Object)):
                break
    else:
        print('Parsing {} failed'.format(runjs))
        exit(-1)

    return preproc_lib_path({
        prop.left.value:
            [
                eval(lib.value)
                for lib in prop.right.items
                if isinstance(lib, ast.String)
            ]
            for prop in node.right.properties
    })
Beispiel #7
0
    def _parse_text(self, text):
        parser = slimit.parser.Parser()
        try:
            tree = parser.parse(text)
            fields = {
                getattr(x.left, 'value', ''): getattr(x.right, 'value', '')
                for x in nodevisitor.visit(tree)
                if isinstance(x, slimit.ast.Assign)
            }

            pat = re.compile(r'^[\'"]|[\'"]$')
            reqstrip = lambda x: re.sub(pat, '', x)

            try:
                parsed = {
                    reqstrip(k): reqstrip(v)
                    for (k, v) in fields.items()
                }
            except TypeError as err:
                print('{}'.format(err))
                sys.stderr.write('Couldn\'t parse text. Exiting...\n')
                sys.exit(10)

            return parsed
        except SyntaxError:
            print(text)
            sys.stderr("Couldn't parse text. Exiting...\n")
            sys.exit(10)
Beispiel #8
0
def extract_g_config(script_text):
    parser = Parser()
    ast_tree = parser.parse(script_text)
    for node in nodevisitor.visit(ast_tree):
        if isinstance(node,
                      ast.VarDecl) and node.identifier.value == 'g_config':
            return extract_object_as_map(node.initializer)
def get_property_attributes(url):
    response = requests.get(url)

    #html parser
    soup = BeautifulSoup(response.text, 'html.parser')
    script = soup.findAll('script', {'type': 'text/javascript'})[3]

    # if ad link returns valid search result, scan for attributes, else skip
    if soup.title.string.find('Real Estate Properties') == -1:
        # if ad is archived, put in dummy date, else get real date
        if soup.find("span", "status-label label-archive") != None:
            date = '31 Dec 9999'
        else:
            #get date from title of advertisement
            date = re.findall(r'\d{2}\s\w{3}\s\d{4}', soup.title.string)[0]

        #javascript parser
        parser = Parser()
        tree = parser.parse(script.text)
        fields = {
            getattr(node.left, 'value', ''): getattr(node.right, 'value', '')
            for node in nodevisitor.visit(tree)
            if isinstance(node, ast.Assign)
        }
        fields.update({'"date sold"': '"' + date + '"'})
        return fields
    else:
        return None
Beispiel #10
0
    def parse_country(self, response):
        charts = response.xpath('//*[@class="row graph_row"]')
        total_corona_chart = charts[0]
        script = total_corona_chart.xpath('div/script/text()').extract()[0]
        title = total_corona_chart.xpath('div/h3/text()').extract()[0]
        try:
            country_name = title[title.index(" in ")+4:]
            if country_name[:4] == "the ":
                country_name = country_name[4:]
        except e:
            raise ValueError("Worldometer changed their labels.\
                              Hold your pain, Harold.")

        parser = Parser()
        tree = parser.parse(script)
        data = [None, None] # dates and corresponding number of cases
        for node in nodevisitor.visit(tree):
            if isinstance(node, ast.Assign):
                if getattr(node.left, 'value', '') == 'categories' and not data[0]:
                    print("\nparsing dates\n")
                    data[0] = [eval(getattr(s, 'value', '')) for s in getattr(node.right, 'items', '')]
                elif getattr(node.left, 'value', '') == 'data' and not data[1]:
                    print("\nparsing number of cases\n")
                    data[1] = [int(getattr(n, 'value', '')) for n in getattr(node.right, 'items', '')]
        assert data[0] and data[1] and len(data[0]) == len(data[1])
        with open("data/%s.csv" % country_name, 'w+') as f:
            for k in range(len(data[0])):
                f.write(data[0][k])
                f.write(',')
                f.write(str(data[1][k]))
                f.write('\n')
Beispiel #11
0
def chapter_url2image_urls(chapter_url):
    g = get_info_from_url(chapter_url, chapter2images)

    p = Parser()
    for t2_or_t3, (slot_idx, pattern_idx, info_pattern) in g:
        tag, _, data = t2_or_t3
        #p = Parser()
        tree = p.parse(data)
        pre = None
        for node in nodevisitor.visit(tree):
            if isinstance(node, ast.Identifier) and node.value == 'image_list':
                break
            pre = node

    assert pre != None
    m = _image_list_match_pattern.match(pre.to_ecma())
    assert m != None
    image_list = eval(m.group(1))
    image_list = eval(image_list)

    ls = []
    for info in image_list.values():
        src = base64.b64decode(info['src']).decode('ascii')
        page = info['page']
        ls.append((page, src))

    ls.sort()
    ls = tuple(src for _, src in ls)

    return ls
Beispiel #12
0
def card_price_history(setname, cardname):
    '''
    Scrapes price history of card from MTGPrice.com, using javascript parser
    Input:
        Setname and cardname are strings, generally taken from Scryfall API.
    Output:
        A numpy array of price history, each 'row' in the form [timestamp, price]
    '''
    # Turn card data into soup
    link = 'https://www.mtgprice.com/sets/' + '_'.join(
        setname.split()) + '/' + '_'.join(cardname.split())
    soup = BeautifulSoup(requests.get(link).content, 'html.parser')

    # GET RESULTS
    text_to_find = 'var results = ['
    history = []
    for script in soup.findAll('script', type='text/javascript'):
        if text_to_find in script.text:
            parser = Parser()
            tree = parser.parse(script.text)
            for node in nodevisitor.visit(tree):
                if isinstance(node, ast.Assign) and getattr(
                        node.left, 'value', '') == "\"data\"":
                    for prices in node.right.items:
                        history.append(
                            [prices.items[0].value, prices.items[1].value])
                    break
    return np.array(history)
Beispiel #13
0
def parse_global_js_for_access_id_action_url(global_js):
    parser = Parser()
    tree = parser.parse(global_js)

    parts = ['protocol', 'roDomain', 'ro', 'rt']
    UrlParts = namedtuple('UrlParts', parts)
    url_parts = UrlParts([], [], [], [])

    getvalue = operator.attrgetter('value')
    err = "Too many '{}' assignments in global.js."
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign):
            try:
                left_value = getvalue(node.left).strip('\'"')
            except AttributeError:
                continue

            if left_value in parts:
                right_value = getvalue(node.right).strip('\'"')
                assert right_value not in getattr(
                    url_parts, left_value), err.format('protocol')
                getattr(url_parts, left_value).append(right_value)

    return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[
        0] + url_parts.rt[0]
def fetch(item):
    msg = ""
    try:
        r = requests.get(item["link"])
        r.encoding = "utf-8"
        soup = BeautifulSoup(r.text, "html.parser")
        msg = r.text
        item["title"] = soup.find("meta",  property="og:title")["content"]
        item["image"] = soup.find("meta",  property="og:image")["content"]
        script_text = None
        for s in soup.find_all("script"):
            if s.string is None:
                continue
            if "Fusion.globalContent" in s.string:
                script_text = s.string
        text = ""
        if script_text is not None:
            tree = JavascriptParser().parse(script_text)
            for node in nodevisitor.visit(tree):
                if not isinstance(node, ast.Assign):
                    continue
                left = node.left.to_ecma()
                if "Fusion.globalContent" == left:
                    data = json.loads(node.right.to_ecma()).get("content_elements", [])
                    text = "<br/>".join([x["content"] for x in data if "content" in x])
        item["text"] = text
    except Exception as e:
        print("cannot parse %s" % (item["link"]))
        raise
    item["source"] = "appledaily"
    item["key"] =  hashlib.md5(item["link"].encode()).hexdigest()
    return item
Beispiel #15
0
def scan_js(crawler, url, content):
    '''
    scan javascript for url assignments (like ajax calls).
    '''
    LOGGER.info('Scanning Javascript on %s' % url)

    parser = Parser()
    tree = parser.parse(content)
    for node in nodevisitor.visit(tree):
        if not isinstance(node, ast.Assign):  # <something>: <something>
            continue
        leftval = getattr(node.left, 'value', '')  # 'leftval': <something>
        if not leftval:
            continue
        if 'url' not in leftval:  # 'url': <something>
            continue
        if isinstance(node.right, ast.String):  # 'url': 'somestring'
            LOGGER.info('Found interesting url in JS: %s' %
                        node.right.value[1:-1])
            crawler.check_link(url, node.right.value[2:-1])
        for item in node.right.__dict__.values():  # string in <something>
            # <something> may be function_call() / variable + 'somestring'
            if isinstance(item, ast.String):
                LOGGER.info('Found interesting url in JS: %s' %
                            item.value[1:-1])
                crawler.check_link(url, item.value[2:-1])
Beispiel #16
0
    def _basic_init(self):
        self._page = _download(self._url)
        if self._page.text[:800].find("Data_fundSharesPositions") >= 0:
            raise FundTypeError("This code seems to be a fund, use fundinfo instead")

        parser = Parser()
        tree = parser.parse(self._page.text)
        nodenet = [
            node.children()[0].children()[1]
            for node in nodevisitor.visit(tree)
            if isinstance(node, ast.VarStatement)
            and node.children()[0].children()[0].value == "Data_millionCopiesIncome"
        ][0]
        name = [
            node.children()[0].children()[1]
            for node in nodevisitor.visit(tree)
            if isinstance(node, ast.VarStatement)
            and (node.children()[0].children()[0].value == "fS_name")
        ][0]
        self.name = name.value.strip('"')
        tz_bj = dt.timezone(dt.timedelta(hours=8))
        datel = [
            dt.datetime.fromtimestamp(
                int(nodenet.children()[i].children()[0].value) / 1e3, tz=tz_bj
            ).replace(tzinfo=None)
            for i in range(len(nodenet.children()))
        ]
        ratel = [
            float(nodenet.children()[i].children()[1].value)
            for i in range(len(nodenet.children()))
        ]
        netvalue = [1]
        for dailyrate in ratel:
            netvalue.append(netvalue[-1] * (1 + dailyrate * 1e-4))
        netvalue.remove(1)

        df = pd.DataFrame(
            data={
                "date": datel,
                "netvalue": netvalue,
                "totvalue": netvalue,
                "comment": [0 for _ in datel],
            }
        )
        df = df[df["date"].isin(opendate)]
        df = df.reset_index(drop=True)
        self.price = df[df["date"] <= yesterdaydash()]
Beispiel #17
0
def inital_check_for_obfuscation_condtiion_sensitiveFunctions(js_text):
    parser = Parser()
    tree = parser.parse(js_text)

    keywords = set()
    if_condition = False

    for node in nodevisitor.visit(tree):

        if isinstance(node, If):
            if_condition = True
            continue

        stack = [node]

        #BFS to go to every depth of the AST tree
        while stack:
            node = stack.pop()
            #only dot access has a.b.getStringfromChar
            if isinstance(node, DotAccessor):
                try:
                    for i in node.children():
                        stack.append(i)
                except:
                    pass

                continue

            if isinstance(node, Identifier):
                #print (node.value),
                keywords.add(node.value)

    #print ("Done visit")
    obfuscation = False
    profiling = False

    if if_condition:
        pass

    ob_list = set()
    pro_list = set()

    for ob in obfuscation_function_names:
        if ob in keywords:
            #print ("[Obfuscation keywords]", ob)
            obfuscation = True
            ob_list.add(ob)
            #break

    for pro in profiling_function_names:
        if pro in keywords:
            #print ("[Profiling keywords]", pro)
            profiling = True
            pro_list.add(pro)
            #break

    #print ('if_condition: {}, obfuscation {}, profiling {}'.format(if_condition,obfuscation,profiling))
    #pint (js_text)
    return if_condition, obfuscation, profiling, ob_list, pro_list
Beispiel #18
0
def whileExtract(s):
    '''Extracts all the while loops in the script. '''
    l = []
    parser = Parser()
    tree = parser.parse(s)
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.While):
            l+=[node.to_ecma()]
    return l
Beispiel #19
0
def get_m_decl(fil):
  with codecs.open(fil, 'r', encoding='utf8') as fd:
    s = fd.read()
    tree = Parser().parse(s)
    m = None
    for node in nodevisitor.visit(tree):
      if isinstance(node, ast.VarDecl) and node.identifier.value == 'm':
        m = node.initializer.to_ecma()
    return m
Beispiel #20
0
def defects_mapping_from_js_ast(js_ast):
    """Generate offices,keywords-to-defects mappings for JS AST.

    This function is used in the spider,
    but can also be called manually via this script
    on reports that failed parsing due to broken AST,
    after we manually fix them.
    """
    # fetch all array elements from the syntax tree
    js_arrays = [
        node.children() for node in nodevisitor.visit(js_ast)
        if isinstance(node, ast.Array)
    ]

    def get_defects_by_keys(data_raw):
        """Fetch key-to-defects from raw html string.

        This is done by building an xml tree from the raw html string,
        and fetching its embedded text.
        """
        res = {}
        for element_raw in data_raw:
            # sometimes data doesn't hold any value.
            # in this case, continue
            try:
                element_raw.value
            except AttributeError:
                continue

            # raw html string looks like this:
            #
            # "<div class='tooltip-title'>משרד הבריאות מופיע ב:</div>היבטים במניעת זיהום של מקורות המים<br/>הפיקוח והבקרה על הפעילות הכספית במרכזים הרפואיים המשלתיים-הכלליים<br/>פעולות הרשויות המקומיות וספקי המים להבטחת איכות מי השתייה<br/>"
            #
            # we parse it into an xml tree.
            # we wrap it with a another <div> element,
            # since its not a valid html: it has tailing </br> elements
            element_ast = et.fromstring('<div>' + element_raw.value[1:-1] +
                                        '</div>')

            # remove the "מופיע ב:" part from the "משרד הבריאות מופיע ב:" string
            key_name = element_ast[0].text.split(u'מופיע ב')[0]

            # defects are the tail of the first <div>
            # and all subsequenet <br> elements.
            key_defects = [element_ast[0].tail
                           ] + [d.tail for d in element_ast[1:] if d.tail]

            # append key-to-defects mapping to result dictionary
            res[key_name] = key_defects

        return res

    return (
        get_defects_by_keys(js_arrays[0]),
        get_defects_by_keys(js_arrays[1]),
    )
Beispiel #21
0
def adaptoutput(text, exit):
    tree = parser.parse(text)
    for node in nodevisitor.visit(tree):
        #text = "var BROKERS = require(\'./mock-brokers\').data;"
        #-> var output = text;
        if isinstance(node, ast.VarDecl) and node.identifier.value == exit:
            return "\tvar output=" + exit + ";"
        elif isinstance(node, ast.VarDecl) and node.identifier.value != exit:
            return "\tvar output=" + node.identifier.value + ";"
    return ""
Beispiel #22
0
def get_raw_value(tree, var_name):
    found = False
    for node in nodevisitor.visit(tree):
        # print(node)
        if found:
            return node.to_ecma()
        if isinstance(node, ast.Identifier):
            if node.value == var_name:
                found = True
    return None
Beispiel #23
0
def parse_script(data):
    # Hack. Fix javascript syntax issue in steam's response
    to_replace = 'BuildGameRow(game, )'
    replacement = 'BuildGameRow(game, 0)'
    data = data.replace(to_replace, replacement)
    parser = Parser()
    tree = parser.parse(data)
    variables = [node for node in nodevisitor.visit(tree)
                 if isinstance(node, ast.VarDecl)]
    return variables
Beispiel #24
0
def FunctionDefinitionsPass(tree) :
     ''' make one pass through the entire tree to get all function definitions.
         TODO: Make the restriction clear that all function declarations and definitions go together in the function foo= { } form 
         TODO: Allow the anonymous function declaration form as well ie f=function(...) { } '''
     fnNames=dict()
     for node in nodevisitor.visit(tree):
        #print type(node)
        if(isinstance(node,ast.FuncDecl)):
            fnNames[node.children()[0].to_ecma()]=node # look up from the function name to the function Declaration object I guess. 
     return fnNames
Beispiel #25
0
def parse_goi_script(script, date_col='date'):
    '''Extract the data node from the javascript text, then parse each
    individual price and then merge to create the final data frame.

    '''

    parser = slimit.parser.Parser()
    tree = parser.parse(script[0])
    fields = [node
              for node in nodevisitor.visit(tree)
              if isinstance(node, slimit.ast.Array)]

    var_names = [ast.literal_eval(node.to_ecma()).split(' ')[0]
                 for node in nodevisitor.visit(fields[0])
                 if isinstance(node, slimit.ast.String)]

    series = [ast.literal_eval(node.to_ecma().replace('Date.UTC', ''))
              for node in nodevisitor.visit(fields[0])
              if isinstance(node, slimit.ast.Array)
              and len(node.to_ecma()) > 1000]

    df_list = [goi_list_to_df(d, n) for n, d in zip(var_names, series)]

    final_data = reduce(lambda left, right: pd.merge(
        left, right, on=date_col), df_list)

    # make the time series a regular spaced time series.
    max_date = final_data[date_col].max()
    min_date = final_data[date_col].min()
    nod = (max_date - min_date).days
    full_dates = pd.DataFrame({date_col: [min_date + timedelta(days=d)
                                          for d in range(0, nod + 1)]})

    # Interpolate the data after converting to regular spaced data
    regular_data = (
        pd.merge(final_data, full_dates, on=date_col, how='right')
        .sort_values(date_col)
        .reset_index(drop=True)
        .set_index(date_col)
        .apply(lambda x: x.interpolate('linear'), axis=0)
        .reset_index())

    return regular_data.rename(index=str, columns={'IGC': 'GOI'})
Beispiel #26
0
    def _basic_init(self):
        self._page = _download(self._url)
        parser = Parser()
        tree = parser.parse(self._page.text)
        nodenet = [
            node.children()[0].children()[1]
            for node in nodevisitor.visit(tree)
            if isinstance(node, ast.VarStatement) and node.children()
            [0].children()[0].value == 'Data_millionCopiesIncome'
        ][0]
        name = [
            node.children()[0].children()[1]
            for node in nodevisitor.visit(tree)
            if isinstance(node, ast.VarStatement) and (
                node.children()[0].children()[0].value == 'fS_name')
        ][0]
        self.name = name.value.strip('"')
        tz_bj = dt.timezone(dt.timedelta(hours=8))
        datel = [
            dt.datetime.fromtimestamp(
                int(nodenet.children()[i].children()[0].value) / 1e3,
                tz=tz_bj).replace(tzinfo=None)
            for i in range(len(nodenet.children()))
        ]
        ratel = [
            float(nodenet.children()[i].children()[1].value)
            for i in range(len(nodenet.children()))
        ]
        netvalue = [1]
        for dailyrate in ratel:
            netvalue.append(netvalue[-1] * (1 + dailyrate * 1e-4))
        netvalue.remove(1)

        df = pd.DataFrame(
            data={
                'date': datel,
                'netvalue': netvalue,
                'totvalue': netvalue,
                'comment': [0 for _ in datel]
            })
        df = df[df['date'].isin(opendate)]
        df = df.reset_index(drop=True)
        self.price = df[df['date'] <= yesterdaydash()]
def get_friends_list(tree):
    for node in nodevisitor.visit(tree):
        if not isinstance(node, ast.Assign):
            continue

        if not (node.op == ':' and "shortProfiles" in node.left.value):
            continue

        return json.loads(node.right.to_ecma())

    return None
Beispiel #28
0
def addAllIntEnv(inv, env=None):
    if (env == None):
        env = {}
    p = Parser()
    t = p.parse(inv)

    for node in nodevisitor.visit(t):
        if isinstance(node, jsast.Identifier):
            env[node.value] = Int

    return env
Beispiel #29
0
def adaptinput(text, entry):
    # print "adaptinput   ", text, "    ",entry
    tree = parser.parse(text)
    for node in nodevisitor.visit(tree):
        #text = "var BROKERS = require(\'./mock-brokers\').data;"
        #-> var output = text;
        if isinstance(node, ast.VarDecl) and node.identifier.value == entry:
            return "\tvar " + node.identifier.value + "=input;"
        elif isinstance(node, ast.VarDecl) and node.identifier.value != exit:
            return "\tvar " + node.identifier.value + "=input;"
    return ""
Beispiel #30
0
def AnalyseCallBack(functionDeclaration):
    """ Looks at the function body of a call back function and gathers all the phone variables that are accessed.
         This is determined by seeing all expressions with a dot accessor ie x.... """
    assert isinstance(functionDeclaration, ast.FuncDecl)
    phonesTouched = (
        []
    )  # to keep track of all the phones that this call back function touched, to decide it's placement.
    tree = Parser().parse(functionDeclaration.to_ecma())
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.DotAccessor):
            phonesTouched.append(node.children()[0].to_ecma())
    return phonesTouched
Beispiel #31
0
def removeDeclarations(js_file):
  parser = Parser()
  tree = parser.parse(js_file)
  output = ""
  for child in tree.children():
    if type(child) != ast.VarStatement:
      output += (child.to_ecma() + "\n")
    else:
      nodes = [x for x in nodevisitor.visit(child)]
      if type(nodes[0].initializer) not in [ast.String, ast.Number, ast.BinOp]:
        output += (child.to_ecma() + "\n")
  return output
Beispiel #32
0
def get_forecast(link):
    html_doc = urllib2.urlopen(link).read()
    soup = BeautifulSoup(html_doc, "html.parser")
    
    forecast_text = soup.find("div", id="div_wgfcst1").find("script").string
    
    parser = Parser()
    forecast_tree = parser.parse(forecast_text)
    
    full_data = {parse_key(node.left):parse_value(node.right)
                 for node in nodevisitor.visit(forecast_tree)
                 if isinstance(node, ast.Assign)}
    
    forecast_tree = parser.parse(forecast_text)
    
    forecast = {parse_key(node.left):parse_array(node.right)
                for node in nodevisitor.visit(forecast_tree)
                if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array)}
    
    full_data.update(forecast)
    return full_data
Beispiel #33
0
def AnalyseCallBack(functionDeclaration) :
     ''' Looks at the function body of a call back function and gathers all the phone variables that are accessed.
         This is determined by seeing all expressions with a dot accessor ie x.... ''' 
     assert(isinstance(functionDeclaration,ast.FuncDecl))
     phonesTouched=[]  # to keep track of all the phones that this call back function touched, to decide it's placement. 
     tree=Parser().parse(functionDeclaration.to_ecma());
     for node in nodevisitor.visit(tree):
       if(isinstance(node,ast.DotAccessor)):
          phonesTouched.append(node.children()[0].to_ecma())
     # remove all nested dot accessors , because you don't want q.phone and q.wifi to be counted as phones. Return unique elements of set alone
     phonesTouched = list(set(map (lambda x : x.split('.')[0],phonesTouched)));
     return phonesTouched
Beispiel #34
0
def parse_JavaScript(js):
    global functions
    parser = Parser()
    tree = parser.parse(js)

    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.FuncDecl):
            if len(node.parameters) > 1:
                last = node.parameters[len(node.parameters) - 1]
                first = node.parameters[0]

                # check for the first parameter
                if first.value == "this$static":

                    # check that the last one is a callback
                    if last.value == "callback":

                        # the function will call createStreamWriter if its used in the client interface
                        if "createStreamWriter" in node.to_ecma():
                            params = []

                            # if we have function arguments
                            if len(node.parameters) > 2:

                                # -2 for the 'this' and callback
                                num_of_params = len(node.parameters) - 2

                                for param in node.parameters:

                                    # we just append the arguments we will need to make in the GWT request
                                    if param.value != "this$static" and param.value != "callback":
                                        params.append(param.value)

                            # else we have no arguments
                            else:
                                num_of_params = 0

                            # strip the correct function name
                            function = node.identifier.value.replace("$", "")
                            function = re.sub('_\d+', '', function)

                            # append to a list, since we my have functions of the same name, but different signatures
                            extracted.append({
                                "function":
                                function,
                                "num_of_args":
                                num_of_params,
                                "args":
                                params,
                                "arg_type_data":
                                get_param_types(function, node.to_ecma())
                            })
def get_threads(tree):
    for node in nodevisitor.visit(tree):
        if not isinstance(node, ast.Assign):
            continue

        if not (node.op == ':' and "threads" in node.left.value):
            continue

        if not isinstance(node.right, ast.Array):
            continue

        return json.loads(node.right.to_ecma())

    return None
def lvl2(src):
    tree = parser.parse(src)
    for node in nodevisitor.visit(tree):
        pass


#        if isinstance(node, ast.FunctionCall):
#: Builtin square root? Nah fam thats for lightweights
#            if isinstance(node, ast.FunctionCall) and not isinstance(node.identifier, ast.Identifier):
#                if node.identifier.identifier.value == "sqrt":
#                    pass
##                    node.identifier = ast.Identifier(lvl1(newtonsqrt))
#
    return tree.to_ecma()  # print awesome javascript :)
Beispiel #37
0
def removeDeclarations(js_file):
    parser = Parser()
    tree = parser.parse(js_file)
    output = ""
    for child in tree.children():
        if type(child) != ast.VarStatement:
            output += (child.to_ecma() + "\n")
        else:
            nodes = [x for x in nodevisitor.visit(child)]
            if type(nodes[0].initializer) not in [
                    ast.String, ast.Number, ast.BinOp
            ]:
                output += (child.to_ecma() + "\n")
    return output
def get_property_attributes(url):
    response = requests.get(url)

    #html parser
    soup = BeautifulSoup(response.text, 'html.parser')
    script = soup.findAll('script', {'type': 'text/javascript'})[3]

    #javascript parser
    parser = Parser()
    tree = parser.parse(script.text)
    fields = {
        getattr(node.left, 'value', ''): getattr(node.right, 'value', '')
        for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign)
    }
    return fields
Beispiel #39
0
def replace_array(js_file):
    parser = Parser()
    tree = parser.parse(js_file)

    vis1 = ECMAVisitor()

    variables = {}
    for child in tree.children():
        print child
        x = [x for x in nodevisitor.visit(child)]
        if type(child) == ast.VarStatement:
            try:
                nodes = [x for x in nodevisitor.visit(child)]
                if type(nodes[0].initializer) == ast.String:
                    variables[
                        nodes[0].identifier.value] = nodes[0].initializer.value
                elif type(nodes[0].initializer) == ast.Number:
                    variables[nodes[0].identifier.value] = eval(
                        nodes[0].initializer.to_ecma())
                elif type(nodes[0].initializer) == ast.BinOp:
                    variables[nodes[0].identifier.value] = eval(
                        nodes[0].initializer.to_ecma())
                elif type(nodes[0].initializer) == ast.Array:
                    #print(nodes[0].initializer.to_ecma())
                    variables[nodes[0].identifier.value] = eval(
                        nodes[0].initializer.to_ecma())
                else:
                    print((type(nodes[0].identifier),
                           nodes[0].identifier.value, nodes[0].initializer))
            except Exception as e:
                print e
                #print (child.to_ecma())

    visitor = Visitor(variables)

    return visitor.visit_Program(tree)
Beispiel #40
0
def PredicatePass(tree) :
     # scan only global statements, nothing within functions. 
     '''   One pass over the tree to get the list of all Predicates in the multi script program 
           Parses an expression node in the AST , specifically an expression of the type assignment.
           It returns the string corresponding to the variable name of the predicate if the assignment
           is indeed a predicate ... . Checks for reassignment of predicates to other variables , 
           which is a type error  '''
     predicateList=dict()
     for node in nodevisitor.visit(tree):
        # Store all mobile nodes in a list ######
        if(isinstance(node,ast.ExprStatement)):
            exprNode=node.expr
            predicateVariable=exprNode.children()[0]
            if(isinstance(exprNode,ast.Assign)):  # check if this is an assignment to a Device object
                 if(isinstance(exprNode.children()[1],ast.FunctionCall)) : # check if the RHS is a fnCall
                      fnCallNode=node.expr.children()[1] # get fnCall
                      predicateAST=GetPredicateAST(fnCallNode,predicateList)
                      predicateList[predicateVariable.to_ecma()]=predicateAST
     return predicateList
Beispiel #41
0
def partitionCode(sourceCode):
    parser = Parser()
    print "Source code originally is ............ \n", sourceCode
    tree = parser.parse(sourceCode)
    fnList = FunctionDefinitionsPass(tree)
    mobileDeviceList = MobileDevicesPass(tree)
    predicateList = PredicatePass(tree)
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.ExprStatement):
            exprNode = node.expr
            if isinstance(exprNode, ast.FunctionCall):  # check if this is a function call to an object
                ParseMethodCalls(
                    exprNode, fnList, mobileDeviceList, predicateList
                )  # TODO: Impose the restiction that all mobile Device declarations come ahead of all else
    print "-*********\n*******\n------------------THE PARTITIONED CODE IS -----------------------------*********\n*******\n"

    for key in partitionedCode:
        print "On node ", key, ", code is \n\n"
        print partitionedCode[key]
Beispiel #42
0
def treeWalker(js_file):
  parser = Parser()
  tree = parser.parse(js_file)
  variables = {}
  for child in tree.children():
    if type(child) == ast.VarStatement:
      try:
        nodes = [x for x in nodevisitor.visit(child)]
        if   type(nodes[0].initializer) == ast.String:
          variables[nodes[0].identifier.value] = nodes[0].initializer.value
        elif type(nodes[0].initializer) == ast.Number:
          variables[nodes[0].identifier.value] = eval(nodes[0].initializer.to_ecma())
        elif type(nodes[0].initializer) == ast.BinOp:
          variables[nodes[0].identifier.value] = eval(nodes[0].initializer.to_ecma())
        else:
          print((nodes[0].identifier.value, nodes[0].initializer))
      except Exception as e:
        print (child.to_ecma())
  return variables
Beispiel #43
0
 def test_modify_tree(self):
     text = """
     for (var i = 0; i < 10; i++) {
       var x = 5 + i;
     }
     """
     parser = Parser()
     tree = parser.parse(text)
     for node in nodevisitor.visit(tree):
         if isinstance(node, ast.Identifier) and node.value == 'i':
             node.value = 'hello'
     self.assertMultiLineEqual(
         tree.to_ecma(),
         textwrap.dedent("""
         for (var hello = 0; hello < 10; hello++) {
           var x = 5 + hello;
         }
         """).strip()
         )
Beispiel #44
0
def partitionCode(sourceCode):
  parser = Parser()
  sourceCodeInHtml=sourceCode.replace(';','\n');
  print "MULTI PHONE SCRIPT \n----------------------------------------\n", sourceCodeInHtml
  tree=parser.parse(sourceCode);
  fnList=FunctionDefinitionsPass(tree)
  mobileDeviceList=MobileDevicesPass(tree)
  predicateList=PredicatePass(tree)
  for node in nodevisitor.visit(tree):
        if(isinstance(node,ast.ExprStatement)):
            exprNode=node.expr
            if(isinstance(exprNode,ast.FunctionCall)):  # check if this is a function call to an object 
                ParseMethodCalls(exprNode,fnList,mobileDeviceList,predicateList) # TODO: Impose the restiction that all mobile Device declarations come ahead of all else
  print "\n\nPARTITIONED CODE \n----------------------------------------\n",
  returnCode =dict()
  for key in partitionedCode :
        print "On phone name \"",mobileDeviceList[key],"\" :  \n ",
        print "\t",partitionedCode[key],"\n"
        returnCode[mobileDeviceList[key]]=partitionedCode[key]
  print ""
  return returnCode
Beispiel #45
0
def PredicatePass(tree) :
     '''   One pass over the tree to get the list of all Predicates in the multi script program 
           Parses an expression node in the AST , specifically an expression of the type assignment.
           It returns the string corresponding to the variable name of the predicate if the assignment
           is indeed a predicate ... . Checks for reassignment of predicates to other variables , which is a type error  '''
     predicateList=[]
     for node in nodevisitor.visit(tree):
        # Store all mobile nodes in a list ######
        if(isinstance(node,ast.ExprStatement)):
            exprNode=node.expr
            if(isinstance(exprNode,ast.Assign)):  # check if this is an assignment to a Device object
               identifierName=exprNode.children()[0].to_ecma()
               if(identifierName in predicateList) :
                     raise Exception("Re-assignment to variable name ",identifierName," that represents a mobile phone")
                     sys.exit(2)
               if(len(exprNode.children())==2) : # Check if the expr has exactly two children , because otherwise it can't be a function call assignment
                 if( (isinstance(exprNode.children()[0],ast.Identifier)) and (isinstance(exprNode.children()[1],ast.FunctionCall))) : # check if the LHS is an identifier and the RHS is a fnCall
                      fnCallNode=node.expr.children()[1] # get fnCall
                      functionName=fnCallNode.children()[0].to_ecma() # getString repr.
                      if (functionName.startswith("getPredicate")): # maybe add more calls in the future, TODO: Need some analysis to approx. the call results at compile time or defer to runtime
                          predicateVariable=exprNode.children()[0]
                          predicateList.append(predicateVariable.to_ecma())
     return predicateList
Beispiel #46
0
def MobileDevicesPass(tree) :
     '''   One pass over the tree to get the list of all mobile devices in the multi script program 
           Parses an expression node in the AST , specifically an expression of the type assignment.
           It returns the string corresponding to the variable name of the mobile device if the assignment
           is indeed getDevice... . Checks for reassignment to mobileDevices using mobileDeviceList '''
     mobileDeviceList=dict() # dictionary instead of a list. : ANIRUDH : Change for demo 
     for node in nodevisitor.visit(tree):
        # Store all mobile nodes in a list ######
        if(isinstance(node,ast.ExprStatement)):
            exprNode=node.expr
            if(isinstance(exprNode,ast.Assign)):  # check if this is an assignment to a Device object
               identifierName=exprNode.children()[0].to_ecma()
               if(identifierName in mobileDeviceList) :
                     raise Exception("Re-assignment to variable name ",identifierName," that represents a mobile phone")
                     sys.exit(2)
               if(len(exprNode.children())==2) : # Check if the expr has exactly two children , because otherwise it can't be a function call assignment
                 if( (isinstance(exprNode.children()[0],ast.Identifier)) and (isinstance(exprNode.children()[1],ast.FunctionCall))) : # check if the LHS is an identifier and the RHS is a fnCall
                      fnCallNode=node.expr.children()[1] # get fnCall
                      functionName=fnCallNode.children()[0].to_ecma() # getString repr.
                      if (functionName.startswith("getDeviceByName")): # maybe add more calls in the future, TODO: Need some analysis to approx. the call results at compile time or defer to runtime
                          mobileDevice=exprNode.children()[0]
                          mobileDeviceList[mobileDevice.to_ecma()]=fnCallNode.children()[1].to_ecma().replace("\"","") # // ANIRUDH: Another change from earlier
     return mobileDeviceList
Beispiel #47
0
def parse_global_js_for_access_id_action_url(global_js):
    parser = Parser()
    tree = parser.parse(global_js)

    parts = ['protocol', 'roDomain', 'ro', 'rt']
    UrlParts = namedtuple('UrlParts', parts)
    url_parts = UrlParts([], [], [], [])

    getvalue = operator.attrgetter('value')
    err = "Too many '{}' assignments in global.js."
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign):
            try:
                left_value = getvalue(node.left).strip('\'"')
            except AttributeError:
                continue

            if left_value in parts:
                right_value = getvalue(node.right).strip('\'"')
                assert right_value not in getattr(url_parts, left_value), err.format('protocol')
                getattr(url_parts, left_value).append(right_value)

    return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[0] + url_parts.rt[0]
Beispiel #48
0
    def post(self):
        print self.request
        print "request: %s" % self.request.body
        request_data = json.loads(self.request.body)
        javascript = request_data["javascript"].replace(u'\u200b', "")
        print(javascript)

        parser = Parser()
        tree = parser.parse(javascript)

        jsnodes = []
        functions = []
        calls = []
        for node in nodevisitor.visit(tree):
            try:
                node_id = node.identifier.to_ecma()
                jsnodes.append(node)
                if isinstance(node, ast.FunctionCall) \
                or isinstance(node, ast.Identifier):
                    calls.append(node)
                if isinstance(node, ast.FuncDecl):
                    functions.append(node)
            except AttributeError, e:
                pass
Beispiel #49
0
def extract_g_config(script_text):
    parser = Parser()
    ast_tree = parser.parse(script_text)
    for node in nodevisitor.visit(ast_tree):
        if isinstance(node, ast.VarDecl) and node.identifier.value == 'g_config':
            return extract_object_as_map(node.initializer)
Beispiel #50
0
    def visit_UnaryOp(self, node):
        s = self.visit(node.value)
        if node.op == '!' and s == 0:
            return '"true"'
        else:
            return s


visitor = JSONVisitor()
parser = Parser()
tree = parser.parse(config.decode('utf-8'))

flag = False
policy_editor_config = ""
for node in nodevisitor.visit(tree):
    if (isinstance(node, ast.Identifier) and
            node.value == 'PolicyEditorConfig'):
        flag = True
    elif flag:
        policy_editor_config = visitor.visit(node)
        break

d = json.loads(policy_editor_config)

try:
    os.mkdir(basedir)
except OSError:
    pass

Beispiel #51
0
def ParseObjectAssignments(js_code):
    parser = Parser()
    tree = parser.parse(js_code)
    return [ParseAssignments(node) \
            for node in nodevisitor.visit(tree) if isinstance(node, ast.Object)]