Ejemplo n.º 1
0
def get_forecast(link):
    html_doc = urllib2.urlopen(link).read()
    soup = BeautifulSoup(html_doc, "html.parser")

    forecast_text = soup.find("div", id="div_wgfcst1").find("script").string

    parser = Parser()
    forecast_tree = parser.parse(forecast_text)

    full_data = {
        parse_key(node.left): parse_value(node.right)
        for node in nodevisitor.visit(forecast_tree)
        if isinstance(node, ast.Assign)
    }

    forecast_tree = parser.parse(forecast_text)

    forecast = {
        parse_key(node.left): parse_array(node.right)
        for node in nodevisitor.visit(forecast_tree)
        if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array)
    }

    full_data.update(forecast)
    return full_data
Ejemplo n.º 2
0
 def _addscript(self, path, date, url, script):
     checksum = sha1(script).digest()
     if redis_db.sadd('js_script_checksums', checksum):
         self.last['script_checksum'] = hexlify(checksum)
         try:
             js_parser = JSParser()
             js_parser.parse(script)
         except Exception:
             # Keep unparsable scripts as 'bad' script if they don't look like HTML.
             mime_type = magic.from_buffer(script.replace('\v', ' '),
                                           mime=True)
             if mime_type == 'text/html' or mime_type == 'application/xml':
                 return
             print >> sys.stderr, 'Cannot parse ' + path
             path = '.bad/' + path
         header = '// Retrieved %sfrom %s\n' % (date + ', ' if date else '',
                                                url)
         path = args.prefix + '/' + path
         try:
             os.makedirs(os.path.dirname(path))
         except OSError:
             pass
         jsfile = open(path, 'w')
         jsfile.write(header)
         jsfile.write(script)
         jsfile.close()
Ejemplo n.º 3
0
    def extract(self, node):
        '''
        Given a BeautifulSoup representation of an HTML document,
        return a list of all code snippets in that document.
        '''

        node_code = None
        code_snippets = []

        # Attempt to parse content for a code node as JavaScript.
        # Mark the content as a code snippet if it is parsed successfully.
        # Skip nodes with nothing but whitespace content.
        if type(node) is Tag and node.name in self.TAGS:
            if node.text.strip() != '':
                try:
                    js_parser = JavaScriptParser()
                    js_parser.parse(node.text)
                except (SyntaxError, TypeError, AttributeError):
                    logger.debug("Code content could not be parsed as JavaScript.")
                else:
                    node_code = node.text
                    code_snippets.append(node_code)

        # If this node did not contain valid code, then visit all children
        # and check them for code.
        if node_code is None and hasattr(node, 'children'):
            for child_element in node.children:
                code_snippets.extend(self.extract(child_element))

        return code_snippets
Ejemplo n.º 4
0
    def case(self, case):
        parser_a = Parser()
        result_a = parser_a.parse(case)

        parser_b = Parser()
        result_b = parser_b.parse(case)

        self.assertEqual(result_a, result_b)
Ejemplo n.º 5
0
 def _parse_succeeds(self, text):
     try:
         parser = JsParser()
         parser.parse(text)
     except (SyntaxError, TypeError):
         return False
     else:
         return True
Ejemplo n.º 6
0
 def _test_function_expression(self):
     text = """
     if (true) {
       function() {
         foo;
         location = 'http://anywhere.com';
       }
     }
     """
     parser = Parser()
     parser.parse(text)
Ejemplo n.º 7
0
 def _test_function_expression(self):
     text = """
     if (true) {
       function() {
         foo;
         location = 'http://anywhere.com';
       }
     }
     """
     parser = Parser()
     parser.parse(text)
Ejemplo n.º 8
0
def scan_js(crawler, url, content):
    '''
    scan javascript for url assignments (like ajax calls).
    '''
    LOGGER.info('Scanning Javascript on %s' % url)

    parser = Parser()
    tree = parser.parse(content)
    for node in nodevisitor.visit(tree):
        if not isinstance(node, ast.Assign):  # <something>: <something>
            continue
        leftval = getattr(node.left, 'value', '')  # 'leftval': <something>
        if not leftval:
            continue
        if 'url' not in leftval:  # 'url': <something>
            continue
        if isinstance(node.right, ast.String):  # 'url': 'somestring'
            LOGGER.info('Found interesting url in JS: %s' %
                        node.right.value[1:-1])
            crawler.check_link(url, node.right.value[2:-1])
        for item in node.right.__dict__.values():  # string in <something>
            # <something> may be function_call() / variable + 'somestring'
            if isinstance(item, ast.String):
                LOGGER.info('Found interesting url in JS: %s' %
                            item.value[1:-1])
                crawler.check_link(url, item.value[2:-1])
Ejemplo n.º 9
0
def extract_glow_lib():
    runjs = norm_path('untrusted/run.js')
    parser = JSParser()

    with open(runjs) as f:
        tree = parser.parse(f.read())

    for node in nodevisitor.visit(tree):
        if (isinstance(node, ast.Assign) and
            isinstance(node.left, ast.DotAccessor) and
            node.left.identifier.value == 'glowscript_libraries' and
            isinstance(node.right, ast.Object)):
                break
    else:
        print('Parsing {} failed'.format(runjs))
        exit(-1)

    return preproc_lib_path({
        prop.left.value:
            [
                eval(lib.value)
                for lib in prop.right.items
                if isinstance(lib, ast.String)
            ]
            for prop in node.right.properties
    })
Ejemplo n.º 10
0
def load_data():
    # Read list names (toplists.js)
    f = open("./data/masterbugtable.js", "r")
    tmp = f.read()
    f.close()
    masterbugtable = json.loads(tmp[tmp.index("{") :])
    # find and process latest test data
    f = open("./data/testing/index.json", "r")
    tmp = f.read()
    f.close()
    test_file_index = json.loads(tmp)
    f = open("./data/testing/%s" % test_file_index[-1], "r")
    test_data = {}
    test_reader = csv.reader(f)
    for line in test_reader:
        test_data[str(line[0])] = {"bug": line[0], "test_date": line[1], "ua": line[2], "test_state": line[3]}
    f.close()
    f = open("./data/sitedata.js", "r")
    parser = Parser()
    tree = parser.parse(f.read())
    f.close()
    return {
        "masterbugtable": masterbugtable,
        "test_result_files": test_file_index,
        "test_results": test_data,
        "tests_parsed": tree,
    }
Ejemplo n.º 11
0
def minify(text, mangle=False, mangle_toplevel=False):
    parser = Parser()
    tree = parser.parse(text)
    if mangle:
        mangler.mangle(tree, toplevel=mangle_toplevel)
    minified = ECMAMinifier().visit(tree)
    return minified
Ejemplo n.º 12
0
def position_info_from_naver(url):
    parsed_obj = urlparse(url)
    article_info_no = 1
    if parsed_obj.netloc == 'm.land.naver.com':
        article_info_no = parsed_obj.path.split('/')[-1]
    elif parsed_obj.netloc == 'new.land.naver.com':
        article_info_no = dict(parse_qsl(parsed_obj.query))['articleNo']
    else:
        raise Exception('unknown url : ' + parsed_obj.netloc )

    r = requests.get('https://m.land.naver.com/article/info/' + str(article_info_no))
    soup = BeautifulSoup(r.content, "html.parser")
    res = dict()

    for sc in soup.findAll("script"):
        pos = sc.text.find('land.articleDetail.jsonPageData')
        if pos > 0:
            bracket_start_pos = sc.text.find('{',pos)
            bracket_end_pos = sc.text.find(';', pos)
            #print(sc.text[bracket_start_pos:bracket_end_pos])
            #js_obj_txt = sc.text[bracket_start_pos:bracket_end_pos]
            js_obj_txt = sc.text[pos:bracket_end_pos]
            # print(js_obj_txt)
            parser = Parser()
            tree = parser.parse(js_obj_txt)
            visitor = NaverDataVisitor()
            visitor.visit(tree)
            res = visitor.res

    return res
Ejemplo n.º 13
0
def get_property_attributes(url):
    response = requests.get(url)

    #html parser
    soup = BeautifulSoup(response.text, 'html.parser')
    script = soup.findAll('script', {'type': 'text/javascript'})[3]

    # if ad link returns valid search result, scan for attributes, else skip
    if soup.title.string.find('Real Estate Properties') == -1:
        # if ad is archived, put in dummy date, else get real date
        if soup.find("span", "status-label label-archive") != None:
            date = '31 Dec 9999'
        else:
            #get date from title of advertisement
            date = re.findall(r'\d{2}\s\w{3}\s\d{4}', soup.title.string)[0]

        #javascript parser
        parser = Parser()
        tree = parser.parse(script.text)
        fields = {
            getattr(node.left, 'value', ''): getattr(node.right, 'value', '')
            for node in nodevisitor.visit(tree)
            if isinstance(node, ast.Assign)
        }
        fields.update({'"date sold"': '"' + date + '"'})
        return fields
    else:
        return None
Ejemplo n.º 14
0
 def test_func(self):
     parser = Parser()
     tree = parser.parse(input)
     mangle(tree, toplevel=True)
     self.assertMultiLineEqual(
         textwrap.dedent(tree.to_ecma()).strip(),
         textwrap.dedent(expected).strip())
Ejemplo n.º 15
0
def analyzeJSCodesFinerBlock(script, display=False):
  try:
    t1 = time.time()
    parser = Parser()
    script = script.strip()
    if script.startswith('<!--') and script.endswith('-->'):
      script = script[4: -3]
    tree = parser.parse(script)
    visitor = MyVisitor( display)
    visitor.visit(tree, 0)
    if len(visitor.first_level_seq) != len(visitor.scripts):
      print >>sys.stderr, "error parsing script: scripts and seqs length inconsistent "+script[:100]
      return None, None
    t2 = time.time()
    total_time = t2 - t1
    total_len = float(len(script))
    try:
      portion = [len(x)/total_len for x in visitor.scripts]
      for i in range(len(portion)):
        t = total_time * portion[i]
        print "AST_TIME: %f %d" %(t, len(visitor.scripts[i]))    
    except:
      pass
    return visitor.first_level_seq, visitor.scripts
  except Exception as e:
    print >>sys.stderr, "error parsing script: "+str(e)+" || [START]"+script[:100]+"[END]"
    return None, None
Ejemplo n.º 16
0
def chapter_url2image_urls(chapter_url):
    g = get_info_from_url(chapter_url, chapter2images)

    p = Parser()
    for t2_or_t3, (slot_idx, pattern_idx, info_pattern) in g:
        tag, _, data = t2_or_t3
        #p = Parser()
        tree = p.parse(data)
        pre = None
        for node in nodevisitor.visit(tree):
            if isinstance(node, ast.Identifier) and node.value == 'image_list':
                break
            pre = node

    assert pre != None
    m = _image_list_match_pattern.match(pre.to_ecma())
    assert m != None
    image_list = eval(m.group(1))
    image_list = eval(image_list)

    ls = []
    for info in image_list.values():
        src = base64.b64decode(info['src']).decode('ascii')
        page = info['page']
        ls.append((page, src))

    ls.sort()
    ls = tuple(src for _, src in ls)

    return ls
Ejemplo n.º 17
0
 def find_first_match(test_file_content, pos):
   test_file_content = TestMethodMatcher.UnitTest.insert_mark(test_file_content, pos)
   if not test_file_content:
     return None
   parser = Parser()
   tree = parser.parse(test_file_content)
   return TestMethodMatcher.UnitTest.get_test_name(tree)
Ejemplo n.º 18
0
def parse_global_js_for_access_id_action_url(global_js):
    parser = Parser()
    tree = parser.parse(global_js)

    parts = ['protocol', 'roDomain', 'ro', 'rt']
    UrlParts = namedtuple('UrlParts', parts)
    url_parts = UrlParts([], [], [], [])

    getvalue = operator.attrgetter('value')
    err = "Too many '{}' assignments in global.js."
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign):
            try:
                left_value = getvalue(node.left).strip('\'"')
            except AttributeError:
                continue

            if left_value in parts:
                right_value = getvalue(node.right).strip('\'"')
                assert right_value not in getattr(
                    url_parts, left_value), err.format('protocol')
                getattr(url_parts, left_value).append(right_value)

    return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[
        0] + url_parts.rt[0]
Ejemplo n.º 19
0
def minify(text, mangle=False):
    parser = Parser()
    tree = parser.parse(text)
    if mangle:
        mangler.mangle(tree)
    minified = ECMAMinifier().visit(tree)
    return minified
Ejemplo n.º 20
0
 def assertUnusedObjects(self, source, expected):
     parser = Parser()
     tree = parser.parse(source)
     uvisit = UnusedObjectsVisitor()
     uvisit.do(tree)
     self.maxDiff = None
     self.assertSequenceEqual(tree.to_ecma(), expected)
Ejemplo n.º 21
0
def analyzeJSCodesFinerBlock(script, display=False):
    try:
        t1 = time.time()
        parser = Parser()
        script = script.strip()
        if script.startswith('<!--') and script.endswith('-->'):
            script = script[4:-3]
        tree = parser.parse(script)
        visitor = MyVisitor(display)
        visitor.visit(tree, 0)
        if len(visitor.first_level_seq) != len(visitor.scripts):
            print >> sys.stderr, "error parsing script: scripts and seqs length inconsistent " + script[:
                                                                                                        100]
            return None, None
        t2 = time.time()
        total_time = t2 - t1
        total_len = float(len(script))
        try:
            portion = [len(x) / total_len for x in visitor.scripts]
            for i in range(len(portion)):
                t = total_time * portion[i]
                print "AST_TIME: %f %d" % (t, len(visitor.scripts[i]))
        except:
            pass
        return visitor.first_level_seq, visitor.scripts
    except Exception as e:
        print >> sys.stderr, "error parsing script: " + str(
            e) + " || [START]" + script[:100] + "[END]"
        return None, None
Ejemplo n.º 22
0
    def parse_country(self, response):
        charts = response.xpath('//*[@class="row graph_row"]')
        total_corona_chart = charts[0]
        script = total_corona_chart.xpath('div/script/text()').extract()[0]
        title = total_corona_chart.xpath('div/h3/text()').extract()[0]
        try:
            country_name = title[title.index(" in ")+4:]
            if country_name[:4] == "the ":
                country_name = country_name[4:]
        except e:
            raise ValueError("Worldometer changed their labels.\
                              Hold your pain, Harold.")

        parser = Parser()
        tree = parser.parse(script)
        data = [None, None] # dates and corresponding number of cases
        for node in nodevisitor.visit(tree):
            if isinstance(node, ast.Assign):
                if getattr(node.left, 'value', '') == 'categories' and not data[0]:
                    print("\nparsing dates\n")
                    data[0] = [eval(getattr(s, 'value', '')) for s in getattr(node.right, 'items', '')]
                elif getattr(node.left, 'value', '') == 'data' and not data[1]:
                    print("\nparsing number of cases\n")
                    data[1] = [int(getattr(n, 'value', '')) for n in getattr(node.right, 'items', '')]
        assert data[0] and data[1] and len(data[0]) == len(data[1])
        with open("data/%s.csv" % country_name, 'w+') as f:
            for k in range(len(data[0])):
                f.write(data[0][k])
                f.write(',')
                f.write(str(data[1][k]))
                f.write('\n')
Ejemplo n.º 23
0
def card_price_history(setname, cardname):
    '''
    Scrapes price history of card from MTGPrice.com, using javascript parser
    Input:
        Setname and cardname are strings, generally taken from Scryfall API.
    Output:
        A numpy array of price history, each 'row' in the form [timestamp, price]
    '''
    # Turn card data into soup
    link = 'https://www.mtgprice.com/sets/' + '_'.join(
        setname.split()) + '/' + '_'.join(cardname.split())
    soup = BeautifulSoup(requests.get(link).content, 'html.parser')

    # GET RESULTS
    text_to_find = 'var results = ['
    history = []
    for script in soup.findAll('script', type='text/javascript'):
        if text_to_find in script.text:
            parser = Parser()
            tree = parser.parse(script.text)
            for node in nodevisitor.visit(tree):
                if isinstance(node, ast.Assign) and getattr(
                        node.left, 'value', '') == "\"data\"":
                    for prices in node.right.items:
                        history.append(
                            [prices.items[0].value, prices.items[1].value])
                    break
    return np.array(history)
Ejemplo n.º 24
0
def extract_g_config(script_text):
    parser = Parser()
    ast_tree = parser.parse(script_text)
    for node in nodevisitor.visit(ast_tree):
        if isinstance(node,
                      ast.VarDecl) and node.identifier.value == 'g_config':
            return extract_object_as_map(node.initializer)
Ejemplo n.º 25
0
def inital_check_for_obfuscation_condtiion_sensitiveFunctions(js_text):
    parser = Parser()
    tree = parser.parse(js_text)

    keywords = set()
    if_condition = False

    for node in nodevisitor.visit(tree):

        if isinstance(node, If):
            if_condition = True
            continue

        stack = [node]

        #BFS to go to every depth of the AST tree
        while stack:
            node = stack.pop()
            #only dot access has a.b.getStringfromChar
            if isinstance(node, DotAccessor):
                try:
                    for i in node.children():
                        stack.append(i)
                except:
                    pass

                continue

            if isinstance(node, Identifier):
                #print (node.value),
                keywords.add(node.value)

    #print ("Done visit")
    obfuscation = False
    profiling = False

    if if_condition:
        pass

    ob_list = set()
    pro_list = set()

    for ob in obfuscation_function_names:
        if ob in keywords:
            #print ("[Obfuscation keywords]", ob)
            obfuscation = True
            ob_list.add(ob)
            #break

    for pro in profiling_function_names:
        if pro in keywords:
            #print ("[Profiling keywords]", pro)
            profiling = True
            pro_list.add(pro)
            #break

    #print ('if_condition: {}, obfuscation {}, profiling {}'.format(if_condition,obfuscation,profiling))
    #pint (js_text)
    return if_condition, obfuscation, profiling, ob_list, pro_list
Ejemplo n.º 26
0
 def test_func(self):
     parser = Parser()
     tree = parser.parse(input)
     mangle(tree, toplevel=True)
     self.assertMultiLineEqual(
         textwrap.dedent(tree.to_ecma()).strip(),
         textwrap.dedent(expected).strip()
         )
Ejemplo n.º 27
0
 def assertFoldingObjects(self, source, expected):
     parser = Parser()
     tree = parser.parse(source)
     uvisit = foldingvisitor.FoldingVisitor()
     uvisit.do(tree)
     print(tree.to_ecma())
     self.maxDiff = None
     self.assertSequenceEqual(tree.to_ecma(), expected)
Ejemplo n.º 28
0
def invJSToZ3(inv, typeEnv):
    p = Parser()
    t = p.parse(inv)

    assert (isinstance(t, jsast.Program))
    assert (len(t.children()) == 1)
    assert (isinstance(t.children()[0], jsast.ExprStatement))
    return jsToZ3Expr(t.children()[0].expr, typeEnv)
Ejemplo n.º 29
0
def whileExtract(s):
    '''Extracts all the while loops in the script. '''
    l = []
    parser = Parser()
    tree = parser.parse(s)
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.While):
            l+=[node.to_ecma()]
    return l
Ejemplo n.º 30
0
 def test_bug_no_semicolon_at_the_end_of_block_plus_newline_at_eof(self):
     # https://github.com/rspivak/slimit/issues/3
     text = textwrap.dedent("""
     function add(x, y) {
       return x + y;
     }
     """)
     parser = Parser()
     tree = parser.parse(text)
     self.assertTrue(bool(tree.children()))
Ejemplo n.º 31
0
def _parse_redirect_to_security_challenge_script(script: str) -> str:
    """ Parses the script which redirects us to security challenge page and gets that URL. """
    parser = Parser()
    tree = parser.parse(script)
    nodes = [node for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign)]
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign) and hasattr(node, 'left') and isinstance(node.left, ast.DotAccessor):
            children = node.left.children()
            if len(children) == 2 and children[0].value == 'window' and children[1].value == 'location':
                return node.right.value.strip('\'"')
Ejemplo n.º 32
0
def parse_script(data):
    # Hack. Fix javascript syntax issue in steam's response
    to_replace = 'BuildGameRow(game, )'
    replacement = 'BuildGameRow(game, 0)'
    data = data.replace(to_replace, replacement)
    parser = Parser()
    tree = parser.parse(data)
    variables = [node for node in nodevisitor.visit(tree)
                 if isinstance(node, ast.VarDecl)]
    return variables
Ejemplo n.º 33
0
 def test_bug_no_semicolon_at_the_end_of_block_plus_newline_at_eof(self):
     # https://github.com/rspivak/slimit/issues/3
     text = textwrap.dedent("""
     function add(x, y) {
       return x + y;
     }
     """)
     parser = Parser()
     tree = parser.parse(text)
     self.assertTrue(bool(tree.children()))
Ejemplo n.º 34
0
def addAllIntEnv(inv, env=None):
    if (env == None):
        env = {}
    p = Parser()
    t = p.parse(inv)

    for node in nodevisitor.visit(t):
        if isinstance(node, jsast.Identifier):
            env[node.value] = Int

    return env
Ejemplo n.º 35
0
def parse(text):
    """
    Turn a valid JavaScript source string and turn it into a source tree
    through the Parser provided by the slimit.parser module.
    """

    global _parser
    if _parser is None:
        _parser = Parser()

    return _parser.parse(text)
Ejemplo n.º 36
0
def analyzeJSCodes(script, display=False):
  try:
    parser = Parser()
    tree = parser.parse(script)
    visitor = MyVisitor( display)
    visitor.visit(tree, 0)
    #print "first_level_seq: %d" %len(visitor.first_level_seq)
    return visitor.node_order_list
  except Exception as e:
    print >>sys.stderr, "error parsing script: "+str(e)+" || "+script
    return None
Ejemplo n.º 37
0
def rewriteJSCodes(script, display=False):
  try:
    parser = Parser()
    tree = parser.parse(script)
    visitor = RewriteVisitor( display)
    visitor.visit(tree, 0)
    x = ECMAVisitor().visit(tree)
    print x
    #print tree.to_ecma()
    #print "first_level_seq: %d" %len(visitor.first_level_seq)
  except Exception as e:
    print >>sys.stderr, "error parsing script: "+str(e)+" || "+script
Ejemplo n.º 38
0
def removeDeclarations(js_file):
  parser = Parser()
  tree = parser.parse(js_file)
  output = ""
  for child in tree.children():
    if type(child) != ast.VarStatement:
      output += (child.to_ecma() + "\n")
    else:
      nodes = [x for x in nodevisitor.visit(child)]
      if type(nodes[0].initializer) not in [ast.String, ast.Number, ast.BinOp]:
        output += (child.to_ecma() + "\n")
  return output
Ejemplo n.º 39
0
def analyzeJSCodes(script, display=False):
    try:
        parser = Parser()
        tree = parser.parse(script)
        visitor = MyVisitor(display)
        visitor.visit(tree, 0)
        #print "first_level_seq: %d" %len(visitor.first_level_seq)
        return visitor.node_order_list
    except Exception as e:
        print >> sys.stderr, "error parsing script: " + str(
            e) + " || " + script
        return None
def main():
    args = parse_args()

    with open(args.filename) as f:
        source = f.read()

    parser = Parser()
    tree = parser.parse(source)

    visitor = ConstantReductionVisitor(args.debug)
    tree = visitor.visit(tree)
    print tree.to_ecma()
Ejemplo n.º 41
0
def main():
    args = parse_args()

    with open(args.filename) as f:
        source = f.read()

    parser = Parser()
    tree = parser.parse(source)

    visitor = ConstantReductionVisitor(args.debug)
    tree = visitor.visit(tree)
    print tree.to_ecma()
Ejemplo n.º 42
0
def get_forecast(link):
    html_doc = urllib2.urlopen(link).read()
    soup = BeautifulSoup(html_doc, "html.parser")
    
    forecast_text = soup.find("div", id="div_wgfcst1").find("script").string
    
    parser = Parser()
    forecast_tree = parser.parse(forecast_text)
    
    full_data = {parse_key(node.left):parse_value(node.right)
                 for node in nodevisitor.visit(forecast_tree)
                 if isinstance(node, ast.Assign)}
    
    forecast_tree = parser.parse(forecast_text)
    
    forecast = {parse_key(node.left):parse_array(node.right)
                for node in nodevisitor.visit(forecast_tree)
                if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array)}
    
    full_data.update(forecast)
    return full_data
Ejemplo n.º 43
0
def parse_JavaScript(js):
    global functions
    parser = Parser()
    tree = parser.parse(js)

    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.FuncDecl):
            if len(node.parameters) > 1:
                last = node.parameters[len(node.parameters) - 1]
                first = node.parameters[0]

                # check for the first parameter
                if first.value == "this$static":

                    # check that the last one is a callback
                    if last.value == "callback":

                        # the function will call createStreamWriter if its used in the client interface
                        if "createStreamWriter" in node.to_ecma():
                            params = []

                            # if we have function arguments
                            if len(node.parameters) > 2:

                                # -2 for the 'this' and callback
                                num_of_params = len(node.parameters) - 2

                                for param in node.parameters:

                                    # we just append the arguments we will need to make in the GWT request
                                    if param.value != "this$static" and param.value != "callback":
                                        params.append(param.value)

                            # else we have no arguments
                            else:
                                num_of_params = 0

                            # strip the correct function name
                            function = node.identifier.value.replace("$", "")
                            function = re.sub('_\d+', '', function)

                            # append to a list, since we my have functions of the same name, but different signatures
                            extracted.append({
                                "function":
                                function,
                                "num_of_args":
                                num_of_params,
                                "args":
                                params,
                                "arg_type_data":
                                get_param_types(function, node.to_ecma())
                            })
Ejemplo n.º 44
0
def rewriteJSCodes(script, display=False):
    try:
        parser = Parser()
        tree = parser.parse(script)
        visitor = RewriteVisitor(display)
        visitor.visit(tree, 0)
        x = ECMAVisitor().visit(tree)
        print x
        #print tree.to_ecma()
        #print "first_level_seq: %d" %len(visitor.first_level_seq)
    except Exception as e:
        print >> sys.stderr, "error parsing script: " + str(
            e) + " || " + script
Ejemplo n.º 45
0
def episode():
    url = h.extract_var(args, 'url')

    name = h.extract_var(args, 'name')

    soup = BeautifulSoup(h.make_request(url, cookie_file, cookie_jar))

    div = h.bs_find_with_class(soup, 'div', 'video-player')

    scripts = div.findAll('script')
    script = None
    for _script in scripts:
        if 'CryptoJS' in _script.text:
            script = _script
            break

    url = ''
    if script:
        script_text = script.text
        _dailytoday = ''
        _subject = ''

        parser = Parser()
        tree = parser.parse(script.text)
        for node in tree.children():
            ecma = node.to_ecma()
            if ecma.startswith('var dailytoday ='):
                _dailytoday = node.children()[0].children()[1].to_ecma()[1:-1]
            elif ecma.startswith('var subject ='):
                _subject = node.children()[0].children()[1].to_ecma()[1:-1]
            # elif "var bigmumbai = " not in ecma and "bigmumbai = " in ecma:
            #     print ecma

        if _dailytoday and _subject:
            url = decrypt.decrypt_url(_dailytoday, _subject)
        else:
            url = script.text.split('bigmumbai = ', 2)[2].split(';')[0][1:-1]

        print url
        plot = h.bs_find_with_class(soup, 'div', 'vp-info').find('span', {'itemprop': 'description'}).text
        thumbnail = soup.find('div', {'itemprop': 'video'}).find('meta', {'itemprop': 'thumbnailUrl'})['content']
        h.add_dir_video(addon_handle, name, url, thumbnail, plot)
    else:
        iframe = div.find('iframe')
        if iframe:
            attrs = dict(iframe.attrs)
            youtube_url = attrs['src']
            print youtube_url
            video_id = urlparse.urlparse(youtube_url).path.replace('/embed/', '')
            url = 'plugin://plugin.video.youtube/play/?video_id=%s' % video_id
            h.add_dir_video(addon_handle, name, url, '', '')
Ejemplo n.º 46
0
def removeDeclarations(js_file):
    parser = Parser()
    tree = parser.parse(js_file)
    output = ""
    for child in tree.children():
        if type(child) != ast.VarStatement:
            output += (child.to_ecma() + "\n")
        else:
            nodes = [x for x in nodevisitor.visit(child)]
            if type(nodes[0].initializer) not in [
                    ast.String, ast.Number, ast.BinOp
            ]:
                output += (child.to_ecma() + "\n")
    return output
Ejemplo n.º 47
0
def read_js_object(code):
    parser = Parser()

    def visit(node):
        if isinstance(node, ast.Program):
            d = {}
            for child in node:
                if not isinstance(child, ast.VarStatement):
                    raise ValueError("All statements should be var statements")
                key, val = visit(child)
                d[key] = val
            return d
        elif isinstance(node, ast.VarStatement):
            return visit(node.children()[0])
        elif isinstance(node, ast.VarDecl):
            return visit(node.identifier), visit(node.initializer)
        elif isinstance(node, ast.Object):
            d = {}
            for property in node:
                key = visit(property.left)
                value = visit(property.right)
                d[key] = value
            return d
        elif isinstance(node, ast.BinOp):
            # simple constant folding
            if node.op == '+':
                if isinstance(node.left, ast.String) and isinstance(
                        node.right, ast.String):
                    return visit(node.left) + visit(node.right)
                elif isinstance(node.left, ast.Number) and isinstance(
                        node.right, ast.Number):
                    return visit(node.left) + visit(node.right)
                else:
                    raise ValueError(
                        "Cannot + on anything other than two literals")
            else:
                raise ValueError("Cannot do operator '{}'".format(node.op))

        elif isinstance(node, ast.String):
            return node.value.strip('"').strip("'")
        elif isinstance(node, ast.Array):
            return [visit(x) for x in node]
        elif isinstance(node, ast.Number) or isinstance(node, ast.Identifier)\
                or isinstance(node, ast.Boolean) or isinstance(node, ast.Null):
            return node.value
        else:
            raise Exception("Unhandled node: {}".format(node))

    return visit(parser.parse(code))
def ParsingOfFunction():
    index = 0
    parser = Parser()
    tree = parser.parse('l = 0;l = h;')

    # for node in nodevisitor.visit(tree):
    #   if isinstance(node, ast.Identifier) and node.value == 'i':
    #      node.value = 'hello'

    x = tree.to_ecma()  # print awesome javascript :)
    # print x;

    # print "Opening the file..."
    # target = open("file.txt",'w')
    # target.write(x)
    # target.close()
    lines = [line.rstrip('\n') for line in open('file.txt')]

    map = {}
    temp = ""
    i = 0

    # print lines;

    for str in lines:

        if ';' in str:
            temp = temp + str
            temp = temp.lstrip(";")
            # print temp;
            map.__setitem__(i, temp)

            print "Going into the lexer function --------------"
            lex = LexingofFunction(temp)
            temp = ""
            i += 1

        else:

            temp = temp + str

    # print map;

    print "----------------------------- print heap now"

    for x in heap:
        print x.name
        print x.level
    return
Ejemplo n.º 49
0
 def _addscript(self, path, date, url, script):
     checksum = sha1(script).digest()
     if redis_db.sadd("js_script_checksums", checksum):
         self.last["script_checksum"] = hexlify(checksum)
         try:
             js_parser = JSParser()
             js_parser.parse(script)
         except Exception:
             # Keep unparsable scripts as 'bad' script if they don't look like HTML.
             mime_type = magic.from_buffer(script.replace("\v", " "), mime=True)
             if mime_type == "text/html" or mime_type == "application/xml":
                 return
             print >>sys.stderr, "Cannot parse " + path
             path = ".bad/" + path
         header = "// Retrieved %sfrom %s\n" % (date + ", " if date else "", url)
         path = args.prefix + "/" + path
         try:
             os.makedirs(os.path.dirname(path))
         except OSError:
             pass
         jsfile = open(path, "w")
         jsfile.write(header)
         jsfile.write(script)
         jsfile.close()
def get_property_attributes(url):
    response = requests.get(url)

    #html parser
    soup = BeautifulSoup(response.text, 'html.parser')
    script = soup.findAll('script', {'type': 'text/javascript'})[3]

    #javascript parser
    parser = Parser()
    tree = parser.parse(script.text)
    fields = {
        getattr(node.left, 'value', ''): getattr(node.right, 'value', '')
        for node in nodevisitor.visit(tree) if isinstance(node, ast.Assign)
    }
    return fields
Ejemplo n.º 51
0
def treeWalker(js_file):
  parser = Parser()
  tree = parser.parse(js_file)
  variables = {}
  for child in tree.children():
    if type(child) == ast.VarStatement:
      try:
        nodes = [x for x in nodevisitor.visit(child)]
        if   type(nodes[0].initializer) == ast.String:
          variables[nodes[0].identifier.value] = nodes[0].initializer.value
        elif type(nodes[0].initializer) == ast.Number:
          variables[nodes[0].identifier.value] = eval(nodes[0].initializer.to_ecma())
        elif type(nodes[0].initializer) == ast.BinOp:
          variables[nodes[0].identifier.value] = eval(nodes[0].initializer.to_ecma())
        else:
          print((nodes[0].identifier.value, nodes[0].initializer))
      except Exception as e:
        print (child.to_ecma())
  return variables
Ejemplo n.º 52
0
 def test_modify_tree(self):
     text = """
     for (var i = 0; i < 10; i++) {
       var x = 5 + i;
     }
     """
     parser = Parser()
     tree = parser.parse(text)
     for node in nodevisitor.visit(tree):
         if isinstance(node, ast.Identifier) and node.value == 'i':
             node.value = 'hello'
     self.assertMultiLineEqual(
         tree.to_ecma(),
         textwrap.dedent("""
         for (var hello = 0; hello < 10; hello++) {
           var x = 5 + hello;
         }
         """).strip()
         )
Ejemplo n.º 53
0
def partitionCode(sourceCode):
    parser = Parser()
    print "Source code originally is ............ \n", sourceCode
    tree = parser.parse(sourceCode)
    fnList = FunctionDefinitionsPass(tree)
    mobileDeviceList = MobileDevicesPass(tree)
    predicateList = PredicatePass(tree)
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.ExprStatement):
            exprNode = node.expr
            if isinstance(exprNode, ast.FunctionCall):  # check if this is a function call to an object
                ParseMethodCalls(
                    exprNode, fnList, mobileDeviceList, predicateList
                )  # TODO: Impose the restiction that all mobile Device declarations come ahead of all else
    print "-*********\n*******\n------------------THE PARTITIONED CODE IS -----------------------------*********\n*******\n"

    for key in partitionedCode:
        print "On node ", key, ", code is \n\n"
        print partitionedCode[key]
Ejemplo n.º 54
0
def partitionCode(sourceCode):
  parser = Parser()
  sourceCodeInHtml=sourceCode.replace(';','\n');
  print "MULTI PHONE SCRIPT \n----------------------------------------\n", sourceCodeInHtml
  tree=parser.parse(sourceCode);
  fnList=FunctionDefinitionsPass(tree)
  mobileDeviceList=MobileDevicesPass(tree)
  predicateList=PredicatePass(tree)
  for node in nodevisitor.visit(tree):
        if(isinstance(node,ast.ExprStatement)):
            exprNode=node.expr
            if(isinstance(exprNode,ast.FunctionCall)):  # check if this is a function call to an object 
                ParseMethodCalls(exprNode,fnList,mobileDeviceList,predicateList) # TODO: Impose the restiction that all mobile Device declarations come ahead of all else
  print "\n\nPARTITIONED CODE \n----------------------------------------\n",
  returnCode =dict()
  for key in partitionedCode :
        print "On phone name \"",mobileDeviceList[key],"\" :  \n ",
        print "\t",partitionedCode[key],"\n"
        returnCode[mobileDeviceList[key]]=partitionedCode[key]
  print ""
  return returnCode
Ejemplo n.º 55
0
def check_expected_output(filename):
    input_file = 'js/%s.js' % filename
    expect_file = input_file + '.expect'
    if not os.path.exists(expect_file):
        raise SkipTest('No expect file for test ' + filename)
    with open(input_file) as f, open(expect_file) as g:
        parser = Parser()
        tree = parser.parse(f.read())

        visitor = ConstantReductionVisitor()
        tree = visitor.visit(tree)
        js = tree.to_ecma().strip()

        expected_output = g.read().strip()
        if js != expected_output:
            print 'Expected:'
            print expected_output
            print ''
            print 'Got:'
            print js
            assert False
Ejemplo n.º 56
0
def parse_global_js_for_access_id_action_url(global_js):
    parser = Parser()
    tree = parser.parse(global_js)

    parts = ['protocol', 'roDomain', 'ro', 'rt']
    UrlParts = namedtuple('UrlParts', parts)
    url_parts = UrlParts([], [], [], [])

    getvalue = operator.attrgetter('value')
    err = "Too many '{}' assignments in global.js."
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign):
            try:
                left_value = getvalue(node.left).strip('\'"')
            except AttributeError:
                continue

            if left_value in parts:
                right_value = getvalue(node.right).strip('\'"')
                assert right_value not in getattr(url_parts, left_value), err.format('protocol')
                getattr(url_parts, left_value).append(right_value)

    return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[0] + url_parts.rt[0]
Ejemplo n.º 57
0
    def _js_design_as_doc(self, filepath):

        dir, filename = os.path.split(filepath)
        name, ext = os.path.splitext(filename)

        with open(filepath) as f:
            js = f.read()

        parser = Parser()
        tree = parser.parse(js)

        views = {}

        for node in tree:
            if isinstance(node, ast.VarStatement):
                for child in node.children():
                    for grandchild in child.children():
                        if isinstance(grandchild, ast.Identifier):
                            view = {}
                            view_name = grandchild.value
                            views[view_name] = view
                        if isinstance(grandchild, ast.Object):
                            for named in grandchild.children():
                                function_name = None
                                function_body = None
                                for kv in named.children():
                                    if isinstance(kv, ast.Identifier) and kv.value in VIEW_FUNCTION_NAMES:
                                        function_name = kv.value
                                    if isinstance(kv, ast.FuncExpr):
                                        function_body = kv.to_ecma()
                                if function_name and function_body:
                                    view[function_name] = function_body


        return {"_id": "_design/%s" % name,
                "views": views}
Ejemplo n.º 58
0
    def post(self):
        print self.request
        print "request: %s" % self.request.body
        request_data = json.loads(self.request.body)
        javascript = request_data["javascript"].replace(u'\u200b', "")
        print(javascript)

        parser = Parser()
        tree = parser.parse(javascript)

        jsnodes = []
        functions = []
        calls = []
        for node in nodevisitor.visit(tree):
            try:
                node_id = node.identifier.to_ecma()
                jsnodes.append(node)
                if isinstance(node, ast.FunctionCall) \
                or isinstance(node, ast.Identifier):
                    calls.append(node)
                if isinstance(node, ast.FuncDecl):
                    functions.append(node)
            except AttributeError, e:
                pass
Ejemplo n.º 59
0
class TestAPIFinder(unittest.TestCase):
    def setUp(self):
        self.walker = ASTWalker()
        self.jstree = JSParser()

    def test_simple_find(self):
        script = """
        var sendToKaleidos = opera.contexts.menu.createItem(foo)
        opera.contexts.menu.addItem(sendToKaleidos);
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'addItem'))

    def test_simple_find2(self):
        script = """
        var allTabs = opera.extension.tabs.getAll();
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'getAll'))

    def test_simple_find3(self):
        script = """
        var temp_tab = opera.extension.tabs.create(
            {url: 'http://online.translate.ua', focused: false}
        );
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'create'))

    def test_simple_find4(self):
        script = """
        var matches = opera.extension.tabs.getFocused()
        .url.match(/v=([^(\&|$)]*)/)
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'getFocused'))

    def test_permission(self):
        script = """
        var sendToKaleidos = opera.contexts.menu.createItem(foo)
        opera.contexts.tabs.getAll();
        """
        self.assertEqual(self.walker.find_apicall(
            self.jstree.parse(script), 'getAll'), 'tabs')

    def test_finder_aliased(self):
        script = """
        var mn = opera.contexts.menu;
        mn.addItem(button);
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'addItem'))

    def test_finder_aliased2(self):
        script = """
        filter.block.add(document.location.href)
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'add'))

    def test_finder_aliased3(self):
        script = """
        URLFilterAPI.block.remove(content.replace(bugReg,"*#"),newOptions);
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'remove'))

    def test_finder_aliased4(self):
        script = """
        uiitem.disabled = !o.tabs.getFocused();
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'getFocused'))

    def test_finder_aliased5(self):
        script = """
        try { return o.tabs.getFocused().url; } catch (e) { return ""; }
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'getFocused'))

    def test_finder_aliased6(self):
        script = """
        var Current = Tabs.getSelected();
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'getSelected'))

    def test_finder_aliased7(self):
        script = """
        var oTabs = opera.extension.tabs;
        oTabs.create({url: READER_URL, focused: true});
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'create'))

    def test_finder_aliased8(self):
        script = """
        d=opera.contexts.menu.createItem(
            {title:g_formfills[e].decprofilename,type:"folder"}
        );
        contextParents[b].addItem(d);
        d.addItem(opera.contexts.menu.createItem({title:gs("Fill Form"),
        onclick:cmaction1,id:g_formfills[e].ffid}));
        """
        self.assertTrue(self.walker._find(self.jstree.parse(script),
                        'addItem'))

    def test_permission_aliased(self):
        script = """
        var tb = opera.contexts.tabs;
        tb.create(tabs);
        """
        self.assertEqual(self.walker.find_apicall(
            self.jstree.parse(script), 'create'), 'tabs')

    def test_permission_aliased2(self):
        script = """
        filter.block.add(document.location.href)
        """
        self.assertEqual(self.walker.find_apicall(
            self.jstree.parse(script), 'add'), ('webRequest',
                                                'webRequestBlocking'))

    def test_permission_aliased3(self):
        script = """
        try { return o.tabs.getFocused().url; } catch (e) { return ""; }
        """
        self.assertEqual(self.walker.find_apicall(
            self.jstree.parse(script), 'getFocused'), 'tabs')

    def test_permission_aliased4(self):
        script = """
        var Current = Tabs.getSelected();
        """
        self.assertEqual(self.walker.find_apicall(
            self.jstree.parse(script), 'getSelected'), 'tabs')

    def test_permission_aliased5(self):
        script = """
        var tbs = opera.contexts.tabs;
        tbs.getFocused();
        """
        self.assertEqual(self.walker.find_apicall(
            self.jstree.parse(script), 'getFocused'), 'tabs')

    def test_finder_multi_aliased(self):
        script = """
        var o = opera;
        c = o.contexts,
        tbs = c.tabs;
        tbs.getFocused();
        """
        self.assertTrue(self.walker._find(
            self.jstree.parse(script), 'getFocused'))

    def test_permission_multi_aliased(self):
        script = """
        var o = opera;
        c = o.contexts,
        tb = c.tabs;
        tb.getSelected();
        """
        self.assertEqual(self.walker.find_apicall(
            self.jstree.parse(script), 'getSelected'), 'tabs')