Example #1
0
def test_findall():
    jscode_snippets = [
        (
            r"""
            var arr1 = ["a","b","c"];
            var arr2 = ["d","e","f"];
            """,
            '//array',
            [dict, list],
            [['a', 'b', 'c'],
             ['d', 'e', 'f']]
        ),
        (
            r"""
            var arr1 = {"a": "b", "c": "d"};
            var arr2 = {"e": 1, "f": 2};
            """,
            '//object',
            [dict, list],
            [{'a': 'b', 'c': 'd'},
             {'e': 1, 'f': 2}]
        ),
    ]

    for snippet, xp, types, expected in jscode_snippets:
        js = js2xml.parse(snippet)
        results = []
        for r in js.xpath(xp):
            results.extend(findall(r, types=types))
        assert_list_equal([make(r) for r in results], expected)
Example #2
0
def test_getall_complex():
    jscode_snippets = [
        (
            r"""
var needleParam = needleParam || {};
needleParam.chatGroup = "test";
needleParam.productId = "6341292";
needleParam.productPrice = "EUR              138.53".replace("$","n_").replace(/,/g,"");
//Begin Needle (fan-sourcing platform) snippet
jQuery(document).ready(function(){

var e = document.createElement("script"); e.type = "text/javascript";
e.async = true;
e.src = document.location.protocol +

"//overstock.needle.com/needle_service.js?1"; document.body.appendChild(e);

});
// End Needle snippet
""",
            [{}],
        )
    ]

    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        assert_list_equal(js2xml.jsonlike.getall(jsxml), expected)
Example #3
0
def test_findall():
    jscode_snippets = [
        (
            r"""
            var arr1 = ["a","b","c"];
            var arr2 = ["d","e","f"];
            """,
            "//array",
            [["a", "b", "c"], ["d", "e", "f"]],
        ),
        (
            r"""
            var arr1 = {"a": "b", "c": "d"};
            var arr2 = {"e": 1, "f": 2};
            """,
            "//object",
            [{"a": "b", "c": "d"}, {"e": 1, "f": 2}],
        ),
    ]

    for snippet, xp, expected in jscode_snippets:
        js = js2xml.parse(snippet)
        results = []
        for r in js.xpath(xp):
            results.extend(js2xml.jsonlike.findall(r))
        assert_list_equal([js2xml.jsonlike.make_dict(r) for r in results], expected)
Example #4
0
def test_parse_string():
    jscode_snippets = [
        (
        r"""
        var h = 'test';
        var i = "test";
        var j = "";
        var k = '""';
        var l = '"';
        var m = '';
        var n = "''";
        var o = "'";
        """, ['test', 'test', '', '""', '"', '', "''", "'"]
        ),
        (
        r"""
        var i = 'test\'s output';
        """, [r"test's output"]
        ),

        (
        r"""
        var i = ["\"", '\''];
        var j = "test\'s output";
        var k = "test\\'s output";
        var l = "nested \"quotes\".";
        """, ['"', "'", r"test's output", r"test\'s output", r'nested "quotes".']
        ),
        (
        r"""
        var i = 'https://www.blogger.com/navbar.g?targetBlogID\0754325487278375417853\46blogName\75spirello\46publishMode\75PUBLISH_MODE_BLOGSPOT\46navbarType\75LIGHT\46layoutType\75LAYOUTS\46searchRoot\75http://spirelloskrimskramserier.blogspot.com/search\46blogLocale\75no\46v\0752\46homepageUrl\75http://spirelloskrimskramserier.blogspot.com/\46vt\0751357383140196484672';
        """, [r'https://www.blogger.com/navbar.g?targetBlogID=4325487278375417853&blogName=spirello&publishMode=PUBLISH_MODE_BLOGSPOT&navbarType=LIGHT&layoutType=LAYOUTS&searchRoot=http://spirelloskrimskramserier.blogspot.com/search&blogLocale=no&v=2&homepageUrl=http://spirelloskrimskramserier.blogspot.com/&vt=1357383140196484672']
        ),
        (
        r"""
        var i = "foo \
bar";
        var j = "foo \
                 bar";
        """, [r'foo bar', 'foo                  bar']
        ),
        (
        r"""
        var x = "\u00A9 Netscape Communications";
        """,
        [ur'\u00a9 Netscape Communications']
        ),
        (
        u"""
        var x = "\u00A9 Netscape Communications";
        """.encode("utf8"),
        [u'\u00a9 Netscape Communications']
        ),
    ]

    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        result = jsxml.xpath("//string/text()")
        assert_list_equal(result, expected)
Example #5
0
def getimgsrc(pin_id):
    url = 'http://huaban.com/pins/%s/' % pin_id
    z = requests.get(url,headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'})
    sel = Selector(text=z.text)
    jscode = sel.xpath("//script[contains(., 'app.page = app.page')]/text()").extract_first()
    parsed_js = js2xml.parse(jscode)
    for i in parsed_js.xpath('//property[@name="pins"]//property[@name="key"]/string/text()'):
        print 'http://img.hb.aicdn.com/' + i
Example #6
0
def main():
    ap = ArgumentParser()
    ap.add_argument('--debug', action='store_true')
    ap.add_argument('filenames', nargs='*', default=['-'])
    args = ap.parse_args()

    for fn in args.filenames:
        fo = sys.stdin if fn == '-' else open(fn, 'rU')
        parsed = js2xml.parse(fo.read())
        print(js2xml.pretty_print(parsed))
    def parse(self, response):
        script = response.xpath('//script[contains(., "var data =")]/text()').extract_first()
        sel = scrapy.Selector(_root=js2xml.parse(script))
        for quote in sel.xpath('//var[@name="data"]/array/object'):
            yield {
                'texto': quote.xpath('string(./property[@name="text"])').extract_first(),
                'autor': quote.xpath(
                    'string(./property[@name="author"]//property[@name="name"])'
                ).extract_first(),
                'tags': quote.xpath('./property[@name="tags"]//string/text()').extract(),
            }

        link_next = response.css('li.next a::attr("href")').extract_first()
        if link_next:
            yield scrapy.Request(response.urljoin(link_next))
Example #8
0
def test_parse_number():
    jscode_snippets = [
        (
        r"""
        var i = 3;
        """, [r'3']
        ),
        (
        r"""
        var i = -3.14;
        """, [r"-3.14"]
        ),
    ]

    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        result = jsxml.xpath("//number/@value")
        assert_list_equal(result, expected)
Example #9
0
def test_parse_url():
    jscode_snippets = [
        (
        r"""
        var i = 'http://www.example.com';
        """, [r'http://www.example.com']
        ),
        (
        r"""
        var i = 'http:\/\/www.example.com';
        """, [r"http://www.example.com"]
        ),
    ]

    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        result = jsxml.xpath("//string/text()")
        assert_list_equal(result, expected)
Example #10
0
def test_parse():
    jscode_snippets = [
        r"""
        var i = 0;
        """,
        r"""
        document.write("\n");
        """,
        r"""
        var t1 = "nested \"quote\".";
        var t2 = 'nested \'quote\'.';
        var t3 = 'nested \"quote\".';
        var t2 = "nested \'quote\'.";
        """
    ]

    for snippet in jscode_snippets:
        assert_is_not_none(js2xml.parse(snippet))
Example #11
0
def test_parse_encoding():

    jscode_snippets = [
        (u"""
        var test = "Daniel Gra\xf1a";
        """,
        None,
        [u"Daniel Gra\xf1a"]
        ),
        (u"""
        var test = "Daniel Gra\xf1a";
        """.encode("latin1"),
        "latin1",
        [u"Daniel Gra\xf1a"]
        ),
    ]

    for snippet, encoding, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet, encoding=encoding)
        result = jsxml.xpath("//string/text()")
        assert_equal(result, expected)
Example #12
0
def test_parse_undefined():
    jscode_snippets = [
        (
        r"""
        myArray = [0,1,,,4,5];
        """, 2
        ),
        (
        r"""
        myArray = [,1,,,4,];
        """, 3 # and not 4
        ),
        (r"""
        myArray = [,1,,,4,,,];
        """, 5
        ),
    ]

    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        result = jsxml.xpath("count(//array/undefined)")
        assert_equal(result, expected)
Example #13
0
def test_json():
    jscode_snippets = [
        (
            r"""
            var arr1 = ["a","b","c"];
            var arr2 = ["d","e","f"];
            """,
            [['a', 'b', 'c'],
             ['d', 'e', 'f']]
        ),
        (
            r"""
            var arr1 = ["a", null, "c"];
            var arr2 = [null, "e", null];
            """,
            [['a', None, 'c'],
             [None, 'e', None]]
        ),
        (
            r"""
            var arr1 = ["a", undefined, "c"];
            var arr2 = [undefined, "e", null];
            """,
            [['a', 'undefined', 'c'],
             ['undefined', 'e', None]]
        ),
        (
            r"""
            var i = -3.14;
            """, []
            ),
        (
            r"""
            money = {
                'quarters': 20
            };
            """,
            [{"quarters": 20}]
        ),
        (
            r"""
            money = {
                'quarters': 10,
                'addQuarters': function(amount) {
                    this.quarters += amount;
                }
            };
            money.addQuarters(10);
            """,
            []
        ),
        (
            r"""
            var money = {
                'quarters': 10,
                'something': [1,2,3,4],
                'somethingelse': {'nested': [5,6,7,8]},
                'addQuarters': function(amount) {
                    this.quarters += amount;
                }
            };
            money.addQuarters(10);
            """,
            [[1,2,3,4], {'nested': [5,6,7,8]}]
        ),
        (
            r"""
            var store = {
                'apples': 10,
                'carrots': [1,2,3,4],
                'chicken': {'eggs': [5,6,7,8]}
            };
            """,
            [{'apples': 10,
              'carrots': [1, 2, 3, 4],
              'chicken': {'eggs': [5, 6, 7, 8]}}]
        ),
        (
            r"""
            var store1 = {
                'apples': 10,
                'carrots': [1,2,3,4],
                'chicken': {'eggs': [5,6,7,8]}
            };
            var store2 = {
                'tomatoes': 20,
                'potatoes': [9, false, 7, 6],
                'spinach': {'cans': [true, 2]}
            };
            """,
            [{'apples': 10,
              'carrots': [1, 2, 3, 4],
              'chicken': {'eggs': [5, 6, 7, 8]}},
             {'potatoes': [9, False, 7, 6],
              'spinach': {'cans': [True, 2]},
              'tomatoes': 20}]
        ),
    ]

    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        results = js2xml.jsonlike.findall(jsxml)
        assert_list_equal([js2xml.jsonlike.make_dict(r) for r in results], expected)

    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        assert_list_equal(js2xml.jsonlike.getall(jsxml), expected)
Example #14
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import js2xml
import os

TEST_DIR = os.path.dirname(__file__)

files = [
    os.path.join(TEST_DIR, 'samples/fullcalendar.js'),
    os.path.join(TEST_DIR, 'samples/fullcalendar.min.js'),
    os.path.join(TEST_DIR, 'samples/jquery.min.js'),
]
for filename in files:
    with open(filename) as f:
        jscode = f.read()

tree = js2xml.parse(jscode)
Example #15
0
def market_value_historic_pull(base_url, player_id):

    mv_soup = get_souped_page(base_url.replace("profil", "marktwertverlauf"))

    if mv_soup.find("script", text=re.compile("Highcharts.Chart")) != None:

        script = mv_soup.find("script", text=re.compile("Highcharts.Chart")).text
        parsed = js2xml.parse(script)


        xpath = '//array//object//property'

        age_list = []
        club_list = []
        mv_list = []
        date_of_value_list = []

        for i in range(len(parsed.xpath(xpath))):

            age = None
            club = None
            raw_value = None
            date_of_value = None
            date_raw = None

            if parsed.xpath(xpath)[i].get('name') == 'age':
                age = int(stringify_children(parsed.xpath(xpath)[i]).split("number value=")[1].split("/")[0][1:][:-1])
                age_list.append(age)

            if parsed.xpath(xpath)[i].get('name') == 'verein':
                club = stringify_children(parsed.xpath(xpath)[i]).split("<string>")[1].split("</string>")[0].lower()
                club_list.append(club)

            if parsed.xpath(xpath)[i].get('name') == 'mw':
                raw_value = stringify_children(parsed.xpath(xpath)[i]).split("<string>")[1].split("</string>")[0].replace("€", "")

                if "m" in raw_value:
                    raw_value = int( float(raw_value.strip().replace("€", "").replace("€","").replace("m","")) * 1000000 )
                elif "Th." in raw_value:
                    raw_value = int(raw_value.strip().replace("€", "").replace("€","").replace("Th.","")) * 1000
                elif "-":
                    raw_value = 0

                mv_list.append(raw_value)

            if parsed.xpath(xpath)[i].get('name') == 'datum_mw':
                date_raw = stringify_children(parsed.xpath(xpath)[i]).split("<string>")[1].split("</string>")[0]
                if date_raw != None:
                    year_of_birth = int(date_raw[len(date_raw)-4:])
                    month_of_birth = month_to_number(date_raw.split(" ")[0])
                    day_of_birth = int(date_raw.split(" ")[1].split(",")[0])
                    date_of_value = datetime.date(year_of_birth, month_of_birth, day_of_birth)

                date_of_value_list.append(date_of_value)


        market_value_history = pd.DataFrame(
        {'club': club_list,
         'value': mv_list,
         'data_date': date_of_value_list,
         'age': age_list
        })

        market_value_history['player_id'] = player_id

        return(market_value_history)

    else:
        market_value_history = pd.DataFrame(
        {'club': [None],
         'value': [None],
         'data_date': [None],
         'age': [None]
        })

        market_value_history['player_id'] = player_id

        return(market_value_history)
Example #16
0
 def parse_product(self, response):
     doc, tag, text = Doc().tagtext()
     product_item = LVRItem()
     image_url_prefix = 'http://images.luisaviaroma.com/Big'
     # Grab the url from html metadata.
     # Even though we know the URL from the request, use the URL in the 
     # product page just in case it comes in different.
     product_item['url'] = \
         response.xpath(
             '/html/head/meta[@property="og:url"]/@content').extract_first()
     # Grab the first (main) image from html metadata.
     product_item['photos'] = [
         response.xpath(
                 '/html/head/meta[@property="og:image"]/@content'
             ).extract_first()]
     assert product_item['photos'][0].startswith(image_url_prefix)
     # Grab breadcrumb list from microdata kept in an ordered list.
     # These will be joined to make the category.
     breadcrumbs = \
         response.xpath(
             '//ol[@itemtype="http://schema.org/BreadcrumbList"]'
             '/li/a/span[@itemprop="name"]/text()').extract()
     # Grab the json from javascript.
     script = \
         response.xpath(
             '//script[contains(., "itemResponse")]/text()').extract_first()
     script_element = js2xml.parse(script)
     # Grab just the itemResponse assignment from the script.
     item_elements = \
         script_element.xpath(
             '//assign[left/identifier[@name="itemResponse"]]/right/*')
     item_dict = js2xml.jsonlike.make_dict(item_elements[0])
     assert item_dict['HasValidDefaultPrice']
     # Item title comes from the
     # (Designer->Description + ShortDescription) elements
     product_item['title'] = \
         u'{} - {}'.format(
             item_dict['Designer']['Description'],
             item_dict['ShortDescription'])
     desc_items = [u'ITEM CODE {}'.format(item_dict['ItemKey']['ItemCode'])]
     desc_items.extend(item_dict['LongtDescription'].strip('|').split('|'))
     if item_dict['Composition']:
         desc_items.append(
             u'Composition: {}'.format(item_dict['Composition']))
     with tag('ul'):
         for desc_item in desc_items:
             with tag('li'):
                 text(desc_item)
     product_item['description'] = doc.getvalue()
     product_item['currency_code'] = \
         item_dict['Pricing'][0]['Prices'][0]['CurrencyId']
     product_item['price'] = \
         item_dict['Pricing'][0]['Prices'][0]['FinalPrice']
     # Add as keywords the breadcrumb, sku, designer name, plus
     # each word in the product name, but total no more than 10
     # keywords.
     product_item['keywords'] = \
         breadcrumbs + [item_dict['Designer']['Description']] + \
         [item_dict['ItemKey']['ItemCode']] +\
         item_dict['ShortDescription'].split()[0:5]
     # Make a category from the breadcrumbs.
     # e.g., WOMEN-> SHOES-> SANDALS to a category:
     #  "WOMEN >> SHOES >> SANDALS"
     product_item['category'] = u' >> '.join(breadcrumbs)
     product_item['sku'] = item_dict['ItemKey']['ItemCode']
     for photo in item_dict['ItemPhotos']:
         photo_url = u''.join([image_url_prefix, photo['Path']])
         # Original photo duplicated in this list so ignore original.
         if photo_url not in product_item['photos']:
             product_item['photos'].append(photo_url)
     return product_item
Example #17
0
    chart_scripts = soep.body.find_all('script',
                                       type='text/javascript',
                                       text=re.compile("Chart"))

    Twitter_Stat = pd.DataFrame(columns=Twitter_Headers)
    Telegram_Stat = pd.DataFrame(columns=Telegram_Headers)
    Youtube_Stat = pd.DataFrame(columns=Youtube_Headers)
    Reddit_Stat = pd.DataFrame(columns=Reddit_Headers)
    Github_Stat = pd.DataFrame(columns=Github_Headers)
    Facebook_Stat = pd.DataFrame(columns=Facebook_Headers)
    BitcoinTalk_Stat = pd.DataFrame(columns=BitcoinTalk_Headers)
    Alexa_Stat = pd.DataFrame(columns=Alexa_Headers)

    for chart in chart_scripts:
        chart_data = chart.text
        parsed = js2xml.parse(chart_data)
        #        print(js2xml.pretty_print(parsed))
        chart_name = parsed.xpath("//var//arguments//string/text()")[
            0]  # 'ic-twitter-stat'
        #        print(chart_name)
        if chart_name == 'ic-twitter-stat':
            for d in parsed.xpath(
                    "//property[@name='data']//property[@name='labels']"):
                Twitter_Stat['Date'] = d.xpath(".//array/string/text()")
            for d in parsed.xpath(
                    "//property[@name='datasets']//array//object"):
                variable = d.xpath(
                    ".//property[@name='label']//string/text()")[0]
                Twitter_Stat[variable] = [
                    d.xpath(".//property[@name='data']//array/number/@value")
                ][0]
Example #18
0
def test_vars():
    jscode_snippets = [
        (
            r"""
            var arr1 = ["a","b","c"];
            var arr2 = ["d","e","f"];
            """,
            {'arr1': ['a', 'b', 'c'],
             'arr2': ['d', 'e', 'f']}
        ),
        (
            r"""
            var arr1 = ["a", null, "c"];
            var arr2 = [null, "e", null];
            """,
            {'arr1': ['a', None, 'c'],
             'arr2': [None, 'e', None]}
        ),
        (
            r"""
            var arr1 = ["a", undefined, "c"];
            var arr2 = [undefined, "e", null];
            """,
            {'arr1': ['a', 'undefined', 'c'],
             'arr2': ['undefined', 'e', None]}
        ),
        (
            r"""
            var i = -3.14;
            """,
            {'i': -3.14}
        ),
        (
            r"""
            money = {
                'quarters': 20
            };
            """,
            {'money': {"quarters": 20}}
        ),
        (
            r"""
            money = {
                quarters: 20
            };
            """,
            {'money': {"quarters": 20}}
        ),
        (
            r"""
            currency = 'USD';
            money = {
                "value": 20,
                "currency": currency
            };
            """,
            {'currency': 'USD',
             'money': {'currency': 'currency', 'value': 20}}
        ),
        (
            r"""
            t = {a: "3", "b": 3, "3": 3.0};
            """,
            {'t': {'3': 3.0, 'a': '3', 'b': 3}}
        ),
        (
            r"""
            money = {
                'quarters': 10,
                'addQuarters': function(amount) {
                    this.quarters += amount;
                }
            };
            money.addQuarters(10);
            """,
            {'money': {'quarters': 10, 'addQuarters': None}}
        ),
        (
            r"""
            var money = {
                'quarters': 10,
                'something': [1,2,3,4],
                'somethingelse': {'nested': [5,6,7,8]},
                'addQuarters': function(amount) {
                    this.quarters += amount;
                }
            };
            money.addQuarters(10);
            """,
            {'money': {'quarters': 10,
                       'addQuarters': None,
                       'something': [1,2,3,4],
                       'somethingelse': {'nested': [5,6,7,8]}}}
        ),
        (
            r"""
            var store = {
                'apples': 10,
                'carrots': [1,2,3,4],
                'chicken': {'eggs': [5,6,7,8]}
            };
            """,
            {'store':
                {'apples': 10,
                 'carrots': [1, 2, 3, 4],
                 'chicken': {'eggs': [5, 6, 7, 8]}}
            }
        ),
        (
            r"""
            var store1 = {
                'apples': 10,
                'carrots': [1,2,3,4],
                'chicken': {'eggs': [5,6,7,8]}
            };
            var store2 = {
                'tomatoes': 20,
                'potatoes': [9, false, 7, 6],
                'spinach': {'cans': [true, 2]}
            };
            """,
            {'store1': {
              'apples': 10,
              'carrots': [1, 2, 3, 4],
              'chicken': {'eggs': [5, 6, 7, 8]}},
             'store2':{
              'potatoes': [9, False, 7, 6],
              'spinach': {'cans': [True, 2]},
              'tomatoes': 20}
            }
        ),
    ]
    for snippet, expected in jscode_snippets:
        tree = parse(snippet)
        assert_dict_equal(get_vars(tree), expected, (snippet, expected))
Example #19
0
 for i in range(20000): 
     x+=1   
     avid+=1
     url="https://www.bilibili.com/video/av"+str(avid)
     try:
         html=requests.get(url,headers=headers)
     except: 
         print("html request error "+str(avid)+' '+str(datetime.datetime.now()))
     bs4Obj=BeautifulSoup(html.text,'lxml')       
     srcScript=bs4Obj.select("script")
     
     
     
     if len(srcScript)>4: 
         try:
             srcElement=js2xml.parse(srcScript[3].string, encoding='utf-8', debug=False)
             src_tree=js2xml.pretty_print(srcElement)
             h1=BeautifulSoup(src_tree,"lxml")
             select(h1,x)
             time.sleep(random.random()*st1)
             countErr=0
             lastTime=str(datetime.datetime.now())
             print("success: av"+str(avid))
         except:
             time.sleep(random.random()*st2)                 
             countErr+=1
             if countErr>100:
                 time.sleep(300)
                 countErr=0
             print('select error: '+str(avid)+"  CountErr: "+str(countErr)+"   now "+str(datetime.datetime.now())+'   last time '+lastTime)
         
Example #20
0
def test_syntax():
    jscode_snippets = [
        # strings
        r"""
        "test";
        """,
        r"""
        "test\
        multiline";
        """,
        # numbers
        "3.14;",
        "-12;",
        "3.45e2;",
        "0377;",
        "0xFF;"
        # arrays
        "[]",
        "[1,2]",
        "[1,,2]",
        "[1,,2,,3,]",
        "['a', 'b','c']",
        "[a, 'b', c]",
        # objects
        "o = {};",
        "o = {a: 1};",
        "o = {a: 1, b: 2};",
        "o = {'c': 1, 'd': 2};",
        'o = {"c": 1, "d": 2};',
        'o = {"c": 1, d: "e"};',
        "e = {foo: 5, bar: 6, baz: ['Baz', 'Content']};",
        "e = {1: a, 2: b};",
        # other primitive data types
        "null;",
        "undefined;",
        "true;",
        "false;",
        # variables
        r"""
        var i;
        """,
        r"""
        var i,j,k;
        """,
        r"""
        var i = 0;
        """,
        r"""
        var i = "test";
        """,
        r"""var z = 'foxes', r = 'birds';""",
        r"""
        var i, j, k = 0;
        """,
        r"""
        var i=1, j, k = 2;
        """,
        r"""
        var i = obj.prop;
        """,
        r"""var testObj = {};""",
        r"""var testObj = [];""",
        # assignements
        r"""
        i = b;
        """,
        r"""
        i.a = "b";
        """,
        r"""
        i["a"] = "b";
        """,
        r"""
        i[a] = "b";
        """,
        # control structures
        r"""
        if (condition) {
            result = expression;
        };""",
        r"""
        if (condition) {
            result = expression;
        } else {
            result = alternative;
        };""",
        r"""
        if (exprA == exprB) {
           result = expression;
        } else if (expr2) {
           result = alternative1;
        } else {
           result = alternative2;
        };""",
        "result = condition ? expression : alternative;",
        # switch
        r"""
        switch (expr) {
           case SOMEVALUE:
             //statements;
             break;
           case ANOTHERVALUE:
             //statements;
             break;
           default:
             //statements;
             break;
         }
        """
        # for loop
        r"""
        for (var i = 0; i < 5; i++) {
            a = i;
        }
        """,
        r"""
        for (var i = 0; i < 5; i++) {
            a = i
        }
        """,
        r"""
        for (var key in array) {
            continue;
        }
        """,
        r"""
        for (;;) {
            break;
        }
        """,
        r"""
        for (; i < len; i++) {
            text += cars[i] + "<br>";
        }
        """,
        r"""
        for (var i = 0, len = cars.length, text = ""; i < len; i++) {
            text += cars[i] + "<br>";
        }
        """,
        """
        for (; i < len; ) {
            text += cars[i] + "<br>";
            i++;
        }
        """,
        # while loop
        """
        while (a<b) {
           a+=1;
        }
        """,
        """
        do {
           a+=1;
         } while (a<b);
        """,
        # with
        """
        with (document) {
           var a = getElementById('a');
           var b = getElementById('b');
           var c = getElementById('c');
         };
        """,
        # label
        r"""
        loop1: for (var a = 0; a < 10; a++) {
           if (a == 4) {
               break loop1; // Stops after the 4th attempt
           }
           alert('a = ' + a);
           loop2: for (var b = 0; b < 10; ++b) {
              if (b == 3) {
                 continue loop2; // Number 3 is skipped
              }
              if (b == 6) {
                 continue loop1; // Continues the first loop, 'finished' is not shown
              }
              alert('b = ' + b);
           }
           alert('finished')
        }
        block1: {
            alert('hello'); // Displays 'hello'
            break block1;
            alert('world'); // Will never get here
        }
        """,
        # functions
        """
        function foo(p) {
            p = "bar";
        }
        """,
        """
        function hello() {
            alert('world');
        }
        """,
        """
        var x = function(y) {
           return y * y;
        };
        """,
        """
        var math = {
          'factorial': function factorial(n) {
            if (n <= 1)
              return 1;
            return n * factorial(n - 1);
          }
        };
        """,
        """
        var anon = function() {
            alert('I am anonymous');
        };
        """,
        """
        anon();
        """,
        """
        setTimeout(function() {
            alert('hello');
        }, 1000)
        """,
        """
        (function() {
            alert('foo');
        }());
        """,
        # get/set
        """
        var obj = {
          get latest () {
            return "latest";
          }
        }
        """,
        """
        delete obj.latest;
        """,
        """
        var o = {
          set current (str) {
            return this.log[this.log.length] = str;
          },
          log: []
        }
        """,
        # new
        """var mycar = new car("Eagle", "Talon TSi", 1993);""",
        # try / catch
        """
        try {
           throw "myException"; // generates an exception
        }
        catch (e) {
           // statements to handle any exceptions
           logMyErrors(e); // pass exception object to error handler
        }
        """,
        """
        try {
            addalert("bad call");
        }
        catch(e) {
            document.write ("Error Message: " + e.message);
            document.write ("<br />");
            document.write ("Error Code: ");
            document.write (e.number & 0xFFFF);
            document.write ("<br />");
            document.write ("Error Name: " + e.name);
        }
        """,
        """
        try {
            document.write("Outer try running...<br/>");

            try {
                document.write("Nested try running...<br/>");
                throw new Error(301, "an error");
            }
            catch (e) {
                document.write ("Nested catch caught " + e.message + "<br/>");
                throw e;
            }
            finally {
                document.write ("Nested finally is running...<br/>");
            }
        }
        catch (e) {
            document.write ("Outer catch caught " + e.message + "<br/>");
        }
        finally {
            document.write ("Outer finally running");
        }
        """,
    ]

    for snippet in jscode_snippets:
        assert_is_not_none(js2xml.parse(snippet))
Example #21
0
def test_json():
    jscode_snippets = [
        (
            r"""
            var arr1 = ["a","b","c"];
            var arr2 = ["d","e","f"];
            """,
            [['a', 'b', 'c'],
             ['d', 'e', 'f']]
        ),
        (
            r"""
            var arr1 = ["a", null, "c"];
            var arr2 = [null, "e", null];
            """,
            [['a', None, 'c'],
             [None, 'e', None]]
        ),
        (
            r"""
            var arr1 = ["a", undefined, "c"];
            var arr2 = [undefined, "e", null];
            """,
            [['a', 'undefined', 'c'],
             ['undefined', 'e', None]]
        ),
        (
            r"""
            var i = -3.14;
            """, []
            ),
        (
            r"""
            money = {
                'quarters': 20
            };
            """,
            [{"quarters": 20}]
        ),
        (
            r"""
            money = {
                quarters: 20
            };
            """,
            [{"quarters": 20}]
        ),
        (
            r"""
            currency = 'USD',
            money = {
                "value": 20,
                "currency": currency
            };
            """,
            [{'currency': 'currency', 'value': 20}]
        ),
        (
            r"""
            t = {a: "3", "b": 3, "3": 3.0};
            """,
            [{'3': 3.0, 'a': '3', 'b': 3}]
        ),
        (
            r"""
            money = {
                'quarters': 10,
                'addQuarters': function(amount) {
                    this.quarters += amount;
                }
            };
            money.addQuarters(10);
            """,
            []
        ),
        (
            r"""
            var money = {
                'quarters': 10,
                'something': [1,2,3,4],
                'somethingelse': {'nested': [5,6,7,8]},
                'addQuarters': function(amount) {
                    this.quarters += amount;
                }
            };
            money.addQuarters(10);
            """,
            [[1,2,3,4], {'nested': [5,6,7,8]}]
        ),
        (
            r"""
            var store = {
                'apples': 10,
                'carrots': [1,2,3,4],
                'chicken': {'eggs': [5,6,7,8]}
            };
            """,
            [{'apples': 10,
              'carrots': [1, 2, 3, 4],
              'chicken': {'eggs': [5, 6, 7, 8]}}]
        ),
        (
            r"""
            var store1 = {
                'apples': 10,
                'carrots': [1,2,3,4],
                'chicken': {'eggs': [5,6,7,8]}
            };
            var store2 = {
                'tomatoes': 20,
                'potatoes': [9, false, 7, 6],
                'spinach': {'cans': [true, 2]}
            };
            """,
            [{'apples': 10,
              'carrots': [1, 2, 3, 4],
              'chicken': {'eggs': [5, 6, 7, 8]}},
             {'potatoes': [9, False, 7, 6],
              'spinach': {'cans': [True, 2]},
              'tomatoes': 20}]
        ),
    ]
    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        assert_list_equal(getall(jsxml, types=[dict, list]), expected)
Example #22
0
 def parse(self, response):
     jstree = js2xml.parse(
         response.xpath('//script/text()').extract_first())
     for el in jstree.xpath('//functioncall/arguments/object'):
         yield js2xml.jsonlike.make_dict(el)
Example #23
0
def getst(url):
    #带上request headers
    z = requests.get(url, headers=headers)
    # 获取第一段JavaScript,并去掉 <!--拆包页-->,防止中文报错
    jscode = etree.HTML(
        z.content).xpath("//script[contains(., 'weibo')]/text()")[0].replace(
            u'<!--拆包页-->', '')
    #使用js2xml 把JavaScript代码替换成xml
    parsed_js = js2xml.parse(jscode)
    #打印下 xml
    # print js2xml.pretty_print(parsed_js)
    #打印的值如下
    """
    <program>
      <var name="$config">
        <object>
          <property name="weibo">
            <number value="0"/>
          </property>
          <property name="wechat">
            <number value="0"/>
          </property>
          <property name="alipay">
            <number value="0"/>
          </property>
          <property name="isLogin">
            <number value="1"/>
          </property>
          <property name="isPad">
            <number value="0"/>
          </property>
          <property name="isPass">
            <number value="0"/>
          </property>
          <property name="st">
            <string>dfd6e4</string>
          </property>
          <property name="ext">
            <string>pay=1&amp;unionPay=1</string>
          </property>
          <property name="loginUrl">
            <string></string>
          </property>
          <property name="cuid">
            <number value="3485500247"/>
          </property>
          <property name="detail">
            <string></string>
          </property>
        </object>
      </var>
      <if>
        <predicate>
          <dotaccessor>
            <object>
              <identifier name="$config"/>
            </object>
            <property>
              <identifier name="wechat"/>
            </property>
          </dotaccessor>
        </predicate>
        <then>
          <block>
            <var name="WB_mishu">
              <string>http://mp.weixin.qq.com/s?__biz=MjM5NDA2NDY4MA==&amp;mid=201898100&amp;idx=4&amp;sn=aceda5551311992d46fa039f54ed9477#rd</string>
            </var>
            <var name="show_WB_mishu">
              <number value="0"/>
            </var>
            <var name="show_WX_guide">
              <number value="0"/>
            </var>
          </block>
        </then>
      </if>
      <if>
        <predicate>
          <dotaccessor>
            <object>
              <identifier name="$config"/>
            </object>
            <property>
              <identifier name="weibo"/>
            </property>
          </dotaccessor>
        </predicate>
        <then>
          <block>
            <var name="$WB_version">
              <string></string>
            </var>
          </block>
        </then>
      </if>
      <var name="minVersion">
        <object>
          <property name="minClientVerNum">
            <string>600</string>
          </property>
          <property name="minClientV">
            <string>6.0.0</string>
          </property>
        </object>
      </var>
      <var name="scheme_protocol">
        <string>sinaweibo://</string>
      </var>
      <if>
        <predicate>
          <binaryoperation operation="==">
            <left>
              <dotaccessor>
                <object>
                  <identifier name="minVersion"/>
                </object>
                <property>
                  <identifier name="minClientVerNum"/>
                </property>
              </dotaccessor>
            </left>
            <right>
              <string>510</string>
            </right>
          </binaryoperation>
        </predicate>
        <then>
          <block>
            <assign operator="=">
              <left>
                <identifier name="scheme_protocol"/>
              </left>
              <right>
                <string>sinaweibo510://</string>
              </right>
            </assign>
          </block>
        </then>
      </if>
    </program>
    """
    #从上面可以看到st在哪,然后用xpath写出来
    st = parsed_js.xpath('//property[@name="st"]/string/text()')[0]
    return st
Example #24
0
    def parse(self, response) -> Match:
        parsed = js2xml.parse(response.text)
        parsed_dict = js2xml.make_dict(
            parsed.xpath('//functioncall/arguments/object')[0])
        bets = []
        bet_type = self.globals["betting_type_names"][str(
            response.meta["betting_type_id"])]['name']
        bet_scope = self.globals["scope_names"][str(
            response.meta["scope_id"])].replace('&nbsp;', ' ')
        for bet_l1 in parsed_dict.get('d', {}).get('oddsdata', []):
            # Bet Level 1: Back or Lay
            for bet_l2 in parsed_dict['d']['oddsdata'][bet_l1]:
                # Bet Level 2: Odds, volume, movement and bet information
                bet_info = parsed_dict['d']['oddsdata'][bet_l1][bet_l2]
                for bookmaker_id in bet_info['odds']:
                    is_active = bet_info['act'][bookmaker_id]
                    if is_active:
                        odds = bet_info['odds'][bookmaker_id]
                        if type(odds) == dict:
                            if bet_type == "1X2":
                                odds = [
                                    odds[odd_type]
                                    for odd_type in ('1', '2', 'X')
                                    if odd_type in odds
                                ]
                            else:
                                odds = list(odds.values())
                        bet_dict = {
                            "bookmaker":
                            self.bookmakers_data[bookmaker_id]['WebUrl'],
                            "bookmaker_nice":
                            self.bookmakers_data[bookmaker_id]['WebName'],
                            "feed":
                            self.name,
                            "date_extracted":
                            datetime.utcnow(),
                            "bet_type":
                            bet_type,
                            "bet_scope":
                            bet_scope,
                            "odds":
                            odds,
                            "url":
                            response.url,
                            "is_back":
                            bet_info['isBack'],
                            "handicap":
                            float(bet_info['handicapValue'])
                            if "handicapValue" in bet_info else None
                        }
                        bet = Bet(**bet_dict)
                        bets.append(bet)

        logging.info(
            f"Parsed {len(bets)} bets of type {bet_type} and scope {bet_scope}. Url: {response.url}. "
            f"Remaining bets of match to parse: {len(response.meta['bets_to_parse'])}"
        )
        response.meta["bets"].extend(bets)
        if response.meta["bets_to_parse"]:
            odds_url, betting_type_id, scope_id = response.meta[
                "bets_to_parse"].pop()
            response.meta['betting_type_id'] = betting_type_id
            response.meta["scope_id"] = scope_id
            yield scrapy.Request(url=urljoin(self.odds_main_url, odds_url),
                                 callback=self.parse,
                                 headers={
                                     'user-agent': self.user_agent,
                                     'referer': response.url
                                 },
                                 meta=response.meta)
        else:
            match = dict(response.meta['match'])
            match['bets'] = response.meta["bets"]
            logging.info(f"Finished parsing {len(response.meta['bets'])} bets "
                         f"from match with URL {response.meta['match_url']}.")
            yield Match(**match)
Example #25
0
    def parse_match(self, response):
        # Get page info
        javascript = response.xpath(
            "//script[contains(text(),'new PageEvent')]/text()").get()
        parsed = js2xml.parse(javascript)
        page_info = js2xml.make_dict(
            parsed.xpath('//var[@name="page"]/new/arguments/object')[0])
        page_info['xhash'] = urllib.parse.unquote(page_info['xhash'])
        page_info['xhashf'] = urllib.parse.unquote(page_info['xhashf'])
        response.meta['page_info'] = page_info

        match_dict = {
            'sport':
            response.meta['sport'],
            'tournament':
            response.meta['tournament'],
            'tournament_nice':
            self.tournament_urls[response.meta['tournament_url']],
            'teams': [page_info["home"], page_info["away"]],
            'country':
            response.meta["country"],
            'commence_time':
            int(
                re.search(
                    r't(\d*)-',
                    response.xpath('//p[contains(@class,"date datet")]').
                    attrib['class']).group(1)),
            'url':
            response.url
        }
        response.meta['match'] = match_dict

        # Get default betting type and scope ID
        sport_id = str(page_info['sportId'])
        betting_type_id = '3' if self.globals['cons']['moneyLineSports'].get(
            page_info['sportId']) else '1'
        scope_id = 2
        if (self.globals['cons']['sportBetTypeScopeId'].get(sport_id)
                and self.globals['cons']['sportBetTypeScopeId'][sport_id].get(
                    betting_type_id)):
            scope_id = self.globals['cons']['sportBetTypeScopeId'][
                sport_id].get(betting_type_id)
        elif self.globals['cons']['betTypeScopeId'].get(betting_type_id):
            scope_id = self.globals['cons']['betTypeScopeId'].get(
                betting_type_id)
        elif self.globals['cons']['sportScopeId'].get(sport_id):
            scope_id = self.globals['cons']['sportScopeId'].get(sport_id)

        odds_url = (
            f'/feed/match/{page_info["versionId"]}-{sport_id}-{page_info["id"]}'
            f'-{betting_type_id}-{scope_id}-{page_info["xhash"]}.dat')
        response.meta['first'] = True
        response.meta['betting_type_id'] = betting_type_id
        response.meta["scope_id"] = scope_id
        response.meta["match_url"] = response.url

        yield scrapy.Request(url=urljoin(self.odds_main_url, odds_url),
                             callback=self.parse_first,
                             headers={
                                 'user-agent': self.user_agent,
                                 'referer': response.url
                             },
                             meta=response.meta)
Example #26
0
def test_parse_string():
    jscode_snippets = [
        (
        r"""
        var h = 'test';
        var i = "test";
        var j = "";
        var k = '""';
        var l = '"';
        var m = '';
        var n = "''";
        var o = "'";
        """, ['test', 'test', '', '""', '"', '', "''", "'"]
        ),
        (
        r"""
        var i = 'test\'s output';
        """, [r"test's output"]
        ),
        (
        r"""
        var i = 'test\
 multiline';
        """, [r"test multiline"]
        ),
        (
        r"""
        var i = 'test\
 long \
 multiline';
        """, [r"test long  multiline"]
        ),

        (
        r"""
        var i = ["\"", '\''];
        var j = "test\'s output";
        var k = "test\\'s output";
        var l = "nested \"quotes\".";
        """, ['"', "'", r"test's output", r"test\'s output", r'nested "quotes".']
        ),
        (
        r"""
        var i = 'https://www.blogger.com/navbar.g?targetBlogID\0754325487278375417853\46blogName\75spirello\46publishMode\75PUBLISH_MODE_BLOGSPOT\46navbarType\75LIGHT\46layoutType\75LAYOUTS\46searchRoot\75http://spirelloskrimskramserier.blogspot.com/search\46blogLocale\75no\46v\0752\46homepageUrl\75http://spirelloskrimskramserier.blogspot.com/\46vt\0751357383140196484672';
        """, [r'https://www.blogger.com/navbar.g?targetBlogID=4325487278375417853&blogName=spirello&publishMode=PUBLISH_MODE_BLOGSPOT&navbarType=LIGHT&layoutType=LAYOUTS&searchRoot=http://spirelloskrimskramserier.blogspot.com/search&blogLocale=no&v=2&homepageUrl=http://spirelloskrimskramserier.blogspot.com/&vt=1357383140196484672']
        ),
        (
        r"""
        var i = "foo \
bar";
        var j = "foo \
                 bar";
        """, [r'foo bar', 'foo                  bar']
        ),
        (
        # testing Unicode literals
        b"""
        var x = "\\u00A9 Netscape Communications 1";
        """,
        [u'\u00a9 Netscape Communications 1']
        ),
        (
        # testing Unicode characters
        u"""
        var x = "\u00A9 Netscape Communications 2";
        """.encode("utf8"),
        [u'\u00a9 Netscape Communications 2']
        ),
        # a real example
        (
        r"""
        var needleParam = needleParam || {};
        needleParam.chatGroup = "test";
        needleParam.productId = "6341292";
        needleParam.productPrice = "EUR              138.53".replace("$","n_").replace(/,/g,"");
        //Begin Needle (fan-sourcing platform) snippet
        jQuery(document).ready(function(){

        var e = document.createElement("script"); e.type = "text/javascript";
        e.async = true;
        e.src = document.location.protocol +

        "//overstock.needle.com/needle_service.js?1"; document.body.appendChild(e);

        });
        // End Needle snippet
        """,
        ['test',
         '6341292',
         'EUR              138.53',
         '$',
         'n_',
         '',
         'script',
         'text/javascript',
         '//overstock.needle.com/needle_service.js?1']
        ),
        # test replacing some control characters
        (
        r"""
        var name = "\u13e9\u0352\u0362\u044f\u2778\u00b3\u1d43\u034e\u034e\u0442\u035b\u13b7\u0362\u033b\u1d51A\u0362\u13de\u0001\u0001\u277c00b";
        """,
        [u'\u13e9\u0352\u0362\u044f\u2778\xb3\u1d43\u034e\u034e\u0442\u035b\u13b7\u0362\u033b\u1d51A\u0362\u13de\ufffd\ufffd\u277c00b']
        ),
        # surrogate pairs
        (r'''var name = "\ud835\udebd"''', [u'\U0001d6bd']),
    ]

    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        result = jsxml.xpath("//string/text()")
        assert_list_equal(result, expected)
Example #27
0
def getst(url):
    #带上request headers
    z = requests.get(url,headers=headers)
    # 获取第一段JavaScript,并去掉 <!--拆包页-->,防止中文报错
    jscode = etree.HTML(z.content).xpath("//script[contains(., 'weibo')]/text()")[0].replace(u'<!--拆包页-->','')
    #使用js2xml 把JavaScript代码替换成xml
    parsed_js  = js2xml.parse(jscode)
    #打印下 xml
    # print js2xml.pretty_print(parsed_js)
    #打印的值如下
    """
    <program>
      <var name="$config">
        <object>
          <property name="weibo">
            <number value="0"/>
          </property>
          <property name="wechat">
            <number value="0"/>
          </property>
          <property name="alipay">
            <number value="0"/>
          </property>
          <property name="isLogin">
            <number value="1"/>
          </property>
          <property name="isPad">
            <number value="0"/>
          </property>
          <property name="isPass">
            <number value="0"/>
          </property>
          <property name="st">
            <string>dfd6e4</string>
          </property>
          <property name="ext">
            <string>pay=1&amp;unionPay=1</string>
          </property>
          <property name="loginUrl">
            <string></string>
          </property>
          <property name="cuid">
            <number value="3485500247"/>
          </property>
          <property name="detail">
            <string></string>
          </property>
        </object>
      </var>
      <if>
        <predicate>
          <dotaccessor>
            <object>
              <identifier name="$config"/>
            </object>
            <property>
              <identifier name="wechat"/>
            </property>
          </dotaccessor>
        </predicate>
        <then>
          <block>
            <var name="WB_mishu">
              <string>http://mp.weixin.qq.com/s?__biz=MjM5NDA2NDY4MA==&amp;mid=201898100&amp;idx=4&amp;sn=aceda5551311992d46fa039f54ed9477#rd</string>
            </var>
            <var name="show_WB_mishu">
              <number value="0"/>
            </var>
            <var name="show_WX_guide">
              <number value="0"/>
            </var>
          </block>
        </then>
      </if>
      <if>
        <predicate>
          <dotaccessor>
            <object>
              <identifier name="$config"/>
            </object>
            <property>
              <identifier name="weibo"/>
            </property>
          </dotaccessor>
        </predicate>
        <then>
          <block>
            <var name="$WB_version">
              <string></string>
            </var>
          </block>
        </then>
      </if>
      <var name="minVersion">
        <object>
          <property name="minClientVerNum">
            <string>600</string>
          </property>
          <property name="minClientV">
            <string>6.0.0</string>
          </property>
        </object>
      </var>
      <var name="scheme_protocol">
        <string>sinaweibo://</string>
      </var>
      <if>
        <predicate>
          <binaryoperation operation="==">
            <left>
              <dotaccessor>
                <object>
                  <identifier name="minVersion"/>
                </object>
                <property>
                  <identifier name="minClientVerNum"/>
                </property>
              </dotaccessor>
            </left>
            <right>
              <string>510</string>
            </right>
          </binaryoperation>
        </predicate>
        <then>
          <block>
            <assign operator="=">
              <left>
                <identifier name="scheme_protocol"/>
              </left>
              <right>
                <string>sinaweibo510://</string>
              </right>
            </assign>
          </block>
        </then>
      </if>
    </program>
    """
    #从上面可以看到st在哪,然后用xpath写出来
    st = parsed_js.xpath('//property[@name="st"]/string/text()')[0]
    return st
Example #28
0
def test_schema():
    jscode_snippets = [

        # strings
        (
        r"""
        "test";
        """,
        """
<program>
  <string>test</string>
</program>

        """
        ),
        (
        r"""
        "test\
        multiline";
        """,
        """
<program>
  <string>test        multiline</string>
</program>
        """
        ),

        # numbers
        (
        "3.14;",
        """
<program>
  <number value="3.14"/>
</program>
        """
        ),
        (
        "-12;",
        """
<program>
  <number value="-12"/>
</program>
        """
        ),
        (
        "3.45e2;",
        """
<program>
  <number value="3.45e2"/>
</program>
        """
        ),
        (
        "0377;",
        """
<program>
  <number value="0377"/>
</program>
        """
        ),
        (
        "0xFF;",
        """
<program>
  <number value="0xFF"/>
</program>
        """
        ),

        # arrays
        (
        "[]",
"""
<program>
  <array/>
</program>
"""
        ),
        (
        "[1,2]",
        """
<program>
  <array>
    <number value="1"/>
    <number value="2"/>
  </array>
</program>
        """
        ),
        (
        "[1,,2]",
        """
<program>
  <array>
    <number value="1"/>
    <undefined/>
    <number value="2"/>
  </array>
</program>
        """
        ),
        (
        "[1,,2,,,3,]",
        """
<program>
  <array>
    <number value="1"/>
    <undefined/>
    <number value="2"/>
    <undefined/>
    <undefined/>
    <number value="3"/>
  </array>
</program>
        """
        ),
        (
        "['a', 'b','c']",
        """
<program>
  <array>
    <string>a</string>
    <string>b</string>
    <string>c</string>
  </array>
</program>

        """
        ),
        (
        "[a, 'b', c]",
        """
<program>
  <array>
    <identifier name="a"/>
    <string>b</string>
    <identifier name="c"/>
  </array>
</program>

        """
        ),

        # objects
        (
        "o = {};",
"""
<program>
  <assign operator="=">
    <left>
      <identifier name="o"/>
    </left>
    <right>
      <object/>
    </right>
  </assign>
</program>
"""
        ),
        (
        "o = {a: 1};",
        """
<program>
  <assign operator="=">
    <left>
      <identifier name="o"/>
    </left>
    <right>
      <object>
        <property name="a">
          <number value="1"/>
        </property>
      </object>
    </right>
  </assign>
</program>

        """
        ),
        (
        "o = {a: 1, b: 2};",
        """
<program>
  <assign operator="=">
    <left>
      <identifier name="o"/>
    </left>
    <right>
      <object>
        <property name="a">
          <number value="1"/>
        </property>
        <property name="b">
          <number value="2"/>
        </property>
      </object>
    </right>
  </assign>
</program>
        """
        ),
        (
        "o = {'c': 1, 'd': 2};",
        """
<program>
  <assign operator="=">
    <left>
      <identifier name="o"/>
    </left>
    <right>
      <object>
        <property name="c">
          <number value="1"/>
        </property>
        <property name="d">
          <number value="2"/>
        </property>
      </object>
    </right>
  </assign>
</program>

        """
        ),
        (
        'o = {"c": 1, "d": 2};',
        """
<program>
  <assign operator="=">
    <left>
      <identifier name="o"/>
    </left>
    <right>
      <object>
        <property name="c">
          <number value="1"/>
        </property>
        <property name="d">
          <number value="2"/>
        </property>
      </object>
    </right>
  </assign>
</program>

        """
        ),
        (
        'o = {"c": 1, d: "e"};',
        """
<program>
  <assign operator="=">
    <left>
      <identifier name="o"/>
    </left>
    <right>
      <object>
        <property name="c">
          <number value="1"/>
        </property>
        <property name="d">
          <string>e</string>
        </property>
      </object>
    </right>
  </assign>
</program>

        """
        ),
        (
        "e = {foo: 5, bar: 6, baz: ['Baz', 'Content']};",
        """
<program>
  <assign operator="=">
    <left>
      <identifier name="e"/>
    </left>
    <right>
      <object>
        <property name="foo">
          <number value="5"/>
        </property>
        <property name="bar">
          <number value="6"/>
        </property>
        <property name="baz">
          <array>
            <string>Baz</string>
            <string>Content</string>
          </array>
        </property>
      </object>
    </right>
  </assign>
</program>

        """
        ),
        # other primitive data types
        (
        "null;",
        """
<program>
  <null/>
</program>

        """
        ),
        (
        "undefined;",
        """
<program>
  <undefined/>
</program>

        """
        ),
        (
        "true;",
        """
<program>
  <boolean>true</boolean>
</program>

        """
        ),
        (
        "false;",
        """
<program>
  <boolean>false</boolean>
</program>

        """
        ),

        # variables
        (
        r"""
        var i;
        """,
        """
<program>
  <var name="i"/>
</program>

        """
        ),
        (
        r"""
        var i,j,k;
        """,
        """
<program>
  <var name="i"/>
  <var name="j"/>
  <var name="k"/>
</program>

        """
        ),
        (
        r"""
        var i = 0;
        """,
        """
<program>
  <var name="i">
    <number value="0"/>
  </var>
</program>

        """
        ),
        (
        r"""
        var i = "test";
        """,
        """
<program>
  <var name="i">
    <string>test</string>
  </var>
</program>

        """
        ),
        (
        r"""var z = 'foxes', r = 'birds';""",
        """
<program>
  <var name="z">
    <string>foxes</string>
  </var>
  <var name="r">
    <string>birds</string>
  </var>
</program>

        """
        ),
        (
        r"""
        var i, j, k = 0;
        """,
        """
<program>
  <var name="i"/>
  <var name="j"/>
  <var name="k">
    <number value="0"/>
  </var>
</program>

        """
        ),
        (
        r"""
        var i=1, j, k = 2;
        """,
        """
<program>
  <var name="i">
    <number value="1"/>
  </var>
  <var name="j"/>
  <var name="k">
    <number value="2"/>
  </var>
</program>
        """
        ),
        (
        r"""
        var i = obj.prop;
        """,
"""
<program>
  <var name="i">
    <dotaccessor>
      <object>
        <identifier name="obj"/>
      </object>
      <property>
        <identifier name="prop"/>
      </property>
    </dotaccessor>
  </var>
</program>
"""
        ),
        (
        r"""var testObj = {};""",
"""
<program>
  <var name="testObj">
    <object/>
  </var>
</program>
"""
        ),
        (
        r"""var testObj = [];""",
"""
<program>
  <var name="testObj">
    <array/>
  </var>
</program>
"""
        ),

        # operations
        (
        r"""
        1 + 2;
        "foo" + false;
        3 - 5
        """,
"""
<program>
  <binaryoperation operation="+">
    <left>
      <number value="1"/>
    </left>
    <right>
      <number value="2"/>
    </right>
  </binaryoperation>
  <binaryoperation operation="+">
    <left>
      <string>foo</string>
    </left>
    <right>
      <boolean>false</boolean>
    </right>
  </binaryoperation>
  <binaryoperation operation="-">
    <left>
      <number value="3"/>
    </left>
    <right>
      <number value="5"/>
    </right>
  </binaryoperation>
</program>
"""
        ),
        (
        r"""
        1.0 / 2.0;
        -2 * 2;
        12 % 5;
        """,
"""
<program>
  <binaryoperation operation="/">
    <left>
      <number value="1.0"/>
    </left>
    <right>
      <number value="2.0"/>
    </right>
  </binaryoperation>
  <binaryoperation operation="*">
    <left>
      <number value="-2"/>
    </left>
    <right>
      <number value="2"/>
    </right>
  </binaryoperation>
  <binaryoperation operation="%">
    <left>
      <number value="12"/>
    </left>
    <right>
      <number value="5"/>
    </right>
  </binaryoperation>
</program>
"""
        ),

        (
        r"""
        // Postfix
        var x = 3;
        y = x++; // y = 3, x = 4

        // Prefix
        var a = 2;
        b = ++a; // a = 3, b = 3
        """,
"""
<program>
  <var name="x">
    <number value="3"/>
  </var>
  <assign operator="=">
    <left>
      <identifier name="y"/>
    </left>
    <right>
      <postfix operation="++">
        <identifier name="x"/>
      </postfix>
    </right>
  </assign>
  <var name="a">
    <number value="2"/>
  </var>
  <assign operator="=">
    <left>
      <identifier name="b"/>
    </left>
    <right>
      <unaryoperation operation="++">
        <identifier name="a"/>
      </unaryoperation>
    </right>
  </assign>
</program>

"""     ),

        (
        r"""
        // Postfix
        var x = 3;
        y = x--; // y = 3, x = 2

        // Prefix
        var a = 2;
        b = --a; // a = 1, b = 1
        """,
"""
<program>
  <var name="x">
    <number value="3"/>
  </var>
  <assign operator="=">
    <left>
      <identifier name="y"/>
    </left>
    <right>
      <postfix operation="--">
        <identifier name="x"/>
      </postfix>
    </right>
  </assign>
  <var name="a">
    <number value="2"/>
  </var>
  <assign operator="=">
    <left>
      <identifier name="b"/>
    </left>
    <right>
      <unaryoperation operation="--">
        <identifier name="a"/>
      </unaryoperation>
    </right>
  </assign>
</program>
"""     ),

        (
        r"""
        var x = 3;
        y = -x; // y = -3, x = 3
        """,
"""
<program>
  <var name="x">
    <number value="3"/>
  </var>
  <assign operator="=">
    <left>
      <identifier name="y"/>
    </left>
    <right>
      <unaryoperation operation="-">
        <identifier name="x"/>
      </unaryoperation>
    </right>
  </assign>
</program>

"""
        ),

        (
        r"""
        +3;     // 3
        +"3";   // 3
        +true;  // 1
        +false; // 0
        +null;  // 0
        """,
"""
<program>
  <number value="+3"/>
  <unaryoperation operation="+">
    <string>3</string>
  </unaryoperation>
  <unaryoperation operation="+">
    <boolean>true</boolean>
  </unaryoperation>
  <unaryoperation operation="+">
    <boolean>false</boolean>
  </unaryoperation>
  <unaryoperation operation="+">
    <null/>
  </unaryoperation>
</program>
"""
        ),

        # assignements
        (
        r"""
        i = b;
        """,
"""
<program>
  <assign operator="=">
    <left>
      <identifier name="i"/>
    </left>
    <right>
      <identifier name="b"/>
    </right>
  </assign>
</program>
"""
        ),
        (
        r"""
        i.a = "b";
        """,
"""
<program>
  <assign operator="=">
    <left>
      <dotaccessor>
        <object>
          <identifier name="i"/>
        </object>
        <property>
          <identifier name="a"/>
        </property>
      </dotaccessor>
    </left>
    <right>
      <string>b</string>
    </right>
  </assign>
</program>
"""
        ),
        (
        r"""
        i["a"] = "b";
        """,
"""
<program>
  <assign operator="=">
    <left>
      <bracketaccessor>
        <object>
          <identifier name="i"/>
        </object>
        <property>
          <string>a</string>
        </property>
      </bracketaccessor>
    </left>
    <right>
      <string>b</string>
    </right>
  </assign>
</program>
"""
        ),
        (
        r"""
        i[a] = "b";
        """,
"""
<program>
  <assign operator="=">
    <left>
      <bracketaccessor>
        <object>
          <identifier name="i"/>
        </object>
        <property>
          <identifier name="a"/>
        </property>
      </bracketaccessor>
    </left>
    <right>
      <string>b</string>
    </right>
  </assign>
</program>
"""
        ),

        # control structures
        (
        r"""
        if (condition) {
            result = expression;
        }""",
"""
<program>
  <if>
    <predicate>
      <identifier name="condition"/>
    </predicate>
    <then>
      <block>
        <assign operator="=">
          <left>
            <identifier name="result"/>
          </left>
          <right>
            <identifier name="expression"/>
          </right>
        </assign>
      </block>
    </then>
  </if>
</program>
"""
        ),
        (
        r"""
        if (condition) {
            result = expression;
        } else {
            result = alternative;
        }""",
"""
<program>
  <if>
    <predicate>
      <identifier name="condition"/>
    </predicate>
    <then>
      <block>
        <assign operator="=">
          <left>
            <identifier name="result"/>
          </left>
          <right>
            <identifier name="expression"/>
          </right>
        </assign>
      </block>
    </then>
    <else>
      <block>
        <assign operator="=">
          <left>
            <identifier name="result"/>
          </left>
          <right>
            <identifier name="alternative"/>
          </right>
        </assign>
      </block>
    </else>
  </if>
</program>
"""
        ),

        (
        r"""
        if (exprA == exprB) {
           result = expression;
        } else if (expr2) {
           result = alternative1;
        } else {
           result = alternative2;
        }""",
"""
<program>
  <if>
    <predicate>
      <binaryoperation operation="==">
        <left>
          <identifier name="exprA"/>
        </left>
        <right>
          <identifier name="exprB"/>
        </right>
      </binaryoperation>
    </predicate>
    <then>
      <block>
        <assign operator="=">
          <left>
            <identifier name="result"/>
          </left>
          <right>
            <identifier name="expression"/>
          </right>
        </assign>
      </block>
    </then>
    <else>
      <if>
        <predicate>
          <identifier name="expr2"/>
        </predicate>
        <then>
          <block>
            <assign operator="=">
              <left>
                <identifier name="result"/>
              </left>
              <right>
                <identifier name="alternative1"/>
              </right>
            </assign>
          </block>
        </then>
        <else>
          <block>
            <assign operator="=">
              <left>
                <identifier name="result"/>
              </left>
              <right>
                <identifier name="alternative2"/>
              </right>
            </assign>
          </block>
        </else>
      </if>
    </else>
  </if>
</program>
"""
        ),

        (
        "result = condition ? expression : alternative;",
"""
<program>
  <assign operator="=">
    <left>
      <identifier name="result"/>
    </left>
    <right>
      <conditional>
        <condition>
          <identifier name="condition"/>
        </condition>
        <value1>
          <identifier name="expression"/>
        </value1>
        <value2>
          <identifier name="alternative"/>
        </value2>
      </conditional>
    </right>
  </assign>
</program>
"""
        ),

        # switch
        (
        r"""
        switch (expr) {
           case SOMEVALUE:
             //statements;
             break;
           case ANOTHERVALUE:
             //statements;
             break;
           default:
             //statements;
             break;
         }
        """,
"""
<program>
  <switch>
    <expression>
      <identifier name="expr"/>
    </expression>
    <case>
      <expression>
        <identifier name="SOMEVALUE"/>
      </expression>
      <break/>
    </case>
    <case>
      <expression>
        <identifier name="ANOTHERVALUE"/>
      </expression>
      <break/>
    </case>
    <default>
      <break/>
    </default>
  </switch>
</program>
"""
        ),

        # for loop
        (
        r"""
        for (var i = 0; i < 5; i++) {
            a = i;
        }
        """,
"""
<program>
  <for>
    <init>
      <var name="i">
        <number value="0"/>
      </var>
    </init>
    <condition>
      <binaryoperation operation="&lt;">
        <left>
          <identifier name="i"/>
        </left>
        <right>
          <number value="5"/>
        </right>
      </binaryoperation>
    </condition>
    <post>
      <postfix operation="++">
        <identifier name="i"/>
      </postfix>
    </post>
    <statement>
      <block>
        <assign operator="=">
          <left>
            <identifier name="a"/>
          </left>
          <right>
            <identifier name="i"/>
          </right>
        </assign>
      </block>
    </statement>
  </for>
</program>
"""
        ),
        (
        r"""
        for (var i = 0; i < 5; i++) {
            a = i
        }
        """,
"""
<program>
  <for>
    <init>
      <var name="i">
        <number value="0"/>
      </var>
    </init>
    <condition>
      <binaryoperation operation="&lt;">
        <left>
          <identifier name="i"/>
        </left>
        <right>
          <number value="5"/>
        </right>
      </binaryoperation>
    </condition>
    <post>
      <postfix operation="++">
        <identifier name="i"/>
      </postfix>
    </post>
    <statement>
      <block>
        <assign operator="=">
          <left>
            <identifier name="a"/>
          </left>
          <right>
            <identifier name="i"/>
          </right>
        </assign>
      </block>
    </statement>
  </for>
</program>
"""
        ),
        (
        r"""
        for (var key in array) {
            continue;
        }
        """,
"""
<program>
  <forin>
    <variable>
      <var name="key"/>
    </variable>
    <object>
      <identifier name="array"/>
    </object>
    <statement>
      <block>
        <continue/>
      </block>
    </statement>
  </forin>
</program>
"""
        ),
        (
        r"""
        for (;;) {
            break;
        }
        """,
"""
<program>
  <for>
    <statement>
      <block>
        <break/>
      </block>
    </statement>
  </for>
</program>
"""
        ),
        (
        r"""
        for (; i < len; i++) {
            j = i;
        }
        """,
"""
<program>
  <for>
    <condition>
      <binaryoperation operation="&lt;">
        <left>
          <identifier name="i"/>
        </left>
        <right>
          <identifier name="len"/>
        </right>
      </binaryoperation>
    </condition>
    <post>
      <postfix operation="++">
        <identifier name="i"/>
      </postfix>
    </post>
    <statement>
      <block>
        <assign operator="=">
          <left>
            <identifier name="j"/>
          </left>
          <right>
            <identifier name="i"/>
          </right>
        </assign>
      </block>
    </statement>
  </for>
</program>
"""
        ),
        (
        r"""
        for (var i = 0, len = cars.length, text = ""; i < len; i++) {
            text += cars[i] + "<br>";
        }
        """,
"""
<program>
  <for>
    <init>
      <var name="i">
        <number value="0"/>
      </var>
      <var name="len">
        <dotaccessor>
          <object>
            <identifier name="cars"/>
          </object>
          <property>
            <identifier name="length"/>
          </property>
        </dotaccessor>
      </var>
      <var name="text">
        <string></string>
      </var>
    </init>
    <condition>
      <binaryoperation operation="&lt;">
        <left>
          <identifier name="i"/>
        </left>
        <right>
          <identifier name="len"/>
        </right>
      </binaryoperation>
    </condition>
    <post>
      <postfix operation="++">
        <identifier name="i"/>
      </postfix>
    </post>
    <statement>
      <block>
        <assign operator="+=">
          <left>
            <identifier name="text"/>
          </left>
          <right>
            <binaryoperation operation="+">
              <left>
                <bracketaccessor>
                  <object>
                    <identifier name="cars"/>
                  </object>
                  <property>
                    <identifier name="i"/>
                  </property>
                </bracketaccessor>
              </left>
              <right>
                <string>&lt;br&gt;</string>
              </right>
            </binaryoperation>
          </right>
        </assign>
      </block>
    </statement>
  </for>
</program>
"""
        ),
        (
        """
        for (; i < len; ) {
            text += cars[i] + "<br>";
            i++;
        }
        """,
"""
<program>
  <for>
    <condition>
      <binaryoperation operation="&lt;">
        <left>
          <identifier name="i"/>
        </left>
        <right>
          <identifier name="len"/>
        </right>
      </binaryoperation>
    </condition>
    <statement>
      <block>
        <assign operator="+=">
          <left>
            <identifier name="text"/>
          </left>
          <right>
            <binaryoperation operation="+">
              <left>
                <bracketaccessor>
                  <object>
                    <identifier name="cars"/>
                  </object>
                  <property>
                    <identifier name="i"/>
                  </property>
                </bracketaccessor>
              </left>
              <right>
                <string>&lt;br&gt;</string>
              </right>
            </binaryoperation>
          </right>
        </assign>
        <postfix operation="++">
          <identifier name="i"/>
        </postfix>
      </block>
    </statement>
  </for>
</program>
"""
        ),

        # while loop
        (
        """
        while (a<b) {
           a+=1;
        }
        """,
"""
<program>
  <while>
    <predicate>
      <binaryoperation operation="&lt;">
        <left>
          <identifier name="a"/>
        </left>
        <right>
          <identifier name="b"/>
        </right>
      </binaryoperation>
    </predicate>
    <statement>
      <block>
        <assign operator="+=">
          <left>
            <identifier name="a"/>
          </left>
          <right>
            <number value="1"/>
          </right>
        </assign>
      </block>
    </statement>
  </while>
</program>
"""
        ),
        (
        """
        do {
           a+=1;
         } while (a<b);
        """,
"""
<program>
  <statement>
    <block>
      <assign operator="+=">
        <left>
          <identifier name="a"/>
        </left>
        <right>
          <number value="1"/>
        </right>
      </assign>
    </block>
  </statement>
  <while>
    <binaryoperation operation="&lt;">
      <left>
        <identifier name="a"/>
      </left>
      <right>
        <identifier name="b"/>
      </right>
    </binaryoperation>
  </while>
</program>
"""
        ),

        # with
        (
        """
        with (document) {
           var a = getElementById('a');
           var b = getElementById('b');
           var c = getElementById('c');
           var c = document.get('c');
         };
        """,
"""
<program>
  <with>
    <identifier name="document"/>
    <statement>
      <block>
        <var name="a">
          <functioncall>
            <function>
              <identifier name="getElementById"/>
            </function>
            <arguments>
              <string>a</string>
            </arguments>
          </functioncall>
        </var>
        <var name="b">
          <functioncall>
            <function>
              <identifier name="getElementById"/>
            </function>
            <arguments>
              <string>b</string>
            </arguments>
          </functioncall>
        </var>
        <var name="c">
          <functioncall>
            <function>
              <identifier name="getElementById"/>
            </function>
            <arguments>
              <string>c</string>
            </arguments>
          </functioncall>
        </var>
        <var name="c">
          <functioncall>
            <function>
              <dotaccessor>
                <object>
                  <identifier name="document"/>
                </object>
                <property>
                  <identifier name="get"/>
                </property>
              </dotaccessor>
            </function>
            <arguments>
              <string>c</string>
            </arguments>
          </functioncall>
        </var>
      </block>
    </statement>
  </with>
  <empty>;</empty>
</program>
"""
        ),

        # label
        (
        r"""
        loop1: for (var a = 0; a < 10; a++) {
           if (a == 4) {
               break loop1; // Stops after the 4th attempt
           }
           alert('a = ' + a);
           loop2: for (var b = 0; b < 10; ++b) {
              if (b == 3) {
                 continue loop2; // Number 3 is skipped
              }
              if (b == 6) {
                 continue loop1; // Continues the first loop, 'finished' is not shown
              }
              alert('b = ' + b);
           }
           alert('finished')
        }
        block1: {
            alert('hello'); // Displays 'hello'
            break block1;
            alert('world'); // Will never get here
        }
        """,
"""
<program>
  <label name="loop1">
    <statement>
      <for>
        <init>
          <var name="a">
            <number value="0"/>
          </var>
        </init>
        <condition>
          <binaryoperation operation="&lt;">
            <left>
              <identifier name="a"/>
            </left>
            <right>
              <number value="10"/>
            </right>
          </binaryoperation>
        </condition>
        <post>
          <postfix operation="++">
            <identifier name="a"/>
          </postfix>
        </post>
        <statement>
          <block>
            <if>
              <predicate>
                <binaryoperation operation="==">
                  <left>
                    <identifier name="a"/>
                  </left>
                  <right>
                    <number value="4"/>
                  </right>
                </binaryoperation>
              </predicate>
              <then>
                <block>
                  <break>
                    <identifier name="loop1"/>
                  </break>
                </block>
              </then>
            </if>
            <functioncall>
              <function>
                <identifier name="alert"/>
              </function>
              <arguments>
                <binaryoperation operation="+">
                  <left>
                    <string>a = </string>
                  </left>
                  <right>
                    <identifier name="a"/>
                  </right>
                </binaryoperation>
              </arguments>
            </functioncall>
            <label name="loop2">
              <statement>
                <for>
                  <init>
                    <var name="b">
                      <number value="0"/>
                    </var>
                  </init>
                  <condition>
                    <binaryoperation operation="&lt;">
                      <left>
                        <identifier name="b"/>
                      </left>
                      <right>
                        <number value="10"/>
                      </right>
                    </binaryoperation>
                  </condition>
                  <post>
                    <unaryoperation operation="++">
                      <identifier name="b"/>
                    </unaryoperation>
                  </post>
                  <statement>
                    <block>
                      <if>
                        <predicate>
                          <binaryoperation operation="==">
                            <left>
                              <identifier name="b"/>
                            </left>
                            <right>
                              <number value="3"/>
                            </right>
                          </binaryoperation>
                        </predicate>
                        <then>
                          <block>
                            <continue>
                              <identifier name="loop2"/>
                            </continue>
                          </block>
                        </then>
                      </if>
                      <if>
                        <predicate>
                          <binaryoperation operation="==">
                            <left>
                              <identifier name="b"/>
                            </left>
                            <right>
                              <number value="6"/>
                            </right>
                          </binaryoperation>
                        </predicate>
                        <then>
                          <block>
                            <continue>
                              <identifier name="loop1"/>
                            </continue>
                          </block>
                        </then>
                      </if>
                      <functioncall>
                        <function>
                          <identifier name="alert"/>
                        </function>
                        <arguments>
                          <binaryoperation operation="+">
                            <left>
                              <string>b = </string>
                            </left>
                            <right>
                              <identifier name="b"/>
                            </right>
                          </binaryoperation>
                        </arguments>
                      </functioncall>
                    </block>
                  </statement>
                </for>
              </statement>
            </label>
            <functioncall>
              <function>
                <identifier name="alert"/>
              </function>
              <arguments>
                <string>finished</string>
              </arguments>
            </functioncall>
          </block>
        </statement>
      </for>
    </statement>
  </label>
  <label name="block1">
    <statement>
      <block>
        <functioncall>
          <function>
            <identifier name="alert"/>
          </function>
          <arguments>
            <string>hello</string>
          </arguments>
        </functioncall>
        <break>
          <identifier name="block1"/>
        </break>
        <functioncall>
          <function>
            <identifier name="alert"/>
          </function>
          <arguments>
            <string>world</string>
          </arguments>
        </functioncall>
      </block>
    </statement>
  </label>
</program>
"""
        ),

        # functions
        (
        """
        function foo(p) {
            p = "bar";
        }
        """,
"""
<program>
  <funcdecl name="foo">
    <parameters>
      <identifier name="p"/>
    </parameters>
    <body>
      <assign operator="=">
        <left>
          <identifier name="p"/>
        </left>
        <right>
          <string>bar</string>
        </right>
      </assign>
    </body>
  </funcdecl>
</program>
"""
        ),
        (
        """
        function hello() {
            alert('world');
        }
        """,
"""
<program>
  <funcdecl name="hello">
    <parameters/>
    <body>
      <functioncall>
        <function>
          <identifier name="alert"/>
        </function>
        <arguments>
          <string>world</string>
        </arguments>
      </functioncall>
    </body>
  </funcdecl>
</program>
"""
        ),
        (
        """
        var anon = function() {
            alert('I am anonymous');
        };
        """,
"""
<program>
  <var name="anon">
    <funcexpr>
      <identifier/>
      <parameters/>
      <body>
        <functioncall>
          <function>
            <identifier name="alert"/>
          </function>
          <arguments>
            <string>I am anonymous</string>
          </arguments>
        </functioncall>
      </body>
    </funcexpr>
  </var>
</program>
"""
        ),
        (
        """
        anon();
        """,
"""
<program>
  <functioncall>
    <function>
      <identifier name="anon"/>
    </function>
    <arguments/>
  </functioncall>
</program>
"""
        ),
        (
        """
        setTimeout(function() {
            alert('hello');
        }, 1000)
        """,
"""
<program>
  <functioncall>
    <function>
      <identifier name="setTimeout"/>
    </function>
    <arguments>
      <funcexpr>
        <identifier/>
        <parameters/>
        <body>
          <functioncall>
            <function>
              <identifier name="alert"/>
            </function>
            <arguments>
              <string>hello</string>
            </arguments>
          </functioncall>
        </body>
      </funcexpr>
      <number value="1000"/>
    </arguments>
  </functioncall>
</program>
"""
        ),
        (
        """
        (function() {
            alert('foo');
        }());
        """,
"""
<program>
  <functioncall>
    <function>
      <funcexpr>
        <identifier/>
        <parameters/>
        <body>
          <functioncall>
            <function>
              <identifier name="alert"/>
            </function>
            <arguments>
              <string>foo</string>
            </arguments>
          </functioncall>
        </body>
      </funcexpr>
    </function>
    <arguments/>
  </functioncall>
</program>
"""
        ),

        # get/set
        (
        """
        var obj = {
          get latest () {
            return "latest";
          }
        }
        """,
"""
<program>
  <var name="obj">
    <object>
      <get>
        <property>
          <identifier name="latest"/>
        </property>
        <body>
          <return>
            <string>latest</string>
          </return>
        </body>
      </get>
    </object>
  </var>
</program>
"""
        ),
        (
        """
        delete obj.latest;
        """,
"""
<program>
  <unaryoperation operation="delete">
    <dotaccessor>
      <object>
        <identifier name="obj"/>
      </object>
      <property>
        <identifier name="latest"/>
      </property>
    </dotaccessor>
  </unaryoperation>
</program>
"""
        ),
        (
        """
        var o = {
          set current (str) {
            return this.log[this.log.length] = str;
          },
          log: []
        }
        """,
"""
<program>
  <var name="o">
    <object>
      <set>
        <body>
          <return>
            <assign operator="=">
              <left>
                <bracketaccessor>
                  <object>
                    <dotaccessor>
                      <object>
                        <identifier>this</identifier>
                      </object>
                      <property>
                        <identifier name="log"/>
                      </property>
                    </dotaccessor>
                  </object>
                  <property>
                    <dotaccessor>
                      <object>
                        <dotaccessor>
                          <object>
                            <identifier>this</identifier>
                          </object>
                          <property>
                            <identifier name="log"/>
                          </property>
                        </dotaccessor>
                      </object>
                      <property>
                        <identifier name="length"/>
                      </property>
                    </dotaccessor>
                  </property>
                </bracketaccessor>
              </left>
              <right>
                <identifier name="str"/>
              </right>
            </assign>
          </return>
        </body>
      </set>
      <property name="log">
        <array/>
      </property>
    </object>
  </var>
</program>
"""
        ),

    ]

    for snippet, expected in jscode_snippets:
        print "---------------------------------------------------------"
        print snippet
        js = js2xml.parse(snippet)
        output = js2xml.pretty_print(js).strip()
        assert_equal(output, expected.strip(), "got\n%s\nexpected:\n%s" % (output, expected))

for i in range(1,460):
    url = 'https://weibo.com/cctvxinwen?is_search=0&visible=0&is_all=1&is_tag=0&profile_ftype=1&page={}#feedtop'.format(i)
    r = requests.get(url, headers=headers)
    # WB_text W_f14
    r.encoding = 'utf-8'

    response = etree.HTML(r.text)
    # print(r.text)
    script_list = response.xpath("//script/text()")
    filter_script = [script for script in script_list if
                     script.find('FM.view({"ns":"pl.content.homeFeed.index","domid":"Pl_Official_MyProfileFeed') != -1]
    # print(filter_script)
    try:
        script_text = js2xml.parse(filter_script[0], encoding='utf-8', debug=False)
        # print(script_list[-1])
        script_tree = js2xml.pretty_print(script_text)
        # print(script_tree)
        selector = etree.HTML(script_tree)
        div_selector = selector.xpath("//program//property[@name='html']/string/text()")[0]
        div_tree_se = etree.HTML(div_selector)
        text_selectors = div_tree_se.xpath("//div[@class='WB_text W_f14']")
        text_selectors_full = div_tree_se.xpath("//div[@node-type='feed_list_content_full']")
        print(text_selectors_full)
        for text_se in text_selectors:
            text = ''.join(text_se.xpath('./text()')).replace('\n', '').replace(' ', '')
            if (text.find('【') != -1 and text.find('】') != -1):
                text = text.split("】")[1]
            if (text[0] == ',' or text[0] == "?"):
                text = text[1:]
Example #30
0
def test_json():
    jscode_snippets = [
        (
            r"""
            var arr1 = ["a","b","c"];
            var arr2 = ["d","e","f"];
            """,
            [["a", "b", "c"], ["d", "e", "f"]],
        ),
        (
            r"""
            var arr1 = ["a", null, "c"];
            var arr2 = [null, "e", null];
            """,
            [["a", None, "c"], [None, "e", None]],
        ),
        (
            r"""
            var arr1 = ["a", undefined, "c"];
            var arr2 = [undefined, "e", null];
            """,
            [["a", "undefined", "c"], ["undefined", "e", None]],
        ),
        (
            r"""
            var i = -3.14;
            """,
            [],
        ),
        (
            r"""
            money = {
                'quarters': 20
            };
            """,
            [{"quarters": 20}],
        ),
        (
            r"""
            money = {
                quarters: 20
            };
            """,
            [{"quarters": 20}],
        ),
        (
            r"""
            currency = 'USD',
            money = {
                "value": 20,
                "currency": currency
            };
            """,
            [{"currency": "currency", "value": 20}],
        ),
        (
            r"""
            t = {a: "3", "b": 3, "3": 3.0};
            """,
            [{"3": 3.0, "a": "3", "b": 3}],
        ),
        (
            r"""
            money = {
                'quarters': 10,
                'addQuarters': function(amount) {
                    this.quarters += amount;
                }
            };
            money.addQuarters(10);
            """,
            [],
        ),
        (
            r"""
            var money = {
                'quarters': 10,
                'something': [1,2,3,4],
                'somethingelse': {'nested': [5,6,7,8]},
                'addQuarters': function(amount) {
                    this.quarters += amount;
                }
            };
            money.addQuarters(10);
            """,
            [[1, 2, 3, 4], {"nested": [5, 6, 7, 8]}],
        ),
        (
            r"""
            var store = {
                'apples': 10,
                'carrots': [1,2,3,4],
                'chicken': {'eggs': [5,6,7,8]}
            };
            """,
            [{"apples": 10, "carrots": [1, 2, 3, 4], "chicken": {"eggs": [5, 6, 7, 8]}}],
        ),
        (
            r"""
            var store1 = {
                'apples': 10,
                'carrots': [1,2,3,4],
                'chicken': {'eggs': [5,6,7,8]}
            };
            var store2 = {
                'tomatoes': 20,
                'potatoes': [9, false, 7, 6],
                'spinach': {'cans': [true, 2]}
            };
            """,
            [
                {"apples": 10, "carrots": [1, 2, 3, 4], "chicken": {"eggs": [5, 6, 7, 8]}},
                {"potatoes": [9, False, 7, 6], "spinach": {"cans": [True, 2]}, "tomatoes": 20},
            ],
        ),
    ]
    for snippet, expected in jscode_snippets:
        jsxml = js2xml.parse(snippet)
        assert_list_equal(js2xml.jsonlike.getall(jsxml), expected)
Example #31
0
    def parsee(self, response):

        item = WeddingspotItem()

        item['id'] = self.id
        self.id = self.id + 1

        item['image_urls'] = response.css(
            '.slick-slide img::attr(src)').extract()

        item['venue_title'] = response.css(
            '.Panel--className .SecondaryCTA--venueName::text').extract_first(
            )
        item['venue_title'] = item['venue_title'].replace("'", "")

        price = response.css(
            '.Panel--className .VenuePrimaryCTA--className h3::text').extract(
            )
        a = ' '
        item['price'] = a.join(price)
        item['price'] = item['price'].replace("'", "")

        item['style'] = response.css(
            '.VenuePage--main-details .VenuePage--detail-text-container p::text'
        ).extract_first()
        item['style'] = item['style'].replace("'", "")

        item['guest_capacity'] = response.css(
            '.VenuePage--main-details .VenuePage--detail-text-container p::text'
        ).extract()[1]
        item['guest_capacity'] = item['guest_capacity'].replace("'", "")

        services = response.css(
            '.VenuePage--main-details .VenuePage--detail-text-container p::text'
        ).extract()[2:4]
        b = ','
        item['services'] = b.join(services)
        item['services'] = item['services'].replace("'", "")

        item['location'] = response.css(
            '.VenuePage--main-details .VenuePage--detail-text-container p::text'
        ).extract()[4]
        item['location'] = item['location'].replace("'", "")

        item['zip_code'] = response.css(
            '.VenuePage--main-details .VenuePage--detail-text-container span::text'
        ).extract_first()
        item['zip_code'] = item['zip_code'].replace("'", "")

        item['description'] = response.css(
            '.VenuePage--description p::text').extract_first()
        item['description'] = item['description'].replace("'", "")

        item['venue_notes'] = response.css(
            'p.VenuePage--additional-detail::text').extract_first()
        item['venue_notes'] = item['venue_notes'].replace("'", "")

        item['url'] = response.url

        amenities = response.css(
            '.Amenities--row .VenuePage--additional-detail')[0]
        amenities = amenities.css(
            '.VenuePage--additional-detail p::text').extract()
        amenities = [value for value in amenities if value != '- ']
        c = ','
        item['amenities'] = c.join(amenities)
        item['amenities'] = item['amenities'].replace("'", "")

        restrictions = response.css(
            '.Amenities--row .VenuePage--additional-detail')[1]
        restrictions = restrictions.css(
            '.VenuePage--additional-detail p::text').extract()
        restrictions = [value for value in restrictions if value != '- ']
        d = ','
        item['restrictions'] = d.join(restrictions)
        item['restrictions'] = item['restrictions'].replace("'", "")

        javascript = response.css(
            "script:contains('window.__PRELOADED_STATE__')::text").get()
        xml = lxml.etree.tostring(js2xml.parse(javascript), encoding='unicode')
        selector = Selector(xml)
        junk = javascript[javascript.index('website'):]
        http = junk[junk.index('http'):]
        http = http[:http.index('"')]
        item['web_url'] = http

        f = open("data.txt", "a")
        f.write(
            f"({item['id']},'{item['venue_title']}','{item['price']}','{item['style']}','{item['guest_capacity']}','{item['services']}','{item['location']}','{item['zip_code']}','{item['description']}','{item['venue_notes']}','{item['url']}','{item['amenities']}','{item['restrictions']}','{item['web_url']}'),\n"
        )
        f.close()

        yield item