Esempio n. 1
0
def test_parslepy_init_selector_handler_error():
    parselet_script = {
        "title": "h1",
        "subtitle": "//h2"
    }
    class MyHandler(parslepy.selectors.SelectorHandler):
        _dummy = True
    mh = MyHandler()
    parselet = parslepy.Parselet(parselet_script, selector_handler=mh)
Esempio n. 2
0
def compare_extracted_output(root,
                             input_parselet,
                             expected_output,
                             debug=False):
    parselet = parslepy.Parselet(input_parselet, strict=True, debug=debug)
    extracted = parselet.extract(root)
    #pprint.pprint(extracted)
    #pprint.pprint(expected_output)
    assert_dict_equal(extracted, expected_output)
Esempio n. 3
0
def test_parslepy_parse_html_file():

    parselet = parslepy.Parselet({"title": "h1"})
    expected = {'title': 'Markup Validation Service'}

    dirname = os.path.dirname(os.path.abspath(__file__))
    extracted = parselet.parse(
                    open(os.path.join(dirname, 'data/validator.w3.org.html'))
                )
    assert_dict_equal(extracted, expected)
    def test_broken(self):
        """
        A broken snippet must raise an Exception
        """

        input_parselet, expected_output = (
            {"stuff": {"broken": "spanner"}},
            {}
        )
        parselet = parslepy.Parselet(input_parselet, strict=True, debug=self.debug)
        extracted = parselet.extract(self.root)
        if self.debug:
            pprint.pprint(extracted)
Esempio n. 5
0
def test_parslepy_defaultparse_xml_file():
    parselet_script = {"id": "//atom:id"}
    dsh = parslepy.selectors.DefaultSelectorHandler(
                namespaces={'atom': 'http://www.w3.org/2005/Atom'}
            )
    dirname = os.path.dirname(os.path.abspath(__file__))
    fp = open(os.path.join(dirname, 'data/itunes.topalbums.rss'))

    expected = {
        'id': 'https://itunes.apple.com/us/rss/topalbums/limit=10/explicit=true/xml'
    }

    parselet = parslepy.Parselet(parselet_script, selector_handler=dsh)
    extracted = parselet.parse(fp, parser=lxml.etree.XMLParser())
    assert_dict_equal(extracted, expected)
    def test_broken_but_optional(self):
        """
        Empty dict if optional keys have broken inner-content

        An inner object might be broken (no selector match),
        but if it's for an optional key, the result is simply an empty dict
        """

        input_parselet, expected_output = (
            {"stuff?": {"perhaps": "spanner"}},
            {'stuff': {}}
        )
        parselet = parslepy.Parselet(input_parselet, strict=True, debug=self.debug)
        extracted = parselet.extract(self.root)
        if self.debug:
            pprint.pprint(extracted)
        assert_dict_equal(extracted, expected_output)
Esempio n. 7
0
def test_parslepy_parse_html_fromstring():

    htmldoc = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
    <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
    <title>The W3C Markup Validation Service</title>
    <link rev="made" href="mailto:[email protected]" />
    <link rel="shortcut icon" href="http://www.w3.org/2008/site/images/favicon.ico" type="image/x-icon" />
    <link rev="start" href="./" title="Home Page" />
    <style type="text/css" media="all">
      @import "./style/base";
    </style>
    <script type="text/javascript" src="scripts/combined"></script>
    <meta name="keywords" content="HTML, HyperText Markup Language, Validation,
      W3C Markup Validation Service" />
    <meta name="description" content="W3C's easy-to-use
      markup validation service, based on SGML and XML parsers." />

    <link rel="alternate" type="application/atom+xml" href="http://www.w3.org/QA/Tools/validator-whatsnew.atom" />
  </head>
  <body>
   <div id="banner">
    <h1 id="title">
      <a href="http://www.w3.org/"><img alt="W3C" width="110" height="61" id="logo" src="./images/w3c.png" /></a>
			<a href="./"><span>Markup Validation Service</span></a>
      </h1>
      <p id="tagline">Check the markup (HTML, XHTML, ...) of Web documents</p>
   </div>
  </body>
</html>
    """

    parselet = parslepy.Parselet(
        {
            "title": "h1",
            "pid": "p[id] @id"
        })
    expected = {
        'title': 'Markup Validation Service',
        'pid': 'tagline'
    }

    extracted = parselet.parse_fromstring(htmldoc)
    assert_dict_equal(extracted, expected)
    def test_one_required_broken_one_matching(self):
        """
        Broken content with 1 non-matching selector
        """

        input_parselet, expected_output = (
            {"stuff": {
                "nothing": "paragraph",
                "title": "h1",
            }},
            {'stuff': {}}
        )
        parselet = parslepy.Parselet(input_parselet, strict=True, debug=self.debug)
        extracted = parselet.extract(self.root)
        if self.debug:
            pprint.pprint(extracted)
        assert_dict_equal(extracted, expected_output)
Esempio n. 9
0
def test_parslepy_init_default():
    parselet_script = {
        "title": "h1",
        "subtitle": "//h2"
    }
    parselet = parslepy.Parselet(parselet_script)

    assert_dict_equal(parselet.parselet, parselet_script)

    assert_is_instance(parselet.parselet_tree, parslepy.base.ParsleyNode)
    assert_equal(len(parselet.parselet_tree), len(parselet_script), "not the same number of keys")

    for k,v in list(parselet.parselet_tree.items()):
        assert_is_instance(k, parslepy.base.ParsleyContext)
        assert_is_instance(v, parslepy.selectors.Selector)

    # since we did not provide a selector handler
    assert_is_instance(parselet.selector_handler, parslepy.base.DefaultSelectorHandler)
    def test_one_required_broken(self):
        """
        Broken content mixing required and optional keys
        """

        input_parselet, expected_output = (
            {"stuff": {
                "nothing": "paragraph",
                "nothing2?": "spanner",
                "nothing3?": "bodyboard",
            }},
            {'stuff': {}}
        )
        parselet = parslepy.Parselet(input_parselet, strict=True, debug=self.debug)
        extracted = parselet.extract(self.root)
        if self.debug:
            pprint.pprint(extracted)
        assert_dict_equal(extracted, expected_output)
Esempio n. 11
0
def test_parslepy_init_selector_handler_error():
    parselet_script = {
        "title": "h1",
        "subtitle": "//h2"
    }
    class MyHandler(parslepy.selectors.SelectorHandler):
        def make(self, selection):
            return parslepy.selectors.Selector(lxml.etree.XPath("body"))

        def select(self, document, selector):
            return None

        def extract(self, document, selector):
            return None

    mh = MyHandler()

    parselet = parslepy.Parselet(parselet_script, selector_handler=mh)
    assert_is_instance(parselet.selector_handler, MyHandler)
 def test_complicated(self):
     input_parselet, expected_output = (
         {"stuff": {
             "nothing?": "paragraph",
             "title": {
                 "value": "h1",
                 "novalue?": {
                     "maybe": "h47",
                 }
             }
         }},
         {'stuff': {'title': {'novalue': {},
                    'value': 'Creative Commons License Deed'}}}
     )
     parselet = parslepy.Parselet(input_parselet, strict=True, debug=self.debug)
     extracted = parselet.extract(self.root)
     if self.debug:
         pprint.pprint(extracted)
     assert_dict_equal(extracted, expected_output)
    def test_all_optional(self):
        """
        When no selector matches anything for optional keys,
        we should end up with an empty dict,
        even if the parent key is required
        """

        input_parselet, expected_output = (
            {"stuff": {
                "nothing1?": "h24",
                "nothing2?": "spanner",
                "nothing3?": "bodyboard",
            }},
            {'stuff': {}}
        )
        parselet = parslepy.Parselet(input_parselet, strict=True, debug=self.debug)
        extracted = parselet.extract(self.root)
        if self.debug:
            pprint.pprint(extracted)
        assert_dict_equal(extracted, expected_output)
    def test_one_required_exists(self):
        """
        Only required keys, no optional keys

        When optional keys selectors do not match anything,
        we should only have non-empty key/values
        """

        input_parselet, expected_output = (
            {"stuff": {
                "nothing": "h1",
                "nothing2?": "spanner",
                "nothing3?": "bodyboard",
            }},
            {'stuff': {'nothing': 'Creative Commons License Deed'}}
        )
        parselet = parslepy.Parselet(input_parselet, strict=True, debug=self.debug)
        extracted = parselet.extract(self.root)
        if self.debug:
            pprint.pprint(extracted)
        assert_dict_equal(extracted, expected_output)
Esempio n. 15
0
def test_parslepy_xpathparse_xml_fromstring():

    parselet_script = {
        "--(//atom:feed/atom:entry)": {
            "title": "atom:title",
            "name": "im:name",
            "id": "atom:id/@im:id",
            "images(im:image)": [{
                "height": "@height",
                "url": ".",
            }],
            "releasedate": "im:releaseDate",
        }
    }
    xsh = parslepy.selectors.XPathSelectorHandler(
                namespaces={
                    'atom': 'http://www.w3.org/2005/Atom',
                    'im': 'http://itunes.apple.com/rss',
                }
            )

    expected = {
        'id': '647928068',
        'images': [
            {   'height': '55',
                'url': 'http://a815.phobos.apple.com/us/r30/Features/v4/02/cc/73/02cc7370-693c-f0fe-505b-bb84043ce186/dj.pehmruyt.55x55-70.jpg'
            },
            {   'height': '60',
                'url': 'http://a1537.phobos.apple.com/us/r30/Features/v4/02/cc/73/02cc7370-693c-f0fe-505b-bb84043ce186/dj.pehmruyt.60x60-50.jpg'
            },
            {   'height': '170',
                'url': 'http://a976.phobos.apple.com/us/r30/Features/v4/02/cc/73/02cc7370-693c-f0fe-505b-bb84043ce186/dj.pehmruyt.170x170-75.jpg'
            }
        ],
        'name': 'The Gifted',
        'title': 'The Gifted - Wale',
    }
    parselet = parslepy.Parselet(parselet_script, selector_handler=xsh)
    extracted = parselet.parse_fromstring(xmldoc, parser=lxml.etree.XMLParser())
    assert_dict_equal(extracted, expected)
Esempio n. 16
0
def test_to_xml():
    parselets = (({
        "first":
        "parslepy:xml(//atom:feed/atom:entry[1]/im:contentType)"
    }, {
        'first':
        '<im:contentType xmlns:im="http://itunes.apple.com/rss" xmlns="http://www.w3.org/2005/Atom" term="Music" label="Music"><im:contentType term="Album" label="Album"/></im:contentType>'
    }), )
    dirname = os.path.dirname(os.path.abspath(__file__))
    root = lxml.etree.parse(open(
        os.path.join(dirname, 'data/itunes.topalbums.rss')),
                            parser=lxml.etree.XMLParser()).getroot()
    xsh = parslepy.selectors.XPathSelectorHandler(
        namespaces={
            'atom': 'http://www.w3.org/2005/Atom',
            'im': 'http://itunes.apple.com/rss'
        })
    for input_parselet, expected_output in parselets:
        parselet = parslepy.Parselet(input_parselet,
                                     selector_handler=xsh,
                                     strict=True)
        extracted = parselet.extract(root)
        assert_dict_equal(extracted, expected_output)
Esempio n. 17
0
    def get_details(self):
        print 'Questions (%s)' % self.next_list_url

        contents = self.url_get(self.next_list_url)

        p = parslepy.Parselet(self.question_parsing_rules)
        page = p.parse_fromstring(contents)

        for row in page['papers']:
            if len(row['cell']) == 11:
                url = row['cell'][8]['url']
                root, ext = os.path.splitext(os.path.split(url)[1])
                self.details.append({
                    "name": row['cell'][0]['contents'],
                    "language": row['cell'][6]['contents'],
                    "url": self.base_url + url,
                    "house": row['cell'][4]['contents'],
                    "date": row['cell'][2]['contents'],
                    "type": ext[1:],

                    # This is also in the pdf's metadata, but it's easier to
                    # get it from here
                    "document_number": int(root.split('_')[0]),
                })

        # check for next page of links (or None if not found)
        self.next_list_url = None
        for cell in page['next']:
            if cell['contents'] == 'Next':
                next_url = self.base_url + cell['url']
                if self.next_list_url == next_url:
                    raise Exception(
                        "Possible url loop detected, next url '{0}' has not changed."
                        .format(next_url))
                self.next_list_url = next_url
                break
Esempio n. 18
0
def test_parslepy_init_wrong_selector_handler():
    parselet_script = {
        "title": "h1",
        "subtitle": "//h2"
    }
    parselet = parslepy.Parselet(parselet_script, selector_handler=lambda s: s)
Esempio n. 19
0
def test_userdefined_extensions():
    def myattrnames(ctx, xpctx, attributes, *args):
        #print "myattrnames:", ctx, xpctx, attributes, args
        return [a.attrname for a in attributes]

    # extension to built full URLs from @href or @src attributes
    try:
        import urlparse  # Python 2.x
    except ImportError:
        import urllib.parse as urlparse

    def absurl(ctx, xpctx, attributes, *args):
        #print "absurl:", ctx, xpctx, attributes, args
        return [urlparse.urljoin(ctx, u) for u in attributes]

    parselets = (({
        "head_meta(head/meta)": [{
            "attrnames": ["myext:attrnames(@*)"],
            "attrvals": ["@*"],
        }],
        "img_links": ["//img/@src"],
        "img_abslinks": ["myext:absurl(//img/@src)"],
    }, {
        'head_meta': [{
            'attrnames': ['http-equiv', 'content'],
            'attrvals': ['Content-Type', 'text/html;charset=utf-8']
        }, {
            'attrnames': ['name', 'content'],
            'attrvals': [
                'keywords',
                'HTML, HyperText Markup Language, Validation,\n      W3C Markup Validation Service'
            ]
        }, {
            'attrnames': ['name', 'content'],
            'attrvals': [
                'description',
                "W3C's easy-to-use\n      markup validation service, based on SGML and XML parsers."
            ]
        }],
        'img_abslinks': [
            'http://validator.w3.org/images/w3c.png',
            'http://validator.w3.org/images/arrow-closed.png',
            'http://validator.w3.org/images/arrow-closed.png',
            'http://validator.w3.org/images/arrow-closed.png',
            'http://www.w3.org/Icons/VSlogo',
            'http://www.w3.org/Icons/WWW/w3c_home_nb',
            'http://validator.w3.org/images/opensource-55x48.png',
            'http://www.w3.org/QA/Tools/I_heart_validator'
        ],
        'img_links': [
            './images/w3c.png', './images/arrow-closed.png',
            './images/arrow-closed.png', './images/arrow-closed.png',
            'http://www.w3.org/Icons/VSlogo',
            'http://www.w3.org/Icons/WWW/w3c_home_nb',
            './images/opensource-55x48.png',
            'http://www.w3.org/QA/Tools/I_heart_validator'
        ]
    }), )
    mynamespaces = {"myext": "myextension"}
    myextensions = {
        ("myextension", "absurl"): absurl,
        ("myextension", "attrnames"): myattrnames,
    }

    sh = parslepy.DefaultSelectorHandler(namespaces=mynamespaces,
                                         extensions=myextensions)

    dirname = os.path.dirname(os.path.abspath(__file__))
    for input_parselet, expected_output in parselets:
        parselet = parslepy.Parselet(input_parselet,
                                     selector_handler=sh,
                                     strict=True)
        extracted = parselet.parse(os.path.join(dirname,
                                                'data/validator.w3.org.html'),
                                   context='http://validator.w3.org/')

        #pprint.pprint(extracted)
        #pprint.pprint(expected_output)
        assert_dict_equal(extracted, expected_output)
Esempio n. 20
0
def test_parslepy_keys():
    parselet_scripts = [
    (
        {
            "title": "h1",
            "subtitle": "//h2"
        },
        ["title", "subtitle"],
    ),
    (
        {
            "--": {
                "--(#banner)": {
                    "--(#title)": {
                        "--(a span)": {
                            "title": "."
                        }
                    }
                }
            }
        },
        ["title"],
    ),
    (
        {
            "--(#header)": {
                "--(#banner)": {
                    "--(#title)": {
                        "--(a span)": {
                            "title": "."
                        }
                    }
                }
            }
        },
        ["title"],
    ),
    (
        {
            "--": {
                "--(#banner)": {
                    "--(#title)": {
                        "--(a span)": {
                            "title": "."
                        }
                    }
                }
            },
            "links": [".//a/@href"]
        },
        ["title", "links"],
    ),
    (
        {
            "title": "h1",
            "--(.content)": {
                "subtitle": ".//h2"
            }
        },
        ["title", "subtitle"],
    ),
    (
        {
            "title": "h1",
            "--(.content)": {
                "title": ".//h2"
            },
            "footer": "parslepy:html(.//div[@class='footer'])"
        },
        ["title", "footer"],
    ),
    ]

    for input_parselet, expected_output in parselet_scripts:
        parselet = parslepy.Parselet(input_parselet)
        assert_equal(set(parselet.keys()),
                     set(expected_output))
Esempio n. 21
0
import parslepy, urllib2
rules = {"questions(//div[contains(@class,'question-summary')])": [{"title": ".//h3/a", "votes": "div.votes div.mini-counts"}]}
parslepy.Parselet(rules).parse(urllib2.urlopen('http://stackoverflow.com'))
{'questions': [{'title': u'node.js RSS memory grows over time despite fairly consistent heap sizes',
    'votes': u'0'},
    {'title': u'SQL query for count of predicate applied on rows of subquery',
    'votes': u'3'},
}

import lxml.etree
import parslepy
import pprint
html = """
<!DOCTYPE html>
<html>
<head>
    <title>Sample document to test parslepy</title>
    <meta http-equiv="content-type" content="text/html;charset=utf-8" />
</head>
<body>
<h1 id="main">What&rsquo;s new</h1>
<ul>
    <li class="newsitem"><a href="/article-001.html">This is the first article</a></li>
    <li class="newsitem"><a href="/article-002.html">A second report on something</a></li>
    <li class="newsitem"><a href="/article-003.html">Python is great!</a> <span class="fresh">New!</span></li>
</ul>
</body>
</html>"""
rules = {
     "heading": "h1#main",
     "news(li.newsitem)": [{
Esempio n. 22
0
        "annuncio_url": "div[class=th_box]  a @href",
        "annuncio_desc": "div[class=descr]  p a strong",
        "annuncio_ora": "div[class=date]",
    }],
    "next_page_url":
    ".//a[contains(., 'Avanti')]/@href",
}

detrules = {
    "info(div.annuncio_info li)": [{
        "item": ".",
    }],
    "coord": ".//script[contains(., 'loadMapQuest')]",
}

parselet = parslepy.Parselet(rules)
detparselet = parslepy.Parselet(detrules)

next_url = "http://www.subito.it/annunci-emilia-romagna/vendita/appartamenti/"

while next_url:

    print "fetching", next_url
    current_url = next_url

    # ottiene il contenuto della pagina
    html = requests.get(next_url)

    extracted = parselet.parse_fromstring(html.content)

    for release in extracted.get("annunci"):
Esempio n. 23
0
    def get_details(self):
        sys.stdout.write('Answers {0}\n'.format(self.next_list_url))

        contents = self.url_get(self.next_list_url)
        page = parslepy.Parselet(
            self.answer_parsing_rules).parse_fromstring(contents)

        for row in page['papers']:
            if len(row['cell']) == 11:
                url = row['cell'][8]['url']
                types = url.partition(".")
                date_published = row['cell'][2]['contents'].strip()
                try:
                    date_published = datetime.datetime.strptime(
                        date_published, '%d %B %Y').date()
                except:
                    warnings.warn("Failed to parse date (%s)" % date_published)
                    date_published = None
                    continue

                document_name = row['cell'][0]['contents'].strip().upper()

                try:
                    document_data = self.document_name_regex.match(
                        document_name).groupdict()
                except:
                    if document_name not in self.known_bad_document_names:
                        sys.stdout.write(
                            'SKIPPING bad document_name {0}\n'.format(
                                document_name))
                    continue

                # FIXME - Temporary fix for launch
                # drop anything which doesn't have a written_number
                if not document_data['written_number']:
                    continue

                # The President and vice Deputy President have their own
                # oral question sequences.
                president = document_data.pop('president')

                if president == 'P':
                    document_data['president_number'] = document_data.pop(
                        'oral_number')
                if president == 'DP':
                    document_data['dp_number'] = document_data.pop(
                        'oral_number')

                document_data.update(
                    dict(
                        document_name=document_name,
                        date_published=date_published,
                        language=row['cell'][6]['contents'],
                        url=self.base_url + url,
                        type=types[2],
                    ))

                try:
                    document_data['date'] = datetime.datetime.strptime(
                        document_data.pop('date_string'),
                        '%y%m%d',
                    ).date()
                except:
                    sys.stdout.write(
                        "BAILING on {0} - problem converting date\n".format(
                            document_name))
                    continue

                # We don't want anything from before the 2009 election.
                if document_data['date'] < datetime.date(2009, 4, 22):
                    continue

                document_data['year'] = document_data['date'].year

                self.details.append(document_data)

        # check for next page of links (or None if not found)
        self.next_list_url = None
        for cell in page['next']:
            if cell['contents'] == 'Next':
                next_url = self.base_url + cell['url']

                if self.next_list_url == next_url:
                    raise Exception(
                        "Possible url loop detected, next url '{0}' has not changed."
                        .format(next_url))

                self.next_list_url = next_url
                break
 def test_itunes_top_albums(self):
     input_parselet, expected_output = (
         {"entries(//atom:feed/atom:entry)": [{
                 "title": "atom:title",
                 "name": "im:name",
                 "id": "atom:id/@im:id",
                 "artist(im:artist)": {
                     "name": ".",
                     "href": "@href",
                 },
                 "images(im:image)": [{
                     "height": "@height",
                     "url": ".",
                 }],
                 #"content": "atom:content[@type='html']"
                 "releasedate": "im:releaseDate",
             }]
         },
         {'entries': [{'artist': {'href': 'https://itunes.apple.com/us/artist/wale/id129335935?uo=2',
                                  'name': 'Wale'},
                       'id': '647928068',
                       'images': [{'height': '55',
                                   'url': 'http://a815.phobos.apple.com/us/r30/Features/v4/02/cc/73/02cc7370-693c-f0fe-505b-bb84043ce186/dj.pehmruyt.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a1537.phobos.apple.com/us/r30/Features/v4/02/cc/73/02cc7370-693c-f0fe-505b-bb84043ce186/dj.pehmruyt.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a976.phobos.apple.com/us/r30/Features/v4/02/cc/73/02cc7370-693c-f0fe-505b-bb84043ce186/dj.pehmruyt.170x170-75.jpg'}],
                       'name': 'The Gifted',
                       'releasedate': '2013-06-24T00:00:00-07:00',
                       'title': 'The Gifted - Wale'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/kanye-west/id2715720?uo=2',
                                  'name': 'Kanye West'},
                       'id': '662392801',
                       'images': [{'height': '55',
                                   'url': 'http://a697.phobos.apple.com/us/r1000/033/Music4/v4/b8/fc/be/b8fcbe49-510d-8afe-7c34-fa268da339f2/UMG_cvrart_00602537439317_01_RGB72_1500x1500_13UAAIM08444.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a1419.phobos.apple.com/us/r1000/033/Music4/v4/b8/fc/be/b8fcbe49-510d-8afe-7c34-fa268da339f2/UMG_cvrart_00602537439317_01_RGB72_1500x1500_13UAAIM08444.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a1930.phobos.apple.com/us/r1000/033/Music4/v4/b8/fc/be/b8fcbe49-510d-8afe-7c34-fa268da339f2/UMG_cvrart_00602537439317_01_RGB72_1500x1500_13UAAIM08444.170x170-75.jpg'}],
                       'name': 'Yeezus',
                       'releasedate': '2013-06-18T00:00:00-07:00',
                       'title': 'Yeezus - Kanye West'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/j-cole/id73705833?uo=2',
                                  'name': 'J Cole'},
                       'id': '651105499',
                       'images': [{'height': '55',
                                   'url': 'http://a537.phobos.apple.com/us/r30/Music2/v4/c5/03/68/c5036883-38b9-702c-baf0-876db639b1f9/886444025935.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a1259.phobos.apple.com/us/r30/Music2/v4/c5/03/68/c5036883-38b9-702c-baf0-876db639b1f9/886444025935.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a1354.phobos.apple.com/us/r30/Music2/v4/c5/03/68/c5036883-38b9-702c-baf0-876db639b1f9/886444025935.170x170-75.jpg'}],
                       'name': 'Born Sinner (Deluxe Version)',
                       'releasedate': '2013-06-14T00:00:00-07:00',
                       'title': 'Born Sinner (Deluxe Version) - J Cole'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/august-burns-red/id47796394?uo=2',
                                  'name': 'August Burns Red'},
                       'id': '655052532',
                       'images': [{'height': '55',
                                   'url': 'http://a854.phobos.apple.com/us/r30/Music2/v4/05/81/64/05816462-e832-80e4-9fa1-554d9bdd2542/886443989689.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a1576.phobos.apple.com/us/r30/Music2/v4/05/81/64/05816462-e832-80e4-9fa1-554d9bdd2542/886443989689.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a359.phobos.apple.com/us/r30/Music2/v4/05/81/64/05816462-e832-80e4-9fa1-554d9bdd2542/886443989689.170x170-75.jpg'}],
                       'name': 'Rescue & Restore',
                       'releasedate': '2013-06-25T00:00:00-07:00',
                       'title': 'Rescue & Restore - August Burns Red'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/mac-miller/id419944559?uo=2',
                                  'name': 'Mac Miller'},
                       'id': '650864146',
                       'images': [{'height': '55',
                                   'url': 'http://a1599.phobos.apple.com/us/r30/Music/v4/7c/03/68/7c03681e-3cb6-23cb-5584-5b9dd42e54f7/040232021398_Cover.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a321.phobos.apple.com/us/r30/Music/v4/7c/03/68/7c03681e-3cb6-23cb-5584-5b9dd42e54f7/040232021398_Cover.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a1696.phobos.apple.com/us/r30/Music/v4/7c/03/68/7c03681e-3cb6-23cb-5584-5b9dd42e54f7/040232021398_Cover.170x170-75.jpg'}],
                       'name': 'Watching Movies With the Sound Off (Deluxe Edition)',
                       'releasedate': '2013-06-18T00:00:00-07:00',
                       'title': 'Watching Movies With the Sound Off (Deluxe Edition) - Mac Miller'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/daft-punk/id5468295?uo=2',
                                  'name': 'Daft Punk'},
                       'id': '617154241',
                       'images': [{'height': '55',
                                   'url': 'http://a1849.phobos.apple.com/us/r1000/096/Music2/v4/52/aa/50/52aa5008-4934-0c27-a08d-8ebd7d13c030/886443919266.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a923.phobos.apple.com/us/r1000/096/Music2/v4/52/aa/50/52aa5008-4934-0c27-a08d-8ebd7d13c030/886443919266.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a1450.phobos.apple.com/us/r1000/096/Music2/v4/52/aa/50/52aa5008-4934-0c27-a08d-8ebd7d13c030/886443919266.170x170-75.jpg'}],
                       'name': 'Random Access Memories',
                       'releasedate': '2013-05-21T00:00:00-07:00',
                       'title': 'Random Access Memories - Daft Punk'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/skillet/id1750802?uo=2',
                                  'name': 'Skillet'},
                       'id': '655774977',
                       'images': [{'height': '55',
                                   'url': 'http://a545.phobos.apple.com/us/r1000/050/Music/v4/b8/3f/7b/b83f7b74-4e7a-6b06-9385-667dc1288d7d/075679954787.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a1267.phobos.apple.com/us/r1000/050/Music/v4/b8/3f/7b/b83f7b74-4e7a-6b06-9385-667dc1288d7d/075679954787.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a114.phobos.apple.com/us/r1000/050/Music/v4/b8/3f/7b/b83f7b74-4e7a-6b06-9385-667dc1288d7d/075679954787.170x170-75.jpg'}],
                       'name': 'Rise',
                       'releasedate': '2013-06-21T00:00:00-07:00',
                       'title': 'Rise - Skillet'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/skillet/id1750802?uo=2',
                                  'name': 'Skillet'},
                       'id': '662457451',
                       'images': [{'height': '55',
                                   'url': 'http://a399.phobos.apple.com/us/r1000/022/Music/v4/87/3e/eb/873eebf6-618c-d8e1-b8df-4d0b60f6729b/075679954749.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a1473.phobos.apple.com/us/r1000/022/Music/v4/87/3e/eb/873eebf6-618c-d8e1-b8df-4d0b60f6729b/075679954749.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a880.phobos.apple.com/us/r1000/022/Music/v4/87/3e/eb/873eebf6-618c-d8e1-b8df-4d0b60f6729b/075679954749.170x170-75.jpg'}],
                       'name': 'Rise (Deluxe Version)',
                       'releasedate': '2013-06-21T00:00:00-07:00',
                       'title': 'Rise (Deluxe Version) - Skillet'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/attila/id46893195?uo=2',
                                  'name': 'Attila'},
                       'id': '649587514',
                       'images': [{'height': '55',
                                   'url': 'http://a608.phobos.apple.com/us/r30/Music/v4/ee/7d/b2/ee7db2ad-e783-2c3a-2ad3-6549868315e7/793018342834.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a1682.phobos.apple.com/us/r30/Music/v4/ee/7d/b2/ee7db2ad-e783-2c3a-2ad3-6549868315e7/793018342834.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a1297.phobos.apple.com/us/r30/Music/v4/ee/7d/b2/ee7db2ad-e783-2c3a-2ad3-6549868315e7/793018342834.170x170-75.jpg'}],
                       'name': 'About That Life',
                       'releasedate': '2013-06-25T00:00:00-07:00',
                       'title': 'About That Life - Attila'},
                      {'artist': {'href': 'https://itunes.apple.com/us/artist/india.arie/id92325?uo=2',
                                  'name': 'India.Arie'},
                       'id': '659585460',
                       'images': [{'height': '55',
                                   'url': 'http://a1694.phobos.apple.com/us/r30/Music/v4/d5/65/b2/d565b212-4463-6486-7ee2-eeab22ff3d87/UMG_cvrart_00602537429486_01_RGB72_1500x1500_13UAAIM06584.55x55-70.jpg'},
                                  {'height': '60',
                                   'url': 'http://a768.phobos.apple.com/us/r30/Music/v4/d5/65/b2/d565b212-4463-6486-7ee2-eeab22ff3d87/UMG_cvrart_00602537429486_01_RGB72_1500x1500_13UAAIM06584.60x60-50.jpg'},
                                  {'height': '170',
                                   'url': 'http://a63.phobos.apple.com/us/r30/Music/v4/d5/65/b2/d565b212-4463-6486-7ee2-eeab22ff3d87/UMG_cvrart_00602537429486_01_RGB72_1500x1500_13UAAIM06584.170x170-75.jpg'}],
                       'name': 'SongVersation (Deluxe Edition)',
                       'releasedate': '2013-06-25T00:00:00-07:00',
                       'title': 'SongVersation (Deluxe Edition) - India.Arie'}]}
     )
     xsh = parslepy.selectors.XPathSelectorHandler(
         namespaces={
             'atom': 'http://www.w3.org/2005/Atom',
             'im': 'http://itunes.apple.com/rss'
         })
     parselet = parslepy.Parselet(
         input_parselet, selector_handler=xsh, strict=True,
         debug=self.debug)
     extracted = parselet.extract(self.docroot)
     if self.debug:
         pprint.pprint(extracted)
     assert_dict_equal(extracted, expected_output)
Esempio n. 25
0
 def init_parselet_expect_syntax_error(self, parselet):
     parslepy.Parselet(parselet)
Esempio n. 26
0
def test_parslepy_init_invalid_parselet():
    parselet = parslepy.Parselet("{ 'title': 'h1'}")