Esempio n. 1
0
def _work(listener, cached_doc_path):

    soup = _ps_lib().soup_via_locators_(url=_url,
                                        html_document_path=cached_doc_path,
                                        listener=listener)

    if soup is None:
        return

    readme, = soup.find_all('div', id='readme')

    # find the H2 called "Articles"

    for el in readme.find_all('h2'):
        if 'Articles' != el.text:
            continue
        ul = el.find_next_sibling()  # not the H2 but whatever is after it
        assert ('ul' == ul.name)
        break

    for li in sv.filter('li', ul):
        these = sv.filter('*', li)  # immediate children, skipping strings
        if 1 == len(these):
            a, = these
            assert ('a' == a.name)
            yikes = None
        else:
            a, desc = these
            assert ('a' == a.name)
            yikes = desc.name
        # NOTE this skips over some interesting strings that are like descs
        dct = {'label': a.text, 'url': a['href']}
        if yikes is not None:
            assert ('code' == yikes)  # meh
        yield dct
Esempio n. 2
0
    def test_invalid_type_input_filter(self):
        """Test bad input into the filter API."""

        flags = sv.DEBUG

        with self.assertRaises(TypeError):
            sv.filter('div', "not a tag", flags=flags)
Esempio n. 3
0
    def test_filter(self):
        """Test filter."""

        markup = """
        <!-- before header -->
        <html>
        <head>
        </head>
        <body>
        <!-- comment -->
        <p id="1"><code id="2"></code><img id="3" src="./image.png"/></p>
        <pre id="4"></pre>
        <p><span id="5" class="some-class"></span><span id="some-id"></span></p>
        <pre id="6" class='ignore'>
            <!-- don't ignore -->
        </pre>
        </body>
        </html>
        """

        soup = bs4.BeautifulSoup(markup, 'html5lib')
        nodes = sv.filter('pre#\\36', soup.html.body)
        self.assertEqual(len(nodes), 1)
        self.assertEqual(nodes[0].attrs['id'], '6')

        nodes = sv.filter(
            'pre#\\36',
            [el for el in soup.html.body.children if isinstance(el, bs4.Tag)])
        self.assertEqual(len(nodes), 1)
        self.assertEqual(nodes[0].attrs['id'], '6')
Esempio n. 4
0
    def test_invalid_type_input(self):
        """Test bad input into the API."""

        with self.assertRaises(TypeError):
            sv.match('div', "not a tag")

        with self.assertRaises(TypeError):
            sv.select('div', "not a tag")

        with self.assertRaises(TypeError):
            sv.filter('div', "not a tag")

        with self.assertRaises(TypeError):
            sv.comments('div', "not a tag")
Esempio n. 5
0
    def test_filter_list(self):
        """
        Test filter list.

        Even if a list is created from the content of a tag, as long as the
        content is document nodes, filter will still handle it.  It doesn't have
        to be just tags.
        """

        markup = """
        <!-- before header -->
        <html>
        <head>
        </head>
        <body>
        <!-- comment -->
        <p id="1"><code id="2"></code><img id="3" src="./image.png"/></p>
        <pre id="4"></pre>
        <p><span id="5" class="some-class"></span><span id="some-id"></span></p>
        <pre id="6" class='ignore'>
            <!-- don't ignore -->
        </pre>
        </body>
        </html>
        """

        soup = self.soup(markup, 'html5lib')
        nodes = sv.filter('pre#\\36', [el for el in soup.html.body.children])
        self.assertEqual(len(nodes), 1)
        self.assertEqual(nodes[0].attrs['id'], '6')
Esempio n. 6
0
    def test_filter_tag_order(self):
        """Test filter tag order."""

        markup = """
        <!-- before header -->
        <html>
        <head>
        </head>
        <body>
        <!-- comment -->
        <p id="1"><code id="2"></code><img id="3" src="./image.png"/></p>
        <pre id="4"></pre>
        <p><span id="5" class="some-class"></span><span id="some-id"></span></p>
        <pre id="6" class='ignore'>
            <!-- don't ignore -->
        </pre>
        </body>
        </html>
        """

        soup = self.soup(markup, 'html.parser')
        ids = [tag['id'] for tag in sv.filter('[id]', soup.html.body.p)]
        self.assertEqual(['2', '3'], ids)
Esempio n. 7
0
    def test_filter_tag(self):
        """Test filter tag."""

        markup = """
        <!-- before header -->
        <html>
        <head>
        </head>
        <body>
        <!-- comment -->
        <p id="1"><code id="2"></code><img id="3" src="./image.png"/></p>
        <pre id="4"></pre>
        <p><span id="5" class="some-class"></span><span id="some-id"></span></p>
        <pre id="6" class='ignore'>
            <!-- don't ignore -->
        </pre>
        </body>
        </html>
        """

        soup = self.soup(markup, 'html5lib')
        nodes = sv.filter('pre#\\36', soup.html.body)
        self.assertEqual(len(nodes), 1)
        self.assertEqual(nodes[0].attrs['id'], '6')
def _filter(sel, el):
    return sv.filter(sel, el)
def _filter(sel, el):
    import soupsieve as sv
    return sv.filter(sel, el)
def _filter(sel, el):
    # at #history-A.1 BeautifulSoup changed
    import soupsieve as sv
    return sv.filter(sel, el)
Esempio n. 11
0
def _direct_children(node):
    return sv.filter('*', node)  # omit strings, `_filter`