Python Parser.css_select Examples

Programming Language: Python

Namespace/Package Name: goose.parsers

Class/Type: Parser

Method/Function: css_select

Examples at hotexamples.com: 11

Python Parser.css_select - 11 examples found. These are the top rated real world Python examples of goose.parsers.Parser.css_select extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getAttribute(9)

fromstring(9)

css_select(6)

getPath(4)

hasChildTag(3)

clearText(2)

createElement(2)

getFormattedText(2)

hasChildTags(2)

adjustTopNode(1)

childNodesWithText(1)

getComments(1)

getElementById(1)

removeTitle(1)

Example #1

Show file

    def remove_drop_caps(self, doc):
        items = Parser.css_select(
            doc, "span[class~=dropcap], span[class~=drop_cap]")
        for item in items:
            item.drop_tag()

        return doc

Example #2

Show file

File: extractors.py Project: aviks/python-goose

    def extract_tags(self, article):
        node = article.doc

        # node doesn't have chidren
        if len(list(node)) == 0:
            return NO_STRINGS

        elements = Parser.css_select(node, A_REL_TAG_SELECTOR)
        if not elements:
            elements = Parser.css_select(node, A_HREF_TAG_SELECTOR)
            if not elements:
                return NO_STRINGS

        tags = []
        for el in elements:
            tag = Parser.getText(el)
            if tag:
                tags.append(tag)

        return set(tags)

Example #3

Show file

File: extractors.py Project: gaybro8777/python-goose

    def extract_tags(self, article):
        node = article.doc

        # node doesn't have chidren
        if len(list(node)) == 0:
            return NO_STRINGS

        elements = Parser.css_select(node, A_REL_TAG_SELECTOR)
        if not elements:
            elements = Parser.css_select(node, A_HREF_TAG_SELECTOR)
            if not elements:
                return NO_STRINGS

        tags = []
        for el in elements:
            tag = Parser.getText(el)
            if tag:
                tags.append(tag)

        return set(tags)

Example #4

Show file

File: outputformatters.py Project: tbkraf08/python-goose

 def remove_negativescores_nodes(self):
     """\
     if there are elements inside our top node
     that have a negative gravity score,
     let's give em the boot
     """
     gravity_items = Parser.css_select(self.top_node, "*[gravityScore]")
     for item in gravity_items:
         score = int(item.attrib.get('gravityScore'), 0)
         if score < 1:
             item.getparent().remove(item)

Example #5

Show file

File: extractors.py Project: aviks/python-goose

    def get_meta_content(self, doc, metaName):
        """\
        Extract a given meta content form document
        """
        meta = Parser.css_select(doc, metaName)
        content = None

        if meta is not None and len(meta) > 0:
            content = meta[0].attrib.get('content')

        if content:
            return content.strip()

        return ''

Example #6

Show file

File: extractors.py Project: tbkraf08/python-goose

    def get_meta_content(self, doc, metaName):
        """\
        Extract a given meta content form document
        """
        meta = Parser.css_select(doc, metaName)
        content = None

        if meta is not None and len(meta) > 0:
            content = meta[0].attrib.get('content')

        if content:
            return content.strip()

        return ''

Example #7

Show file

File: tests.py Project: tbkraf08/python-goose

    def test_cssselect(self):
        html = '<html><body>'
        html += '<p class="link">this is a test <a class="link">link</a> and this is <strong class="foo">strong</strong></p>'
        html += '<p>this is a test and this is <strong class="link">strong</strong></p>'
        html += '</body></html>'
        doc = Parser.fromstring(html)
        # find node with a class attribute
        items_expected = doc.cssselect("*[class]")
        items_result = Parser.css_select(doc, "*[class]")
        self.assertEqual(len(items_expected), 4)
        self.assertEqual(len(items_expected), len(items_result))

        # find p nodes
        items_expected = doc.cssselect("p")
        items_result = Parser.css_select(doc, "p")
        self.assertEqual(len(items_expected), 2)
        self.assertEqual(len(items_expected), len(items_result))

        # find nodes with attribute class equal to link
        items_expected = doc.cssselect("*[class=link]")
        items_result = Parser.css_select(doc, "*[class=link]")
        self.assertEqual(len(items_expected), 3)
        self.assertEqual(len(items_expected), len(items_result))

        # find p nodes with class attribute
        items_expected = doc.cssselect("p[class]")
        items_result = Parser.css_select(doc, "p[class]")
        self.assertEqual(len(items_expected), 1)
        self.assertEqual(len(items_expected), len(items_result))

        # find p nodes with class attribute link
        items_expected = doc.cssselect("p[class=link]")
        items_result = Parser.css_select(doc, "p[class=link]")
        self.assertEqual(len(items_expected), 1)
        self.assertEqual(len(items_expected), len(items_result))

        # find strong nodes with class attribute link or foo
        items_expected = doc.cssselect("strong[class=link], strong[class=foo]")
        items_result = Parser.css_select(
            doc, "strong[class=link], strong[class=foo]")
        self.assertEqual(len(items_expected), 2)
        self.assertEqual(len(items_expected), len(items_result))

        # find strong nodes with class attribute link or foo
        items_expected = doc.cssselect("p > a")
        items_result = Parser.css_select(doc, "p > a")
        self.assertEqual(len(items_expected), 1)
        self.assertEqual(len(items_expected), len(items_result))

Example #8

Show file

File: tests.py Project: BigData-Tools/python-goose

    def test_cssselect(self):
        html = '<html><body>'
        html += '<p class="link">this is a test <a class="link">link</a> and this is <strong class="foo">strong</strong></p>'
        html += '<p>this is a test and this is <strong class="link">strong</strong></p>'
        html += '</body></html>'
        doc = Parser.fromstring(html)
        # find node with a class attribute
        items_expected = doc.cssselect("*[class]")
        items_result = Parser.css_select(doc, "*[class]")
        self.assertEqual(len(items_expected), 4)
        self.assertEqual(len(items_expected), len(items_result))

        # find p nodes
        items_expected = doc.cssselect("p")
        items_result = Parser.css_select(doc, "p")
        self.assertEqual(len(items_expected), 2)
        self.assertEqual(len(items_expected), len(items_result))

        # find nodes with attribute class equal to link
        items_expected = doc.cssselect("*[class=link]")
        items_result = Parser.css_select(doc, "*[class=link]")
        self.assertEqual(len(items_expected), 3)
        self.assertEqual(len(items_expected), len(items_result))

        # find p nodes with class attribute
        items_expected = doc.cssselect("p[class]")
        items_result = Parser.css_select(doc, "p[class]")
        self.assertEqual(len(items_expected), 1)
        self.assertEqual(len(items_expected), len(items_result))

        # find p nodes with class attribute link
        items_expected = doc.cssselect("p[class=link]")
        items_result = Parser.css_select(doc, "p[class=link]")
        self.assertEqual(len(items_expected), 1)
        self.assertEqual(len(items_expected), len(items_result))

        # find strong nodes with class attribute link or foo
        items_expected = doc.cssselect("strong[class=link], strong[class=foo]")
        items_result = Parser.css_select(doc, "strong[class=link], strong[class=foo]")
        self.assertEqual(len(items_expected), 2)
        self.assertEqual(len(items_expected), len(items_result))

        # find strong nodes with class attribute link or foo
        items_expected = doc.cssselect("p > a")
        items_result = Parser.css_select(doc, "p > a")
        self.assertEqual(len(items_expected), 1)
        self.assertEqual(len(items_expected), len(items_result))

Example #9

Show file

File: cleaners.py Project: BigData-Tools/python-goose

    def remove_drop_caps(self, doc):
        items = Parser.css_select(doc, "span[class~=dropcap], span[class~=drop_cap]")
        for item in items:
            item.drop_tag()

        return doc

Example #10

Show file

File: cleaners.py Project: BigData-Tools/python-goose

 def clean_para_spans(self, doc):
     spans = Parser.css_select(doc, 'p > span')
     for item in spans:
         item.drop_tag()
     return doc

Example #11

Show file

 def clean_para_spans(self, doc):
     spans = Parser.css_select(doc, 'p > span')
     for item in spans:
         item.drop_tag()
     return doc