Python SoupStrainer Examples

Programming Language: Python

Namespace/Package Name: webkitpy.thirdparty.BeautifulSoup

Class/Type: SoupStrainer

Examples at hotexamples.com: 2

Python SoupStrainer - 2 examples found. These are the top rated real world Python examples of webkitpy.thirdparty.BeautifulSoup.SoupStrainer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SoupStrainer(2)

Frequently Used Methods

SoupStrainer (2)

Example #1

Show file

    def _parse_attachment_ids_request_query(self, page, since=None):
        # Formats
        digits = re.compile("\d+")
        attachment_href = re.compile("attachment.cgi\?id=\d+&action=review")
        # if no date is given, return all ids
        if not since:
            attachment_links = SoupStrainer("a", href=attachment_href)
            return [
                int(digits.search(tag["href"]).group(0))
                for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)
            ]

        # Parse the main table only
        date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}")
        mtab = SoupStrainer("table", {"class": "requests"})
        soup = BeautifulSoup(page, parseOnlyThese=mtab)
        patch_ids = []

        for row in soup.findAll("tr"):
            patch_tag = row.find("a", {"href": attachment_href})
            if not patch_tag:
                continue
            patch_id = int(digits.search(patch_tag["href"]).group(0))
            date_tag = row.find("td", text=date_format)
            if date_tag and datetime.strptime(
                    date_format.search(date_tag).group(0),
                    "%Y-%m-%d %H:%M") < since:
                continue
            patch_ids.append(patch_id)
        return patch_ids

Example #2

Show file

 def _parse_attachment_ids_request_query(self, page):
     digits = re.compile("\d+")
     attachment_href = re.compile("attachment.cgi\?id=\d+&action=review")
     attachment_links = SoupStrainer("a", href=attachment_href)
     return [
         int(digits.search(tag["href"]).group(0))
         for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)
     ]