def _parse_attachment_ids_request_query(self, page, since=None): # Formats digits = re.compile("\d+") attachment_href = re.compile("attachment.cgi\?id=\d+&action=review") # if no date is given, return all ids if not since: attachment_links = SoupStrainer("a", href=attachment_href) return [ int(digits.search(tag["href"]).group(0)) for tag in BeautifulSoup(page, parseOnlyThese=attachment_links) ] # Parse the main table only date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}") mtab = SoupStrainer("table", {"class": "requests"}) soup = BeautifulSoup(page, parseOnlyThese=mtab) patch_ids = [] for row in soup.findAll("tr"): patch_tag = row.find("a", {"href": attachment_href}) if not patch_tag: continue patch_id = int(digits.search(patch_tag["href"]).group(0)) date_tag = row.find("td", text=date_format) if date_tag and datetime.strptime( date_format.search(date_tag).group(0), "%Y-%m-%d %H:%M") < since: continue patch_ids.append(patch_id) return patch_ids
def _parse_attachment_ids_request_query(self, page): digits = re.compile("\d+") attachment_href = re.compile("attachment.cgi\?id=\d+&action=review") attachment_links = SoupStrainer("a", href=attachment_href) return [ int(digits.search(tag["href"]).group(0)) for tag in BeautifulSoup(page, parseOnlyThese=attachment_links) ]