def parse_list(self, page): precinct, raw_pdf = page pdf_text = pdfstring_to_text(raw_pdf, keep_layout=False) m = pdf_re.search(pdf_text) if not m: raise ScraperBroken("Didn't find data in PDF for precinct %s" % precinct) else: yield dict(m.groupdict(), precinct=precinct)
def parse_detail(self, page, list_record): text = pdfstring_to_text(page) m = self.parse_detail_re.search(text) if m: self.logger.debug('Got a match for parse_detail_re') return m.groupdict() else: self.logger.warning("Regex failed on %s", self.__current_url) raise SkipRecord