class RecordedDocumentTableParser(HTMLParser): def __init__(self): super().__init__() self._rows = None self.row_parser = RecordedDocumentRowParser() self.currently_parsing_records_table = False def rows(self) -> list: return self._rows def handle_starttag(self, tag, attrs): if self.currently_parsing_records_table: self.row_parser.handle_starttag(tag, attrs) if tag == 'table' and ('class', 'records') in attrs: self.currently_parsing_records_table = True def handle_data(self, data): if self.currently_parsing_records_table: self.row_parser.handle_data(data) def handle_endtag(self, tag): if self.currently_parsing_records_table and tag == 'table': self.currently_parsing_records_table = False self._rows = [row for row in self.row_parser.rows() if len(row)]
def __init__(self): super().__init__() self._rows = None self.row_parser = RecordedDocumentRowParser() self.currently_parsing_records_table = False