def get_opening_hours(self): hours_nodes = self.root.xpath('.//table[contains(@class, "hours-table")]//tr') texts = [] for node in hours_nodes: day = tostring(node.find('th')) times = tostring_with_breaks(node.find('td')) texts.append('%s\t%s' % (day, times)) source_text = '\n'.join(texts) return data.OpeningHours(source_text=source_text)
def get_address(self): return html_parsing.tostring_with_breaks(self.root.xpath('.//div[@class="address"]')[0])
def html_str_to_text(self, html_str): return tostring_with_breaks(etree.HTML(html_str), with_tail=True, strip_punctuation=False)