def get_file(location): if location.startswith('/'): with open(location) as f: r = requests.Session() r.content = f.read() else: r = requests.get(location) return fromstring(r.content), get_clean_body_content(r.content)
def get_file(location): if location.startswith('/'): with open(location) as f: r = requests.Session() r.content = f.read() else: r = requests.get(location) return fromstring(r.content), get_clean_body_content(r.content)
def fetcher(url): r = requests.get(url, allow_redirects=False, headers={'User-Agent': 'Juriscraper'}) # Throw an error if a bad status code is returned. r.raise_for_status() html_tree = html.fromstring(r.text) html_tree.make_links_absolute(self.url) path = '//p[contains(@style, "justify")]/span[@style="font-weight: bold" ]/../following-sibling::p[not(contains(@style, "justify"))][position()=2]/following-sibling::p' summary_string = "" for e in html_tree.xpath(path): s = html.tostring(e, method='html', encoding='unicode') summary_string += s return get_clean_body_content(summary_string, remove_extra_tags=['span'])
def fetcher(url): r = requests.get(url, allow_redirects=False, headers={'User-Agent': 'Juriscraper'}) # Throw an error if a bad status code is returned. r.raise_for_status() html_tree = html.fromstring(r.text) html_tree.make_links_absolute(self.url) path = '//p[contains(@style, "justify")]/span[@style="font-weight: bold" ]/../following-sibling::p[not(contains(@style, "justify"))][position()=2]/following-sibling::p' summary_string = "" for e in html_tree.xpath(path): s = html.tostring(e, method='html', encoding='unicode') summary_string += s return get_clean_body_content(summary_string, remove_extra_tags=['span'])