def get_clean_text(fileid): try: raw = acquire.load_etext(fileid) tx = strip_headers.strip_headers(raw).strip() raw_title = tx.split('\n', 1)[0] tidy_title = raw_title.translate( str.maketrans('', '', string.punctuation)) return ({"title": tidy_title, "text": tx}) except Exception as e: print(e) return ({})
def test_unreachable_mirror(self): self.request_head_response(ok=False) with self.assertRaises(UnknownDownloadUriException): text.load_etext(1)
def test_load_etext(self): etext = text.load_etext(2701) self.assertIsInstance(etext, str) self.assertGreater(len(etext), 1000)
def test_invalid_etext(self): with self.assertRaises(UnknownDownloadUriException): text.load_etext(1, mirror='http://example.com')