Example #1
0
def get_clean_text(fileid):
    try:
        raw = acquire.load_etext(fileid)
        tx = strip_headers.strip_headers(raw).strip()
        raw_title = tx.split('\n', 1)[0]
        tidy_title = raw_title.translate(
            str.maketrans('', '', string.punctuation))
        return ({"title": tidy_title, "text": tx})
    except Exception as e:
        print(e)
        return ({})
Example #2
0
 def test_unreachable_mirror(self):
     self.request_head_response(ok=False)
     with self.assertRaises(UnknownDownloadUriException):
         text.load_etext(1)
Example #3
0
 def test_load_etext(self):
     etext = text.load_etext(2701)
     self.assertIsInstance(etext, str)
     self.assertGreater(len(etext), 1000)
Example #4
0
 def test_invalid_etext(self):
     with self.assertRaises(UnknownDownloadUriException):
         text.load_etext(1, mirror='http://example.com')
Example #5
0
    def test_unreachable_mirror(self):
        self.request_head_response(ok=False)

        with self.assertRaises(UnknownDownloadUriException):
            text.load_etext(1)
Example #6
0
 def test_load_etext(self):
     etext = text.load_etext(2701)
     self.assertIsInstance(etext, str)
     self.assertGreater(len(etext), 1000)
Example #7
0
 def test_invalid_etext(self):
     with self.assertRaises(UnknownDownloadUriException):
         text.load_etext(1, mirror='http://example.com')