Beispiel #1
0
def clean(txt, startline=1, pretty_quote=True, correct_word_break=None, guess_chapter=True, guess_parasep=False):
    from ptxt2ftxt import ptxt2ftxt, ftxtclean
    from ftxt2markdown import ftxt2markdown
    txt = ptxt2ftxt(txt, startline)
    txt = ftxtclean(txt, pretty_quote, correct_word_break)
    txt = ftxt2markdown(txt, guess_chapter, guess_parasep)
    return txt
Beispiel #2
0
def clean(txt, startline=1, pretty_quote=True, correct_word_break=None, guess_chapter=True, guess_parasep=False):
    from ptxt2ftxt import ptxt2ftxt, ftxtclean
    from ftxt2markdown import ftxt2markdown
    txt = ptxt2ftxt(txt, startline)
    txt = ftxtclean(txt, pretty_quote, correct_word_break)
    #open("dbg.txt","w").write(txt.encode('utf-8'))
    txt = ftxt2markdown(txt, guess_chapter, guess_parasep)
    return txt
 def run(self, path_to_ebook):
     print("reformatter: "+path_to_ebook)
     f = open(path_to_ebook, 'r')
     raw = f.read()
     encoding = force_encoding(raw, True)
     print("Detected encoding: ", encoding)
     txt = unicode(raw, encoding, errors='replace')
     # reformat
     if prefs['reformat']:
         print("reformatting...")
         from ptxt2ftxt import ptxt2ftxt, ftxtclean
         from ftxt2markdown import ftxt2markdown
         txt = ptxt2ftxt(txt, para_by_mark=prefs['para_by_mark'])
         txt = ftxtclean(txt, pretty_quote=prefs['pretty_quote'], correct_word_break=prefs['correct_word_break'])
         txt = ftxt2markdown(txt, guessChapter=prefs['guess_chapter'], guessParaSep=prefs['insert_empty_paragraph'])
     # save as temporary file
     tempfile = self.temporary_file('.txt')
     tempfile.write( txt.encode('utf-8') )
     tempfile.close()
     print("save as ", tempfile.name)
     return tempfile.name