class DocParser: def __init__(self, name, book_properties): self.name = name self.book = Book(book_properties) HTML_HEADER = """ <html> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <head> Title </head> <body> """ def parse(self, output=""): self.temp_dir = tempfile.mkdtemp() if output == "": output = self.temp_dir + "/tmp.html" os.system( PATH_TO_WV_WARE + "wvWare -x" + PATH_TO_WV_HTML + "/wvHtml.xml --charset=cp1251 %s > %s" % (self.name, output) ) paragraphs = [] # temp_file = self.file.decode("utf-8") file = open(self.temp_dir + "/tmp.html", "r") temp_file = prettify.remove_spaces(file.read()) temp_file = prettify.remove_unnecessary_tags(temp_file) soup = BeautifulSoup(temp_file) temp_names = soup.findAll(align="center") names = [] titles = [] for temp_name in temp_names: if not re.match(r"^(<.*?>|\s+)*$", str(temp_name)): names.append(re.sub(r"\s+", " ", str(temp_name))) temp = re.sub(r"(<.*>|\s+)", " ", temp_name.prettify()) titles.append(re.sub(r"\s+", " ", temp)) temp_file = re.sub(r"\s+", " ", temp_file.decode("cp1251").encode("utf-8")) out = open(self.temp_dir + "/tmp", "w") out.write(temp_file) out.write(" \n\n\n") for name in names: out.write(name + "\n\n\n") out.close() if not names: print "not names" file = open(self.temp_dir + "/0.html", "w") file.write(temp_file) file.close() self.book.add_file(self.temp_dir + "/0.html", "c0", "") for i, name in enumerate(names): split_index = temp_file.find(name) if i == 0: paragraph = "" else: paragraph = self.HTML_HEADER paragraph += temp_file[:split_index] soup = BeautifulSoup(paragraph) paragraph = soup.prettify() paragraphs.append(paragraph) temp_file = temp_file[split_index:] # soup = BeautifulSoup(temp_file) # temp_file = soup.prettify() for i, paragraph in enumerate(paragraphs): file = open(self.temp_dir + "/%d.html" % i, "w") file.write(paragraph) file.close() self.book.add_file(self.temp_dir + "/%d.html" % i, "c%d" % i, titles[i]) # for i, image in enumerate(self.images): # self.book.add_file(image, 'im%d' % i, title="", in_spine=False) self.book.pack() return True
def __init__(self, name, book_properties): self.name = name self.book = Book(book_properties)
def epub_form(request): """ Displays form for converting from txt files and adding into db """ messages = [] if request.method == "POST": form = EpubAddForm(request.POST, request.FILES) if form.is_valid(): cd = form.cleaned_data book_name = cd['caption'] lang = cd['language'] tags = cd['tags'] subject = cd['subject'] description = cd['description'] type = cd['type'] date = cd['date'] rights = cd['rights'] temp_name = tempfile.mkstemp()[1] properties = BookProperties(temp_name, book_name) properties.language = lang properties.subject = subject properties.description = description properties.genre = type properties.date = date properties.rights = rights properties.author = request.user.first_name + " " + request.user.last_name book = BookCreator(properties) file = request.FILES['file'] toc = cd['toc'] if toc: book.split_by_toc(toc, file.read()) else: book.txt_to_html(file.read(), temp_name + "text.html") book.add_file(temp_name + "text.html", "c1", "") book.pack() try: language = Language.objects.get(short=lang) except: return render_response(request, "epub/epub_form.html", {'errors': [_('No such language.')], 'user': request.user, 'menu_on': True, 'form': form}) lang_code = LANG_CODE[0] for lang_code in LANG_CODE: if lang_code[0] == lang: break book_model = Book.objects.create(language=language, pagelink="") book_model.title = book_name try: author = Author.objects.get(name=request.user.first_name + ' ' + request.user.last_name) except: author = Author.objects.create(name=request.user.first_name + ' ' + request.user.last_name) book_model.author = [author] book_model.save() ebook = EpubBook.objects.create(book = book_model) exfile = _ExistingFile(temp_name) ebook.name.save(book_name + ".epub", exfile) print ebook.name try: book_file = BookFile(link="/" + MEDIA_URL + "/" + quote(str(ebook.name))) book_file.type = 'epub' book_file.save() except Exception, e: print e return render_response(request, "epub/epub_form.html", {'errors': [_("Book name should be unique")], 'menu_on': True, 'form': form}) book_model.book_file = [book_file] book_model.save() ebook.save() messages += [_("Book successfully created.")] hrr = HttpResponseRedirect("/book/id%d" % book_model.id) return hrr