def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(os.getcwdu(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwdu(), mi) manifest = [(u'index.html', None)] images = os.listdir(os.getcwdu()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine([u'index.html']) log.debug('Rendering manifest...') with open(u'metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(os.getcwdu(), u'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(os.getcwdu(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwdu(), mi) manifest = [(u'index.html', None)] images = os.listdir(os.getcwdu()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine([u'index.html']) log.debug('Rendering manifest...') with open(u'metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(os.getcwdu(), u'metadata.opf')
def convert_new(self, stream, accelerators): from calibre.ebooks.pdf.pdftohtml import pdftohtml from calibre.utils.cleantext import clean_ascii_chars from calibre.ebooks.pdf.reflow import PDFDocument pdftohtml(os.getcwd(), stream.name, self.opts.no_images, as_xml=True) with lopen('index.xml', 'rb') as f: xml = clean_ascii_chars(f.read()) PDFDocument(xml, self.opts, self.log) return os.path.join(os.getcwd(), 'metadata.opf')
def convert_new(self, stream, accelerators): from calibre.ebooks.pdf.pdftohtml import pdftohtml from calibre.utils.cleantext import clean_ascii_chars from calibre.ebooks.pdf.reflow import PDFDocument pdftohtml(os.getcwdu(), stream.name, self.opts.no_images, as_xml=True) with open(u'index.xml', 'rb') as f: xml = clean_ascii_chars(f.read()) PDFDocument(xml, self.opts, self.log) return os.path.join(os.getcwdu(), u'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(os.getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwd(), mi) manifest = [('index.html', None)] images = os.listdir(os.getcwd()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine(['index.html']) log.debug('Rendering manifest...') with lopen('metadata.opf', 'wb') as opffile: opf.render(opffile) if os.path.exists('toc.ncx'): ncxid = opf.manifest.id_for_path('toc.ncx') if ncxid: with lopen('metadata.opf', 'r+b') as f: raw = f.read().replace( b'<spine', b'<spine toc="%s"' % as_bytes(ncxid)) f.seek(0) f.write(raw) return os.path.join(os.getcwd(), 'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(getcwd(), mi) manifest = [('index.html', None)] images = os.listdir(getcwd()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine(['index.html']) log.debug('Rendering manifest...') with lopen('metadata.opf', 'wb') as opffile: opf.render(opffile) if os.path.exists('toc.ncx'): ncxid = opf.manifest.id_for_path('toc.ncx') if ncxid: with lopen('metadata.opf', 'r+b') as f: raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid)) f.seek(0) f.write(raw) return os.path.join(getcwd(), 'metadata.opf')
def pdftohtml_extract_pdf_issn(pdf_path): output_dir= base_dir() output_dir = output_dir.replace(os.sep,'/') if DEBUG: print("Current output working directory ( output_dir= base_dir() ) for temporary files is: ", output_dir) pdftohtml(output_dir, pdf_path, no_images=True, as_xml = False) return output_dir