def extract_content(self, output_dir): from calibre.ebooks.pml.pmlconverter import pml_to_html output_dir = os.path.abspath(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) pml = '' for i in range(1, self.header_record.num_text_pages + 1): self.log.debug('Extracting text page %i' % i) pml += self.get_text_page(i) title = self.mi.title if not isinstance(title, unicode_type): title = title.decode('utf-8', 'replace') html = '<html><head><title>%s</title></head><body>%s</body></html>' % \ (title, pml_to_html(pml)) with CurrentDir(output_dir): with open('index.html', 'wb') as index: self.log.debug('Writing text to index.html') index.write(html.encode('utf-8')) if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) images = [] with CurrentDir(os.path.join(output_dir, 'images/')): for i in range(self.header_record.non_text_offset, len(self.sections)): name, img = self.get_image(i) if name: name = as_unicode(name) images.append(name) with open(name, 'wb') as imgf: self.log.debug('Writing image %s to images/' % name) imgf.write(img) opf_path = self.create_opf(output_dir, images) return opf_path
def extract_content(self, output_dir): from calibre.ebooks.pml.pmlconverter import pml_to_html output_dir = os.path.abspath(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) pml = u'' for i in range(1, self.header_record.num_text_pages + 1): self.log.debug('Extracting text page %i' % i) pml += self.get_text_page(i) title = self.mi.title if not isinstance(title, unicode): title = title.decode('utf-8', 'replace') html = u'<html><head><title>%s</title></head><body>%s</body></html>' % \ (title, pml_to_html(pml)) with CurrentDir(output_dir): with open('index.html', 'wb') as index: self.log.debug('Writing text to index.html') index.write(html.encode('utf-8')) if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) images = [] with CurrentDir(os.path.join(output_dir, 'images/')): for i in range(self.header_record.non_text_offset, len(self.sections)): name, img = self.get_image(i) if name: images.append(name) with open(name, 'wb') as imgf: self.log.debug('Writing image %s to images/' % name) imgf.write(img) opf_path = self.create_opf(output_dir, images) return opf_path