Esempio n. 1
0
    def extract_content(self, output_dir):
        from calibre.ebooks.pml.pmlconverter import pml_to_html

        output_dir = os.path.abspath(output_dir)

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        pml = ''
        for i in range(1, self.header_record.num_text_pages + 1):
            self.log.debug('Extracting text page %i' % i)
            pml += self.get_text_page(i)

        title = self.mi.title
        if not isinstance(title, unicode_type):
            title = title.decode('utf-8', 'replace')

        html = '<html><head><title>%s</title></head><body>%s</body></html>' % \
            (title, pml_to_html(pml))

        with CurrentDir(output_dir):
            with open('index.html', 'wb') as index:
                self.log.debug('Writing text to index.html')
                index.write(html.encode('utf-8'))

        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        images = []
        with CurrentDir(os.path.join(output_dir, 'images/')):
            for i in range(self.header_record.non_text_offset,
                           len(self.sections)):
                name, img = self.get_image(i)
                if name:
                    name = as_unicode(name)
                    images.append(name)
                    with open(name, 'wb') as imgf:
                        self.log.debug('Writing image %s to images/' % name)
                        imgf.write(img)

        opf_path = self.create_opf(output_dir, images)

        return opf_path
Esempio n. 2
0
    def extract_content(self, output_dir):
        from calibre.ebooks.pml.pmlconverter import pml_to_html

        output_dir = os.path.abspath(output_dir)

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        pml = u''
        for i in range(1, self.header_record.num_text_pages + 1):
            self.log.debug('Extracting text page %i' % i)
            pml += self.get_text_page(i)

        title = self.mi.title
        if not isinstance(title, unicode):
            title = title.decode('utf-8', 'replace')

        html = u'<html><head><title>%s</title></head><body>%s</body></html>' % \
            (title, pml_to_html(pml))

        with CurrentDir(output_dir):
            with open('index.html', 'wb') as index:
                self.log.debug('Writing text to index.html')
                index.write(html.encode('utf-8'))

        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        images = []
        with CurrentDir(os.path.join(output_dir, 'images/')):
            for i in range(self.header_record.non_text_offset, len(self.sections)):
                name, img = self.get_image(i)
                if name:
                    images.append(name)
                    with open(name, 'wb') as imgf:
                        self.log.debug('Writing image %s to images/' % name)
                        imgf.write(img)

        opf_path = self.create_opf(output_dir, images)

        return opf_path