Ejemplo n.º 1
0
    def __init__(self, opffile, config):
        self.buff = []

        self.book_title = ''  # Название книги
        self.book_author = ''  # Автор
        self.book_series = ''  # Книжная серия
        self.book_series_num = ''  # Номер в книжной серии

        self.book_lang = 'ru'

        self.hyphenate = config.current_profile['hyphens']
        if self.hyphenate:
            self.replaceNBSP = config.current_profile['hyphensReplaceNBSP']
            self.hyphenator = MyHyphen(self.book_lang)

        self.opffile = opffile
        self.path = os.path.dirname(opffile)

        self.log = config.log
        self.bookseriestitle = config.current_profile['bookTitleFormat']
        self.transliterate_author_and_title = config.transliterate_author_and_title

        self.book_uuid = uuid.uuid4()

        self.tree = etree.parse(opffile, parser=etree.XMLParser(recover=True))
        self.root = self.tree.getroot()
Ejemplo n.º 2
0
class EpubProc:
    def __init__(self, opffile, config):
        self.buff = []

        self.book_title = ''  # Название книги
        self.book_author = ''  # Автор
        self.book_series = ''  # Книжная серия
        self.book_series_num = ''  # Номер в книжной серии

        self.book_lang = 'ru'
        self.hyphenator = MyHyphen(self.book_lang)
        self.hyphenate = config.current_profile['hyphens']
        self.replaceNBSP = config.current_profile['hyphensReplaceNBSP']

        self.opffile = opffile
        self.path = os.path.dirname(opffile)

        self.log = config.log
        self.bookseriestitle = config.current_profile['bookTitleFormat']
        self.transliterate_author_and_title = config.transliterate_author_and_title

        self.book_uuid = uuid.uuid4()

        self.tree = etree.parse(opffile, parser=etree.XMLParser(recover=True))
        self.root = self.tree.getroot()

    def insert_hyphenation(self, s):
        if not s:
            return ''
        return html.unescape(
            s
        ) if not self.hyphenator or not self.hyphenate or self.header or self.subheader else self.hyphenator.hyphenate_text(
            html.unescape(s), self.replaceNBSP)

    def process(self):

        #        stdout = sys.stdout
        #        sys.stdout = codecs.open('stdout.txt', 'w', 'utf-8')

        # get uuid if any
        for node in self.root.iter('{*}identifier'):
            try:
                self.book_uuid = uuid.UUID(node.text)
            except:
                pass
            break
        # Lookup series/sequences data if any
        for node in self.root.iter('{*}meta'):
            attributes = node.attrib
            if 'name' in attributes:
                if attributes['name'].endswith('series_index'):
                    self.book_series_num = attributes['content']
                elif attributes['name'].endswith('series'):
                    self.book_series = attributes['content']

        # And reformat book title accordingly
        if self.book_series != '':
            for node in self.root.iter('{*}title'):
                self.book_title = node.text
                abbr = ''.join(word[0] for word in self.book_series.split())
                title = self.bookseriestitle
                title = title.replace(
                    '#series',
                    '' if not self.book_series else self.book_series.strip())
                title = title.replace(
                    '#number', '' if not self.book_series_num else
                    self.book_series_num.strip())
                title = title.replace(
                    '#padnumber', '' if not self.book_series_num else
                    self.book_series_num.strip().zfill(2))
                title = title.replace(
                    '#title',
                    '' if not self.book_title else self.book_title.strip())
                title = title.replace('#abbrseries',
                                      '' if not abbr else abbr.lower())
                if self.transliterate_author_and_title:
                    title = transliterate(title)
                node.text = title

        for node in self.root.iter('{*}language'):
            self.book_lang = node.text
        self.hyphenator.set_language(self.book_lang)

        indent(self.root)
        self.tree.write(self.opffile,
                        encoding='utf-8',
                        method='xml',
                        xml_declaration=True)

        # See if we have items to correct and process
        for node in self.root.iter('{*}item'):
            attributes = node.attrib
            if 'href' in attributes and 'media-type' in attributes:
                if attributes['media-type'] == 'application/xhtml+xml':
                    filename = os.path.join(self.path, attributes['href'])
                    self.log.debug('Processing {}'.format(filename))
                    # Proper XML encoding needed by kndlegen
                    xhtml = etree.parse(filename,
                                        parser=etree.XMLParser(recover=True))
                    if self.hyphenate:
                        # Do hyphenation if desiried
                        for body in xhtml.getroot().iter('{*}body'):
                            for elem in body.iter('{*}p', '{*}div'):
                                if elem.text:
                                    elem.text = save_html(
                                        self.insert_hyphenation(elem.text))
                                if elem.tail:
                                    elem.tail = save_html(
                                        self.insert_hyphenation(elem.tail))
                                for child in elem.iterchildren(
                                        tag=etree.Element):
                                    if not child.tag.endswith('pre'):
                                        if child.text:
                                            child.text = save_html(
                                                self.insert_hyphenation(
                                                    child.text))
                                        if child.tail:
                                            child.tail = save_html(
                                                self.insert_hyphenation(
                                                    child.tail))
                    xhtml.write(filename,
                                encoding='utf-8',
                                method='xml',
                                xml_declaration=True)