예제 #1
0
def zip_resume(chemin, epub):
    job.log('\t rezippe le résumé')
    part = epub.rpartition('-')
    if part[1]:
        epubresume = part[0] + '%Resume% ' + part[1] + part[2][0:-5] + '.epub'
    else:
        epubresume = part[2][0:-5] + '_%Resume%.epub'
    zepub = zipfile.ZipFile(epubresume, mode="w",
                               compression=zipfile.ZIP_DEFLATED, allowZip64=True)
    # d'abord le mimetype non compressé.
    zepub.write(os.path.join(chemin, "mimetype"), arcname="mimetype",
                   compress_type=zipfile.ZIP_STORED)

    # puis les autres fichiers
    exclude_files = ['.DS_Store', 'mimetype']
    for root, _dirs, files in os.walk(chemin):
        for fn in files:
            if fn in exclude_files:
                continue
            absfn = os.path.join(root, fn)
            zfn = os.path.relpath(absfn, chemin).replace(os.sep, '/')
            zepub.write(absfn, zfn)
    zepub.close()
    try:
        shutil.rmtree(chemin)
    except:
        pass
    return
예제 #2
0
def modif_tatoo_html(monepub):
    for identifier in monepub.opf.manifest:
        item = monepub.opf.manifest[identifier]
        path = os.path.join(monepub.chemin, monepub.content_path) + '\\' + item.href
        path = os.path.normpath(urllib.parse.unquote(path))
        if item.href.endswith(('html', 'htm', 'opf')):
            data = fonctions.parse_file(path)
            if data is None:
                job.log('\t Fichier ' + os.path.basename(path) + ' malformé, impossible à parser')
                continue
            try:
                # suppression des commentaires
                for elem in data.iter(tag=etree.Comment):
                    if elem.text:
                        job.logtat('suppression des commentaires html :' + str(elem.text))
                        tail = elem.tail
                        parent = elem.getparent()
                        parent.remove(elem)
                        parent.text = tail
                para = XPath('.//h:p')
                # suppression des paragraphes contenant un mail
                for child in para(data):
                    if child.text is not None and re.search('[@].*[.][a-z]{2,3}', child.text) and len(child.text) < 100 :
                        job.logtat('suppression du paragraphe : ' + str(child.text))
                        child.getparent().remove(child)
                # suppression du blabla
                for child in para(data):
                    children = child.getchildren()
                    if (child.text is not None) and (re.search('filigrane', child.text)) and (re.search('e-book', child.text)):
                        job.logtat('modification de ces paragraphes : "' + str(child.text) + str(children[0].tail) + '" dans ' + os.path.basename(path))
                        child.text = 'Cet e-book contenait un filigrane (watermark) et une identification qui ont été supprimés pour votre agrément'
                        children[0].tail = 'par PersonnaLiseur'
                # suppression des watermark
                waterm = XPath('.//h:img')
                for src in waterm(data):
                    for  value in src.values():
                        if 'base64'  in value:
                            job.logtat('suppression du watermark dans ' + os.path.basename(path))
                            div = src.getparent()
                            div.getparent().remove(div)
                # suppression des body id
                body_id = XPath('.//h:body')
                for body in body_id(data):
                    if body.get('id'):
                        job.logtat('suppression du body id "' + body.get('id') + '" dans ' + os.path.basename(path))
                        body.set('id', '')
            except:
                job.log('html non modifiés')
                return
            with open(path, mode='wb'):
                data.write(path, encoding='utf-8', xml_declaration=True, pretty_print=True)
            # suppression des commentaires hors html
            f = open(path, mode='r', encoding='utf-8')
            texte = f.read(-1)
            if re.search(r"<!--[\d\D]*?-->", texte, re.DOTALL):
                job.logtat('suppression des commentaires hors html dans ' + os.path.basename(path))
                texte = re.sub(r"<!--[\d\D]*?-->", '', texte, re.DOTALL)
            with open(path, mode='w', encoding='utf-8') as f:
                f.write(texte)
    return
예제 #3
0
def modifie_titlepage(monepub, options_texte):
    '''Modifie l'éventuel viewbox de la page de couv 
    pour tenir compte des nouvelles dimensions de l'image'''
    if monepub.pagedecouv:
        with open(monepub.pagedecouv, mode='r', encoding='utf-8') as f:
            if not 'viewBox' in f.read(-1):
                return
        job.log('\t puis on rectifie le viewbox')
        im = Image.open(monepub.imagedecouv)
        newwidth, newheight = im.size
        parser = etree.XMLParser(encoding='utf-8', recover=True, remove_blank_text=True)
        with open(monepub.pagedecouv, mode='r', encoding='utf-8') as f:
            data = etree.parse(f, parser=parser)
            viewXpath = xpath(data, './/s:svg')
            imageXpath = xpath(data, '//s:image')
            for i, key in enumerate(viewXpath[0].keys()):
                if key == 'viewBox':
                    newvalue = '0 0 ' + str(newwidth) + ' ' + str(newheight)
                    viewXpath[0].set('viewBox', newvalue)
                if key == 'width':
                    imageXpath[0].set('width', str(newwidth))
                if key == 'height':
                    imageXpath[0].set('height', str(newheight))
        f = open(monepub.pagedecouv, mode='wb')
        data.write(f, encoding='utf-8', xml_declaration=True, pretty_print=True)
        f.close()
    else:
        return
    return
예제 #4
0
def redim(options_texte, monepub):
    job.log('Redimensionne les images de ' + monepub.chemin)
    taille = options_texte[2]
    newheight = str.strip(re.split('x', taille, flags=re.IGNORECASE)[0])
    newwidth = str.strip(re.split('x', taille, flags=re.IGNORECASE)[1])
    qual = int(options_texte[5])
    for root, dir, files in os.walk(monepub.chemin):
        for name in files :
            name = os.path.join(root, name)
            ext = os.path.splitext(name)[1]
            if ext in EXT_IMAGES:
                try :
                    im = Image.open(name)
                    oldwidth, oldheight = im.size
                    coef = oldheight / int(newheight)
                    if coef <= 1:
                        im.thumbnail((int(newwidth), int(newheight)), Image.ANTIALIAS)
                    else :
                        im.thumbnail((int(newwidth), int(newheight)), Image.BICUBIC)
                    if ext in ('.jpg', '.jpeg'):
                        im.save(name, quality=qual)
                    elif ext == '.png':
                        im.save(name, optimize=True)
                    else :
                        im.save(name)
                except IOError:
                    pass
    return
예제 #5
0
def inserer_jacket(f1path, monepub, LECHEMIN):
    job.log('\t on l\'insère')
    # d'abord supprimer les anciennes jacket dans le manifest
    for key, value in enumerate(monepub.opf.manifest) :
        if 'jacket' in value :
            del monepub.opf.manifest[value]
    # puis dans la spine
    for  idref, linear in enumerate(monepub.opf.spine.itemrefs):
        item = monepub.opf.spine.itemrefs[idref]
        if 'jacket' in item or 'calibre_jacket' in item :
            monepub.opf.spine.itemrefs.remove(item)
    # puis on ajoute la nouvelle jaquette
    for identifier in monepub.opf.manifest :
        item = monepub.opf.manifest[identifier]
        if item.media_type == 'application/xhtml+xml' :
            rep = os.path.dirname(item.href)  # on détermine le répertoire de xhtml
            if rep == '':
                repjacket = 'jacket.xhtml'
            else:
                repjacket = rep + '/jacket.xhtml'

    monepub.opf.manifest.add_item('jacket', repjacket, 'application/xhtml+xml')
    shutil.move(f1path, os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + repjacket))
    monepub.opf.spine.itemrefs.insert(1, ('jacket', True))
    myopf = os.path.join(monepub.chemin, monepub.opf_path)  # ../tmp/xxx + \\OEBPS + /content
    with open(myopf, mode='w', encoding='utf-8', errors='strict') as f :
        f.write(monepub.opf.as_xml_document().toprettyxml())
    return
예제 #6
0
def suppr_tatoo(monepub):
    job.log('Détatouage')
    fonctions.metas_xhtml(monepub, copyr=False)
    suppr_fichiers_suspects(monepub)
    modif_metas_tatoo(monepub)
    modif_tatoo_html(monepub)

    return
예제 #7
0
 def cherche_opf(self, chemin):
     for root, dir, files in os.walk(chemin):
         for name in files:
             if 'opf' in os.path.splitext(name)[1] :
                 self.opf_path = files
             if self.opf_path is None:
                 job.log('\n Epub invalide : pas de content.opf')
             else:
                 return
예제 #8
0
def renomme_fichiers(dossier):
    job.log('Renommage des fichiers')
    dict_nom = {}
    dict_nomcomplet = {}
    i = 0
    for root, dir, files in os.walk(dossier):
        for name in files :
            nomcomplet = os.path.join(root, name)
            ext = os.path.splitext(name)[1]
            chiffres = re.search('[0-9]', name)
            if (ext in ('.html', '.xhtml', '.htm')) and not (name.startswith('PL')) and (chiffres):
                dict_nomcomplet[nomcomplet] = os.path.join(root, 'PL' + str(i) + '.xhtml')
                dict_nom[name] = 'PL' + str(i) + '.xhtml'
                i += 1
                os.rename(nomcomplet, dict_nomcomplet[nomcomplet])
    #===========================================================================
    # for key, value in dict_nom.items():
    #     print(key, '->', value)
    # for key, value in dict_nomcomplet.items():
    #     print(key, '->', value)
    #===========================================================================
    for drive, dir, files in os.walk(dossier):
        for name in files :
            name = os.path.join(drive, name)
            ext = os.path.splitext(name)[1]

            if ext in ('.html', '.xhtml', '.htm', '.opf'):
                # path = open(os.path.normpath(name), mode='r')
                data = parse_file(name)
                root = data.getroot()
                lien_a = XPath('.//h:a|//opf:item|//opf:reference')
                for child in lien_a(data):
                    for attr, value in child.items():
                        value = urllib.parse.unquote(value)
                        for ancien_nom in dict_nom.keys():
                            if (attr == 'href') and ((re.search('/' + ancien_nom, value)) or (re.match(ancien_nom, value))):
                                value = value.replace(ancien_nom, dict_nom[ancien_nom])
                                child.set('href', value)
                xml = etree.tostring(data, encoding='utf-8', xml_declaration=True, pretty_print=True)
                with open(name, mode='wb') as f:
                    f.write(xml)
            if ext == '.ncx':
                # path = open(os.path.normpath(name), mode='r')
                data = parse_file(name)
                root = data.getroot()
                lien_a = XPath('.//dt:content')
                for child in lien_a(data):
                    for attr, value in child.items():
                        for ancien_nom in dict_nom.keys():
                            if (attr == 'src') and (re.search(ancien_nom, value)):
                                value = value.replace(ancien_nom, dict_nom[ancien_nom])
                                child.set('src', value)

                xml = etree.tostring(data, encoding='utf-8', xml_declaration=True, pretty_print=True)
                with open(name, mode='wb') as f:
                    f.write(xml)
    return
예제 #9
0
def css_expo(monepub):
    os.chdir(monepub.chemin)
    job.log('Corrige les exposants')
    for elt in monepub.css :
        f = monepub.normalize(os.path.join(monepub.chemin, elt))
        with open(f, mode='r', encoding='utf8', errors='ignore') as filecss :
            texte = re.sub(r'vertical-align\s?:\s?super', r'vertical-align:top', filecss.read(-1))
            texte = re.sub(r'vertical-align\s?:\s?[0-9]{1,3}%', r'vertical-align:top', texte)
        f1 = open(monepub.normalize(os.path.join(monepub.chemin, elt)), mode='w', encoding='utf8', errors='strict')
        f1.write(texte)
        f1.close()
    return
예제 #10
0
def resume(nouvelepub, covimg, LECHEMIN, monepub, epub, options_texte):
    job.log('Création d\'un résumé')
    covpath = LECHEMIN + '/tmp2/cover.jpg'
    shutil.copy2(covimg, covpath)
    if 'Redimensionner les images' in options_texte :
        redim_cover(options_texte, covpath)
    jacket = LECHEMIN + '/tmp2/jacket.xhtml'
    cover = LECHEMIN + '/tmp2/cover.xhtml'
    # try:
    nouvelepub.insere_fichier(LECHEMIN, cover, 'ch1', 'Text/cover.xhtml', 'application/xhtml+xml',
                               append_to_spine=True, insert_to_spine=False, index=0 , is_linear=True)
    nouvelepub.insere_fichier(LECHEMIN, jacket, 'ch2', 'Text/jacket.xhtml', 'application/xhtml+xml',
                              append_to_spine=True, insert_to_spine=False, index=0 , is_linear=True)
    nouvelepub.insere_fichier(LECHEMIN, covpath, 'img', 'Images/cover.jpg', 'image/jpeg',
                              append_to_spine=False, insert_to_spine=False, index=0, is_linear=True)
    # except :
    # job.log('\n PROBLEME à l\'insertion des fichiers')
    # pass
    try:
        nouvelepub.toc.title = monepub.opf.metadata.titles[0][0]
    except:
        nouvelepub.toc.title = ''
    try:
        nouvelepub.opf.metadata.add_title(monepub.opf.metadata.titles[0][0])
    except:
        nouvelepub.opf.metadata.add_title('')
    try :
        nouvelepub.opf.metadata.subjects = monepub.opf.metadata.subjects
    except:
        nouvelepub.opf.metadata.subjects = ''
    try :
        nouvelepub.opf.metadata.creators = monepub.opf.metadata.creators
    except:
        nouvelepub.opf.metadata.creators = ''
    try:
        nouvelepub.opf.metadata.description = monepub.opf.metadata.description
    except:
        nouvelepub.opf.metadata.description = ''
    nouvelepub.opf.guide.add_reference('Text/cover.xhtml', ref_type='cover', title='Couverture')

    fpath = nouvelepub.chemin + nouvelepub.content_path + '/content.opf'  # ../tmp2/book/ + OEBPS + /content
    with open(fpath, mode='w', encoding='utf-8', errors='strict') as f :
        f.write(nouvelepub.opf.as_xml_document().toprettyxml())

    fpath = nouvelepub.chemin + nouvelepub.content_path + '/toc.ncx'  # ../tmp2/book/ + OEBPS + /toc
    with open(fpath, mode='w', encoding='utf-8', errors='strict') as f :
        f.write(nouvelepub.toc.as_xml_document().toprettyxml())

    zip_resume(nouvelepub.chemin, epub)
    return
예제 #11
0
def supprimer_jacket(monepub, dirtemp, LECHEMIN):
    job.log('Suppression des jackets')
    # d'abord supprimer les anciennes jacket dans le manifest
    for key, value in enumerate(monepub.opf.manifest) :
        item = monepub.opf.manifest[value]
        if 'jacket' in value :
            file = monepub.normalize(monepub.relat_to_abs(item.href))
            os.remove(file)
            del monepub.opf.manifest[value]
    # puis dans la spine
    for  idref, linear in enumerate(monepub.opf.spine.itemrefs):
        item = monepub.opf.spine.itemrefs[idref]
        if 'jacket' in item[0] or 'calibre_jacket' in item[0] :
            monepub.opf.spine.itemrefs.remove(item)
    myopf = os.path.join(monepub.chemin, monepub.opf_path)  # ../tmp/xxx + \\OEBPS + /content
    with open(myopf, mode='w', encoding='utf-8', errors='strict') as f :
        f.write(monepub.opf.as_xml_document().toprettyxml())
    return
예제 #12
0
def redim_cover(options_texte, covpath):
    job.log('\t redimensionne la couv du résumé')
    taille = options_texte[2]
    newheight = str.strip(re.split('x', taille, flags=re.IGNORECASE)[0])
    newwidth = str.strip(re.split('x', taille, flags=re.IGNORECASE)[1])
    qual = int(options_texte[5])
    try:
        im = Image.open(os.path.normpath(urllib.parse.unquote(covpath)))
    except IOError:
        job.log('\t IOError de l\'image de couv')
        return
    oldwidth, oldheight = im.size
    coef = oldheight / int(newheight)
    if coef <= 1:
        im.thumbnail((int(newwidth), int(newheight)), Image.ANTIALIAS)
    else :
        im.thumbnail((int(newwidth), int(newheight)), Image.BICUBIC)
    im.save(covpath, quality=qual)
    return
예제 #13
0
def get_coverimg_name(monepub, pagecouv):  #
    '''récupère l'image de couverture à partir de la page de couv'''
    if not os.path.isfile(pagecouv):
        job.log('\t Ce fichier n\'existe pas')
        return None
    try:
        dom = parse(pagecouv)
        images = dom.getElementsByTagName('img')
        for img in images:
            attr = img.attributes
            src = attr['src']
            return (monepub.normalize(src.value))
        images = dom.getElementsByTagName('image')
        for img in images:
            attr = img.attributes
            src = attr['xlink:href']
            return (monepub.normalize(src.value))
    except ExpatError:
        job.log('\t ExpatError sur le fichier de couv')
        return None
예제 #14
0
def maj_meta(monepub, metaname, metac, metacal, copyr):
    os.chdir(monepub.chemin)
    job.log('Met à jour les métadonnées')
    mesmeta = monepub.opf.as_xml_document()
    if metaname :
        job.log('\t il existe un metadata.opf')
        calibre_meta = opf.parse_opf(os.path.normpath(metaname))
        calibrexml = calibre_meta.as_xml_document()
        metaxml = modifie_metas(calibrexml, metac, metacal)
    else :
        job.log('\t on utilise les metas internes')
        metaxml = modifie_metas(mesmeta, metac, metacal)

    for node in mesmeta.getElementsByTagName('package'):
        metas = node.firstChild
        package = metas.parentNode
        monmanifest = metas.nextSibling
        package.removeChild(metas)
        package.insertBefore(metaxml, monmanifest)
        path = os.path.normpath(os.path.join(monepub.chemin, monepub.opf_path))
        f = open(path, mode='w', encoding='utf8', errors='strict')
        f.write(mesmeta.toprettyxml())
        f.close()
        monepub.init_read()
        metas_xhtml(monepub, copyr)
    return
예제 #15
0
 def set_cover(self):
     '''Détermine la page de couverture et son image'''
     EXT_IMAGES = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')
     # job.log('\t on cherche la page de couv dans l\'epub, ')
     covpage = couv.has_cover(self)  # url et image de la page de couv
     if covpage :
         # job.log('trouvé : ' + str(covpage))
         self.pagedecouv = self.normalize(covpage)
         job.log('Page de couverture : ' + self.pagedecouv)
         covimg = couv.get_coverimg_name(self, self.pagedecouv)
         # job.log('\t puis l\'image, ' + str(covimg))
         if covimg is not None and (covimg.startswith('..')):
             covimg = os.path.normpath(covpage.rsplit('\\', 2)[0] + covimg.strip('.'))
         elif covimg is not None:
             covimg = os.path.normpath(os.path.dirname(covpage) + '\\' + covimg.strip('.'))
         if covimg is None or not (covimg.endswith(EXT_IMAGES)) :
             covimg = couv.seek_img(self)
             # job.log('\t on finit par choisir ' + str(covimg))
         covimg = self.normalize(covimg)
         job.log('Image de couverture : ' + str(covimg))
         self.imagedecouv = covimg
     return
예제 #16
0
def maj_publication(monepub, copyright):
    from PyQt4 import QtGui
    try:
        job.ProgressDialog.setLabelText('Récupération de la date de copyright')
        QtGui.QApplication.processEvents()
    except:
        pass
    job.log('Récupération du copyright')
    pubdate = sorted(copyright)[0] + '-01-01'
    file = os.path.normpath(monepub.chemin + '\\' + monepub.opf_path)
    parser = etree.XMLParser(attribute_defaults=False, load_dtd=False, remove_comments=False, ns_clean=True)
    tree = etree.parse(file, parser)
    xpdate = xpath(tree, '//dc:date')
    for child in xpdate :
        if child.get("{http://www.idpf.org/2007/opf}event") == 'publication' :
            child.text = pubdate
        elif child.get("{http://www.idpf.org/2007/opf}event") is None:
            child.text = pubdate
    texte = etree.tostring(tree, encoding='utf-8', xml_declaration=True, pretty_print=True)
    with open(file, mode='wb') as f:
        f.write(texte)
    opf.parse_opf(file)
    return
예제 #17
0
def cree_couv(monepub, covname, LECHEMIN, epub, options_texte):  # crée une page de couverture
    job.log('Création d\'une page de couverture')
    fpath = os.path.normpath(LECHEMIN + '/resources/covermodele.xhtml')
    f1path = os.path.normpath(LECHEMIN + '/tmp2/cover.xhtml')
    if covname :
        job.log('\t on utilise l\'image de couv de Calibre')
        covimg = covname
        monepub.imagedecouv = covname
    else :
        if monepub.imagedecouv is not None:
            covimg = monepub.imagedecouv
            # conversion en jpg
            if not os.path.splitext(monepub.imagedecouv)[1]  in ('.jpg', '.jpeg'):
                im = Image.open(monepub.imagedecouv)
                ima = im.convert('RGB')
                output = os.path.splitext(monepub.imagedecouv)[0] + '.jpg'
                ima.save(output, format="JPEG")
                monepub.imagedecouv = output
        else :  # pas de couv trouvée
            job.log('\t on utilise la page par défaut')
            monepub.pagedecouv = 'defaut'
            src = os.path.normpath(LECHEMIN + '/resources/covermodele.xhtml')
            dst = os.path.normpath(LECHEMIN + '/tmp2/cover.xhtml')
            shutil.copy2(src, dst)
            covpage = dst
            covimg = LECHEMIN + '/resources/default_cover.png'
    with open(fpath, mode='r', encoding='utf-8', errors='strict') as modele :
        texte = modele.read(-1)
        texte = re.sub(r'%%couv%%', '../Images/cover.jpg', texte)
    f1 = open(f1path, mode='w', encoding='utf8', errors='strict')
    f1.write(texte)
    f1.close()
    cree_jaquette(monepub, LECHEMIN, insere_jacket=False)
    nouvelepub = container.EpubFile()
    nouvelepub.createnew(LECHEMIN)
    return resume(nouvelepub, covimg, LECHEMIN, monepub, epub, options_texte)
예제 #18
0
def cree_jaquette(monepub, LECHEMIN, insere_jacket) :  # génère une page de jaquette avec les métadonnées
    job.log('Création d\'une page de garde')
    fpath = LECHEMIN + '/resources/jacketmodele.xhtml'
    f1path = LECHEMIN + '/tmp2/jacket.xhtml'
    if not os.path.isfile(f1path):
        shutil.copy2(fpath, f1path)
    with open(f1path, mode='r', encoding='utf-8', errors='strict') as modele :
        if monepub.opf.metadata.titles :
            titre = monepub.opf.metadata.titles[0][0]
        else:
            titre = ''
        if  monepub.opf.metadata.creators:
            auteur = monepub.opf.metadata.creators[0][0]
        else :
            auteur = ''
        if monepub.opf.metadata.publisher :
            editeur = monepub.opf.metadata.publisher
        else:
            editeur = ''
        if monepub.opf.metadata.dates:
            pubdate = monepub.opf.metadata.dates[0][0]
        else :
            pubdate = ''
        if pubdate:
            pubdate = '(' + pubdate[0:4] + ')'
        for date, event in enumerate(monepub.opf.metadata.dates) :
            if event == 'publication':
                pubdate = date
        if monepub.opf.metadata.subjects:
            sujet = monepub.opf.metadata.subjects[0]
            long = len(monepub.opf.metadata.subjects)
            if long > 1 :
                for elt in monepub.opf.metadata.subjects[1:long + 1]:
                    sujet = sujet + ', ' + elt
        else:
            sujet = ''
        if monepub.opf.metadata.description:
            comment = monepub.opf.metadata.description
        else:
            comment = ''
        serie = monepub.opf.metadata.serie
        serie_index = monepub.opf.metadata.serie_index
        if serie :
            serie = serie + ' [' + serie_index + ']'
        else :
            serie = ''
        parser = etree.XMLParser(encoding='utf-8', recover=True, remove_blank_text=True)
        data = etree.parse(modele, parser=parser)
        TITLE = XPath('.//h:title')
        titrex = XPath('.//h:span[@class="title"]')
        series = XPath('.//h:td[@class="cbj_series"]')
        author = XPath('.//h:td[@class="cbj_author"]')
        publisher = XPath('.//h:td[@class="cbj_pubdata"]')
        content = XPath('.//h:td[@class="cbj_content"]')
        description = XPath('.//h:div[@class="cbj_comments"]')
        for title in TITLE(data):
            title.text = titre
        for tit in titrex(data):
            tit.text = titre
        for ser in series(data):
            ser.text = serie
        author(data)[0].text = auteur
        for pub in publisher(data):
            pub.text = editeur + ' ' + pubdate
        for cont in content(data):
            cont.text = sujet
        for desc in description(data):
            desc.text = re.sub(r"<\s*?[^>]+\s*?>", '', comment, re.DOTALL)
            # desc.text = comment

    f1 = open(f1path, mode='wb')
    data.write(f1, encoding='utf-8', xml_declaration=True, pretty_print=True)
    f1.close()

    if not insere_jacket:
        return
    else :
        return inserer_jacket(f1path, monepub, LECHEMIN)
예제 #19
0
def parse_toc(xmlstring):
    """Inspect an NCX formated xml document."""
    toc = Ncx()
    try :
        toc_xml = minidom.parseString(xmlstring).documentElement
    except:
        try:
            rewrite_toc(xmlstring)
            toc_xml = minidom.parseString(xmlstring).documentElement
        except:
            job.log('pas de toc')
            return

    xmlns = toc_xml.getAttribute('xmlns')
    if xmlns:
        toc.xmlns = xmlns

    version = toc_xml.getAttribute('version')
    if version:
        toc.version = version

    lang = toc_xml.getAttribute('xml:lang')
    if lang:
        toc.lang = lang

    # Inspect head > meta; unknown meta are ignored
    try:
        head = toc_xml.getElementsByTagName('head')[0]
    except:
        rewrite_toc(xmlstring)
        toc_xml = minidom.parseString(xmlstring).documentElement
    metas = {'dtb:uid': '',
             'dtb:depth': '',
             'dtb:totalPageCount': '',
             'dtb:maxPageNumber': '',
             'dtb:generator': ''}

    for meta in head.getElementsByTagName('meta'):
        metas[meta.getAttribute('name')] = meta.getAttribute('content')

    toc.uid = metas['dtb:uid']
    toc.depth = metas['dtb:depth']
    toc.total_page_count = metas['dtb:totalPageCount']
    toc.max_page_number = metas['dtb:maxPageNumber']
    toc.generator = metas['dtb:generator']

    # Get title (one and only one <docTitle> tag is required)
    doc_title_node = toc_xml.getElementsByTagName('docTitle')[0]
    toc.title = _parse_for_text_tag(doc_title_node)

    # Get authors (<docAuthor> tags are optionnal)
    for author in toc_xml.getElementsByTagName('docAuthor'):
        toc.authors.append(_parse_for_text_tag(author))

    # Inspect <navMap> (one is required)
    nav_map_node = toc_xml.getElementsByTagName('navMap')[0]
    toc.nav_map = _parse_xml_nav_map(nav_map_node)

    # Inspect <pageList> (optionnal, only one)
    page_lists = toc_xml.getElementsByTagName('pageList')
    if len(page_lists) > 0:
        toc.page_list = _parse_xml_page_list(page_lists[0])

    # Inspect <navList> (optionnal, many are possible)
    for nav_list in toc_xml.getElementsByTagName('navList'):
        toc.add_nav_list(_parse_xml_nav_list(nav_list))

    return toc
예제 #20
0
def metas_xhtml(monepub, copyr):
    XHTML_NS = 'http://www.w3.org/1999/xhtml'
    XMLNS_NS = 'http://www.w3.org/2000/xmlns/'
    META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]')
    TITLE_XP = XPath('/h:html/h:head/h:title')
    METACONT_XP = XPath('/h:html/h:head/h:meta[@content]')
    METAADEPT1_XP = XPath('//h:meta[@name="Adept.resource"]')
    METAADEPT2_XP = XPath('//h:meta[@name="Adept.expected.resource"]')
    copyfind = None

    for path in monepub.liste_fichiers :
        if path.endswith(('html', 'htm')):
            if not os.path.isfile(path):
                job.log('Fichier non présent dans l\'epub : ' + path)
                continue
            data = parse_file(path)
            if data is None:
                job.log('\t Fichier ' + os.path.basename(path) + ' malformé, impossible à parser')
                continue
            try:
                for elem in data.iter(tag=etree.Comment):
                    if elem.text:
                        tail = elem.tail
                        parent = elem.getparent()
                        parent.remove(elem)
                        parent.text = tail
                for meta in METACONT_XP(data):
                    if (meta.get('content') != 'text/html; charset=utf-8') and (meta.get('content') != 'text/html; charset=UTF-8'):
                        job.logtat('suppression du meta : "' + meta.get('name') + ' : ' + meta.get('content') + '"')
                        meta.getparent().remove(meta)

                for title in TITLE_XP(data):
                    title.getparent().remove(title)

                for meta in METAADEPT1_XP(data):
                    job.logtat('suppression du meta : ' + meta.get('name'))
                    meta.getparent().remove(meta)
                for meta in METAADEPT2_XP(data):
                    job.logtat('suppression du meta : ' + meta.get('name'))
                    meta.getparent().remove(meta)
                texte = data.xpath("string()")
                copyfind = re.findall('\u00A9\s*?.*?([0-9]{4})', texte)
                if copyfind:
                    copyright = copyfind
                with open(path, mode='wb',):
                    data.write(path, encoding='utf-8', xml_declaration=True, pretty_print=True)
            except:
                job.log('\t metas des html non modifiées')
                continue

        if path.endswith(('opf', 'ncx')):
            data = parse_file(path)
            if data is None:
                job.log('\t Fichier ' + os.path.basename(path) + ' malformé, impossible à parser')
                continue
            for elem in data.iter(tag=etree.Comment):
                if elem.text:
                    elem.getparent().remove(elem)
            try:
                for meta in METAADEPT1_XP(data):
                    meta.getparent().remove(meta)
                for meta in METAADEPT2_XP(data):
                    meta.getparent().remove(meta)
            except:
                job.log('Fichier ' + path + ' non modifié')
                continue
            with open(path, mode='wb',):
                data.write(path, encoding='utf-8', xml_declaration=True, pretty_print=True)
    if copyr and copyfind:
        return maj_publication(monepub, copyright)
    else :
        return
예제 #21
0
def has_cover(monepub):  # récupère la page de couverture
    cover_item = None
    cover_id = None
    # on cherche d'abord dans le guide
    if monepub.opf.guide.references :
        # job.log('\t on cherche dans le guide')
        list_couv = ('cover', 'Cover', 'couverture', 'Couverture', 'title', 'coverpage')
        if len(monepub.opf.guide.references) > 1:
            for i, (href, ref_type, title) in enumerate(monepub.opf.guide.references) :
                if (ref_type in list_couv) or (title in list_couv) :
                    # job.log('\t trouvé dans le guide: ' + href)
                    cover_item = href
        else:
            if (monepub.opf.guide.references[0][1] in list_couv) or  (monepub.opf.guide.references[0][2] in list_couv):
                cover_item = monepub.opf.guide.references[0][0]
                # job.log('\t trouvé dans le guide: ' + str(cover_item))
        if (cover_item is not None) and (cover_item.endswith('html')):
            # job.log('\t on cherche l\'url dans manifest')
            for identifier in monepub.opf.manifest :
                item = monepub.opf.manifest[identifier]
                if str(cover_item) == item.href:
                    covpage = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + item.href)
                    # job.log('\t trouvé ' + covpage)
                    if not os.path.isfile(covpage):
                        cover_item = None
                        covpage = None
                    else:
                        return covpage
        elif cover_item and cover_item.endswith(('jpg', 'jpeg', 'png', 'gif', 'svg')):
            cover_id = cover_item
            cover_item = None
            covpage = get_cover_cover_id(monepub, cover_id)
            return covpage


# pas trouvé dans le guide, on cherche dans les metadatas
    if cover_item is None :
        # job.log('\t on cherche dans les metas')
        for i, (name, content) in enumerate(monepub.opf.metadata.metas):
            if name == 'cover':
                cover_id = content  # id de l'image
                item = monepub.get_item(cover_id)
                if item :
                    cover_item_manifest = item.href  # url relative de l'image
                else :
                    cover_item_manifest = None
                if cover_item_manifest is None:
                    cover_id = None
                    continue

                covpage = get_cover_page(monepub, cover_item_manifest)
                # job.log('\t trouvé dans les metas')
                return covpage
            else:
                cover_id = None
# pas trouvé dans les métadatas, on cherche dans le manifest
    if cover_id is None :
        # job.log('\t on cherche dans le manifest')
        cover_item_manifest = monepub.get_item('cover')  # on récupère le chemin relatif dans le manifest,\
                                                            # sans doute celui de l'image
        if cover_item_manifest is None :
            cover_item_manifest = monepub.get_item('cover-img')
        if cover_item_manifest is None :
            cover_item_manifest = monepub.get_item('cover-id')

        if cover_item_manifest and cover_item_manifest.href.endswith('html'):
            covpage = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + cover_item_manifest.href)
            # job.log('\t trouvé dans le manifest (html)' + covpage)
            if not os.path.isfile(covpage):
                covpage = None
            else:
                return covpage
        elif cover_item_manifest and cover_item_manifest.href.endswith(('jpg', 'jpeg', 'png', 'gif')):
            # job.log('\t trouvé dans le manifest (image):' + cover_item_manifest.href + ', il faut scanner les pages')
            covpage = get_cover_page(monepub, cover_item_manifest)
            if not os.path.isfile(covpage):
                covpage = None
            else:
                return covpage
        else:
            job.log('\t rien trouvé dans le manifest:')
            pass
# on cherche dans la spine
    if cover_item is None:
        cover_item = monepub.opf.spine.itemrefs[0][0]
        print(cover_item)
        cover_item_manifest = monepub.get_item(cover_item)
        if not cover_item_manifest.href.endswith('html'):
            cover_item = monepub.opf.spine.itemrefs[1][0]
            cover_item_manifest = monepub.get_item(cover_item)
            if not cover_item_manifest.href.endswith('html') or 'jacket' in cover_item:
                cover_item = monepub.opf.spine.itemrefs[2][0]
        # job.log('\t on cherche dans la spine: ' + str(cover_item))
        cover_item_manifest = monepub.get_item(cover_item)
        pagecouv = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + cover_item_manifest.href)
        if get_coverimg_name(monepub, pagecouv):  # on a trouvé un lien vers une image
            covimg = get_coverimg_name(monepub, pagecouv)
            covpage = pagecouv
            return covpage
        else:
            covpage = None
            covimg = None
            return
    else :
            job.log('\t rien trouvé dans le manifest:')
            covpage = None
            return covpage
예제 #22
0
def insere_logo(monepub, newimage, newimagepath, LECHEMIN):
    job.log('Insertion d\'un logo')
    for identifier in monepub.opf.manifest :
        item = monepub.opf.manifest[identifier]
        if item.media_type in ('image/jpeg', 'image/png') :
            repimg = os.path.dirname(item.href)  # on détermine le répertoire des images
            continue
        if item.media_type == ('application/xhtml+xml'):  # on détermine le répertoire des pages
            reptxt = os.path.dirname(item.href)
            continue
    if repimg == reptxt :
        relpathimg = os.path.basename(newimagepath)
    elif reptxt == '':
        relpathimg = repimg + '/' + os.path.basename(newimagepath)
    else:
        relpathimg = '../' + repimg + '/' + os.path.basename(newimagepath)
    nouvelempl = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + repimg + '\\' + os.path.basename(newimagepath))
    if os.path.isfile(nouvelempl):
        nouvelempl = nouvelempl.rsplit('.', 1)[0] + 'PL.' + nouvelempl.rsplit('.', 1)[1]
        relpathimg = relpathimg.rsplit('.', 1)[0] + 'PL.' + relpathimg.rsplit('.', 1)[1]
    shutil.copy2(newimagepath, nouvelempl)
    fpath = LECHEMIN + '/resources/logo_modele.xhtml'
    fpath2 = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + reptxt + '\\' 'logoPL.xhtml')
    shutil.copy2(fpath, fpath2)
    relatimg = os.path.relpath(nouvelempl, start=os.path.normpath(monepub.chemin + '\\' + monepub.content_path))
    relattxt = os.path.relpath(fpath2, start=os.path.normpath(monepub.chemin + '\\' + monepub.content_path))
    #
    parser = etree.XMLParser(encoding='utf-8', recover=True, remove_blank_text=True)
    with open(fpath2, mode='r', encoding='utf-8') as f:
        data = etree.parse(f, parser=parser)
        viewXpath = xpath(data, './/s:svg')
        imageXpath = xpath(data, '//s:image')
        for i, key in enumerate(viewXpath[0].keys()):
            if key == 'viewBox':
                newvalue = '0 0 ' + str(newimage.width()) + ' ' + str(newimage.height())
                viewXpath[0].set('viewBox', newvalue)
        for i, key in enumerate(imageXpath[0].keys()):
            if key == 'width':
                imageXpath[0].set('width', str(newimage.width()))
            if key == 'height':
                imageXpath[0].set('height', str(newimage.height()))
            if key == '{http://www.w3.org/1999/xlink}href':
                imageXpath[0].set('{http://www.w3.org/1999/xlink}href', relpathimg.replace('\\', '/'))
    f = open(fpath2, mode='wb')
    data.write(f, encoding='utf-8', xml_declaration=True, pretty_print=True)
    f.close()
    # d'abord supprimer les anciens logos dans le manifest
    for key, value in enumerate(monepub.opf.manifest) :
        if 'logoPL' in value :
            del monepub.opf.manifest[value]
    # puis dans la spine
    for  idref, linear in enumerate(monepub.opf.spine.itemrefs):
        item = monepub.opf.spine.itemrefs[idref]
        if 'logoPL' in item :
            monepub.opf.spine.itemrefs.remove(item)

    monepub.insere_fichier(LECHEMIN, fpath2, 'logoPL', relattxt.replace('\\', '/'), 'application/xhtml+xml',
                           append_to_spine=False, insert_to_spine=True, index=1, is_linear=True, move=False)
    monepub.insere_fichier(LECHEMIN, fpath2, 'logoPL_img', relatimg.replace('\\', '/'), 'image/png',
                           append_to_spine=False, insert_to_spine=False, index=0, is_linear=True, move=False)
    fpath = os.path.normpath(monepub.chemin + '/' + monepub.opf_path)  #   OEBPS + /content
    with open(fpath, mode='w', encoding='utf-8', errors='strict') as f :
        f.write(monepub.opf.as_xml_document().toprettyxml())
    return
예제 #23
0
    def init_read(self):
        # Read container.xml to get OPF xml file path
        xmlstring = 'META-INF/container.xml'
        container_xml = minidom.parse(xmlstring).documentElement

        for e in container_xml.getElementsByTagName('rootfile'):
            if e.getAttribute('media-type') == MIMETYPE_OPF:
                # Only take the first full-path available
                self.opf_path = e.getAttribute('full-path')  # OEBPS\content.opf ou content.opf
                if os.path.isfile(os.path.join(self.chemin, self.opf_path)):
                    self.content_path = os.path.dirname(self.opf_path)  # OEBPS ou ''
                    break
                else :
                    self.opf_path = self.cherche_opf(self.chemin)
                    if self.opf_path:
                        self.content_path = os.path.dirname(self.opf_path)  # OEBPS ou ''
                    else:
                        job.log('Epub invalide : pas de content.opf')
                        break

        # Read OPF xml file
        xml_string = (self.opf_path)
        self.opf = opf.parse_opf(xml_string)
        try:
            self.uid = [x for x in self.opf.metadata.identifiers
                      if x[1] == self.opf.uid_id][0]
        except:
            self.uid = None
        self.item_toc = self.get_item(self.opf.spine.toc)

        if self.item_toc is None:
            for identifier in self.opf.manifest:
                item = self.opf.manifest[identifier]
                if 'ncx' in item.href :
                    self.item_toc = os.path.join(self.content_path, item.href)
        if  self.opf.metadata.creators:
            self.auteur = self.opf.metadata.creators[0][0]
        else :
            self.auteur = ''
        if self.opf.metadata.titles :
            self.titre = self.opf.metadata.titles[0][0]
        else:
            self.titre = ''
        # Get the css files (on en profite pour implémenter la liste des fichiers)
        self.css = []
        self.liste_items = []
        self.liste_fichiers = []
        for identifier in self.opf.manifest:
            item = self.opf.manifest[identifier]
            self.liste_items.append(item.href)
            self.liste_fichiers.append(self.normalize(self.relat_to_abs(item.href)))
            if 'css' in item.href :
                self.css.append(os.path.join(self.content_path, item.href))  # OEBPS\Style\styles.css ou stylesheet.css
        # Inspect NCX toc file
        try:
            self.toc = ncx.parse_toc(self.read_item(self.item_toc))
        except (UnicodeError, IOError) :
            self.toc = None
        # recherche la page et l'image de couverture
        if self.pagedecouv is None:
            self.set_cover()
예제 #24
0
def convert(monepub):
    '''convertit toutes les images non transparentes en jpg'''
    job.log('Conversion en jpg')
    ext_to_change = []
    for name in monepub.liste_fichiers:
        if os.path.splitext(name)[1] in ('.png', '.gif', '.bmp'):
            try:
                im = Image.open(name)
                if (im.mode == 'RGBA'):
                    alpha = im.tostring()
                    val_alpha = alpha[3]
            except:
                job.log('\t Erreur de lecture de ' + name)
                continue
            if 'transparency' in im.info or (im.mode == 'RGBA' and val_alpha != 255):
                continue
            else:
                ext_to_change.append(os.path.basename(name))
                im2 = im.convert('RGB')
                output = os.path.splitext(name)[0] + '.jpg'
                im2.save(output, 'JPEG')
                try:
                    os.remove(name)
                except:
                    pass
                    job.log('fichier source non supprimé !')
    # modifie les liens dans les pages
    for page  in monepub.liste_fichiers :
        try :
            if os.path.splitext(page)[1] in ('.html', '.xhtml', '.htm'):
                data = parse_file(page)
                image_src = XPath(".//h:img[@src]")
                image_alt = XPath(".//h:img[@alt]")
                for img in ext_to_change:
                    img_jpg = img.rsplit('.', 1)[0] + '.jpg'
                    for src in image_src(data):
                        src.set('src', src.get('src').replace(img, img_jpg))
                    for alt in image_alt(data):
                        alt.set('alt', src.get('alt').replace(img, img_jpg))
                with open(page, mode='wb') as f:
                    data.write(page, encoding='utf-8', xml_declaration=True, pretty_print=True)
        except:
            job.log('lien dans la page ' + page + ' non modifié')
            continue
    # modifie le manifest
    myopf = os.path.join(monepub.chemin, monepub.opf_path)
    data_opf = parse_file(myopf)
    for img in ext_to_change:
        img_jpg = img.rsplit('.', 1)[0] + '.jpg'
        imgxpath = xpath(data_opf, './/opf:item')
        for child in imgxpath:
            changed = False
            for i, key in enumerate(child.keys()):
                if key == 'href':
                    if img in child.get('href') :
                        changed = True
                    href_text = child.get('href').replace(img, img_jpg)
                    child.set('href', href_text)
                if key == 'media-type' and changed is True:
                    child.set('media-type', 'image/jpeg')
    texte_opf = etree.tostring(data_opf, encoding='utf-8', xml_declaration=True, pretty_print=True)
    with open(myopf, mode='wb') as f:
        f.write(texte_opf)
    monepub.init_read()
    monepub.set_cover()
    return