예제 #1
0
파일: text.py 프로젝트: mgorny/librarian
def transform(wldoc, flags=None, **options):
    """
    Transforms input_file in XML to output_file in TXT.
    possible flags: raw-text,
    """
    # Parse XSLT
    style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt')
    style = etree.parse(style_filename)

    document = copy.deepcopy(wldoc)
    del wldoc
    document.swap_endlines()

    if flags:
        for flag in flags:
            document.edoc.getroot().set(flag, 'yes')

    result = document.transform(style, **options)

    if not flags or 'raw-text' not in flags:
        if document.book_info:
            parsed_dc = document.book_info
            description = parsed_dc.description
            url = document.book_info.url
    
            license_description = parsed_dc.license_description
            license = parsed_dc.license
            if license:
                license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license)        
            else:
                license_description = u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)"
    
            source = parsed_dc.source_name
            if source:
                source = "\n\nTekst opracowany na podstawie: " + source
            else:
                source = ''
    
            contributors = ', '.join(person.readable() for person in 
                                     sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p)))
            if contributors:
                contributors = "\n\nOpracowanie redakcyjne i przypisy: %s" % contributors
        else:
            description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).'
            url = '*' * 10
            license = ""
            license_description = ""
            source = ""
            contributors = ""
        return OutputFile.from_string((TEMPLATE % {
            'description': description,
            'url': url,
            'license_description': license_description,
            'text': unicode(result),
            'source': source,
            'contributors': contributors,
        }).encode('utf-8'))
    else:
        return OutputFile.from_string(unicode(result).encode('utf-8'))
예제 #2
0
def transform(wldoc, verbose=False, cover=None, flags=None):
    """ produces a FB2 file

    cover: a cover.Cover object or True for default
    flags: less-advertising, working-copy
    """

    document = deepcopy(wldoc)
    del wldoc

    if flags:
        for flag in flags:
            document.edoc.getroot().set(flag, 'yes')

    document.clean_ed_note()
    document.clean_ed_note('abstrakt')

    style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt')
    style = etree.parse(style_filename)

    replace_by_verse(document.edoc)
    sectionify(document.edoc)

    result = document.transform(style)

    return OutputFile.from_bytes(six.text_type(result).encode('utf-8'))
예제 #3
0
파일: fb2.py 프로젝트: quiris11/librarian
def transform(wldoc, verbose=False,
              cover=None, flags=None):
    """ produces a FB2 file

    cover: a cover.Cover object or True for default
    flags: less-advertising, working-copy
    """

    document = deepcopy(wldoc)
    del wldoc

    if flags:
        for flag in flags:
            document.edoc.getroot().set(flag, 'yes')

    document.clean_ed_note()
    document.clean_ed_note('abstrakt')

    style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt')
    style = etree.parse(style_filename)

    replace_by_verse(document.edoc)
    sectionify(document.edoc)

    result = document.transform(style)

    return OutputFile.from_string(unicode(result).encode('utf-8'))
예제 #4
0
def transform(wldoc,
              verbose=False,
              sample=None,
              cover=None,
              use_kindlegen=False,
              flags=None,
              hyphenate=True,
              ilustr_path='',
              converter_path=None):
    """ produces a MOBI file

    wldoc: a WLDocument
    sample=n: generate sample e-book (with at least n paragraphs)
    cover: a cover.Cover factory overriding default
    flags: less-advertising,
    converter_path: override path to MOBI converter,
      either ebook-convert or kindlegen
    """

    document = deepcopy(wldoc)
    del wldoc

    epub = document.as_epub(verbose=verbose,
                            sample=sample,
                            html_toc=True,
                            cover=cover or True,
                            flags=flags,
                            hyphenate=hyphenate,
                            ilustr_path=ilustr_path,
                            output_type='mobi')
    if verbose:
        kwargs = {}
    else:
        devnull = open("/dev/null", 'w')
        kwargs = {"stdout": devnull, "stderr": devnull}

    output_file = NamedTemporaryFile(prefix='librarian',
                                     suffix='.mobi',
                                     delete=False)
    output_file.close()

    if use_kindlegen:
        output_file_basename = os.path.basename(output_file.name)
        subprocess.check_call([
            converter_path or 'kindlegen', '-c2',
            epub.get_filename(), '-o', output_file_basename
        ], **kwargs)
    else:
        subprocess.check_call([
            converter_path or 'ebook-convert',
            epub.get_filename(), output_file.name, '--no-inline-toc',
            '--mobi-file-type=both', '--mobi-ignore-margins'
        ], **kwargs)
    return OutputFile.from_filename(output_file.name)
예제 #5
0
def transform(wldoc, stylesheet='legacy', options=None, flags=None):
    """Transforms the WL document to XHTML.

    If output_filename is None, returns an XML,
    otherwise returns True if file has been written,False if it hasn't.
    File won't be written if it has no content.
    """
    # Parse XSLT
    try:
        style_filename = get_stylesheet(stylesheet)
        style = etree.parse(style_filename)

        document = copy.deepcopy(wldoc)
        del wldoc
        document.swap_endlines()

        if flags:
            for flag in flags:
                document.edoc.getroot().set(flag, 'yes')

        document.clean_ed_note()
        document.clean_ed_note('abstrakt')

        if not options:
            options = {}
        options.setdefault('gallery', "''")
        result = document.transform(style, **options)
        del document  # no longer needed large object :)

        if html_has_content(result):
            add_anchors(result.getroot())
            add_table_of_themes(result.getroot())
            add_table_of_contents(result.getroot())

            return OutputFile.from_bytes(
                etree.tostring(result,
                               method='html',
                               xml_declaration=False,
                               pretty_print=True,
                               encoding='utf-8'))
        else:
            return None
    except KeyError:
        raise ValueError("'%s' is not a valid stylesheet.")
    except (XMLSyntaxError, XSLTApplyError) as e:
        raise ParseError(e)
예제 #6
0
파일: mobi.py 프로젝트: lolownia/librarian
def transform(wldoc, verbose=False,
              sample=None, cover=None, flags=None):
    """ produces a MOBI file

    wldoc: a WLDocument
    sample=n: generate sample e-book (with at least n paragraphs)
    cover: a cover.Cover factory overriding default
    flags: less-advertising,
    """

    document = deepcopy(wldoc)
    del wldoc
    book_info = document.book_info

    # provide a cover by default
    if not cover:
        cover = DefaultEbookCover
    cover_file = NamedTemporaryFile(suffix='.png', delete=False)
    bound_cover = cover(book_info)
    bound_cover.save(cover_file)

    if bound_cover.uses_dc_cover:
        if document.book_info.cover_by:
            document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
        if document.book_info.cover_source:
            document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)

    if not flags:
        flags = []
    flags = list(flags) + ['with-full-fonts']
    epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True,
            flags=flags, style=get_resource('epub/style.css'))

    if verbose:
        kwargs = {}
    else:
        devnull = open("/dev/null", 'w')
        kwargs = {"stdout": devnull, "stderr": devnull}

    output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False)
    output_file.close()
    subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name,
            '--no-inline-toc', '--mobi-file-type=both', '--subset-embedded-fonts', '--mobi-ignore-margins', '--cover=%s' % cover_file.name], **kwargs)
    os.unlink(cover_file.name)
    return OutputFile.from_filename(output_file.name)
예제 #7
0
파일: html.py 프로젝트: quiris11/librarian
def transform(wldoc, stylesheet='legacy', options=None, flags=None):
    """Transforms the WL document to XHTML.

    If output_filename is None, returns an XML,
    otherwise returns True if file has been written,False if it hasn't.
    File won't be written if it has no content.
    """
    # Parse XSLT
    try:
        style_filename = get_stylesheet(stylesheet)
        style = etree.parse(style_filename)

        document = copy.deepcopy(wldoc)
        del wldoc
        document.swap_endlines()

        if flags:
            for flag in flags:
                document.edoc.getroot().set(flag, 'yes')

        document.clean_ed_note()
        document.clean_ed_note('abstrakt')

        if not options:
            options = {}
        options.setdefault('gallery', "''")
        result = document.transform(style, **options)
        del document  # no longer needed large object :)

        if html_has_content(result):
            add_anchors(result.getroot())
            add_table_of_themes(result.getroot())
            add_table_of_contents(result.getroot())

            return OutputFile.from_string(etree.tostring(
                result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8'))
        else:
            return None
    except KeyError:
        raise ValueError("'%s' is not a valid stylesheet.")
    except (XMLSyntaxError, XSLTApplyError), e:
        raise ParseError(e)
예제 #8
0
파일: fb2.py 프로젝트: mgorny/librarian
def transform(wldoc, verbose=False, cover=None, flags=None):
    """ produces a FB2 file

    cover: a cover.Cover object or True for default
    flags: less-advertising, working-copy
    """

    document = deepcopy(wldoc)
    del wldoc

    if flags:
        for flag in flags:
            document.edoc.getroot().set(flag, "yes")

    style_filename = os.path.join(os.path.dirname(__file__), "fb2/fb2.xslt")
    style = etree.parse(style_filename)

    result = document.transform(style)

    return OutputFile.from_string(unicode(result).encode("utf-8"))
예제 #9
0
파일: mobi.py 프로젝트: alanthai/librarian
def transform(wldoc, verbose=False, sample=None, cover=None, use_kindlegen=False, flags=None):
    """ produces a MOBI file

    wldoc: a WLDocument
    sample=n: generate sample e-book (with at least n paragraphs)
    cover: a cover.Cover factory overriding default
    flags: less-advertising,
    """

    document = deepcopy(wldoc)
    del wldoc

    epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True, cover=cover or True, flags=flags)
    if verbose:
        kwargs = {}
    else:
        devnull = open("/dev/null", "w")
        kwargs = {"stdout": devnull, "stderr": devnull}

    output_file = NamedTemporaryFile(prefix="librarian", suffix=".mobi", delete=False)
    output_file.close()

    if use_kindlegen:
        output_file_basename = os.path.basename(output_file.name)
        subprocess.check_call(["kindlegen", "-c2", epub.get_filename(), "-o", output_file_basename], **kwargs)
    else:
        subprocess.check_call(
            [
                "ebook-convert",
                epub.get_filename(),
                output_file.name,
                "--no-inline-toc",
                "--mobi-file-type=both",
                "--mobi-ignore-margins",
            ],
            **kwargs
        )
    return OutputFile.from_filename(output_file.name)
예제 #10
0
def transform(wldoc, verbose=False, style=None, html_toc=False,
              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
    """ produces a EPUB file

    sample=n: generate sample e-book (with at least n paragraphs)
    cover: a cover.Cover factory or True for default
    flags: less-advertising, without-fonts, working-copy
    """

    def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
        """ processes one input file and proceeds to its children """

        replace_characters(wldoc.edoc.getroot())

        hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
        hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)

        # every input file will have a TOC entry,
        # pointing to starting chunk
        toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
        chars = set()
        if first:
            # write book title page
            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
            chars = used_chars(html_tree.getroot())
            zip.writestr(
                'OPS/title.html',
                etree.tostring(
                    html_tree, pretty_print=True, xml_declaration=True,
                    encoding="utf-8",
                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                )
            )
            # add a title page TOC entry
            toc.add(u"Strona tytułowa", "title.html")
        elif wldoc.book_info.parts:
            # write title page for every parent
            if sample is not None and sample <= 0:
                chars = set()
                html_string = open(get_resource('epub/emptyChunk.html')).read()
            else:
                html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
                chars = used_chars(html_tree.getroot())
                html_string = etree.tostring(
                    html_tree, pretty_print=True, xml_declaration=True,
                    encoding="utf-8",
                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                )
            zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
            add_to_manifest(manifest, chunk_counter)
            add_to_spine(spine, chunk_counter)
            chunk_counter += 1

        if len(wldoc.edoc.getroot()) > 1:
            # rdf before style master
            main_text = wldoc.edoc.getroot()[1]
        else:
            # rdf in style master
            main_text = wldoc.edoc.getroot()[0]
            if main_text.tag == RDFNS('RDF'):
                main_text = None

        if main_text is not None:
            for chunk_xml in chop(main_text):
                empty = False
                if sample is not None:
                    if sample <= 0:
                        empty = True
                    else:
                        sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
                chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)

                toc.extend(chunk_toc)
                chars = chars.union(chunk_chars)
                zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
                add_to_manifest(manifest, chunk_counter)
                add_to_spine(spine, chunk_counter)
                chunk_counter += 1

        for child in wldoc.parts():
            child_toc, chunk_counter, chunk_chars, sample = transform_file(
                child, chunk_counter, first=False, sample=sample)
            toc.append(child_toc)
            chars = chars.union(chunk_chars)

        return toc, chunk_counter, chars, sample

    document = deepcopy(wldoc)
    del wldoc

    if flags:
        for flag in flags:
            document.edoc.getroot().set(flag, 'yes')

    document.clean_ed_note()
    document.clean_ed_note('abstrakt')

    # add editors info
    editors = document.editors()
    if editors:
        document.edoc.getroot().set('editors', u', '.join(sorted(
            editor.readable() for editor in editors)))
    if document.book_info.funders:
        document.edoc.getroot().set('funders', u', '.join(
            document.book_info.funders))
    if document.book_info.thanks:
        document.edoc.getroot().set('thanks', document.book_info.thanks)

    opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
    manifest = opf.find('.//' + OPFNS('manifest'))
    guide = opf.find('.//' + OPFNS('guide'))
    spine = opf.find('.//' + OPFNS('spine'))

    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
    zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)

    functions.reg_mathml_epub(zip)

    if os.path.isdir(ilustr_path):
        for i, filename in enumerate(os.listdir(ilustr_path)):
            file_path = os.path.join(ilustr_path, filename)
            zip.write(file_path, os.path.join('OPS', filename))
            image_id = 'image%s' % i
            manifest.append(etree.fromstring(
                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))

    # write static elements
    mime = zipfile.ZipInfo()
    mime.filename = 'mimetype'
    mime.compress_type = zipfile.ZIP_STORED
    mime.extra = ''
    zip.writestr(mime, 'application/epub+zip')
    zip.writestr(
        'META-INF/container.xml',
        '<?xml version="1.0" ?>'
        '<container version="1.0" '
        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
        '<rootfiles><rootfile full-path="OPS/content.opf" '
        'media-type="application/oebps-package+xml" />'
        '</rootfiles></container>'
    )
    zip.write(get_resource('res/wl-logo-small.png'),
              os.path.join('OPS', 'logo_wolnelektury.png'))
    zip.write(get_resource('res/jedenprocent.png'),
              os.path.join('OPS', 'jedenprocent.png'))
    if not style:
        style = get_resource('epub/style.css')
    zip.write(style, os.path.join('OPS', 'style.css'))

    if cover:
        if cover is True:
            cover = DefaultEbookCover

        cover_file = StringIO()
        bound_cover = cover(document.book_info)
        bound_cover.save(cover_file)
        cover_name = 'cover.%s' % bound_cover.ext()
        zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
        del cover_file

        cover_tree = etree.parse(get_resource('epub/cover.html'))
        cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
        zip.writestr('OPS/cover.html', etree.tostring(
            cover_tree, pretty_print=True, xml_declaration=True,
            encoding="utf-8",
            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
        ))

        if bound_cover.uses_dc_cover:
            if document.book_info.cover_by:
                document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
            if document.book_info.cover_source:
                document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)

        manifest.append(etree.fromstring(
            '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
        manifest.append(etree.fromstring(
            '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
        spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
        opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
        guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))

    annotations = etree.Element('annotations')

    toc_file = etree.fromstring(
        '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
        '"-//NISO//DTD ncx 2005-1//EN" '
        '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
        '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
        'version="2005-1"><head></head><docTitle></docTitle><navMap>'
        '</navMap></ncx>'
    )
    nav_map = toc_file[-1]

    if html_toc:
        manifest.append(etree.fromstring(
            '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
        spine.append(etree.fromstring(
            '<itemref idref="html_toc" />'))
        guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))

    toc, chunk_counter, chars, sample = transform_file(document, sample=sample)

    if len(toc.children) < 2:
        toc.add(u"Początek utworu", "part1.html")

    # Last modifications in container files and EPUB creation
    if len(annotations) > 0:
        toc.add("Przypisy", "annotations.html")
        manifest.append(etree.fromstring(
            '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
        spine.append(etree.fromstring(
            '<itemref idref="annotations" />'))
        replace_by_verse(annotations)
        html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
        chars = chars.union(used_chars(html_tree.getroot()))
        zip.writestr('OPS/annotations.html', etree.tostring(
            html_tree, pretty_print=True, xml_declaration=True,
            encoding="utf-8",
            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
        ))

    toc.add("Wesprzyj Wolne Lektury", "support.html")
    manifest.append(etree.fromstring(
        '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
    spine.append(etree.fromstring(
        '<itemref idref="support" />'))
    html_string = open(get_resource('epub/support.html')).read()
    chars.update(used_chars(etree.fromstring(html_string)))
    zip.writestr('OPS/support.html', html_string)

    toc.add("Strona redakcyjna", "last.html")
    manifest.append(etree.fromstring(
        '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
    spine.append(etree.fromstring(
        '<itemref idref="last" />'))
    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
    chars.update(used_chars(html_tree.getroot()))
    zip.writestr('OPS/last.html', etree.tostring(
        html_tree, pretty_print=True, xml_declaration=True,
        encoding="utf-8",
        doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
    ))

    if not flags or 'without-fonts' not in flags:
        # strip fonts
        tmpdir = mkdtemp('-librarian-epub')
        try:
            cwd = os.getcwd()
        except OSError:
            cwd = None

        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
        for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
            optimizer_call = ['perl', 'subset.pl', '--chars',
                              ''.join(chars).encode('utf-8'),
                              get_resource('fonts/' + fname),
                              os.path.join(tmpdir, fname)]
            if verbose:
                print "Running font-optimizer"
                subprocess.check_call(optimizer_call)
            else:
                dev_null = open(os.devnull, 'w')
                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null)
            zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
            manifest.append(etree.fromstring(
                '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
        rmtree(tmpdir)
        if cwd is not None:
            os.chdir(cwd)
    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
                 xml_declaration=True, encoding="utf-8"))
    title = document.book_info.title
    attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
    for st in attributes:
        meta = toc_file.makeelement(NCXNS('meta'))
        meta.set('name', st)
        meta.set('content', '0')
        toc_file[0].append(meta)
    toc_file[0][0].set('content', str(document.book_info.url))
    toc_file[0][1].set('content', str(toc.depth()))
    set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))

    # write TOC
    if html_toc:
        toc.add(u"Spis treści", "toc.html", index=1)
        zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
    toc.write_to_xml(nav_map)
    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
                 xml_declaration=True, encoding="utf-8"))
    zip.close()

    return OutputFile.from_filename(output_file.name)
예제 #11
0
파일: pdf.py 프로젝트: alanthai/librarian
def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
              cover=None, flags=None, customizations=None):
    """ produces a PDF file with XeLaTeX

    wldoc: a WLDocument
    verbose: prints all output from LaTeX
    save_tex: path to save the intermediary LaTeX file to
    morefloats (old/new/none): force specific morefloats
    cover: a cover.Cover factory or True for default
    flags: less-advertising,
    customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class)
    """

    # Parse XSLT
    try:
        book_info = wldoc.book_info
        document = load_including_children(wldoc)
        root = document.edoc.getroot()

        if cover:
            if cover is True:
                cover = DefaultEbookCover
            bound_cover = cover(book_info, width=1200)
            root.set('data-cover-width', str(bound_cover.width))
            root.set('data-cover-height', str(bound_cover.height))
            if bound_cover.uses_dc_cover:
                if book_info.cover_by:
                    root.set('data-cover-by', book_info.cover_by)
                if book_info.cover_source:
                    root.set('data-cover-source',
                            book_info.cover_source)
        if flags:
            for flag in flags:
                root.set('flag-' + flag, 'yes')

        # check for LaTeX packages
        if morefloats:
            root.set('morefloats', morefloats.lower())
        elif package_available('morefloats', 'maxfloats=19'):
            root.set('morefloats', 'new')

        # add customizations
        if customizations is not None:
            root.set('customizations', u','.join(customizations))

        # add editors info
        editors = document.editors()
        if editors:
            root.set('editors', u', '.join(sorted(
                editor.readable() for editor in editors)))
        if document.book_info.funders:
            root.set('funders', u', '.join(document.book_info.funders))
        if document.book_info.thanks:
            root.set('thanks', document.book_info.thanks)

        # hack the tree
        move_motifs_inside(document.edoc)
        hack_motifs(document.edoc)
        parse_creator(document.edoc)
        substitute_hyphens(document.edoc)
        fix_hanging(document.edoc)
        fix_tables(document.edoc)

        # wl -> TeXML
        style_filename = get_stylesheet("wl2tex")
        style = etree.parse(style_filename)
        functions.reg_mathml_latex()

        # TeXML -> LaTeX
        temp = mkdtemp('-wl2pdf')

        for sponsor in book_info.sponsors:
            ins = etree.Element("data-sponsor", name=sponsor)
            logo = sponsor_logo(sponsor)
            if logo:
                fname = 'sponsor-%s' % os.path.basename(logo)
                shutil.copy(logo, os.path.join(temp, fname))
                ins.set('src', fname)
            root.insert(0, ins)
                
        if book_info.sponsor_note:
            root.set("sponsor-note", book_info.sponsor_note)

        texml = document.transform(style)

        if cover:
            with open(os.path.join(temp, 'cover.png'), 'w') as f:
                bound_cover.save(f, quality=80)

        del document # no longer needed large object :)

        tex_path = os.path.join(temp, 'doc.tex')
        fout = open(tex_path, 'w')
        process(StringIO(texml), fout, 'utf-8')
        fout.close()
        del texml

        if save_tex:
            shutil.copy(tex_path, save_tex)

        # LaTeX -> PDF
        shutil.copy(get_resource('pdf/wl.cls'), temp)
        shutil.copy(get_resource('res/wl-logo.png'), temp)

        try:
            cwd = os.getcwd()
        except OSError:
            cwd = None
        os.chdir(temp)

        if verbose:
            p = call(['xelatex', tex_path])
        else:
            p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
        if p:
            raise ParseError("Error parsing .tex file")

        if cwd is not None:
            os.chdir(cwd)

        output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
        pdf_path = os.path.join(temp, 'doc.pdf')
        shutil.move(pdf_path, output_file.name)
        shutil.rmtree(temp)
        return OutputFile.from_filename(output_file.name)

    except (XMLSyntaxError, XSLTApplyError), e:
        raise ParseError(e)
예제 #12
0
파일: cover.py 프로젝트: fnp/librarian
 def output_file(self, *args, **kwargs):
     imgstr = BytesIO()
     self.save(imgstr, *args, **kwargs)
     return OutputFile.from_bytes(imgstr.getvalue())
예제 #13
0
 def output_file(self, *args, **kwargs):
     imgstr = StringIO()
     self.save(imgstr, *args, **kwargs)
     return OutputFile.from_string(imgstr.getvalue())
예제 #14
0
 def output_file(self, *args, **kwargs):
     imgstr = BytesIO()
     self.save(imgstr, *args, **kwargs)
     return OutputFile.from_bytes(imgstr.getvalue())
예제 #15
0
def transform(wldoc, flags=None, **options):
    """
    Transforms input_file in XML to output_file in TXT.
    possible flags: raw-text,
    """
    # Parse XSLT
    style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt')
    style = etree.parse(style_filename)

    document = copy.deepcopy(wldoc)
    del wldoc
    document.swap_endlines()

    if flags:
        for flag in flags:
            document.edoc.getroot().set(flag, 'yes')
    if 'wrapping' in options:
        options['wrapping'] = str(options['wrapping'])

    result = document.transform(style, **options)

    if not flags or 'raw-text' not in flags:
        if document.book_info:
            parsed_dc = document.book_info
            description = parsed_dc.description
            url = document.book_info.url
    
            license_description = parsed_dc.license_description
            license = parsed_dc.license
            if license:
                license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (
                    license_description, license)
            else:
                license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się " \
                                      u"w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, " \
                                      u"publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi " \
                                      u"materiałami (przypisy, motywy literackie etc.), które podlegają prawu " \
                                      u"autorskiemu, to te dodatkowe materiały udostępnione są na licencji " \
                                      u"Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL " \
                                      u"(http://creativecommons.org/licenses/by-sa/3.0/)"

            source = parsed_dc.source_name
            if source:
                source = "\n\nTekst opracowany na podstawie: " + source
            else:
                source = ''
    
            contributors = ', '.join(person.readable() for person in 
                                     sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p)))
            if contributors:
                contributors = "\n\nOpracowanie redakcyjne i przypisy: %s." % contributors
            funders = ', '.join(parsed_dc.funders)
            if funders:
                funders = u"\n\nPublikację wsparli i wsparły: %s." % funders
            publisher = '\n\nWydawca: ' + ', '.join(parsed_dc.publisher)
            isbn = getattr(parsed_dc, 'isbn_txt', None)
            if isbn:
                isbn = '\n\n' + isbn
            else:
                isbn = ''
        else:
            description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).'
            url = '*' * 10
            license_description = ""
            source = ""
            contributors = ""
            funders = ""
            publisher = ""
            isbn = ""
        result = (TEMPLATE % {
            'description': description,
            'url': url,
            'license_description': license_description,
            'text': unicode(result),
            'source': source,
            'contributors': contributors,
            'funders': funders,
            'publisher': publisher,
            'isbn': isbn,
        }).encode('utf-8')
    else:
        result = unicode(result).encode('utf-8')
    return OutputFile.from_string("\r\n".join(result.splitlines()) + "\r\n")
예제 #16
0
파일: epub.py 프로젝트: dastanko/librarian
def transform(wldoc, verbose=False,
              style=None, html_toc=False,
              sample=None, cover=None, flags=None):
    """ produces a EPUB file

    sample=n: generate sample e-book (with at least n paragraphs)
    cover: a cover.Cover factory or True for default
    flags: less-advertising, without-fonts, working-copy
    """

    def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
        """ processes one input file and proceeds to its children """

        replace_characters(wldoc.edoc.getroot())

        hyphenator = set_hyph_language(wldoc.edoc.getroot())
        hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)

        # every input file will have a TOC entry,
        # pointing to starting chunk
        toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
        chars = set()
        if first:
            # write book title page
            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
            chars = used_chars(html_tree.getroot())
            zip.writestr(
                'OPS/title.html',
                etree.tostring(
                    html_tree, pretty_print=True, xml_declaration=True,
                    encoding="utf-8",
                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                )
            )
            # add a title page TOC entry
            toc.add(u"Strona tytułowa", "title.html")
        elif wldoc.book_info.parts:
            # write title page for every parent
            if sample is not None and sample <= 0:
                chars = set()
                html_string = open(get_resource('epub/emptyChunk.html')).read()
            else:
                html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
                chars = used_chars(html_tree.getroot())
                html_string = etree.tostring(
                    html_tree, pretty_print=True, xml_declaration=True,
                    encoding="utf-8",
                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                )
            zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
            add_to_manifest(manifest, chunk_counter)
            add_to_spine(spine, chunk_counter)
            chunk_counter += 1

        if len(wldoc.edoc.getroot()) > 1:
            # rdf before style master
            main_text = wldoc.edoc.getroot()[1]
        else:
            # rdf in style master
            main_text = wldoc.edoc.getroot()[0]
            if main_text.tag == RDFNS('RDF'):
                main_text = None

        if main_text is not None:
            for chunk_xml in chop(main_text):
                empty = False
                if sample is not None:
                    if sample <= 0:
                        empty = True
                    else:
                        sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
                chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)

                toc.extend(chunk_toc)
                chars = chars.union(chunk_chars)
                zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
                add_to_manifest(manifest, chunk_counter)
                add_to_spine(spine, chunk_counter)
                chunk_counter += 1

        for child in wldoc.parts():
            child_toc, chunk_counter, chunk_chars, sample = transform_file(
                child, chunk_counter, first=False, sample=sample)
            toc.append(child_toc)
            chars = chars.union(chunk_chars)

        return toc, chunk_counter, chars, sample

    document = deepcopy(wldoc)
    del wldoc

    if flags:
        for flag in flags:
            document.edoc.getroot().set(flag, 'yes')

    # add editors info
    editors = document.editors()
    if editors:
        document.edoc.getroot().set('editors', u', '.join(sorted(
            editor.readable() for editor in editors)))
    if document.book_info.funders:
        document.edoc.getroot().set('funders', u', '.join(
            document.book_info.funders))
    if document.book_info.thanks:
        document.edoc.getroot().set('thanks', document.book_info.thanks)

    opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
    manifest = opf.find('.//' + OPFNS('manifest'))
    guide = opf.find('.//' + OPFNS('guide'))
    spine = opf.find('.//' + OPFNS('spine'))

    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
    zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)

    # write static elements
    mime = zipfile.ZipInfo()
    mime.filename = 'mimetype'
    mime.compress_type = zipfile.ZIP_STORED
    mime.extra = ''
    zip.writestr(mime, 'application/epub+zip')
    zip.writestr(
        'META-INF/container.xml',
        '<?xml version="1.0" ?>'
        '<container version="1.0" '
        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
        '<rootfiles><rootfile full-path="OPS/content.opf" '
        'media-type="application/oebps-package+xml" />'
        '</rootfiles></container>'
    )
    zip.write(get_resource('res/wl-logo-small.png'),
              os.path.join('OPS', 'logo_wolnelektury.png'))
    zip.write(get_resource('res/jedenprocent.png'),
              os.path.join('OPS', 'jedenprocent.png'))
    if not style:
        style = get_resource('epub/style.css')
    zip.write(style, os.path.join('OPS', 'style.css'))

    if cover:
        if cover is True:
            cover = DefaultEbookCover

        cover_file = StringIO()
        bound_cover = cover(document.book_info)
        bound_cover.save(cover_file)
        cover_name = 'cover.%s' % bound_cover.ext()
        zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
        del cover_file

        cover_tree = etree.parse(get_resource('epub/cover.html'))
        cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
        zip.writestr('OPS/cover.html', etree.tostring(
            cover_tree, pretty_print=True, xml_declaration=True,
            encoding="utf-8",
            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
        ))

        if bound_cover.uses_dc_cover:
            if document.book_info.cover_by:
                document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
            if document.book_info.cover_source:
                document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)

        manifest.append(etree.fromstring(
            '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
        manifest.append(etree.fromstring(
            '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
        spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
        opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
        guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))

    annotations = etree.Element('annotations')

    toc_file = etree.fromstring(
        '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
        '"-//NISO//DTD ncx 2005-1//EN" '
        '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
        '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
        'version="2005-1"><head></head><docTitle></docTitle><navMap>'
        '</navMap></ncx>'
    )
    nav_map = toc_file[-1]

    if html_toc:
        manifest.append(etree.fromstring(
            '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
        spine.append(etree.fromstring(
            '<itemref idref="html_toc" />'))
        guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))

    toc, chunk_counter, chars, sample = transform_file(document, sample=sample)

    if len(toc.children) < 2:
        toc.add(u"Początek utworu", "part1.html")

    # Last modifications in container files and EPUB creation
    if len(annotations) > 0:
        toc.add("Przypisy", "annotations.html")
        manifest.append(etree.fromstring(
            '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
        spine.append(etree.fromstring(
            '<itemref idref="annotations" />'))
        replace_by_verse(annotations)
        html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
        chars = chars.union(used_chars(html_tree.getroot()))
        zip.writestr('OPS/annotations.html', etree.tostring(
            html_tree, pretty_print=True, xml_declaration=True,
            encoding="utf-8",
            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
        ))

    toc.add("Wesprzyj Wolne Lektury", "support.html")
    manifest.append(etree.fromstring(
        '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
    spine.append(etree.fromstring(
        '<itemref idref="support" />'))
    html_string = open(get_resource('epub/support.html')).read()
    chars.update(used_chars(etree.fromstring(html_string)))
    zip.writestr('OPS/support.html', html_string)

    toc.add("Strona redakcyjna", "last.html")
    manifest.append(etree.fromstring(
        '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
    spine.append(etree.fromstring(
        '<itemref idref="last" />'))
    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
    chars.update(used_chars(html_tree.getroot()))
    zip.writestr('OPS/last.html', etree.tostring(
        html_tree, pretty_print=True, xml_declaration=True,
        encoding="utf-8",
        doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
    ))

    if not flags or not 'without-fonts' in flags:
        # strip fonts
        tmpdir = mkdtemp('-librarian-epub')
        try:
            cwd = os.getcwd()
        except OSError:
            cwd = None

        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
        for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
            optimizer_call = ['perl', 'subset.pl', '--chars',
                              ''.join(chars).encode('utf-8'),
                              get_resource('fonts/' + fname),
                              os.path.join(tmpdir, fname)]
            if verbose:
                print "Running font-optimizer"
                subprocess.check_call(optimizer_call)
            else:
                subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
            manifest.append(etree.fromstring(
                '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
        rmtree(tmpdir)
        if cwd is not None:
            os.chdir(cwd)
    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
                 xml_declaration=True, encoding="utf-8"))
    title = document.book_info.title
    attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
    for st in attributes:
        meta = toc_file.makeelement(NCXNS('meta'))
        meta.set('name', st)
        meta.set('content', '0')
        toc_file[0].append(meta)
    toc_file[0][0].set('content', str(document.book_info.url))
    toc_file[0][1].set('content', str(toc.depth()))
    set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))

    # write TOC
    if html_toc:
        toc.add(u"Spis treści", "toc.html", index=1)
        zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
    toc.write_to_xml(nav_map)
    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
                 xml_declaration=True, encoding="utf-8"))
    zip.close()

    return OutputFile.from_filename(output_file.name)
예제 #17
0
파일: cover.py 프로젝트: alanthai/librarian
 def output_file(self, *args, **kwargs):
     imgstr = StringIO()
     self.save(imgstr, *args, **kwargs)
     return OutputFile.from_string(imgstr.getvalue())
예제 #18
0
파일: pdf.py 프로젝트: axie-devel/librarian
def transform(wldoc,
              verbose=False,
              save_tex=None,
              morefloats=None,
              cover=None,
              flags=None,
              customizations=None,
              ilustr_path='',
              latex_dir=False):
    """ produces a PDF file with XeLaTeX

    wldoc: a WLDocument
    verbose: prints all output from LaTeX
    save_tex: path to save the intermediary LaTeX file to
    morefloats (old/new/none): force specific morefloats
    cover: a cover.Cover factory or True for default
    flags: less-advertising,
    customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class)
    """

    # Parse XSLT
    try:
        book_info = wldoc.book_info
        document = load_including_children(wldoc)
        root = document.edoc.getroot()

        if cover:
            if cover is True:
                cover = make_cover
            bound_cover = cover(book_info, width=1200)
            root.set('data-cover-width', str(bound_cover.width))
            root.set('data-cover-height', str(bound_cover.height))
            if bound_cover.uses_dc_cover:
                if book_info.cover_by:
                    root.set('data-cover-by', book_info.cover_by)
                if book_info.cover_source:
                    root.set('data-cover-source', book_info.cover_source)
        if flags:
            for flag in flags:
                root.set('flag-' + flag, 'yes')

        # check for LaTeX packages
        if morefloats:
            root.set('morefloats', morefloats.lower())
        elif package_available('morefloats', 'maxfloats=19'):
            root.set('morefloats', 'new')

        # add customizations
        if customizations is not None:
            root.set('customizations', u','.join(customizations))

        # add editors info
        editors = document.editors()
        if editors:
            root.set(
                'editors',
                u', '.join(sorted(editor.readable() for editor in editors)))
        if document.book_info.funders:
            root.set('funders', u', '.join(document.book_info.funders))
        if document.book_info.thanks:
            root.set('thanks', document.book_info.thanks)

        # hack the tree
        move_motifs_inside(document.edoc)
        hack_motifs(document.edoc)
        parse_creator(document.edoc)
        substitute_hyphens(document.edoc)
        fix_hanging(document.edoc)
        fix_tables(document.edoc)
        mark_subauthors(document.edoc)

        # wl -> TeXML
        style_filename = get_stylesheet("wl2tex")
        style = etree.parse(style_filename)
        functions.reg_mathml_latex()

        # TeXML -> LaTeX
        temp = mkdtemp('-wl2pdf')

        for ilustr in document.edoc.findall("//ilustr"):
            shutil.copy(os.path.join(ilustr_path, ilustr.get("src")), temp)

        for sponsor in book_info.sponsors:
            ins = etree.Element("data-sponsor", name=sponsor)
            logo = sponsor_logo(sponsor)
            if logo:
                fname = 'sponsor-%s' % os.path.basename(logo)
                shutil.copy(logo, os.path.join(temp, fname))
                ins.set('src', fname)
            root.insert(0, ins)

        if book_info.sponsor_note:
            root.set("sponsor-note", book_info.sponsor_note)

        texml = document.transform(style)

        if cover:
            with open(os.path.join(temp, 'cover.png'), 'w') as f:
                bound_cover.save(f, quality=80)

        del document  # no longer needed large object :)

        tex_path = os.path.join(temp, 'doc.tex')
        fout = open(tex_path, 'wb')
        process(six.BytesIO(texml), fout, 'utf-8')
        fout.close()
        del texml

        if save_tex:
            shutil.copy(tex_path, save_tex)

        # LaTeX -> PDF
        shutil.copy(get_resource('pdf/wl.cls'), temp)
        shutil.copy(get_resource('res/wl-logo.png'), temp)

        if latex_dir:
            return temp

        try:
            cwd = os.getcwd()
        except OSError:
            cwd = None
        os.chdir(temp)

        # some things work better when compiled twice
        # (table of contents, [line numbers - disabled])
        for run in range(2):
            if verbose:
                p = call(['xelatex', tex_path])
            else:
                p = call(['xelatex', '-interaction=batchmode', tex_path],
                         stdout=PIPE,
                         stderr=PIPE)
            if p:
                raise ParseError("Error parsing .tex file")

        if cwd is not None:
            os.chdir(cwd)

        output_file = NamedTemporaryFile(prefix='librarian',
                                         suffix='.pdf',
                                         delete=False)
        pdf_path = os.path.join(temp, 'doc.pdf')
        shutil.move(pdf_path, output_file.name)
        shutil.rmtree(temp)
        return OutputFile.from_filename(output_file.name)

    except (XMLSyntaxError, XSLTApplyError) as e:
        raise ParseError(e)