Python WLDocument.from_file Exemples, librarian.parser.WLDocument.from_file Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_pdf.py Projet : quiris11/librarian

def test_transform():
    temp = NamedTemporaryFile(delete=False)
    temp.close()
    WLDocument.from_file(
            get_fixture('text', 'asnyk_zbior.xml'),
            provider=DirDocProvider(get_fixture('text', ''))
        ).as_pdf(save_tex=temp.name)
    tex = open(temp.name).read().decode('utf-8')
    print tex

    # Check contributor list.
    editors = re.search(ur'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex)
    assert_equal(editors.group(1), u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")

Exemple #2

0

Afficher le fichier

def test_transform():
    temp = NamedTemporaryFile(delete=False)
    temp.close()
    WLDocument.from_file(get_fixture('text', 'asnyk_zbior.xml'),
                         provider=DirDocProvider(get_fixture(
                             'text', ''))).as_pdf(save_tex=temp.name)
    tex = open(temp.name, 'rb').read().decode('utf-8')

    # Check contributor list.
    editors = re.search(
        r'\\def\\editors\{Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}',
        tex)
    assert_equal(editors.group(1),
                 u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska")

Exemple #3

0

Afficher le fichier

Fichier : models.py Projet : prmtl/wolnelektury

    def wldocument(self, parse_dublincore=True):
        from catalogue.import_utils import ORMDocProvider
        from librarian.parser import WLDocument

        return WLDocument.from_file(self.xml_file.path,
                provider=ORMDocProvider(self),
                parse_dublincore=parse_dublincore)

Exemple #4

0

Afficher le fichier

Fichier : test_html.py Projet : alanthai/librarian

def test_transform():
    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html')

    html = WLDocument.from_file(
            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
        ).as_html().get_string()

    assert_equal(html, file(expected_output_file_path).read())

Exemple #5

0

Afficher le fichier

def test_transform():
    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html')

    html = WLDocument.from_file(
            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
        ).as_html().get_bytes()

    assert_equal(html, open(expected_output_file_path, 'rb').read())

Exemple #6

0

Afficher le fichier

def test_transform():
    expected_output_file_path = get_fixture('text',
                                            'asnyk_miedzy_nami_expected.txt')

    text = WLDocument.from_file(
        get_fixture('text',
                    'miedzy-nami-nic-nie-bylo.xml')).as_text().get_string()

    assert_equal(text, file(expected_output_file_path).read())

Exemple #7

0

Afficher le fichier

def test_transform_raw():
    expected_output_file_path = get_fixture(
        'text', 'asnyk_miedzy_nami_expected_raw.txt')

    text = WLDocument.from_file(
        get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')).as_text(
            flags=['raw-text']).get_bytes()

    assert_equal(text, open(expected_output_file_path, 'rb').read())

Exemple #8

0

Afficher le fichier

Fichier : book.py Projet : jumasheff/wolnelektury

    def wldocument(self, parse_dublincore=True, inherit=True):
        from catalogue.import_utils import ORMDocProvider
        from librarian.parser import WLDocument

        if inherit and self.parent:
            meta_fallbacks = self.parent.cover_info()
        else:
            meta_fallbacks = None

        return WLDocument.from_file(self.xml_file.path,
                provider=ORMDocProvider(self),
                parse_dublincore=parse_dublincore,
                meta_fallbacks=meta_fallbacks)

Exemple #9

0

Afficher le fichier

Fichier : book.py Projet : barszczmm/wolnelektury

    def wldocument(self, parse_dublincore=True, inherit=True):
        from catalogue.import_utils import ORMDocProvider
        from librarian.parser import WLDocument

        if inherit and self.parent:
            meta_fallbacks = self.parent.cover_info()
        else:
            meta_fallbacks = None

        return WLDocument.from_file(self.xml_file.path,
                provider=ORMDocProvider(self),
                parse_dublincore=parse_dublincore,
                meta_fallbacks=meta_fallbacks)

Exemple #10

0

Afficher le fichier

Fichier : packagers.py Projet : quiris11/librarian

    def prepare_file(cls, main_input, output_dir, verbose=False, overwrite=False):
        path, fname = os.path.realpath(main_input).rsplit('/', 1)
        provider = DirDocProvider(path)
        slug, ext = os.path.splitext(fname)

        if output_dir != '':
            makedirs(output_dir)
        outfile = os.path.join(output_dir, slug + '.' + cls.ext)
        if os.path.exists(outfile) and not overwrite:
            return

        doc = WLDocument.from_file(main_input, provider=provider)
        output_file = cls.transform(doc, cover=cls.cover, flags=cls.flags)
        doc.save_output_file(output_file, output_path=outfile)

Exemple #11

0

Afficher le fichier

Fichier : packagers.py Projet : excelsior-pencil/librarian

    def prepare_file(cls, main_input, output_dir, verbose=False):
        path, fname = os.path.realpath(main_input).rsplit('/', 1)
        provider = DirDocProvider(path)
        slug, ext = os.path.splitext(fname)

        if output_dir != '':
            try:
                os.makedirs(output_dir)
            except:
                pass
        outfile = os.path.join(output_dir, slug + '.' + cls.ext)

        doc = WLDocument.from_file(main_input, provider=provider)
        output_file = cls.converter.transform(doc,
                cover=cls.cover, flags=cls.flags)
        doc.save_output_file(output_file, output_path=outfile)

Exemple #12

0

Afficher le fichier

Fichier : packagers.py Projet : axie-devel/librarian

    def prepare_file(cls,
                     main_input,
                     output_dir,
                     verbose=False,
                     overwrite=False):
        path, fname = os.path.realpath(main_input).rsplit('/', 1)
        provider = DirDocProvider(path)
        slug, ext = os.path.splitext(fname)

        if output_dir != '':
            makedirs(output_dir)
        outfile = os.path.join(output_dir, slug + '.' + cls.ext)
        if os.path.exists(outfile) and not overwrite:
            return

        doc = WLDocument.from_file(main_input, provider=provider)
        output_file = cls.transform(doc, cover=cls.cover, flags=cls.flags)
        doc.save_output_file(output_file, output_path=outfile)

Exemple #13

0

Afficher le fichier

Fichier : test_epub.py Projet : alanthai/librarian

def test_transform():
    epub = WLDocument.from_file(
            get_fixture('text', 'asnyk_zbior.xml'),
            provider=DirDocProvider(get_fixture('text', ''))
        ).as_epub(flags=['without_fonts']).get_file()
    zipf = ZipFile(epub)

    # Check contributor list.
    last = zipf.open('OPS/last.html')
    tree = html.parse(last)
    editors_attribution = False
    for par in tree.findall("//p"):
        if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'):
            editors_attribution = True
            assert_equal(par.text.rstrip(),
                u'Opracowanie redakcyjne i przypisy: '
                u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
    assert_true(editors_attribution)

Exemple #14

0

Afficher le fichier

def test_transform():
    epub = WLDocument.from_file(
        get_fixture('text', 'asnyk_zbior.xml'),
        provider=DirDocProvider(get_fixture(
            'text', ''))).as_epub(flags=['without_fonts']).get_file()
    zipf = ZipFile(epub)

    # Check contributor list.
    last = zipf.open('OPS/last.html')
    tree = html.parse(last)
    editors_attribution = False
    for par in tree.findall("//p"):
        if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'):
            editors_attribution = True
            assert_equal(
                par.text.rstrip(), u'Opracowanie redakcyjne i przypisy: '
                u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
    assert_true(editors_attribution)

Exemple #15

0

Afficher le fichier

Fichier : test_mobi.py Projet : axie-devel/librarian

def test_transform():
    mobi = WLDocument.from_file(
            get_fixture('text', 'asnyk_zbior.xml'),
            provider=DirDocProvider(get_fixture('text', ''))
        ).as_mobi(converter_path='true').get_file()

Exemple #16

0

Afficher le fichier

def test_transform_hyphenate():
    epub = WLDocument.from_file(get_fixture('text', 'asnyk_zbior.xml'),
                                provider=DirDocProvider(get_fixture(
                                    'text',
                                    ''))).as_epub(flags=['without_fonts'],
                                                  hyphenate=True).get_file()

Exemple #17

0

Afficher le fichier

Fichier : test_html.py Projet : alanthai/librarian

def test_passing_parse_dublincore_to_transform():
    """Passing parse_dublincore=False to transform omits DublinCore parsing."""
    WLDocument.from_file(
            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
            parse_dublincore=False,
        ).as_html()

Exemple #18

0

Afficher le fichier

Fichier : test_html.py Projet : alanthai/librarian

def test_no_dublincore():
    WLDocument.from_file(
            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
        ).as_html()

Exemple #19

0

Afficher le fichier

    def prepare(cls, input_filenames, output_dir='', verbose=False):
        from lxml import etree
        from librarian import DirDocProvider, ParseError
        from librarian.parser import WLDocument
        from copy import deepcopy
        import os.path

        xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
            <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
        product = etree.fromstring("""<product>
                <publisherProductId></publisherProductId>
                <title></title>
                <info></info>
                <description></description>
                <authors>
                    <author>
                        <names>Jan</names>
                        <lastName>Kowalski</lastName>
                    </author>
                </authors>
                <price>0.0</price>
                <language>PL</language>
            </product>""")

        try:
            for main_input in input_filenames:
                if verbose:
                    print main_input
                path, fname = os.path.realpath(main_input).rsplit('/', 1)
                provider = DirDocProvider(path)
                slug, ext = os.path.splitext(fname)

                outfile_dir = os.path.join(output_dir, slug)
                makedirs(os.path.join(output_dir, slug))

                doc = WLDocument.from_file(main_input, provider=provider)
                info = doc.book_info

                product_elem = deepcopy(product)
                product_elem[0].text = cls.utf_trunc(slug, 100)
                product_elem[1].text = cls.utf_trunc(info.title, 255)
                product_elem[2].text = cls.utf_trunc(info.description, 255)
                product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
                product_elem[4][0][0].text = cls.utf_trunc(u' '.join(info.author.first_names), 100)
                product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
                xml.append(product_elem)

                cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
                outfile = os.path.join(outfile_dir, '1.epub')
                outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
                doc.save_output_file(doc.as_epub(), output_path=outfile)
                doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample)
                outfile = os.path.join(outfile_dir, '1.mobi')
                outfile_sample = os.path.join(outfile_dir, '1.sample.mobi')
                doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile)
                doc.save_output_file(
                    doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25),
                    output_path=outfile_sample)
        except ParseError, e:
            print '%(file)s:%(name)s:%(message)s' % {
                'file': main_input,
                'name': e.__class__.__name__,
                'message': e.message
            }

Exemple #20

0

Afficher le fichier

Fichier : index.py Projet : barszczmm/wolnelektury

    def index_content(self, book, book_fields={}):
        """
        Walks the book XML and extract content from it.
        Adds parts for each header tag and for each fragment.
        """
        wld = WLDocument.from_file(book.xml_file.path, parse_dublincore=False)
        root = wld.edoc.getroot()

        master = self.get_master(root)
        if master is None:
            return []

        def walker(node, ignore_tags=[]):

            if node.tag not in ignore_tags:
                yield node, None, None
                if node.text is not None:
                    yield None, node.text, None
                for child in list(node):
                    for b, t, e in walker(child):
                        yield b, t, e
                yield None, None, node

            if node.tail is not None:
                yield None, node.tail, None
            return

        def fix_format(text):
            #            separator = [u" ", u"\t", u".", u";", u","]
            if isinstance(text, list):
                # need to join it first
                text = filter(lambda s: s is not None, content)
                text = u' '.join(text)
                # for i in range(len(text)):
                #     if i > 0:
                #         if text[i][0] not in separator\
                #             and text[i - 1][-1] not in separator:
                #          text.insert(i, u" ")

            return re.sub("(?m)/$", "", text)

        def add_part(snippets, **fields):
            doc = self.create_book_doc(book)
            for n, v in book_fields.items():
                doc[n] = v

            doc['header_index'] = fields["header_index"]
            doc['header_span'] = 'header_span' in fields and fields[
                'header_span'] or 1
            doc['header_type'] = fields['header_type']

            doc['text'] = fields['text']

            # snippets
            snip_pos = snippets.add(fields["text"])

            doc['snippets_position'] = snip_pos[0]
            doc['snippets_length'] = snip_pos[1]
            if snippets.revision:
                doc["snippets_revision"] = snippets.revision

            if 'fragment_anchor' in fields:
                doc["fragment_anchor"] = fields['fragment_anchor']

            if 'themes' in fields:
                doc['themes'] = fields['themes']
            doc['uid'] = "part%s%s%s" % (doc['header_index'],
                                         doc['header_span'],
                                         doc.get('fragment_anchor', ''))
            return doc

        def give_me_utf8(s):
            if isinstance(s, unicode):
                return s.encode('utf-8')
            else:
                return s

        fragments = {}
        snippets = Snippets(book.id).open('w')
        try:
            for header, position in zip(list(master), range(len(master))):

                if header.tag in self.skip_header_tags:
                    continue
                if header.tag is etree.Comment:
                    continue

                # section content
                content = []
                footnote = []

                def all_content(text):
                    for frag in fragments.values():
                        frag['text'].append(text)
                    content.append(text)

                handle_text = [all_content]

                for start, text, end in walker(
                        header, ignore_tags=self.ignore_content_tags):
                    # handle footnotes
                    if start is not None and start.tag in self.footnote_tags:
                        footnote = []

                        def collect_footnote(t):
                            footnote.append(t)

                        handle_text.append(collect_footnote)
                    elif end is not None and footnote is not [] and end.tag in self.footnote_tags:
                        handle_text.pop()
                        doc = add_part(snippets,
                                       header_index=position,
                                       header_type=header.tag,
                                       text=u''.join(footnote),
                                       is_footnote=True)
                        self.index.add(doc)
                        footnote = []

                    # handle fragments and themes.
                    if start is not None and start.tag == 'begin':
                        fid = start.attrib['id'][1:]
                        fragments[fid] = {
                            'text': [],
                            'themes': [],
                            'start_section': position,
                            'start_header': header.tag
                        }

                    # themes for this fragment
                    elif start is not None and start.tag == 'motyw':
                        fid = start.attrib['id'][1:]
                        handle_text.append(None)
                        if start.text is not None:
                            fragments[fid]['themes'] += map(
                                unicode.strip,
                                map(unicode, (start.text.split(','))))
                    elif end is not None and end.tag == 'motyw':
                        handle_text.pop()

                    elif start is not None and start.tag == 'end':
                        fid = start.attrib['id'][1:]
                        if fid not in fragments:
                            continue  # a broken <end> node, skip it
                        frag = fragments[fid]
                        if frag['themes'] == []:
                            continue  # empty themes list.
                        del fragments[fid]

                        doc = add_part(snippets,
                                       header_type=frag['start_header'],
                                       header_index=frag['start_section'],
                                       header_span=position -
                                       frag['start_section'] + 1,
                                       fragment_anchor=fid,
                                       text=fix_format(frag['text']),
                                       themes=frag['themes'])
                        self.index.add(doc)

                        # Collect content.

                    if text is not None and handle_text is not []:
                        hdl = handle_text[-1]
                        if hdl is not None:
                            hdl(text)

                        # in the end, add a section text.
                doc = add_part(snippets,
                               header_index=position,
                               header_type=header.tag,
                               text=fix_format(content))

                self.index.add(doc)

        finally:
            snippets.close()

Exemple #21

0

Afficher le fichier

    def run(cls):
        # Parse commandline arguments
        usage = """Usage: %%prog [options] SOURCE [SOURCE...]
        Convert SOURCE files to %s format.""" % cls.format_name

        parser = optparse.OptionParser(usage=usage)

        parser.add_option('-v',
                          '--verbose',
                          action='store_true',
                          dest='verbose',
                          default=False,
                          help='print status messages to stdout')
        parser.add_option(
            '-d',
            '--make-dir',
            action='store_true',
            dest='make_dir',
            default=False,
            help='create a directory for author and put the output file in it')
        parser.add_option('-o',
                          '--output-file',
                          dest='output_file',
                          metavar='FILE',
                          help='specifies the output file')
        parser.add_option('-O',
                          '--output-dir',
                          dest='output_dir',
                          metavar='DIR',
                          help='specifies the directory for output')
        if cls.uses_cover:
            if cls.cover_optional:
                parser.add_option('-c',
                                  '--with-cover',
                                  action='store_true',
                                  dest='with_cover',
                                  default=False,
                                  help='create default cover')
            parser.add_option(
                '-C',
                '--image-cache',
                dest='image_cache',
                metavar='URL',
                help='prefix for image download cache' +
                (' (implies --with-cover)' if cls.cover_optional else ''))
        for option in cls.parser_options + cls.transform_options + cls.transform_flags:
            option.add(parser)

        options, input_filenames = parser.parse_args()

        if len(input_filenames) < 1:
            parser.print_help()
            return 1

        # Prepare additional args for parser.
        parser_args = {}
        for option in cls.parser_options:
            parser_args[option.name()] = option.value(options)
        # Prepare additional args for transform method.
        transform_args = {}
        for option in cls.transform_options:
            transform_args[option.name()] = option.value(options)
        # Add flags to transform_args, if any.
        transform_flags = [
            flag.name() for flag in cls.transform_flags if flag.value(options)
        ]
        if transform_flags:
            transform_args['flags'] = transform_flags
        if options.verbose:
            transform_args['verbose'] = True
        # Add cover support, if any.
        if cls.uses_cover:
            if options.image_cache:

                def cover_class(*args, **kwargs):
                    return DefaultEbookCover(image_cache=options.image_cache,
                                             *args,
                                             **kwargs)

                transform_args['cover'] = cover_class
            elif not cls.cover_optional or options.with_cover:
                transform_args['cover'] = DefaultEbookCover

        # Do some real work
        try:
            for main_input in input_filenames:
                if options.verbose:
                    print main_input

            # Where to find input?
            if cls.uses_provider:
                path, fname = os.path.realpath(main_input).rsplit('/', 1)
                provider = DirDocProvider(path)
            else:
                provider = None

            # Where to write output?
            if not (options.output_file or options.output_dir):
                output_file = os.path.splitext(main_input)[0] + '.' + cls.ext
            else:
                output_file = options.output_file

            # Do the transformation.
            doc = WLDocument.from_file(main_input,
                                       provider=provider,
                                       **parser_args)
            transform = cls.transform
            if transform is None:
                transform = getattr(WLDocument, 'as_%s' % cls.ext)
            output = transform(doc, **transform_args)

            doc.save_output_file(output, output_file, options.output_dir,
                                 options.make_dir, cls.ext)

        except ParseError, e:
            print '%(file)s:%(name)s:%(message)s' % {
                'file': main_input,
                'name': e.__class__.__name__,
                'message': e
            }

Exemple #22

0

Afficher le fichier

def test_no_dublincore():
    WLDocument.from_file(get_fixture('text',
                                     'asnyk_miedzy_nami_nodc.xml')).as_text()

Exemple #23

0

Afficher le fichier

Fichier : index.py Projet : jumasheff/wolnelektury

    def index_content(self, book, book_fields={}):
        """
        Walks the book XML and extract content from it.
        Adds parts for each header tag and for each fragment.
        """
        wld = WLDocument.from_file(book.xml_file.path, parse_dublincore=False)
        root = wld.edoc.getroot()

        master = self.get_master(root)
        if master is None:
            return []

        def walker(node, ignore_tags=[]):

            if node.tag not in ignore_tags:
                yield node, None, None
                if node.text is not None:
                    yield None, node.text, None
                for child in list(node):
                    for b, t, e in walker(child):
                        yield b, t, e
                yield None, None, node

            if node.tail is not None:
                yield None, node.tail, None
            return

        def fix_format(text):
            #            separator = [u" ", u"\t", u".", u";", u","]
            if isinstance(text, list):
                # need to join it first
                text = filter(lambda s: s is not None, content)
                text = u' '.join(text)
                # for i in range(len(text)):
                #     if i > 0:
                #         if text[i][0] not in separator\
                #             and text[i - 1][-1] not in separator:
                #          text.insert(i, u" ")

            return re.sub("(?m)/$", "", text)

        def add_part(snippets, **fields):
            doc = self.create_book_doc(book)
            for n, v in book_fields.items():
                doc[n] = v

            doc['header_index'] = fields["header_index"]
            doc['header_span'] = 'header_span' in fields and fields['header_span'] or 1
            doc['header_type'] = fields['header_type']

            doc['text'] = fields['text']

            # snippets
            snip_pos = snippets.add(fields["text"])

            doc['snippets_position'] = snip_pos[0]
            doc['snippets_length'] = snip_pos[1]
            if snippets.revision:
                doc["snippets_revision"] = snippets.revision

            if 'fragment_anchor' in fields:
                doc["fragment_anchor"] = fields['fragment_anchor']

            if 'themes' in fields:
                doc['themes'] = fields['themes']
            doc['uid'] = "part%s%s%s" % (doc['header_index'],
                                         doc['header_span'],
                                         doc.get('fragment_anchor', ''))
            return doc

        def give_me_utf8(s):
            if isinstance(s, unicode):
                return s.encode('utf-8')
            else:
                return s

        fragments = {}
        snippets = Snippets(book.id).open('w')
        try:
            for header, position in zip(list(master), range(len(master))):

                if header.tag in self.skip_header_tags:
                    continue
                if header.tag is etree.Comment:
                    continue

                # section content
                content = []
                footnote = []

                def all_content(text):
                    for frag in fragments.values():
                        frag['text'].append(text)
                    content.append(text)
                handle_text = [all_content]

                for start, text, end in walker(header, ignore_tags=self.ignore_content_tags):
                    # handle footnotes
                    if start is not None and start.tag in self.footnote_tags:
                        footnote = []

                        def collect_footnote(t):
                            footnote.append(t)

                        handle_text.append(collect_footnote)
                    elif end is not None and footnote is not [] and end.tag in self.footnote_tags:
                        handle_text.pop()
                        doc = add_part(snippets, header_index=position, header_type=header.tag,
                                       text=u''.join(footnote),
                                       is_footnote=True)
                        self.index.add(doc)
                        footnote = []

                    # handle fragments and themes.
                    if start is not None and start.tag == 'begin':
                        fid = start.attrib['id'][1:]
                        fragments[fid] = {'text': [], 'themes': [], 'start_section': position, 'start_header': header.tag}

                    # themes for this fragment
                    elif start is not None and start.tag == 'motyw':
                        fid = start.attrib['id'][1:]
                        handle_text.append(None)
                        if start.text is not None:
                            fragments[fid]['themes'] += map(unicode.strip, map(unicode, (start.text.split(','))))
                    elif end is not None and end.tag == 'motyw':
                        handle_text.pop()

                    elif start is not None and start.tag == 'end':
                        fid = start.attrib['id'][1:]
                        if fid not in fragments:
                            continue  # a broken <end> node, skip it
                        frag = fragments[fid]
                        if frag['themes'] == []:
                            continue  # empty themes list.
                        del fragments[fid]

                        doc = add_part(snippets,
                                       header_type=frag['start_header'],
                                       header_index=frag['start_section'],
                                       header_span=position - frag['start_section'] + 1,
                                       fragment_anchor=fid,
                                       text=fix_format(frag['text']),
                                       themes=frag['themes'])
                        self.index.add(doc)

                        # Collect content.

                    if text is not None and handle_text is not []:
                        hdl = handle_text[-1]
                        if hdl is not None:
                            hdl(text)

                        # in the end, add a section text.
                doc = add_part(snippets, header_index=position,
                               header_type=header.tag, text=fix_format(content))

                self.index.add(doc)

        finally:
            snippets.close()

Exemple #24

0

Afficher le fichier

Fichier : book2anything.py Projet : mgorny/librarian

    def run(cls):
        # Parse commandline arguments
        usage = """Usage: %%prog [options] SOURCE [SOURCE...]
        Convert SOURCE files to %s format.""" % cls.format_name

        parser = optparse.OptionParser(usage=usage)

        parser.add_option('-v', '--verbose', 
                action='store_true', dest='verbose', default=False,
                help='print status messages to stdout')
        parser.add_option('-d', '--make-dir',
                action='store_true', dest='make_dir', default=False,
                help='create a directory for author and put the output file in it')
        parser.add_option('-o', '--output-file',
                dest='output_file', metavar='FILE',
                help='specifies the output file')
        parser.add_option('-O', '--output-dir',
                dest='output_dir', metavar='DIR',
                help='specifies the directory for output')
        if cls.uses_cover:
            if cls.cover_optional:
                parser.add_option('-c', '--with-cover', 
                        action='store_true', dest='with_cover', default=False,
                        help='create default cover')
            parser.add_option('-C', '--image-cache',
                    dest='image_cache', metavar='URL',
                    help='prefix for image download cache' +
                        (' (implies --with-cover)' if cls.cover_optional else ''))
        for option in cls.parser_options + cls.transform_options + cls.transform_flags:
            option.add(parser)

        options, input_filenames = parser.parse_args()

        if len(input_filenames) < 1:
            parser.print_help()
            return(1)

        # Prepare additional args for parser.
        parser_args = {}
        for option in cls.parser_options:
            parser_args[option.name()] = option.value(options)
        # Prepare additional args for transform method.
        transform_args = {}
        for option in cls.transform_options:
            transform_args[option.name()] = option.value(options)
        # Add flags to transform_args, if any.
        transform_flags = [flag.name() for flag in cls.transform_flags
                    if flag.value(options)]
        if transform_flags:
            transform_args['flags'] = transform_flags
        # Add cover support, if any.
        if cls.uses_cover:
            if options.image_cache:
                transform_args['cover'] = lambda x: WLCover(x, image_cache = options.image_cache)
            elif not cls.cover_optional or options.with_cover:
                transform_args['cover'] = WLCover


        # Do some real work
        try:
            for main_input in input_filenames:
                if options.verbose:
                    print main_input

            # Where to find input?
            if cls.uses_provider:
                path, fname = os.path.realpath(main_input).rsplit('/', 1)
                provider = DirDocProvider(path)
            else:
                provider = None

            # Where to write output?
            if not (options.output_file or options.output_dir):
                output_file = os.path.splitext(main_input)[0] + '.' + cls.ext
            else:
                output_file = None

            # Do the transformation.
            doc = WLDocument.from_file(main_input, provider=provider, **parser_args)
            transform = cls.transform
            if transform is None:
                transform = getattr(WLDocument, 'as_%s' % cls.ext)
            output = transform(doc, **transform_args)

            doc.save_output_file(output,
                output_file, options.output_dir, options.make_dir, cls.ext)

        except ParseError, e:
            print '%(file)s:%(name)s:%(message)s' % {
                'file': main_input,
                'name': e.__class__.__name__,
                'message': e
            }

Exemple #25

0

Afficher le fichier

Fichier : partners.py Projet : alanthai/librarian

    def prepare(cls, input_filenames, output_dir="", verbose=False):
        from lxml import etree
        from librarian import DirDocProvider, ParseError
        from librarian.parser import WLDocument
        from copy import deepcopy
        import os
        import os.path

        xml = etree.fromstring(
            """<?xml version="1.0" encoding="utf-8"?>
            <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>"""
        )
        product = etree.fromstring(
            """<product>
                <publisherProductId></publisherProductId>
                <title></title>
                <info></info>
                <description></description>
                <authors>
                    <author>
                        <names>Jan</names>
                        <lastName>Kowalski</lastName>
                    </author>
                </authors>
                <price>0.0</price>
                <language>PL</language>
            </product>"""
        )

        try:
            for main_input in input_filenames:
                if verbose:
                    print main_input
                path, fname = os.path.realpath(main_input).rsplit("/", 1)
                provider = DirDocProvider(path)
                slug, ext = os.path.splitext(fname)

                outfile_dir = os.path.join(output_dir, slug)
                os.makedirs(os.path.join(output_dir, slug))

                doc = WLDocument.from_file(main_input, provider=provider)
                info = doc.book_info

                product_elem = deepcopy(product)
                product_elem[0].text = cls.utf_trunc(slug, 100)
                product_elem[1].text = cls.utf_trunc(info.title, 255)
                product_elem[2].text = cls.utf_trunc(info.description, 255)
                product_elem[3].text = cls.utf_trunc(info.source_name, 3000)
                product_elem[4][0][0].text = cls.utf_trunc(u" ".join(info.author.first_names), 100)
                product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
                xml.append(product_elem)

                cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug + ".jpg"))
                outfile = os.path.join(outfile_dir, "1.epub")
                outfile_sample = os.path.join(outfile_dir, "1.sample.epub")
                doc.save_output_file(doc.as_epub(), output_path=outfile)
                doc.save_output_file(doc.as_epub(doc, sample=25), output_path=outfile_sample)
                outfile = os.path.join(outfile_dir, "1.mobi")
                outfile_sample = os.path.join(outfile_dir, "1.sample.mobi")
                doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), output_path=outfile)
                doc.save_output_file(doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), output_path=outfile_sample)
        except ParseError, e:
            print "%(file)s:%(name)s:%(message)s" % {
                "file": main_input,
                "name": e.__class__.__name__,
                "message": e.message,
            }

Exemple #26

0

Afficher le fichier

def test_passing_parse_dublincore_to_transform():
    """Passing parse_dublincore=False to the constructor omits DublinCore parsing."""
    WLDocument.from_file(
        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
        parse_dublincore=False,
    ).as_text()

Exemple #27

0

Afficher le fichier

Fichier : test_epub.py Projet : mgorny/librarian

def test_transform():
    WLDocument.from_file(
            get_fixture('text', 'asnyk_zbior.xml'),
            provider=DirDocProvider(get_fixture('text', ''))
        ).as_epub(flags=['without_fonts'])