コード例 #1
0
def meta_to_runs(what, intern_image, total_w):
    # pylint: disable=R0911
    recurse = partial(meta_to_runs, intern_image=intern_image, total_w=total_w)
    if isinstance(what, basestring):
        return [mkel('w:r', {}, [mk_t(what)])]
    elif isinstance(what, list):
        return flatmap(recurse, what)
    elif isinstance(what, tuple):
        t, _, b = what
        runs = recurse(b)
        if t in ('b', 'i', 's', 'u'):
            return [apply_html_style(t, run) for run in runs]
        else:
            log.warn("Didn't understand html tag %r", what)
            return runs
    elif isinstance(what, literal.Image):
        rid = intern_image(what)
        target_w = parse_percentage(what.style['width']) * total_w
        w, h = what.get_size()
        w, h = [docxlite.Emu(x * target_w / h) for x in (w, h)]
        inline = (what.style['display'] == 'inline')
        return [mkel('w:r', {}, [make_pic(rid, w, h, inline)])]
    elif isinstance(what, literal.Bibliography):
        return recurse(what.data)
    else:
        log.warn('Fallthrough: %r', what)
        return recurse(unparse_literal(what))
コード例 #2
0
def meta_to_runs(what, intern_image, total_w):
    # pylint: disable=R0911
    recurse = partial(meta_to_runs, intern_image=intern_image, total_w=total_w)
    if isinstance(what, basestring):
        return [mkel('w:r', {}, [mk_t(what)])]
    elif isinstance(what, list):
        return flatmap(recurse, what)
    elif isinstance(what, tuple):
        t, _, b = what
        runs = recurse(b)
        if t in ('b', 'i', 's', 'u'):
            return [apply_html_style(t, run) for run in runs]
        else:
            log.warn("Didn't understand html tag %r", what)
            return runs
    elif isinstance(what, literal.Image):
        rid = intern_image(what)
        target_w = parse_percentage(what.style['width']) * total_w
        w, h = what.get_size()
        w, h = [docxlite.Emu(x * target_w / h) for x in (w, h)]
        inline = (what.style['display'] == 'inline')
        return [mkel('w:r', {}, [make_pic(rid, w, h, inline)])]
    elif isinstance(what, literal.Bibliography):
        return recurse(what.data)
    else:
        log.warn('Fallthrough: %r', what)
        return recurse(unparse_literal(what))
コード例 #3
0
def meta_to_html(meta):
    # pylint: disable=R0914
    # FIXME(alexander): this is just a really hacky way to convert the
    # document properties into something vaguely visually plausible in a style
    # independent manner
    head = meta.items()
    lang = head['lang']
    prepend = []
    title = head.pop('title', '')
    # FIXME(alexander): should maybe default to docname?
    # the only style which does currently not have a title
    # is letter, so could use subject there
    if title:
        prepend.append(mkel('h1', {'class': ['title']}, [title]))
    subtitle = head.pop('subtitle', '')
    if subtitle:
        prepend.append(mkel('h2', {'class': ['subtitle']}, [subtitle]))
    dl_body = []
    types_to_omit = (int, Bibliography, Image, Lang)
    #FIXME(alexander): toc-depth should be int,
    #                  and bibliography-preamble must die
    keys_to_omit = ('toc-depth', 'bibliography-preamble')
    for (k, v) in head.iteritems():
        is_default_value = 'supplied' not in meta.d[k]
        if is_default_value:
            continue
        a = {'class': [k]}
        if isinstance(v, types_to_omit) or k in keys_to_omit:
            a['hidden'] = ""
        label = meta.d[k].get('label', k.capitalize())
        dl_body.append(mkel('dt', a, [lang.localize(label)]))
        a = a.copy()
        dd = unparse_literal(v, roundtrip=False)
        roundtrippable = unparse_literal(v)
        if dd != roundtrippable:
            a['data-value'] = roundtrippable
        dl_body.append(
            mkel('dd', a,
                 [dd if not isinstance(v, bool) else lang.localize(dd)]))

    if dl_body:
        prepend.append(mkel('dl', {'id': 'document-properties'}, dl_body))

    return lang.code, title, prepend
コード例 #4
0
def meta_to_html(meta):
    # pylint: disable=R0914
    # FIXME(alexander): this is just a really hacky way to convert the
    # document properties into something vaguely visually plausible in a style
    # independent manner
    head = meta.items()
    lang = head['lang']
    prepend = []
    title = head.pop('title', '')
    # FIXME(alexander): should maybe default to docname?
    # the only style which does currently not have a title
    # is letter, so could use subject there
    if title:
        prepend.append(mkel('h1', {'class': ['title']}, [title]))
    subtitle = head.pop('subtitle', '')
    if subtitle:
        prepend.append(mkel('h2', {'class': ['subtitle']}, [subtitle]))
    dl_body = []
    types_to_omit = (int, Bibliography, Image, Lang)
    #FIXME(alexander): toc-depth should be int,
    #                  and bibliography-preamble must die
    keys_to_omit = ('toc-depth', 'bibliography-preamble')
    for (k, v) in head.iteritems():
        is_default_value = 'supplied' not in meta.d[k]
        if is_default_value:
            continue
        a = {'class': [k]}
        if isinstance(v, types_to_omit) or k in keys_to_omit:
            a['hidden'] = ""
        label = meta.d[k].get('label', k.capitalize())
        dl_body.append(mkel('dt', a, [lang.localize(label)]))
        a = a.copy()
        dd = unparse_literal(v, roundtrip=False)
        roundtrippable = unparse_literal(v)
        if dd != roundtrippable:
            a['data-value'] = roundtrippable
        dl_body.append(mkel('dd', a, [dd if not isinstance(v, bool)
                                      else lang.localize(dd)]))

    if dl_body:
        prepend.append(mkel('dl', {'id': 'document-properties'}, dl_body))

    return lang.code, title, prepend
コード例 #5
0
    def xmp_meta(self, head):
        """Create XMP metadata for pdf (via hyperxmp.sty).

        Note that generating well structured pdf output with latex is a fool's
        errand, so this has some shortcomings:

        - dc:language  should be of type 'bag'.
        - XMP and info entries aren't synched for
          'xmp:CreateDate'/'CreationDate' and 'pdf:Producer'/'Producer'.
        - For PDF/A we'd also need 'pdfaid:part' and 'pdfaid:conformance', on
          first sight it looks like hypermp.sty takes a hardcoded guess, with
          two possible outcomes: nothing or PDF/A-1b.

        In an ideal world we'd probably only create valid PDF/A-2u or PDF/X
        documents, but both seem pretty much impossible to achieve from latex
        directly, with even the trivial metadata stuff above being a pain and
        then we'd also need to deal, at the very least, with ICC Profiles and
        unicode mappings (already somewhat painful for reasons of historical
        baggage in PDF and the font standards it supports and really horrible
        in latex because e.g. of issues with zero-width glyphs and math
        characters without unicode equivalent). PDF/A-2a would also require
        tagging.

        """
        fallbacks = {'description': 'abstract'}
        xmps = []
        for k in 'lang title author description keywords copyright'.split():
            if k in head:
                mk = k
            else:
                if not k in fallbacks or fallbacks[k] not in head:
                    continue
                mk = fallbacks[k]
            v = unparse_literal(head[mk], roundtrip=False, plain=True)
            # typesetr uses ';' to separate fields (like keywords or multiple
            # authors), because ',' is often ambiguous where ';' almost never
            # is.
            if k in self._COMMA_SEPARATED_XMP_FIELDS:
                if ',' in v:
                    v = v.replace(',', self._PRIVATE1).replace(';', ',')
                    latex_v = cmd('xmpquote', [], [
                        self.latexify(v).replace(self._PRIVATE1,
                                                 r'\xmpcomma{}')
                    ])
                else:
                    latex_v = self.latexify(v.replace(';', ','))
            else:
                latex_v = self.latexify(v)
            xmps.append(
                raw('pdf%s={%s}' %
                    (k if k != 'description' else 'subject', latex_v)))
        # FIXME(alexander): append version info?
        xmps.append(raw('pdfcreator={Typesetr}'))
        return ',\n'.join(xmps)
コード例 #6
0
def meta_to_dublin_core(head, modified=None):
    modified = datetime.datetime.utcnow() if modified is None else modified
    ts = modified.isoformat().split('.')[0] + 'Z'
    assert 'uuid' in head
    dublin = []
    head = head.copy()
    dc = 'dc:'.__add__
    ids = Counter()
    for h, d, v in ((k, d, head[k]) for (k, d) in HEAD_TO_CORE.iteritems()
                    if k in head):
        if h in MARC_RELATOR:
            # FIXME(alexander): make authors etc. first class type
            people = (re.split(r'\s*;\s*', v)
                      if isinstance(v, basestring) else v)
            for a in people:
                ids[d] += 1
                person = Person(a)
                pid = "pub-%s-%d" % (d, ids[d])
                dublin.append(mkel(dc(d), {'id': pid}, [person.display_name]))
                dublin.append(
                    mkel(
                        'meta', {
                            'refines': pid,
                            'property': 'role',
                            'scheme': 'marc:relators'
                        }, [MARC_RELATOR[h]]))
                dublin.append(
                    mkel('meta', {
                        'refines': pid,
                        'property': 'file-as'
                    }, [person.sort_name]))
        else:
            unparsed = unparse_literal(v, roundtrip=False, plain=True)
            if not unparsed:
                continue
            attrs = dict(id=h) if h in IDED_ELEMENTS else {}
            dublin.append(mkel(dc(d), attrs, [unparsed]))
            if h in TITLE_ELEMENTS:
                dublin.append(
                    mkel('meta', {
                        'refines': '#%s' % attrs['id'],
                        'property': 'title-type'
                    }, [{
                        'title': 'main'
                    }.get(h, h)]))
                dublin.append(
                    mkel('meta', {
                        'refines': '#%s' % attrs['id'],
                        'property': 'display-seq'
                    }, [str(TITLE_ELEMENTS.index(h) + 1)]))
    dublin.append(mkel('meta', {'property': 'dcterms:modified'}, [ts]))
    return ('metadata', {}, dublin), DUBLIN_META_NS
コード例 #7
0
    def xmp_meta(self, head):
        """Create XMP metadata for pdf (via hyperxmp.sty).

        Note that generating well structured pdf output with latex is a fool's
        errand, so this has some shortcomings:

        - dc:language  should be of type 'bag'.
        - XMP and info entries aren't synched for
          'xmp:CreateDate'/'CreationDate' and 'pdf:Producer'/'Producer'.
        - For PDF/A we'd also need 'pdfaid:part' and 'pdfaid:conformance', on
          first sight it looks like hypermp.sty takes a hardcoded guess, with
          two possible outcomes: nothing or PDF/A-1b.

        In an ideal world we'd probably only create valid PDF/A-2u or PDF/X
        documents, but both seem pretty much impossible to achieve from latex
        directly, with even the trivial metadata stuff above being a pain and
        then we'd also need to deal, at the very least, with ICC Profiles and
        unicode mappings (already somewhat painful for reasons of historical
        baggage in PDF and the font standards it supports and really horrible
        in latex because e.g. of issues with zero-width glyphs and math
        characters without unicode equivalent). PDF/A-2a would also require
        tagging.

        """
        fallbacks = {'description': 'abstract'}
        xmps = []
        for k in 'lang title author description keywords copyright'.split():
            if k in head:
                mk = k
            else:
                if not k in fallbacks or fallbacks[k] not in head:
                    continue
                mk = fallbacks[k]
            v = unparse_literal(head[mk], roundtrip=False, plain=True)
            # typesetr uses ';' to separate fields (like keywords or multiple
            # authors), because ',' is often ambiguous where ';' almost never
            # is.
            if k in self._COMMA_SEPARATED_XMP_FIELDS:
                if ',' in v:
                    v = v.replace(',', self._PRIVATE1).replace(';', ',')
                    latex_v = cmd('xmpquote', [],
                                  [self.latexify(v).replace(
                                      self._PRIVATE1, r'\xmpcomma{}')])
                else:
                    latex_v = self.latexify(v.replace(';', ','))
            else:
                latex_v = self.latexify(v)
            xmps.append(raw('pdf%s={%s}' %
                            (k if k != 'description' else 'subject', latex_v)))
        # FIXME(alexander): append version info?
        xmps.append(raw('pdfcreator={Typesetr}'))
        return ',\n'.join(xmps)
コード例 #8
0
def odtify(tree, required_styles, images):
    # FIXME(alexander): remove this disgusting crap;
    # how do Images work ATM?
    if isinstance(tree, literal.Bibliography):
        return odtify(tree.data, required_styles, images)
    if isinstance(tree, basestring):
        return odtify_basestring(tree)
    if isinstance(tree, list):
        return "".join(odtify(e, required_styles, images) for e in tree)
    if isinstance(tree, tuple):
        t, a, b = tree
        return HTML_TO_ODT[t](a, b, required_styles, images)
    if isinstance(tree, literal.Image):
        return odt_image(tree, images)
    else:
        log.warn('Fallthrough: %r', tree)
        return odtify(unparse_literal(tree), required_styles, images)
コード例 #9
0
def odtify(tree, required_styles, images):
    # FIXME(alexander): remove this disgusting crap;
    # how do Images work ATM?
    if isinstance(tree, literal.Bibliography):
        return odtify(tree.data, required_styles, images)
    if isinstance(tree, basestring):
        return odtify_basestring(tree)
    if isinstance(tree, list):
        return "".join(odtify(e, required_styles, images) for e in tree)
    if isinstance(tree, tuple):
        t, a, b = tree
        return HTML_TO_ODT[t](a, b, required_styles, images)
    if isinstance(tree, literal.Image):
        return odt_image(tree, images)
    else:
        log.warn('Fallthrough: %r', tree)
        return odtify(unparse_literal(tree), required_styles, images)
コード例 #10
0
def meta_to_dublin_core(head, modified=None):
    modified = datetime.datetime.utcnow() if modified is None else modified
    ts = modified.isoformat().split('.')[0] + 'Z'
    assert 'uuid' in head
    dublin = []
    head = head.copy()
    dc = 'dc:'.__add__
    ids = Counter()
    for h, d, v in ((k, d, head[k])
                    for (k, d) in HEAD_TO_CORE.iteritems() if k in head):
        if h in MARC_RELATOR:
            # FIXME(alexander): make authors etc. first class type
            people = (re.split(r'\s*;\s*', v)
                      if isinstance(v, basestring) else v)
            for a in people:
                ids[d] += 1
                person = Person(a)
                pid = "pub-%s-%d" % (d, ids[d])
                dublin.append(mkel(dc(d), {'id': pid}, [person.display_name]))
                dublin.append(mkel('meta', {'refines': pid,
                                            'property': 'role',
                                            'scheme':'marc:relators'},
                                   [MARC_RELATOR[h]]))
                dublin.append(mkel('meta', {'refines': pid,
                                            'property': 'file-as'},
                                   [person.sort_name]))
        else:
            unparsed = unparse_literal(v, roundtrip=False, plain=True)
            if not unparsed:
                continue
            attrs = dict(id=h) if h in IDED_ELEMENTS else {}
            dublin.append(mkel(dc(d), attrs, [unparsed]))
            if h in TITLE_ELEMENTS:
                dublin.append(mkel('meta', {'refines': '#%s' % attrs['id'],
                                            'property': 'title-type'},
                                   [{'title': 'main'}.get(h, h)]))
                dublin.append(mkel('meta', {'refines': '#%s' % attrs['id'],
                                            'property': 'display-seq'},
                                   [str(TITLE_ELEMENTS.index(h) + 1)]))
    dublin.append(mkel('meta', {'property': 'dcterms:modified'}, [ts]))
    return ('metadata', {}, dublin), DUBLIN_META_NS
コード例 #11
0
 def error(problem, k, supplied=None):
     errors[k] = OrderedDict([('error', problem)])
     if supplied is not None:
         errors[k]['supplied'] = unparse_literal(supplied)
     if k in self._info:
         errors[k]['canonical'] = self._info[k]['default']
コード例 #12
0
    def validate_and_augment(self, meta): # pylint: disable=R0912
        canonical_meta = copy.deepcopy(meta)

        parsed = {}
        errors = OrderedDict()
        def error(problem, k, supplied=None):
            errors[k] = OrderedDict([('error', problem)])
            if supplied is not None:
                errors[k]['supplied'] = unparse_literal(supplied)
            if k in self._info:
                errors[k]['canonical'] = self._info[k]['default']

        def check_supplied(): # pylint: disable=R0912
            def try_to_reify(v, parse):
                try:
                    return parse(v)
                except (KeyboardInterrupt, SystemExit):
                    raise
                except Exception as ex:
                    # pylint: disable=W0631
                    log.info('Meta conversion error on %s, %s', k, ex)
                    error('Not a valid %s format (expected %s)' % (
                        right_type, TYPE_EXAMPLES[right_type]),
                          k,
                          supplied=meta[k])

            for k in meta:
                canonical_meta[k] = meta[k] # default
                if k not in self._info:
                    maybe_meants = spellsuggest.spell_suggest(
                        k, self._info.keys())
                    suggestion = (" (did you mean '%s'?)" % maybe_meants[0]
                                  if maybe_meants else '')
                    if k not in ('title', 'subtitle'):
                        error("Unexpected field '%s'%s" % (k, suggestion),
                              k, meta[k])
                    else:
                        error("This document type does not have a %s" % k,
                              k, meta[k])
                    continue
                potential_types = PY_TYPE_TO_TYPESETR_TYPES[type(meta[k])]
                right_type = self._info[k]['type']
                if right_type in potential_types:
                    if right_type == 'bibliography':
                        parsed[k] = try_to_reify(meta[k], Bibliography)

                else:
                    if 'rich-text' in potential_types:
                        if not isinstance(meta[k], basestring):
                            meta[k] = postprocess.plaintextify(meta[k])
                        potential_types = ('text',)
                    if potential_types == ('text',):
                        parsed[k] = try_to_reify(
                            meta[k],
                            # pylint: disable=W0640
                            lambda v: parse_literal(v, right_type))
                    else:
                        error("Expected meta field '%s:' to be"
                              " of type '%s', not '%s'" % (
                                  k, right_type, potential_types[0]),
                              k, supplied=meta[k])

        def check_required():
            for k in self._required:
                if k == 'title':
                    continue
                    # see __init__ comment
                ## assert 'default' not in self._info[k], \
                ##        "metadata.yml error -- %s has a required default" % k
                if k not in meta:
                    right_type = self._info[k]['type']
                    if right_type == 'text':
                        error('This field is required', '', k)
                    else:
                        error('This field (of type %s)'
                              ' is required' % right_type, k)

        check_supplied()
        check_required()
        meta_ans = OrderedDict()
        for k in meta.keys() + [k for k in self._info.keys() if k not in meta]:
            if k not in errors:
                meta_ans[k] = OrderedDict()
                if k in meta:
                    meta_ans[k]['supplied'] = unparse_literal(meta[k])
                # the canonical form is a string representation (for now) but
                # we don't just want to use the string that was supplied (e.g.
                # 'YES'), we want to canonicalize it. We achieve that by
                # unparsing the parsed version if any; unparsing a string is
                # idempotent hence values that are already strings are left as
                # is
                canonical = unparse_literal(
                    parsed.get(k, meta.get(k, self._defaults[k])))
                if canonical != meta_ans[k].get('supplied'):
                    meta_ans[k]['canonical'] = canonical
            else:
                meta_ans[k] = errors[k]
            if (self._info.get(k, {}).get('label', self.default_label(k)) !=
                self.default_label(k)): # pylint: disable=C0330
                meta_ans[k]['label'] = self._info[k]['label']
            if k in self._info and self._info[k]['type'] != 'text':
                meta_ans[k]['type'] = self._info[k]['type']
        return MetaInfo(meta_ans)