def handle_p_content(self, e, current_part):
     if e.tag == RUN_TAG:
         return self.handle_run(e)
     elif e.tag == HYPERLINK_TAG:
         internalId = e.attrib.get(ns.r('id'))
         if internalId is None:
             ref = '#' + e.attrib[ns.w('anchor')]
         else:
             rels = self.doc.get_rels_for(current_part)
             ref = rels[internalId].attrib['Target']
         # 'u', 'span' = nuke bogus color and underline
         # styling that google docs likes to add to links;
         # XXX(alexander): rewrite colour less bluntly;
         # this also nukes background color
         handle_p = partial(self.handle_p_content, current_part=current_part)
         body = whack(('u', 'span').__contains__, flatmap(handle_p, e))
         if not body:
             log.warn('hyperlink with no body to: %r', ref)
         return [mkel('a', {'href': ref}, body)]
     elif e.tag == BOOKMARK_END_TAG:
         return []
     elif e.tag == BOOKMARK_START_TAG:
         return [mkel('a', {'name':  e.attrib[ns.w('name')]}, [])]
     elif e.tag == ns.m('oMath'):
         return self.handle_omath(e)
     else:
         log.warn('Ignoring unknown tag %s', e.tag)
         return []
Beispiel #2
0
 def handle_p_content(self, e, current_part):
     if e.tag == RUN_TAG:
         return self.handle_run(e)
     elif e.tag == HYPERLINK_TAG:
         internalId = e.attrib.get(ns.r('id'))
         if internalId is None:
             ref = '#' + e.attrib[ns.w('anchor')]
         else:
             rels = self.doc.get_rels_for(current_part)
             ref = rels[internalId].attrib['Target']
         # 'u', 'span' = nuke bogus color and underline
         # styling that google docs likes to add to links;
         # XXX(alexander): rewrite colour less bluntly;
         # this also nukes background color
         handle_p = partial(self.handle_p_content,
                            current_part=current_part)
         body = whack(('u', 'span').__contains__, flatmap(handle_p, e))
         if not body:
             log.warn('hyperlink with no body to: %r', ref)
         return [mkel('a', {'href': ref}, body)]
     elif e.tag == BOOKMARK_END_TAG:
         return []
     elif e.tag == BOOKMARK_START_TAG:
         return [mkel('a', {'name': e.attrib[ns.w('name')]}, [])]
     elif e.tag == ns.m('oMath'):
         return self.handle_omath(e)
     else:
         log.warn('Ignoring unknown tag %s', e.tag)
         return []
    def gen_journal(entry):
        journal = []
        try:
            journal.append(mkel('.journal-title',
                                item_attributes('isPartOf',
                                                'Periodical', 'itemscope'),
                                [entry.fields['journal']]))
        except KeyError:
            pass

        try:
            journal.append(mkel('.volume',
                                item_attributes('volumeNumber'),
                                [entry.fields['volume']]))
            journal.append(
                mkel('.number',
                     item_attributes('issueNumber'),
                     ['(', entry.fields['number'], ')']))
        except KeyError:
            pass

        if journal:
            yield mkel('.journal',
                       item_attributes('isPartOf',
                                       'PublicationVolume', 'itemscope'),
                       list(intersperse(' ', journal)))
Beispiel #4
0
    def gen_journal(entry):
        journal = []
        try:
            journal.append(
                mkel('.journal-title',
                     item_attributes('isPartOf', 'Periodical', 'itemscope'),
                     [entry.fields['journal']]))
        except KeyError:
            pass

        try:
            journal.append(
                mkel('.volume', item_attributes('volumeNumber'),
                     [entry.fields['volume']]))
            journal.append(
                mkel('.number', item_attributes('issueNumber'),
                     ['(', entry.fields['number'], ')']))
        except KeyError:
            pass

        if journal:
            yield mkel(
                '.journal',
                item_attributes('isPartOf', 'PublicationVolume', 'itemscope'),
                list(intersperse(' ', journal)))
def meta_to_runs(what, intern_image, total_w):
    # pylint: disable=R0911
    recurse = partial(meta_to_runs, intern_image=intern_image, total_w=total_w)
    if isinstance(what, basestring):
        return [mkel('w:r', {}, [mk_t(what)])]
    elif isinstance(what, list):
        return flatmap(recurse, what)
    elif isinstance(what, tuple):
        t, _, b = what
        runs = recurse(b)
        if t in ('b', 'i', 's', 'u'):
            return [apply_html_style(t, run) for run in runs]
        else:
            log.warn("Didn't understand html tag %r", what)
            return runs
    elif isinstance(what, literal.Image):
        rid = intern_image(what)
        target_w = parse_percentage(what.style['width']) * total_w
        w, h = what.get_size()
        w, h = [docxlite.Emu(x * target_w / h) for x in (w, h)]
        inline = (what.style['display'] == 'inline')
        return [mkel('w:r', {}, [make_pic(rid, w, h, inline)])]
    elif isinstance(what, literal.Bibliography):
        return recurse(what.data)
    else:
        log.warn('Fallthrough: %r', what)
        return recurse(unparse_literal(what))
Beispiel #6
0
def meta_to_runs(what, intern_image, total_w):
    # pylint: disable=R0911
    recurse = partial(meta_to_runs, intern_image=intern_image, total_w=total_w)
    if isinstance(what, basestring):
        return [mkel('w:r', {}, [mk_t(what)])]
    elif isinstance(what, list):
        return flatmap(recurse, what)
    elif isinstance(what, tuple):
        t, _, b = what
        runs = recurse(b)
        if t in ('b', 'i', 's', 'u'):
            return [apply_html_style(t, run) for run in runs]
        else:
            log.warn("Didn't understand html tag %r", what)
            return runs
    elif isinstance(what, literal.Image):
        rid = intern_image(what)
        target_w = parse_percentage(what.style['width']) * total_w
        w, h = what.get_size()
        w, h = [docxlite.Emu(x * target_w / h) for x in (w, h)]
        inline = (what.style['display'] == 'inline')
        return [mkel('w:r', {}, [make_pic(rid, w, h, inline)])]
    elif isinstance(what, literal.Bibliography):
        return recurse(what.data)
    else:
        log.warn('Fallthrough: %r', what)
        return recurse(unparse_literal(what))
Beispiel #7
0
def _coalesce_blocks(attrs, blocks):
    B = Var('B')
    _ = Var('_')
    blocks = list(blocks)
    _debug = blocks[:]

    def next_body():
        return blocks.pop(0)[2] if blocks else []

    while True:
        body = next_body()
        if not body:
            break
        pre_block = []
        while body and body == [('code', {}, B)]:
            pre_block.append(plaintextify(B.val) + '\n')
            body = next_body()
        if pre_block:
            pre_block = mkel('pre', {}, pre_block)
            yield pre_block
        non_pre_block = []
        while body and body != [('code', {}, B)]:
            is_citation = 'right' in attrs.get('class', [])
            if is_citation:
                non_pre_block.append(
                    mkel('footer', {}, [mkel('cite', {}, body)]))
            else:
                if needs_wrapping_in_p(body):
                    body = [mkel('p', {}, body)]
                non_pre_block.extend(body)
            body = next_body()

        if non_pre_block:
            yield mkel('blockquote', {}, tidy(non_pre_block))
Beispiel #8
0
    def meta_to_docx(meta, intern_image, total_w):
        tups = []

        meta_copy = meta.raw_items().copy()

        to_runs = partial(meta_to_runs,
                          intern_image=intern_image,
                          total_w=total_w)

        for name in ['Title', 'Subtitle']:
            bit = meta_copy.pop(name.lower(), None)
            if bit:
                pr = mkel(
                    'w:pPr',
                    {},
                    [
                        # FIXME(ash): currently we don't ensure the styles exist
                        mkel('w:pStyle', {'w:val': name}, [])
                    ])
                tups.append(make_p(pr, *to_runs(bit)))

        for k, v in meta_copy.iteritems():
            body = (to_runs([mkel('u', {}, [str(k) + ':']), ' ']) + to_runs(v))
            tups.append(make_p(*body))

        return [tup2etree(tup, nsmap=ns.dict) for tup in tups]
def _coalesce_blocks(attrs, blocks):
    B = Var('B')
    _ = Var('_')
    blocks = list(blocks)
    _debug = blocks[:]
    def next_body():
        return blocks.pop(0)[2] if blocks else []
    while True:
        body = next_body()
        if not body:
            break
        pre_block = []
        while body and body == [('code', {}, B)]:
            pre_block.append(plaintextify(B.val) + '\n')
            body = next_body()
        if pre_block:
            pre_block = mkel('pre', {}, pre_block)
            yield pre_block
        non_pre_block = []
        while body and body != [('code', {}, B)]:
            is_citation = 'right' in attrs.get('class', [])
            if is_citation:
                non_pre_block.append(mkel('footer', {},
                                          [mkel('cite', {}, body)]))
            else:
                if needs_wrapping_in_p(body):
                    body = [mkel('p', {}, body)]
                non_pre_block.extend(body)
            body = next_body()

        if non_pre_block:
            yield mkel('blockquote', {}, tidy(non_pre_block))
Beispiel #10
0
    def handle_emphasis(self, emph, body):
        r"""Boldens italicizes or strikes-through latex text.

        Harder than it sounds: The problem being that \textbf and \textit
        don't work across paragraphs and \bfseries and \itshape don't do
        italic correction (i.e. the end of the emphasized text juts into what
        follows it, because the space is not widened as necessary).

        >>> writer = LatexWriter()
        >>> print writer.handle_emphasis('b', ['some bold text'])
        \textbf{some bold text}
        >>> print writer.handle_emphasis(
        ...     'b', [('p', {}, [('i', {},  ['some bold italic'])]), 'text'])
        {\bfseries{}\textit{some bold italic}
        <BLANKLINE>
        text\/}
        >>>

        With strikethrough and underline the problem is even worse. TeX itself
        has no underline/strikethrough at all and the default LaTeX \underline
        command is broken (e.g. makes the text un(line)breakable). All
        replacements like soul's \ul and ulem's \uline have weird limitations
        that cause random breakage, so we push these styles down into the body
        recursively.

        >>> print writer.handle_emphasis(
        ...  'u', [('p', {}, [('i', {},
        ...                     [('b', {}, ['ul bold italic'])])]), 'text'])
        {\itshape{}{\bfseries{}\uline{ul bold italic}\/}\/}
        <BLANKLINE>
        \uline{text}

        """
        # can safely use \textit/\textbf etc.
        INLINE_TEXT = Var(
            'INLINE_TEXT',  # pylint: disable=C0103
            lambda x: isinstance(x, basestring) and '\n' not in x)
        if body == [INLINE_TEXT]:
            return cmd(self.INLINE_EMPH_TO_LATEX[emph], [],
                       [self.latexify(body)])
        else:
            if emph in ('b', 'i'):
                # need to use itshape/bfseries and do italic correction (r'\/')
                return texcmd(
                    dict(b='bfseries', i='itshape')[emph],
                    join(self.latexify(body), r'\/'))
            else:
                assert emph in ('u', 's')
                # XXX: it might be better to have latexify as the outmost call
                # here rather than join indivudally converted parts. That would
                # allow for further rewrite logic in other parts of the latex
                # converter.
                return join(*(self.handle_emphasis(emph, [e]) if isinstance(
                    e, basestring) else self.latexify(
                        mkel(*e[:2],
                             body=[
                                 mkel(emph, {}, [subbody_part])
                                 for subbody_part in e[2]
                             ])) for e in body))
def ensec(heading, section, kill_anchor, gensym):
    h, attr, body = heading
    assert h in H_TAGS
    # reasons for lifting the anchor id to the section include:
    # - epub seems to require sections to have id's
    # - endnotify expects sections to have id's
    attr, body = lift_anchor_id(attr, body, gensym, kill_anchor)
    return mkel('section', attr, [mkel(h, {}, body)] + section)
def ensec(heading, section, kill_anchor, gensym):
    h, attr, body = heading
    assert h in H_TAGS
    # reasons for lifting the anchor id to the section include:
    # - epub seems to require sections to have id's
    # - endnotify expects sections to have id's
    attr, body = lift_anchor_id(attr, body, gensym, kill_anchor)
    return mkel('section', attr, [mkel(h, {}, body)] + section)
    def handle_emphasis(self, emph, body):
        r"""Boldens italicizes or strikes-through latex text.

        Harder than it sounds: The problem being that \textbf and \textit
        don't work across paragraphs and \bfseries and \itshape don't do
        italic correction (i.e. the end of the emphasized text juts into what
        follows it, because the space is not widened as necessary).

        >>> writer = LatexWriter()
        >>> print writer.handle_emphasis('b', ['some bold text'])
        \textbf{some bold text}
        >>> print writer.handle_emphasis(
        ...     'b', [('p', {}, [('i', {},  ['some bold italic'])]), 'text'])
        {\bfseries{}\textit{some bold italic}
        <BLANKLINE>
        text\/}
        >>>

        With strikethrough and underline the problem is even worse. TeX itself
        has no underline/strikethrough at all and the default LaTeX \underline
        command is broken (e.g. makes the text un(line)breakable). All
        replacements like soul's \ul and ulem's \uline have weird limitations
        that cause random breakage, so we push these styles down into the body
        recursively.

        >>> print writer.handle_emphasis(
        ...  'u', [('p', {}, [('i', {},
        ...                     [('b', {}, ['ul bold italic'])])]), 'text'])
        {\itshape{}{\bfseries{}\uline{ul bold italic}\/}\/}
        <BLANKLINE>
        \uline{text}

        """
        # can safely use \textit/\textbf etc.
        INLINE_TEXT = Var('INLINE_TEXT', # pylint: disable=C0103
                          lambda x: isinstance(x, basestring) and '\n' not in x)
        if body == [INLINE_TEXT]:
            return cmd(self.INLINE_EMPH_TO_LATEX[emph], [],
                       [self.latexify(body)])
        else:
            if emph in ('b', 'i'):
                # need to use itshape/bfseries and do italic correction (r'\/')
                return texcmd(dict(b='bfseries', i='itshape')[emph],
                              join(self.latexify(body), r'\/'))
            else:
                assert emph in ('u', 's')
                # XXX: it might be better to have latexify as the outmost call
                # here rather than join indivudally converted parts. That would
                # allow for further rewrite logic in other parts of the latex
                # converter.
                return join(*(
                    self.handle_emphasis(emph, [e])
                    if isinstance(e, basestring)
                    else self.latexify(
                        mkel(*e[:2], body=[
                            mkel(emph, {}, [subbody_part])
                            for subbody_part in e[2]]))
                    for e in body))
Beispiel #14
0
    def parse_table(self, e, current_part):
        # XXX(ash): simplify
        # pylint: disable=R0914
        def cell_bg(tc):
            if tc[0].tag == TABLE_COLUMN_PROPERTIES_TAG:
                bg = val(tc[0], ns.w('shd'), ns.w('fill'))
                if bg:
                    return add_bg({}, '#' + bg)
            return {}

        def skip_past(e, child):
            if e[0].tag == child:
                return e[0].itersiblings()
            return e.iterchildren()

        def parse_rows(e, has_header_row, has_header_col):
            def is_header(i, j):
                return i == 0 and has_header_row or j == 0 and has_header_col

            return [
                mkel('tr', {}, [
                    mkel(
                        'th' if is_header(i, j) else 'td', cell_bg(tc),
                        self.parse_body(skip_past(tc,
                                                  TABLE_COLUMN_PROPERTIES_TAG),
                                        current_part=current_part))
                    for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG))
                ]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG))
            ]

        tblPr = first_of_tag(e, ns.w('tblPr'))
        tbl_stuff = tblPr.itersiblings()
        tblGrid = next(tbl_stuff)
        # according to the schema this is always true
        assert tblGrid.tag == ns.w('tblGrid'), tblGrid.tag
        look = tblPr.find(ns.w('tblLook'))
        if look is None:
            has_header_row = has_header_col = False
        else:
            # this is actually the canonical check;
            # the identical per cell/row props are just for caching
            has_header_row, has_header_col = (look.attrib.get(k) == "1"
                                              for k in (ns.w('firstRow'),
                                                        ns.w('firstColumn')))

        grid_cols = tblGrid.iterchildren(ns.w('gridCol'))
        col_widths = [int(gc.attrib[ns.w('w')]) for gc in grid_cols]
        col_total = sum(col_widths)
        col_pcts = [100. * w / col_total for w in col_widths]
        cols = [
            mkel('col', add_style({}, 'width', '%s%%' % w), [])
            for w in col_pcts
        ]
        rows = parse_rows(e, has_header_row, has_header_col)
        table = odt_parser.parse_table_body(cols + rows)
        return mkel('table', {}, table)
    def gen_authors(entry):
        try:
            authors = [mkel('.author',
                            item_attributes('author', 'Person', 'itemscope'),
                            gen_name(a)) for a in entry.persons['author']]
        except KeyError:
            authors = [mkel('.author', {}, ['Anon'])]

        separator = mkel('.author-separator', {}, ['; '])
        yield mkel('span.authors', {}, list(intersperse(separator, authors)))
def _parse_body(xml, handle_data_url, parent_tag, footnote_state):
    # pylint: disable=R0912,R0914
    if parent_tag == 'pre':
        return [etree.tostring(xml, method="text")]
    ans = []
    xml = list(xml)
    for e in xml:
        tag = e.tag
        text = e.text or ''
        tail = e.tail or ''
        body = _parse_body(xml=e,
                           handle_data_url=handle_data_url,
                           parent_tag=tag,
                           footnote_state=footnote_state)
        attrs = dict(e.attrib)
        _cleanup_attrs(tag, attrs)
        if text:
            body = [text] + body
        if 'class' in attrs:
            tag = _cleanup_classes(tag, attrs)
        if 'style' in attrs:
            _cleanup_style(tag, attrs)
        if tag == 'figure':
            # put figcaption in canonical order
            _cleanup_fig(attrs, body)
        elif tag == 'img' and parent_tag != 'figure':
            img_attrs, img_body = attrs, body
            _de_data_url(handle_data_url, img_attrs)
            # XXX(alexander): pop the 'margin' class;
            # the display 'inline' covers that
            if 'margin' in img_attrs.get('class', []):
                img_attrs['class'].remove('margin')
                if not img_attrs['class']:
                    del img_attrs['class']
            tag, attrs = 'figure', {
                'style': OrderedDict([('display', 'inline')])
            }
            body = [mkel('img', img_attrs, img_body)]

            _cleanup_fig(attrs, body)
        _de_data_url(handle_data_url, attrs)
        maybe_anchorize_id(tag, attrs, body)
        footnote = _maybe_handle_footnote(tag, attrs, body, footnote_state)
        if footnote is not None:
            ans.extend(footnote)
        elif tag in ALLOWED_TAGS:
            ans.append(mkel(tag, attrs, body))
        elif tag == '.tex2jax_process':
            ans.append(mkcmd('tex', body))
        else:
            log.info('Stripping non-allowed tag %s', tag)
            ans.extend(body)
        if tail:
            ans.append(tail)
    return ans
 def split_footnote(e):
     t, a, b = e
     if t != '.footnote':
         return e
     counter[0] += 1
     ordinal = [str(counter[0])]
     fid = '%s-fn%d' % (secid, counter[0]) if 'id' not in a else a['id']
     endnotes.append(
         mkel('aside', merge_attrs(a, aside_attrs, {'id': fid}), b))
     return mkel('a', merge_attrs(a_attrs, {'href': '#' + fid}),
                 ordinal)
def _parse_body(xml, handle_data_url, parent_tag, footnote_state):
    # pylint: disable=R0912,R0914
    if parent_tag == 'pre':
        return [etree.tostring(xml, method="text")]
    ans = []
    xml = list(xml)
    for e in xml:
        tag = e.tag
        text = e.text or ''
        tail = e.tail or ''
        body = _parse_body(xml=e,
                           handle_data_url=handle_data_url,
                           parent_tag=tag,
                           footnote_state=footnote_state)
        attrs = dict(e.attrib)
        _cleanup_attrs(tag, attrs)
        if text:
            body = [text] + body
        if 'class' in attrs:
            tag = _cleanup_classes(tag, attrs)
        if 'style' in attrs:
            _cleanup_style(tag, attrs)
        if tag == 'figure':
            # put figcaption in canonical order
            _cleanup_fig(attrs, body)
        elif tag == 'img' and parent_tag != 'figure':
            img_attrs, img_body = attrs, body
            _de_data_url(handle_data_url, img_attrs)
            # XXX(alexander): pop the 'margin' class;
            # the display 'inline' covers that
            if 'margin' in img_attrs.get('class', []):
                img_attrs['class'].remove('margin')
                if not img_attrs['class']:
                    del img_attrs['class']
            tag, attrs = 'figure', {'style':
                                    OrderedDict([('display', 'inline')])}
            body = [mkel('img', img_attrs, img_body)]

            _cleanup_fig(attrs, body)
        _de_data_url(handle_data_url, attrs)
        maybe_anchorize_id(tag, attrs, body)
        footnote = _maybe_handle_footnote(tag, attrs, body, footnote_state)
        if footnote is not None:
            ans.extend(footnote)
        elif tag in ALLOWED_TAGS:
            ans.append(mkel(tag, attrs, body))
        elif tag == '.tex2jax_process':
            ans.append(mkcmd('tex', body))
        else:
            log.info('Stripping non-allowed tag %s', tag)
            ans.extend(body)
        if tail:
            ans.append(tail)
    return ans
 def split_footnote(e):
     t, a, b = e
     if t != '.footnote':
         return e
     counter[0] += 1
     ordinal = [str(counter[0])]
     fid = '%s-fn%d' % (secid, counter[0]) if 'id' not in a else a['id']
     endnotes.append(mkel('aside',
                          merge_attrs(a, aside_attrs, {'id': fid}), b))
     return mkel('a',
                 merge_attrs(a_attrs, {'href': '#' + fid}), ordinal)
    def parse_table(self, e, current_part):
        # XXX(ash): simplify
        # pylint: disable=R0914
        def cell_bg(tc):
            if tc[0].tag == TABLE_COLUMN_PROPERTIES_TAG:
                bg = val(tc[0], ns.w('shd'), ns.w('fill'))
                if bg:
                    return add_bg({}, '#' + bg)
            return {}

        def skip_past(e, child):
            if e[0].tag == child:
                return e[0].itersiblings()
            return e.iterchildren()

        def parse_rows(e, has_header_row, has_header_col):
            def is_header(i, j):
                return i == 0 and has_header_row or j == 0 and has_header_col

            return [
                mkel('tr', {},
                     [mkel('th' if is_header(i, j) else 'td', cell_bg(tc),
                           self.parse_body(
                               skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG),
                               current_part=current_part))
                      for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG))])
                for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG))]

        tblPr = first_of_tag(e, ns.w('tblPr'))
        tbl_stuff = tblPr.itersiblings()
        tblGrid = next(tbl_stuff)
        # according to the schema this is always true
        assert tblGrid.tag == ns.w('tblGrid'), tblGrid.tag
        look = tblPr.find(ns.w('tblLook'))
        if look is None:
            has_header_row = has_header_col = False
        else:
            # this is actually the canonical check;
            # the identical per cell/row props are just for caching
            has_header_row, has_header_col = (
                look.attrib.get(k) == "1"
                for k in (ns.w('firstRow'), ns.w('firstColumn')))

        grid_cols = tblGrid.iterchildren(ns.w('gridCol'))
        col_widths = [int(gc.attrib[ns.w('w')]) for gc in grid_cols]
        col_total = sum(col_widths)
        col_pcts = [100. * w / col_total for w in col_widths]
        cols = [mkel('col',
                     add_style({}, 'width', '%s%%' % w),
                     []) for w in col_pcts]
        rows = parse_rows(e, has_header_row, has_header_col)
        table = odt_parser.parse_table_body(cols + rows)
        return mkel('table', {}, table)
        def parse_rows(e, has_header_row, has_header_col):
            def is_header(i, j):
                return i == 0 and has_header_row or j == 0 and has_header_col

            return [
                mkel('tr', {},
                     [mkel('th' if is_header(i, j) else 'td', cell_bg(tc),
                           self.parse_body(
                               skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG),
                               current_part=current_part))
                      for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG))])
                for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG))]
Beispiel #22
0
    def gen_authors(entry):
        try:
            authors = [
                mkel('.author', item_attributes('author',
                                                'Person', 'itemscope'),
                     gen_name(a)) for a in entry.persons['author']
            ]
        except KeyError:
            authors = [mkel('.author', {}, ['Anon'])]

        separator = mkel('.author-separator', {}, ['; '])
        yield mkel('span.authors', {}, list(intersperse(separator, authors)))
Beispiel #23
0
        def parse_rows(e, has_header_row, has_header_col):
            def is_header(i, j):
                return i == 0 and has_header_row or j == 0 and has_header_col

            return [
                mkel('tr', {}, [
                    mkel(
                        'th' if is_header(i, j) else 'td', cell_bg(tc),
                        self.parse_body(skip_past(tc,
                                                  TABLE_COLUMN_PROPERTIES_TAG),
                                        current_part=current_part))
                    for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG))
                ]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG))
            ]
Beispiel #24
0
 def gen_url(entry):
     attr = item_attributes('url')
     try:
         attr['href'] = entry.fields['url']
         yield mkel('a.url', attr, [entry.fields['url']])
     except KeyError:
         pass
def _maybe_handle_footnote(tag, attrs, body, footnote_state):
    classes = attrs.get('class', [])
    if tag == 'a' and 'noteref' in classes:
        href = attrs['href']
        if href.startswith('#'):
            # pylint: disable=W0622
            id = href[1:]
            footnote_state[id] = body
            return [mkel('.footnote', {}, body)]
        else:
            # XXX(ash): make user-visible
            log.warn("Found a footnote reference but didn't understand its "
                     "href (%s), so skipping it.", href)
            return []
    if tag == 'aside' and 'endnote' in classes:
        print attrs, body
        id_ = attrs['id']
        old_body = footnote_state.pop(id_, None)
        if old_body is None:
            # XXX(ash): make user-visible
            log.warn("Found a footnote body but its id (%s) doesn't match any "
                     "footnote reference seen previously, so skipping it.", id_)
        else:
            # overwrite the body of the anchor we saved earlier
            old_body[:] = body
        return []
    return None
 def gen_url(entry):
     attr = item_attributes('url')
     try:
         attr['href'] = entry.fields['url']
         yield mkel('a.url', attr, [entry.fields['url']])
     except KeyError:
         pass
def _maybe_handle_footnote(tag, attrs, body, footnote_state):
    classes = attrs.get('class', [])
    if tag == 'a' and 'noteref' in classes:
        href = attrs['href']
        if href.startswith('#'):
            # pylint: disable=W0622
            id = href[1:]
            footnote_state[id] = body
            return [mkel('.footnote', {}, body)]
        else:
            # XXX(ash): make user-visible
            log.warn(
                "Found a footnote reference but didn't understand its "
                "href (%s), so skipping it.", href)
            return []
    if tag == 'aside' and 'endnote' in classes:
        print attrs, body
        id_ = attrs['id']
        old_body = footnote_state.pop(id_, None)
        if old_body is None:
            # XXX(ash): make user-visible
            log.warn(
                "Found a footnote body but its id (%s) doesn't match any "
                "footnote reference seen previously, so skipping it.", id_)
        else:
            # overwrite the body of the anchor we saved earlier
            old_body[:] = body
        return []
    return None
    def gen_entry(entry, key):
        generators = {
            'article': [gen_authors, gen_year, gen_title,
                        gen_journal, gen_page],
            'book': [gen_authors, gen_year, gen_title, gen_publisher],
            'proceedings': [gen_authors, gen_year, gen_title, gen_publisher],
            'inbook': [gen_authors, gen_year, gen_title, gen_title,
                       gen_publisher, gen_chapter, gen_page],
            'phdthesis': [gen_authors, gen_year, gen_title,
                          gen_text('PhD diss.'), gen_school],
            'inproceedings': [gen_authors, gen_year, gen_title, gen_booktitle,
                              gen_publisher, gen_page],
            'mastersthesis': [gen_authors, gen_year, gen_title,
                              gen_text('Master diss.'), gen_school],
            'misc': [gen_authors, gen_year, gen_title, gen_url]
        }.get(entry.type, 'misc')

        li_fields = [field for gen in generators for field in gen(entry)]
        li_fields = list(intersperse('. ', li_fields))
        li_fields.append('.')
        li_type = {
            'article': 'ScholarlyArticle',
            'book': 'Book',
            'proceedings': 'ConferenceProceedings',
            'inbook': 'BookChapter',
            'phdthesis': 'PhdThesis',
            'inproceedings': 'ConferenceProceedings',
            'mastersthesis': 'MasterThesis',
            'misc' : 'Misc'
        }.get(entry.type, 'misc')

        li_attributes = item_attributes('citation', li_type, 'itemscope')
        li_attributes['id'] = _bibliography_anchor(key)

        return mkel('li.ref', li_attributes, li_fields)
def make_toc(title, lang, toc, toc_depth, titlepage=False):
    ns = {None: 'http://www.w3.org/1999/xhtml',
          'epub': 'http://www.idpf.org/2007/ops'}
    toc_ol_body = []

    if titlepage:
        toc_ol_body.append(mkel('li', {'id': 'toc-titlepage'},
                                [('a', {'href': 'titlepage.xhtml'}, [title])]))
    # FIXME(alexander): make this work for arbitrary toc-depth;
    # also don't tie to single-html file layout/name.
    assert toc_depth == 1
    chapter_toc = [h for h in toc if isinstance(h, tuple)]
    toc_ol_body.extend(
        ('li', {'class': 'toc-chapter', 'id': 'toc-chapter-%d' % i},
         [('a', {'href': 'main.xhtml#%s' % a['id']}, [h])])
        for (i, (tag, a, (h,))) in zip(count(1), chapter_toc))
    landmarks = make_landmarks(title, lang)
    return html_string_from_body(
        ('body', {},
         [('section',
           {'class': 'frontmatter toc', 'epub:type': 'frontmatter toc'},
           [('header', {}, [('h1', {}, [lang.localize('Contents')])]),
            ('nav', {'epub:type': 'toc', 'id': 'toc'},
             [('ol', {}, toc_ol_body)]),
            landmarks
           ])]), title=title, nsmap=ns)
def _opf_item(href, id=None, mime=None, properties=None): #pylint: disable=W0622
    id = id or href.split('.')[0].replace('/', '-')
    attrs = {'id': id, 'href': href,
             'media-type': mime or mimetype_of_url(href)}
    if properties is not None:
        attrs['properties'] = properties
    return mkel('item', attrs, [])
def endnotify(body, aside_attrs, a_attrs, section_attrs):
    """Transform .footnotes to noterefs and chapter rearnotes.

    Assumes that `body` is a list of `<section>s`.
    """
    ans = []
    for section in body:
        t, a, b = section
        secid = a['id']
        # pylint: disable=W0640
        counter = [0]
        endnotes = []

        def split_footnote(e):
            t, a, b = e
            if t != '.footnote':
                return e
            counter[0] += 1
            ordinal = [str(counter[0])]
            fid = '%s-fn%d' % (secid, counter[0]) if 'id' not in a else a['id']
            endnotes.append(
                mkel('aside', merge_attrs(a, aside_attrs, {'id': fid}), b))
            return mkel('a', merge_attrs(a_attrs, {'href': '#' + fid}),
                        ordinal)

        b = _transform(split_footnote, b)
        if len(endnotes) > 0:
            b.append(mkel('section', section_attrs, endnotes))
        ans.append((t, a, b))
    return ans
def endnotify(body, aside_attrs, a_attrs, section_attrs):
    """Transform .footnotes to noterefs and chapter rearnotes.

    Assumes that `body` is a list of `<section>s`.
    """
    ans = []
    for section in body:
        t, a, b = section
        secid = a['id']
        # pylint: disable=W0640
        counter = [0]
        endnotes = []
        def split_footnote(e):
            t, a, b = e
            if t != '.footnote':
                return e
            counter[0] += 1
            ordinal = [str(counter[0])]
            fid = '%s-fn%d' % (secid, counter[0]) if 'id' not in a else a['id']
            endnotes.append(mkel('aside',
                                 merge_attrs(a, aside_attrs, {'id': fid}), b))
            return mkel('a',
                        merge_attrs(a_attrs, {'href': '#' + fid}), ordinal)
        b = _transform(split_footnote, b)
        if len(endnotes) > 0:
            b.append(mkel('section', section_attrs, endnotes))
        ans.append((t, a, b))
    return ans
def _coalesce_siblings(tag, attrs, sibling_group):
    compacted_content = tidy(_sib_bodies(sibling_group))
    if (tag, attrs) == ('span', {}):
        for compacted_bit in compacted_content:
            yield compacted_bit
    else:
        yield mkel(tag, attrs, compacted_content)
Beispiel #34
0
    def transclude(self, pic):
        # for id:
        # pylint: disable=W0622

        if self.transclusions is None:
            return []

        width_emu = float(val(pic, ns.wp('extent'), 'cx'))
        embeds = pic.xpath('.//a:blip/@r:embed', namespaces=ns.dict)
        try:
            id, = embeds
        except ValueError:
            log.warn('Expected exactly one r:embed with an image id, got %r',
                     embeds)
            return []

        href = self.transclusions.normalize_known_transclusion(id)
        return [
            make_figure(relwidth=width_emu / self.textwidth_emu,
                        inline={
                            'anchor': False,
                            'inline': True
                        }[pic.tag.split('}')[1]],
                        body=[mkel('img', {'src': href}, [])],
                        src=href,
                        original_href=id)
        ]
Beispiel #35
0
def _coalesce_siblings(tag, attrs, sibling_group):
    compacted_content = tidy(_sib_bodies(sibling_group))
    if (tag, attrs) == ('span', {}):
        for compacted_bit in compacted_content:
            yield compacted_bit
    else:
        yield mkel(tag, attrs, compacted_content)
def make_toc(title, lang, toc, toc_depth, titlepage=False):
    ns = {
        None: 'http://www.w3.org/1999/xhtml',
        'epub': 'http://www.idpf.org/2007/ops'
    }
    toc_ol_body = []

    if titlepage:
        toc_ol_body.append(
            mkel('li', {'id': 'toc-titlepage'}, [('a', {
                'href': 'titlepage.xhtml'
            }, [title])]))
    # FIXME(alexander): make this work for arbitrary toc-depth;
    # also don't tie to single-html file layout/name.
    assert toc_depth == 1
    chapter_toc = [h for h in toc if isinstance(h, tuple)]
    toc_ol_body.extend(('li', {
        'class': 'toc-chapter',
        'id': 'toc-chapter-%d' % i
    }, [('a', {
        'href': 'main.xhtml#%s' % a['id']
    }, [h])]) for (i, (tag, a, (h, ))) in zip(count(1), chapter_toc))
    landmarks = make_landmarks(title, lang)
    return html_string_from_body(('body', {}, [('section', {
        'class': 'frontmatter toc',
        'epub:type': 'frontmatter toc'
    }, [('header', {}, [('h1', {}, [lang.localize('Contents')])]),
        ('nav', {
            'epub:type': 'toc',
            'id': 'toc'
        }, [('ol', {}, toc_ol_body)]), landmarks])]),
                                 title=title,
                                 nsmap=ns)
 def make_footnote(self, e):
     # pylint: disable=W0622
     id = e.attrib[ns.w('id')]
     ps = (self.doc.get_footnote if e.tag == FOOTNOTE_REFERENCE_TAG
           else self.doc.get_endnote)(id).iterfind(P_TAG)
     footnote_part = 'footnotes'  # XXX what about endnotes
     return mkel('.footnote', {},
                 [self.handle_p(p, current_part=footnote_part) for p in ps])
 def handle_p(self, e, current_part, in_list=False):
     attrs = {}
     pPr = first_of_tag(e, P_PROPS_TAG)
     jc_class = self.JC_TO_CLASS.get(val(pPr, ns.w('jc')))
     if jc_class:
         attrs = add_class(attrs, jc_class)
     tag = style_to_tag(val(pPr, ns.w('pStyle')) or '')
     content = iter(e) if pPr is None else pPr.itersiblings()
     handle_p = partial(self.handle_p_content, current_part=current_part)
     ans = mkel(tag, attrs, flatmap(handle_p, content))
     left_indent = val(pPr, ns.w('ind'), ns.w('left')) or 0.0
     indent = int(round(float(left_indent) / self.default_indent_twips))
     if (not in_list) and indent:
         ans = lift_code(ans)
         ans = mkel('.block', {'indent': indent}, [ans])
         ans = hacky_flatten_block(ans)
     return ans
Beispiel #39
0
 def handle_p(self, e, current_part, in_list=False):
     attrs = {}
     pPr = first_of_tag(e, P_PROPS_TAG)
     jc_class = self.JC_TO_CLASS.get(val(pPr, ns.w('jc')))
     if jc_class:
         attrs = add_class(attrs, jc_class)
     tag = style_to_tag(val(pPr, ns.w('pStyle')) or '')
     content = iter(e) if pPr is None else pPr.itersiblings()
     handle_p = partial(self.handle_p_content, current_part=current_part)
     ans = mkel(tag, attrs, flatmap(handle_p, content))
     left_indent = val(pPr, ns.w('ind'), ns.w('left')) or 0.0
     indent = int(round(float(left_indent) / self.default_indent_twips))
     if (not in_list) and indent:
         ans = lift_code(ans)
         ans = mkel('.block', {'indent': indent}, [ans])
         ans = hacky_flatten_block(ans)
     return ans
Beispiel #40
0
 def make_footnote(self, e):
     # pylint: disable=W0622
     id = e.attrib[ns.w('id')]
     ps = (self.doc.get_footnote if e.tag == FOOTNOTE_REFERENCE_TAG else
           self.doc.get_endnote)(id).iterfind(P_TAG)
     footnote_part = 'footnotes'  # XXX what about endnotes
     return mkel('.footnote', {},
                 [self.handle_p(p, current_part=footnote_part) for p in ps])
Beispiel #41
0
def meta_to_html(meta):
    # pylint: disable=R0914
    # FIXME(alexander): this is just a really hacky way to convert the
    # document properties into something vaguely visually plausible in a style
    # independent manner
    head = meta.items()
    lang = head['lang']
    prepend = []
    title = head.pop('title', '')
    # FIXME(alexander): should maybe default to docname?
    # the only style which does currently not have a title
    # is letter, so could use subject there
    if title:
        prepend.append(mkel('h1', {'class': ['title']}, [title]))
    subtitle = head.pop('subtitle', '')
    if subtitle:
        prepend.append(mkel('h2', {'class': ['subtitle']}, [subtitle]))
    dl_body = []
    types_to_omit = (int, Bibliography, Image, Lang)
    #FIXME(alexander): toc-depth should be int,
    #                  and bibliography-preamble must die
    keys_to_omit = ('toc-depth', 'bibliography-preamble')
    for (k, v) in head.iteritems():
        is_default_value = 'supplied' not in meta.d[k]
        if is_default_value:
            continue
        a = {'class': [k]}
        if isinstance(v, types_to_omit) or k in keys_to_omit:
            a['hidden'] = ""
        label = meta.d[k].get('label', k.capitalize())
        dl_body.append(mkel('dt', a, [lang.localize(label)]))
        a = a.copy()
        dd = unparse_literal(v, roundtrip=False)
        roundtrippable = unparse_literal(v)
        if dd != roundtrippable:
            a['data-value'] = roundtrippable
        dl_body.append(
            mkel('dd', a,
                 [dd if not isinstance(v, bool) else lang.localize(dd)]))

    if dl_body:
        prepend.append(mkel('dl', {'id': 'document-properties'}, dl_body))

    return lang.code, title, prepend
def _opf_item(href, id=None, mime=None, properties=None):  #pylint: disable=W0622
    id = id or href.split('.')[0].replace('/', '-')
    attrs = {
        'id': id,
        'href': href,
        'media-type': mime or mimetype_of_url(href)
    }
    if properties is not None:
        attrs['properties'] = properties
    return mkel('item', attrs, [])
def meta_to_html(meta):
    # pylint: disable=R0914
    # FIXME(alexander): this is just a really hacky way to convert the
    # document properties into something vaguely visually plausible in a style
    # independent manner
    head = meta.items()
    lang = head['lang']
    prepend = []
    title = head.pop('title', '')
    # FIXME(alexander): should maybe default to docname?
    # the only style which does currently not have a title
    # is letter, so could use subject there
    if title:
        prepend.append(mkel('h1', {'class': ['title']}, [title]))
    subtitle = head.pop('subtitle', '')
    if subtitle:
        prepend.append(mkel('h2', {'class': ['subtitle']}, [subtitle]))
    dl_body = []
    types_to_omit = (int, Bibliography, Image, Lang)
    #FIXME(alexander): toc-depth should be int,
    #                  and bibliography-preamble must die
    keys_to_omit = ('toc-depth', 'bibliography-preamble')
    for (k, v) in head.iteritems():
        is_default_value = 'supplied' not in meta.d[k]
        if is_default_value:
            continue
        a = {'class': [k]}
        if isinstance(v, types_to_omit) or k in keys_to_omit:
            a['hidden'] = ""
        label = meta.d[k].get('label', k.capitalize())
        dl_body.append(mkel('dt', a, [lang.localize(label)]))
        a = a.copy()
        dd = unparse_literal(v, roundtrip=False)
        roundtrippable = unparse_literal(v)
        if dd != roundtrippable:
            a['data-value'] = roundtrippable
        dl_body.append(mkel('dd', a, [dd if not isinstance(v, bool)
                                      else lang.localize(dd)]))

    if dl_body:
        prepend.append(mkel('dl', {'id': 'document-properties'}, dl_body))

    return lang.code, title, prepend
def maybe_anchorize_id(tag, attrs, body):
    """DESTRUCTIVELY push the id into an anchor in the body, in most cases.

    Anything w/ an id should be linkable; the id should not be used
    otherwise.
    """
    if 'id' in attrs:
        if tag not in ('dl', 'ol', 'ul', 'aside'):
            body.insert(0, mkel('a', dict(name=attrs['id']), []))
            del attrs['id']
Beispiel #45
0
    def handle_run(self, r):
        # XXX(ash): pylint is right about this being too complex
        # pylint: disable=R0912
        _ = Var('_')
        ans = []
        rPr = first_of_tag(r, RUN_PROPS_TAG)
        content = rPr.itersiblings() if rPr is not None else iter(r)
        for e in content:
            # pylint: disable=W0622
            type = e.attrib.get(ns.w('type'))
            if e.tag == TEXT_TAG:
                ans.append(e.text)
            elif e.tag == TAB_TAG:
                # XXX(alexander): this can also work like a '_' or '…' \dotfill
                ans.append('\t')
            elif e.tag in (FOOTNOTE_REF_TAG, ENDNOTE_REF_TAG):
                # XXX(ash): what is going on here
                pass
            elif e.tag == BREAK_TAG and type in ('page', 'column'):
                ans.append(mkel('.pagebreak', {}, []))
            elif e.tag == BREAK_TAG or e.tag == CR_TAG:
                assert (type is None) or (type == 'textWrapping')
                ans.append(mkel('br', {}, []))
            # FIXME, tags below untested
            elif e.tag == SOFT_HYPHEN_TAG:
                ans.append(SOFT_HYPHEN)
            elif e.tag == NON_BREAKING_HYPHEN_TAG:
                ans.append(NON_BREAKING_HYPHEN)
            elif e.tag == ns.w('drawing'):
                ans.extend(
                    flatmap(self.transclude,
                            e.xpath(self.IMAGE_XPATH, namespaces=ns.dict)))
            elif e.tag in (FOOTNOTE_REFERENCE_TAG, ENDNOTE_REFERENCE_TAG):
                ans.append(self.make_footnote(e))
            else:
                # movie,
                # rt, ruby, rubyAlign etc. for ruby stuff
                # sym, with special handling for wingdings I guess...
                log.warn('Unknown tag %r', e.tag)
        if rPr is not None and ans != Seq[Seq['.footnote', _:], _:]:
            ans = self.apply_rpr(rPr, ans)

        return ans
def maybe_anchorize_id(tag, attrs, body):
    """DESTRUCTIVELY push the id into an anchor in the body, in most cases.

    Anything w/ an id should be linkable; the id should not be used
    otherwise.
    """
    if 'id' in attrs:
        if tag not in ('dl', 'ol', 'ul', 'aside'):
            body.insert(0, mkel('a', dict(name=attrs['id']), []))
            del attrs['id']
    def handle_run(self, r):
        # XXX(ash): pylint is right about this being too complex
        # pylint: disable=R0912
        _ = Var('_')
        ans = []
        rPr = first_of_tag(r, RUN_PROPS_TAG)
        content = rPr.itersiblings() if rPr is not None else iter(r)
        for e in content:
            # pylint: disable=W0622
            type = e.attrib.get(ns.w('type'))
            if e.tag == TEXT_TAG:
                ans.append(e.text)
            elif e.tag == TAB_TAG:
                # XXX(alexander): this can also work like a '_' or '…' \dotfill
                ans.append('\t')
            elif e.tag in (FOOTNOTE_REF_TAG, ENDNOTE_REF_TAG):
                # XXX(ash): what is going on here
                pass
            elif e.tag == BREAK_TAG and type in ('page', 'column'):
                ans.append(mkel('.pagebreak', {}, []))
            elif e.tag == BREAK_TAG or e.tag == CR_TAG:
                assert (type is None) or (type == 'textWrapping')
                ans.append(mkel('br', {}, []))
            # FIXME, tags below untested
            elif e.tag == SOFT_HYPHEN_TAG:
                ans.append(SOFT_HYPHEN)
            elif e.tag == NON_BREAKING_HYPHEN_TAG:
                ans.append(NON_BREAKING_HYPHEN)
            elif e.tag == ns.w('drawing'):
                ans.extend(
                    flatmap(self.transclude, e.xpath(self.IMAGE_XPATH,
                                                     namespaces=ns.dict)))
            elif e.tag in (FOOTNOTE_REFERENCE_TAG, ENDNOTE_REFERENCE_TAG):
                ans.append(self.make_footnote(e))
            else:
                # movie,
                # rt, ruby, rubyAlign etc. for ruby stuff
                # sym, with special handling for wingdings I guess...
                log.warn('Unknown tag %r', e.tag)
        if rPr is not None and ans != Seq[Seq['.footnote', _:], _:]:
            ans = self.apply_rpr(rPr, ans)

        return ans
def unwrap_figures(body):
    # XXX: this currently only operates at the toplevel, both looking for
    # paragraphs and also looking for block figures in paragraphs. Strictly
    # speaking we should probably descend for both. As an additional hack, we
    # descend, up to the the <td> level, into tables.
    FATTRS, PATTRS, FBODY = map(
        Var, 'FATTRS, PATTRS, FBODY'.split(', '))
    BLOCK_STYLE_ATTR = Var('BLOCK_STYLE_ATTR',
                           lambda a: a['style']['display'] == 'block')
    BLOCK_FIG = ('figure', BLOCK_STYLE_ATTR, FBODY)
    PBODY_WITH_BLOCKFIG = Var('PBODY_WITH_BLOCKFIG',
                              list.__contains__, BLOCK_FIG)
    for elem in body:
        if elem and elem[0] in ('table', 'tr', 'td', 'blockquote'):
            yield mkel(elem[0], elem[1], list(unwrap_figures(elem[-1])))
        elif elem in (('p', {}, [('figure', FATTRS, FBODY)]),
                      ('figure', FATTRS, FBODY)):
            # override style of standalone figures
            new_fattrs = copy.deepcopy(FATTRS.val)
            new_fattrs['style']['display'] = 'block'
            yield mkel('figure', new_fattrs, FBODY.val)
        # Split a <p> that contains a block figure into
        # two paragraphs separated by a figure.
        # This case can only arise due to the
        # large inline image heuristic; if the paragraph
        # has an id attribute (shouldn't happen yet),
        # we put it into the first half of the split. We throw away
        # empty <p>s.
        elif elem == ('p', PATTRS, Seq[PBODY_WITH_BLOCKFIG:]):
            body = PBODY_WITH_BLOCKFIG.val
            i_fig = body.index(BLOCK_FIG)
            if body[:i_fig]:
                yield mkel('p', PATTRS.val, body[:i_fig])
                cloned_attrs = dict((k, v) for (k, v) in PATTRS.val.items()
                                    if k != 'id')
            else:
                cloned_attrs = PATTRS.val
            yield body[i_fig]
            if cloned_attrs or body[i_fig+1:]:
                yield ('p', cloned_attrs, body[i_fig+1:])
        else:
            yield elem
Beispiel #49
0
 def build_list(cls, tree):
     _ = Var('_')
     if isinstance(tree, list):
         ans = []
         for (tag, attr), body in itertools.groupby(
                 tree, lambda x: (_, _) if isinstance(x, list) else x[0]):
             this_body = []
             if tag is _:
                 body, = body
                 ans.append(mkel('.block', {}, cls.build_list(body)))
             else:
                 for x in body:
                     if isinstance(x, list):
                         item = cls.build_list(x)
                         this_body[-1][2].extend(item)
                     else:
                         item = [x[1]]
                         this_body.append(mkel('li', {}, item))
                 ans.append(mkel(tag, attr, this_body))
     return ans
Beispiel #50
0
def hacky_flatten_block(block):
    # XXX(ash): move to postprocess
    # pylint: disable=C0103
    BLOCK_ATTRS = Var('BLOCK_ATTRS')
    P_ATTRS = Var('P_ATTRS')
    BODY = Var('BODY')
    if block == ('.block', BLOCK_ATTRS, [('p', P_ATTRS, BODY)]):
        return mkel('.block', merge_attrs(BLOCK_ATTRS.val, P_ATTRS.val),
                    BODY.val)
    else:
        return block
Beispiel #51
0
def unwrap_figures(body):
    # XXX: this currently only operates at the toplevel, both looking for
    # paragraphs and also looking for block figures in paragraphs. Strictly
    # speaking we should probably descend for both. As an additional hack, we
    # descend, up to the the <td> level, into tables.
    FATTRS, PATTRS, FBODY = map(Var, 'FATTRS, PATTRS, FBODY'.split(', '))
    BLOCK_STYLE_ATTR = Var('BLOCK_STYLE_ATTR',
                           lambda a: a['style']['display'] == 'block')
    BLOCK_FIG = ('figure', BLOCK_STYLE_ATTR, FBODY)
    PBODY_WITH_BLOCKFIG = Var('PBODY_WITH_BLOCKFIG', list.__contains__,
                              BLOCK_FIG)
    for elem in body:
        if elem and elem[0] in ('table', 'tr', 'td', 'blockquote'):
            yield mkel(elem[0], elem[1], list(unwrap_figures(elem[-1])))
        elif elem in (('p', {}, [('figure', FATTRS, FBODY)]), ('figure',
                                                               FATTRS, FBODY)):
            # override style of standalone figures
            new_fattrs = copy.deepcopy(FATTRS.val)
            new_fattrs['style']['display'] = 'block'
            yield mkel('figure', new_fattrs, FBODY.val)
        # Split a <p> that contains a block figure into
        # two paragraphs separated by a figure.
        # This case can only arise due to the
        # large inline image heuristic; if the paragraph
        # has an id attribute (shouldn't happen yet),
        # we put it into the first half of the split. We throw away
        # empty <p>s.
        elif elem == ('p', PATTRS, Seq[PBODY_WITH_BLOCKFIG:]):
            body = PBODY_WITH_BLOCKFIG.val
            i_fig = body.index(BLOCK_FIG)
            if body[:i_fig]:
                yield mkel('p', PATTRS.val, body[:i_fig])
                cloned_attrs = dict(
                    (k, v) for (k, v) in PATTRS.val.items() if k != 'id')
            else:
                cloned_attrs = PATTRS.val
            yield body[i_fig]
            if cloned_attrs or body[i_fig + 1:]:
                yield ('p', cloned_attrs, body[i_fig + 1:])
        else:
            yield elem
def hacky_flatten_block(block):
    # XXX(ash): move to postprocess
    # pylint: disable=C0103
    BLOCK_ATTRS = Var('BLOCK_ATTRS')
    P_ATTRS = Var('P_ATTRS')
    BODY = Var('BODY')
    if block == ('.block', BLOCK_ATTRS, [('p', P_ATTRS, BODY)]):
        return mkel('.block',
                    merge_attrs(BLOCK_ATTRS.val, P_ATTRS.val),
                    BODY.val)
    else:
        return block
 def build_list(cls, tree):
     _ = Var('_')
     if isinstance(tree, list):
         ans = []
         for (tag, attr), body in itertools.groupby(
                 tree,
                 lambda x: (_, _) if isinstance(x, list) else x[0]):
             this_body = []
             if tag is _:
                 body, = body
                 ans.append(mkel('.block', {}, cls.build_list(body)))
             else:
                 for x in body:
                     if isinstance(x, list):
                         item = cls.build_list(x)
                         this_body[-1][2].extend(item)
                     else:
                         item = [x[1]]
                         this_body.append(mkel('li', {}, item))
                 ans.append(mkel(tag, attr, this_body))
     return ans
def apply_html_style(tag, run):
    '''
    >>> run1 = mkel('w:r', {}, ['...'])

    >>> run2 = apply_html_style('b', run1)

    >>> run2 # doctest: +NORMALIZE_WHITESPACE
    ('w:r', {}, [('w:rPr', {}, [('w:b', {'w:val': '1'}, [])]),
                 '...'])

    >>> apply_html_style('i', run2) # doctest: +NORMALIZE_WHITESPACE
    ('w:r', {}, [('w:rPr', {}, [('w:b', {'w:val': '1'}, []),
                                ('w:i', {'w:val': '1'}, [])]),
                 '...'])
    '''
    rpr = {
        'u': mkel('w:u', {'w:val': 'single'}, []),
        'b': mkel('w:b', {'w:val': '1'}, []),
        's': mkel('w:strike', {'w:val': '1'}, []),
        'i': mkel('w:i', {'w:val': '1'}, []),
    }[tag]
    t, a, b = run
    if b[0][:1] == ('w:rPr',):
        assert b[0][1] == {}
        rprs = b[0][2] + [rpr]
        b = b[1:]
    else:
        rprs = [rpr]
    b = [mkel('w:rPr', {}, rprs)] + b
    return mkel(t, a, b)
Beispiel #55
0
def apply_html_style(tag, run):
    '''
    >>> run1 = mkel('w:r', {}, ['...'])

    >>> run2 = apply_html_style('b', run1)

    >>> run2 # doctest: +NORMALIZE_WHITESPACE
    ('w:r', {}, [('w:rPr', {}, [('w:b', {'w:val': '1'}, [])]),
                 '...'])

    >>> apply_html_style('i', run2) # doctest: +NORMALIZE_WHITESPACE
    ('w:r', {}, [('w:rPr', {}, [('w:b', {'w:val': '1'}, []),
                                ('w:i', {'w:val': '1'}, [])]),
                 '...'])
    '''
    rpr = {
        'u': mkel('w:u', {'w:val': 'single'}, []),
        'b': mkel('w:b', {'w:val': '1'}, []),
        's': mkel('w:strike', {'w:val': '1'}, []),
        'i': mkel('w:i', {'w:val': '1'}, []),
    }[tag]
    t, a, b = run
    if b[0][:1] == ('w:rPr', ):
        assert b[0][1] == {}
        rprs = b[0][2] + [rpr]
        b = b[1:]
    else:
        rprs = [rpr]
    b = [mkel('w:rPr', {}, rprs)] + b
    return mkel(t, a, b)
def make_opf(
        head,
        parts,
        transclusions,
        includes,  # pylint: disable=R0913,R0914
        cover_image=None,
        compat=False):
    """Create package.opf contents.

    `compat`: whether to create an epub2 compatible package
              FIXME(alexander): not fully implemented
    """
    title = head['title']
    dublin, dublin_ns = meta_to_dublin_core(head)
    manifest_body = []
    spine_body = []

    manifest_body.extend(_opf_item(inc) for inc in includes)

    if compat:
        manifest_body.append(_opf_item('toc.ncx', 'ncx'))
    manifest_body.append(_opf_item('toc.xhtml', properties='nav'))
    spine_body.append(mkel('itemref', {'idref': 'toc', 'linear': 'no'}, []))
    if cover_image:
        cover_src = transclusions.add_literal_image(cover_image)
        manifest_body.extend(make_cover_opf(cover_image, cover_src))
        spine_body.append(
            mkel('itemref', {
                'idref': 'cover',
                'linear': 'no'
            }, []))
    else:
        cover_src = None
    for part in parts:
        manifest_body.append(_opf_item(part + '.xhtml'))
        spine_body.append(mkel('itemref', {
            'idref': part,
            'linear': 'yes'
        }, []))
    # images
    manifest_body.extend(
        _opf_item(
            k, id='img-' + k.split('.')[0], mime=transclusions.get_mimetype(k))
        for (k, _) in transclusions.iteritems() if k != cover_src)
    package_body = [
        dublin,
        mkel('manifest', {}, manifest_body),
        mkel('spine', {'toc': 'ncx'} if compat else {}, spine_body)
    ]
    if compat:
        package_body.append(('guide', {}, [('reference',
                                            dict(type='toc',
                                                 title=title,
                                                 href='toc.xhtml'), [])]))
    package = mkel('package', {
        'version': '3.0',
        'unique-identifier': 'uuid'
    }, package_body)
    ns = {(k if k != 'opf' else None): v for (k, v) in dublin_ns.iteritems()}
    return package, ns
def whack_elt(pred, body, kill_body=False):
    res = []
    for e in body:
        if isinstance(e, basestring):
            res.append(e)
        else:
            bh, ba, bb = e
            if pred(e):
                if not kill_body:
                    res.extend(whack_elt(pred, bb, kill_body))
            else:
                res.append(mkel(bh, ba, whack_elt(pred, bb, kill_body)))
    return res
    def bad_command(self, head, attrs, body):
        assert head in ('LIT', 'CMD')
        bad_cmd = attrs['class'][0]
        n = docproblem('Unknown command: {}', bad_cmd)

        warning = small(red(self.latexify(
            u"CONVERSION ERROR: Not a valid command"
            u" (only use underlining for commands): “")))
        the_cmd = self.latexify(
            mkel('u', {}, [bad_cmd + (':' if head == 'CMD' else '')]))
        warning_end = small(red(self.latexify(u'”')))
        return join(problem_anchor(n, join(warning, the_cmd, warning_end)),
                    self.latexify(body))
Beispiel #59
0
def whack_elt(pred, body, kill_body=False):
    res = []
    for e in body:
        if isinstance(e, basestring):
            res.append(e)
        else:
            bh, ba, bb = e
            if pred(e):
                if not kill_body:
                    res.extend(whack_elt(pred, bb, kill_body))
            else:
                res.append(mkel(bh, ba, whack_elt(pred, bb, kill_body)))
    return res
def underlines_to_commands(parsed_body, lstrip=False): # pylint: disable=R0912
    CATTRS, CBODY = map(
        Var, 'CATTRS, CBODY'.split(', '))
    reparsed = []
    appendpoint = reparsed
    for i, e in enumerate(parsed_body):
        if e == ('u', CATTRS, CBODY):
            assert CATTRS.val == {}
            assert len(CBODY.val) == 1
            # underlines can hide invisible whitespace
            # FIXME(alexander): should make sure this is plain text
            # bogus underlined footnoterefs can e.g. mess this up
            cmd = CBODY.val[0].strip()
            if cmd.endswith(':'): # take args
                reparsed.append(
                    mkcmd(cmd[:-1].lower(),
                          underlines_to_commands(parsed_body[i+1:], True)))
                return reparsed
            elif cmd[:1] == cmd[-1:] == '$':
                reparsed.append(mkcmd('tex', [r'\(%s\)' % cmd[1:-1]]))
            elif cmd[:1] == '\\':
                # FIXME(alexander): this should probably be parsed
                reparsed.append(mkcmd('tex', [cmd]))
            elif cmd[:1] == '<' and cmd[-1] == '>':
                # FIXME(alexander):
                reparsed.extend(tidy(parse_chunk(cmd)))
                mkerr([cmd], 'Underlined tags must be well-formed xml')
            # Transform (invisibly, in GDocs) underlined whitespace to plain
            # whitespace. This should not break up underlined runs of text,
            # because at this point we should already have coalesced those.
            elif not cmd:
                if CATTRS.val:
                    log.warn('Ignoring bogus attributes in `<u> </u>`: %r',
                             CATTRS.val)
                cbody, = CBODY.val
                reparsed.append(cbody)
            else:
                assert cmd
                reparsed.append(mklit(cmd.lower()))
                lstrip = False
        else:
            if isinstance(e, basestring):
                if lstrip:
                    e = e.lstrip()
                if e:
                    appendpoint.append(e)
            else:
                appendpoint.append(mkel(e[0], e[1],
                                        underlines_to_commands(e[2], lstrip)))
            lstrip = False
    return reparsed