Ejemplo n.º 1
0
def postprocess(raw_body, transclusions, bibliography=None, asides=False):
    citations = set()
    # kill comments early, because they can mess things up
    # (e.g. by splitting commands or citations)
    # FIXME(alexander): comments should probably be out-of-band
    if not asides:
        raw_body = whack('aside'.__eq__, raw_body, kill_body=True)

    # FIXME(alexander): investigate the performance impact of this final tidy
    # -- it's needed for tidying up stuff that happened after commandification
    # (currently that only affects blockquotes)
    raw_parsed_body = tidy(
        space_normalize(
            list(
                unwrap_figures(
                    captionize(
                        underlines_to_commands(
                            parse_cites(coalesce(raw_body),
                                        bib_entries=getattr(
                                            bibliography, 'entries', {}),
                                        collect_cite=citations.add)))))))
    unaugmented_head, body = extract_meta(raw_parsed_body, transclusions)
    if citations:
        if 'bibliography' not in unaugmented_head:
            docproblem(MISSING_BIBLIOGRAPHY, sorted(citations)[0])
    return unaugmented_head, body
    def bad_command(self, head, attrs, body):
        assert head in ('LIT', 'CMD')
        bad_cmd = attrs['class'][0]
        n = docproblem('Unknown command: {}', bad_cmd)

        warning = small(red(self.latexify(
            u"CONVERSION ERROR: Not a valid command"
            u" (only use underlining for commands): “")))
        the_cmd = self.latexify(
            mkel('u', {}, [bad_cmd + (':' if head == 'CMD' else '')]))
        warning_end = small(red(self.latexify(u'”')))
        return join(problem_anchor(n, join(warning, the_cmd, warning_end)),
                    self.latexify(body))
def postprocess(raw_body, transclusions, bibliography=None, asides=False):
    citations = set()
    # kill comments early, because they can mess things up
    # (e.g. by splitting commands or citations)
    # FIXME(alexander): comments should probably be out-of-band
    if not asides:
        raw_body = whack('aside'.__eq__, raw_body, kill_body=True)

    # FIXME(alexander): investigate the performance impact of this final tidy
    # -- it's needed for tidying up stuff that happened after commandification
    # (currently that only affects blockquotes)
    raw_parsed_body = tidy(
        space_normalize(list(unwrap_figures(captionize(
            underlines_to_commands(
                parse_cites(coalesce(raw_body),
                            bib_entries=getattr(bibliography, 'entries', {}),
                            collect_cite=citations.add)))))))
    unaugmented_head, body = extract_meta(raw_parsed_body, transclusions)
    if citations:
        if 'bibliography' not in unaugmented_head:
            docproblem(MISSING_BIBLIOGRAPHY, sorted(citations)[0])
    return unaugmented_head, body
Ejemplo n.º 4
0
    def bad_command(self, head, attrs, body):
        assert head in ('LIT', 'CMD')
        bad_cmd = attrs['class'][0]
        n = docproblem('Unknown command: {}', bad_cmd)

        warning = small(
            red(
                self.latexify(u"CONVERSION ERROR: Not a valid command"
                              u" (only use underlining for commands): “")))
        the_cmd = self.latexify(
            mkel('u', {}, [bad_cmd + (':' if head == 'CMD' else '')]))
        warning_end = small(red(self.latexify(u'”')))
        return join(problem_anchor(n, join(warning, the_cmd, warning_end)),
                    self.latexify(body))
def captionize(body):
    CBODY, TAG, PATTRS, FATTRS, FBODY = map(
        Var, 'CBODY, TAG, PATTRS, FATTRS, FBODY'.split(', '))
    ans = []
    for e1, e2 in window(body, 2):
        if e2 in (varcmd('caption', CBODY),
                  ('p', PATTRS,
                   [varcmd('caption', CBODY)])):
            #XXX(alexander): the right way would probably be to normalize
            # justify/left away before we get here.
            e1_is_figure = ((e1 == ('p', PATTRS, [(TAG, FATTRS, FBODY)]) and
                             PATTRS.val in ({}, {'class': ['justify']},
                                            {'class': ['left']})
                             or e1 == (TAG, FATTRS, FBODY))
                            and TAG.val in ('table', 'figure'))
            if not e1_is_figure:
                if TAG.match and TAG.val in H_TAGS:
                    docproblem(CAPTION_AFTER_HEADING,
                               plaintextify(CBODY.val),
                               plaintextify(FBODY.val))
                else:
                    docproblem(CAPTION_AFTER_NON_FLOAT,
                               plaintextify(CBODY.val))
                continue
            if PATTRS.match and PATTRS.val:
                log.warn(
                    'Unexpected attrs in paragraph wrapping the caption: %r',
                    PATTRS.val)
            ans[-1] = (TAG.val, FATTRS.val,
                       [('figcaption' if TAG.val == 'figure' else 'caption',
                         {},
                         CBODY.val)] + captionize(FBODY.val))
        elif e2 == (TAG, FATTRS, FBODY):
            ans.append(mkel(TAG.val, FATTRS.val, captionize(FBODY.val)))
        else:
            ans.append(e2)
    return ans
Ejemplo n.º 6
0
def captionize(body):
    CBODY, TAG, PATTRS, FATTRS, FBODY = map(
        Var, 'CBODY, TAG, PATTRS, FATTRS, FBODY'.split(', '))
    ans = []
    for e1, e2 in window(body, 2):
        if e2 in (varcmd('caption',
                         CBODY), ('p', PATTRS, [varcmd('caption', CBODY)])):
            #XXX(alexander): the right way would probably be to normalize
            # justify/left away before we get here.
            e1_is_figure = ((e1 == ('p', PATTRS, [(TAG, FATTRS, FBODY)])
                             and PATTRS.val in ({}, {
                                 'class': ['justify']
                             }, {
                                 'class': ['left']
                             }) or e1 == (TAG, FATTRS, FBODY))
                            and TAG.val in ('table', 'figure'))
            if not e1_is_figure:
                if TAG.match and TAG.val in H_TAGS:
                    docproblem(CAPTION_AFTER_HEADING, plaintextify(CBODY.val),
                               plaintextify(FBODY.val))
                else:
                    docproblem(CAPTION_AFTER_NON_FLOAT,
                               plaintextify(CBODY.val))
                continue
            if PATTRS.match and PATTRS.val:
                log.warn(
                    'Unexpected attrs in paragraph wrapping the caption: %r',
                    PATTRS.val)
            ans[-1] = (TAG.val, FATTRS.val,
                       [('figcaption' if TAG.val == 'figure' else 'caption',
                         {}, CBODY.val)] + captionize(FBODY.val))
        elif e2 == (TAG, FATTRS, FBODY):
            ans.append(mkel(TAG.val, FATTRS.val, captionize(FBODY.val)))
        else:
            ans.append(e2)
    return ans
Ejemplo n.º 7
0
def handle_fragment(fragment, indent, transclusions, h_shift, epub_clean,
                    bibliography):
    # pylint: disable=R0911,R0914,R0912,R0913,R0915
    # FIXME(alexander): clean this up a bit, and get rid of pylint muffles
    if isinstance(fragment, basestring):
        return cgi.escape(fragment)

    (tag, attrs, content) = fragment
    if tag in ['script', 'style'] and content:
        content_str, = content
        return NOT_INLINE_TEMPLATE % dict(
            indent=indent,
            tag=tag,
            attrs_str=encode_attrs(attrs, transclusions, epub_clean),
            content_str=_indent(
                '\n' + maybe_cdatafy(_indent(content_str.strip('\n'), ' ')),
                indent))
    if tag == 'pre':
        return '\n' + highlight.as_html(fragment)

    # special case figures and tables
    if tag == 'figure':
        style = attrs['style'].copy()
        width = style.pop('width', '100%')
        attrs = dict(attrs.items(), style=style)
        # FIXME(alexander): dirty hacks to fixup caption & width
        img = content[-1]
        assert img[0] == 'img'
        img[1].setdefault('style', OrderedDict())['width'] = width
        # put figcaption towards end
        if content[0][0] == 'figcaption':
            content[0], content[-1] = content[-1], content[0]
        if style['display'] == 'inline':
            ATTRS = Var('ATTRS')  # pylint: disable=C0103
            assert content[:1] == [('img', ATTRS, [])], \
                "figure does not begin with an img"
            attrs = add_class(ATTRS.val, 'margin')
            # peel of the figure tag for inlined stuff
            # as a hack to make epub/html validate
            # (figures can't occur in all contexts imgs can)
            return handle_fragments([('img', attrs, [])],
                                    bibliography=bibliography,
                                    indent=indent,
                                    transclusions=transclusions,
                                    h_shift=h_shift,
                                    epub_clean=epub_clean)
    elif tag == 'table':
        colgroups = [el for el in content if el[0] == 'colgroup']
        COLS = Var("COLS")  # pylint: disable=C0103
        assert colgroups == [('colgroup', {}, COLS)], \
                "Expected single colgroup in table %s" % content
        # FIXME(alexander): this deepcopy is a lazy hack so we can mutate away
        # imperatively propagate table cell alignment down
        # this is a pretty horrible hack and would blow
        # up nastily if there is attribute aliasing,
        # but deepcopying should kinda make it work
        content = copy.deepcopy(content)
        _propagate_alignment(content, COLS.val)

    elif tag == 'col':
        if not epub_clean:
            attrs = attrs.copy()
            attrs['width'] = attrs['style']['width']
            del attrs['style']
        # cull
        ## return handle_fragments(content, indent)
    # FIXME(alexander): might make more sense to filter (or h-ify) these out
    # elsewhere, but for now this seems not unreasonable
    elif tag == 'title':
        tag = 'h1'
        attrs = add_class(attrs, 'title')
    elif tag == 'subtitle':
        tag = 'h2'
        attrs = add_class(attrs, 'subtitle')
    elif tag in ('CMD', 'LIT'):
        bad_command = None
        cmd_type, = attrs['class']
        # FIXME(alexander): convert tex to html for non-math;
        # convert tex math to MML for epub
        if cmd_type in ('$', 'tex'):
            tex, = content
            if cmd_type == '$':
                tex = r'\(%s\)' % tex
            return '<span class="tex2jax_process">%s</span>' % cgi.escape(tex)
        elif CITE_REX.match(cmd_type):
            if bibliography:
                bibliography.cited.add(content[0])
                # post = ('[%s]' % content[1] if len(content) > 1 and content[1]
                #         else '')
                # Post is ignored for the moment
                return _format_citation(cmd_type, content[0], bibliography)
            else:
                docerror.docproblem(
                    'Citation exists, but bibliography is missing')
        else:
            bad_command = cmd_type + (':' if content else '')
            docerror.docproblem('Unknown command type:%s' % cmd_type)
    elif epub_clean:
        if tag == 'a' and 'name' in attrs:
            assert len(attrs) == 1
            attrs = {'id': attrs['name']}
        elif tag == 'img':
            attrs = {
                k: attrs[k]
                for k in attrs if k not in ('width', 'height')
            }

    # FIXME(alexander): support continued-list properly in html, by keeping
    # track of numbers of items per list-id and translating it to start

    if tag in H_TAGS:
        if h_shift:
            tag = 'h%d' % min(len(H_TAGS), max(1, int(tag[1]) + h_shift))

    # generic [tagname].class tags
    if '.' in tag:
        if tag == '.pagebreak':
            tag = 'div.pagebreak'  # for whitespace sanitization
        tagname, classname = tag.split('.', 1)
        tag = tagname or 'span'
        attrs = add_class(attrs, classname)

    if tag == 'CMD' and bad_command:
        tag = 'span'
        attrs = {'class': ['bad-command']}
        content = [('u', {}, [bad_command])] + content
    elif tag == 'ERR':
        tag = 'span'
        attrs = {'class': ['err'], 'title': attrs['info'][0]}

    content_str = handle_fragments(content,
                                   indent='  ' + indent,
                                   transclusions=transclusions,
                                   h_shift=h_shift,
                                   epub_clean=epub_clean,
                                   bibliography=bibliography)
    if tag in VOID_TAGS:
        assert not content
        template = "<%(tag)s%(attrs_str)s/>"
    elif tag in INLINE:
        template = "<%(tag)s%(attrs_str)s>%(content_str)s</%(tag)s>"
    elif '\n' in content_str:
        template = NOT_INLINE_TEMPLATE
    else:
        template = COMPACT_NOT_INLINE_TEMPLATE

    # FIXME(alexander): disgusting hack; fix this properly and
    # use a set representation to start with!
    classes = attrs.get('class')
    if classes:
        attrs = attrs.copy()
        attrs['class'] = sorted(set(classes))

    return template % dict(indent=indent,
                           tag=tag,
                           attrs_str=encode_attrs(attrs, transclusions,
                                                  epub_clean),
                           content_str=content_str)
    def latexify(self, ast): # pylint: disable=E0102,R0914,R0915,R0911,R0912
        if isinstance(ast, list):
            return re.sub('\n\n$', '\n',
                          join(*map(self.latexify, ast)))
        else:
            node = ast
            if isinstance(node, basestring):
                return quote(node)
            else:
                assert isinstance(node, tuple)
                h, a, b = node
                if h == 'div':  # canonicalize pseudo-elements
                    h = a['class'].pop()
                    assert not a['class']
                    del a['class']

                if h[:-1] == 'h':
                    if self.am_inside('list') or self.am_inside('table'):
                        return docwarn(
                            self.latexify(b),
                            'Cannot have sections inside lists or tables: %r' %
                            postprocess.plaintextify(b))
                    else:
                        with self.inside('section'):
                            if a:
                                log.warn('heading w/ attr %r', a)
                            labels, b = extract_labels(b)
                            return self.section(h, b, labels)
                elif h == 'p':
                    ans = nl(self.latexify(b))
                    if self.am_inside('.footnote') and self.am_inside('table'):
                        return docwarn(ans,
                                       'Multi-paragraph footnotes in tables are'
                                       ' unsupported')
                    return nl(ans)
                elif h == 'span':
                    return self.latexify(b) # XXX
                elif h in ('ol', 'ul'):
                    ol = partial(self.enumerate_,
                                 start=a.get('start'),
                                 series=a.get('id'),
                                 resume=a.get('data-continue-list'))
                    with self.inside('list'):
                        return nl(
                            freshline({
                                'ol': ol,
                                'ul': itemize}[h](
                                    self.latexify(b))))
                elif h == 'li':
                    labels, b = extract_labels(b)
                    labelling = (join(*(map(mklabel, labels) + [' ']))
                                 if labels else '')
                    return join(freshline(cmd('item')),
                                labelling, self.latexify(b))
                elif h == 'table':
                    nested_table = self.am_inside('table')
                    with self.inside('table'):
                        # pylint: disable=C0103
                        CLASS_TO_SPEC = {'left': 'P', 'center': 'C',
                                         'right': 'R', 'justify': 'N'}
                        b = b[:]
                        tablecaption = None
                        if b[0][0] == 'caption':
                            with self.inside('caption'):
                                tablecaption = self.latexify(b[0][2])
                            del b[0]

                        colgroup = [el for el in b if el[0] == 'colgroup']
                        rows = [el for el in b if el[0] == 'tr']
                        assert len(colgroup) == 1, \
                                "Expected single colgroup in table %s" % b
                        cols = colgroup[0][2]
                        colspecs = []
                        for col_h, col_a, col_b in cols:
                            if col_h != 'col':
                                break
                            assert not col_b

                            coltype = 'P'
                            for cls in CLASS_TO_SPEC:
                                if cls in col_a.get('class', []):
                                    coltype = CLASS_TO_SPEC[cls]

                            coltype = "%s{%s}" % (coltype, textwidth_percent(
                                col_a['style']['width']))

                            colspecs.append(coltype)
                        rows = "\\tabularnewline\n".join(
                            map(self.latexify, rows))
                        if nested_table and tablecaption:
                            docproblem(
                                "Tables within tables can't have captions;"
                                " outputing caption as normal text",
                                level='warning')


                            ans = join(nl(table(colspecs, rows)), tablecaption)
                        else:
                            ans = table(colspecs, rows, tablecaption)
                    if self.post_float_yuck and not self.am_inside('table'):
                        ans = join(ans, *self.post_float_yuck)
                        del self.post_float_yuck[:]
                    return ans
                elif h == 'col': # FIXME
                    assert False, "Unexpected col"
                elif h == 'tr':
                    return " & ".join(map(self.latexify, b))
                elif h == 'td':
                    if 'headcol' in a.get('class', []):
                        return colh(self.latexify(b))
                    return self.latexify(b)
                elif h == 'th':
                    if 'headcol' in a.get('class', []):
                        return rowh(colh(self.latexify(b)))
                    return rowh(self.latexify(b))
                elif h == 'figure':
                    b = b[:]
                    if b[0][0] == 'figcaption':
                        with self.inside('caption'):
                            figcaption = self.latexify(b[0][2])
                        del b[0]
                    else:
                        figcaption = None
                    assert len(b) == 1 and b[0][0] == 'img'
                    img = b[0][1]['src']
                    inline = False
                    warns = []
                    if a['style']['display'] == 'inline':
                        if self.am_inside('table'):
                            warns.append([
                                'Margin figures not supported in tables, '
                                'inserting into table cell'])
                        else:
                            inline = True
                    if inline:
                        if figcaption:
                            warns.append(
                                ['Ignoring figcaption for inline figure:'
                                 ' "%s"', figcaption])
                        ans = marginfigure(img=img)
                    else:
                        fakecaption = figcaption and self.am_inside('table')
                        if fakecaption:
                            warns.append([
                                "Figures in tables can't have captions; "
                                "outputing caption as normal text"])
                        # inside blockquotes more complicated figure
                        # environments don't seem to work reliably
                        rawincludegraphics = self.am_inside('blockquote')
                        ans = figure(img=img,
                                     classes=a.get('class', []),
                                     width=a['style']['width'],
                                     figcaption=figcaption,
                                     fakecaption=fakecaption,
                                     rawincludegraphics=rawincludegraphics)
                    if self.post_float_yuck and not self.am_inside('table'):
                        ans = join(ans, *self.post_float_yuck)
                        del self.post_float_yuck[:]
                    return ans if not warns else docwarns(ans, *warns)
                elif h == 'img':
                    assert False, 'unexpected image'
                elif h == 'a':
                    if 'name' in a:
                        # we can't do that blindly, because we want to
                        # generate labels for things like lists and headings
                        # this is only a fallback for anchors outside of
                        # 'labelled' envs
                        return cmd('hypertarget', [],
                                   [a['name'].lstrip('#'), ''])
                    elif 'href' in a:
                        if a['href'].startswith('#'):
                            return cmd('hyperref',
                                       [latexify_href(a['href'][1:])],
                                       [self.latexify(b)])
                        ##
                        # XXX(alexander): handle bare urls specially, because
                        # we want more relaxed linebreaking rules for them.
                        # Note that we're not using \url directly, because
                        # it's not robust and also can't cope with certain
                        # arguments, such as unbalanced '{'/'}'s. Also, even
                        # with fairly aggressive hyphenization params, this is
                        # in in itself not enough to resolve all overfull hbox
                        # issues with urls, although it's not 100% clear to me
                        # why.
                        elif b and a['href'] in (b[0], url_fix(b[0])):
                            # XXX(alexander): use url_fixed version here?
                            return urldef(a['href'], self.urldefs)
                        else:
                            ans = cmd('href', [], [latexify_href(a['href']),
                                                   self.latexify(b)])
                            if b[0].startswith('http'):
                                ans = docwarn(
                                    ans,
                                    'Suspicious link with body/href'
                                    ' mismatch: %r != %r' % (
                                        a['href'].encode('utf-8'), b[0]))
                            return ans
                    else:
                        assert False, 'Malformed link: %s' % ((h, a, b),)
                elif h == 'aside':
                    return cmd('comment', [], [self.latexify(b)])
                elif h in ('b', 'i', 'u', 's'):
                    assert not a, 'unexpected <%s %r' % (h, a)
                    return self.handle_emphasis(h, b)
                elif h == 'code':
                    #FIXME: write something more specialized
                    return cmd('texttt', [], [self.latexify(b)])
                elif h == 'sup':
                    return cmd('textsuperscript', [], [self.latexify(b)])
                elif h == 'sub':
                    return cmd('textsubscript', [], [self.latexify(b)])
                elif h == '.footnote':
                    with self.inside('.footnote'):
                        if self.am_inside('caption'):
                            self.post_float_yuck.append(cmd('footnotetext',
                                                            [],
                                                            [self.latexify(b)]))
                            return cmd(r'protect\footnotemark', [], [])
                        else:
                            return cmd('footnote', [], [self.latexify(b)])
                elif h == '.pagebreak':
                    return nl(cmd('clearpage', [], [self.latexify(b)]))
                elif h == 'br':
                    assert a == {}
                    assert b == []
                    return nl(cmd('newline'))
                elif h == 'blockquote':
                    with self.inside('blockquote'):
                        return blockquote(self.latexify(b))
                elif (h == 'footer' and b == [Seq['cite', :]]
                      and self.am_inside('blockquote')):
                    return nl(cmd('attrib', [], [self.latexify(b[0][2])]))
                elif node == ('CMD', {'class': ['$']}, b):
                    return join('$', b[0], '$')
                elif node == ('CMD', {'class': [Var('CITE', CITE_REX.match)]},
                              b):
                    return self.munge_cite(node, b)
                elif node == ('CMD', {'class': ['tex']}, b):
                    return b[0]
                elif h in ('CMD', 'LIT'):
                    return self.bad_command(*node)
                elif h == 'pre':
                    return highlight.as_latex(node)
                elif h == 'wbr':
                    return '{}'
                else:
                    #FIXME(alexander): set 1 as error-code?
                    log.error('Unexpected tag: %s %r %r', h, a, b)
                    return join("")
def docwarns(latex_body, *warnings):
    ns = [docproblem(*warning, level='warning') for warning in warnings]
    return reduce_right(problem_anchor, ns, latex_body)
Ejemplo n.º 10
0
    def latexify(self, ast):  # pylint: disable=E0102,R0914,R0915,R0911,R0912
        if isinstance(ast, list):
            return re.sub('\n\n$', '\n', join(*map(self.latexify, ast)))
        else:
            node = ast
            if isinstance(node, basestring):
                return quote(node)
            else:
                assert isinstance(node, tuple)
                h, a, b = node
                if h == 'div':  # canonicalize pseudo-elements
                    h = a['class'].pop()
                    assert not a['class']
                    del a['class']

                if h[:-1] == 'h':
                    if self.am_inside('list') or self.am_inside('table'):
                        return docwarn(
                            self.latexify(b),
                            'Cannot have sections inside lists or tables: %r' %
                            postprocess.plaintextify(b))
                    else:
                        with self.inside('section'):
                            if a:
                                log.warn('heading w/ attr %r', a)
                            labels, b = extract_labels(b)
                            return self.section(h, b, labels)
                elif h == 'p':
                    ans = nl(self.latexify(b))
                    if self.am_inside('.footnote') and self.am_inside('table'):
                        return docwarn(
                            ans, 'Multi-paragraph footnotes in tables are'
                            ' unsupported')
                    return nl(ans)
                elif h == 'span':
                    return self.latexify(b)  # XXX
                elif h in ('ol', 'ul'):
                    ol = partial(self.enumerate_,
                                 start=a.get('start'),
                                 series=a.get('id'),
                                 resume=a.get('data-continue-list'))
                    with self.inside('list'):
                        return nl(
                            freshline({
                                'ol': ol,
                                'ul': itemize
                            }[h](self.latexify(b))))
                elif h == 'li':
                    labels, b = extract_labels(b)
                    labelling = (join(*(map(mklabel, labels) +
                                        [' '])) if labels else '')
                    return join(freshline(cmd('item')), labelling,
                                self.latexify(b))
                elif h == 'table':
                    nested_table = self.am_inside('table')
                    with self.inside('table'):
                        # pylint: disable=C0103
                        CLASS_TO_SPEC = {
                            'left': 'P',
                            'center': 'C',
                            'right': 'R',
                            'justify': 'N'
                        }
                        b = b[:]
                        tablecaption = None
                        if b[0][0] == 'caption':
                            with self.inside('caption'):
                                tablecaption = self.latexify(b[0][2])
                            del b[0]

                        colgroup = [el for el in b if el[0] == 'colgroup']
                        rows = [el for el in b if el[0] == 'tr']
                        assert len(colgroup) == 1, \
                                "Expected single colgroup in table %s" % b
                        cols = colgroup[0][2]
                        colspecs = []
                        for col_h, col_a, col_b in cols:
                            if col_h != 'col':
                                break
                            assert not col_b

                            coltype = 'P'
                            for cls in CLASS_TO_SPEC:
                                if cls in col_a.get('class', []):
                                    coltype = CLASS_TO_SPEC[cls]

                            coltype = "%s{%s}" % (coltype,
                                                  textwidth_percent(
                                                      col_a['style']['width']))

                            colspecs.append(coltype)
                        rows = "\\tabularnewline\n".join(
                            map(self.latexify, rows))
                        if nested_table and tablecaption:
                            docproblem(
                                "Tables within tables can't have captions;"
                                " outputing caption as normal text",
                                level='warning')

                            ans = join(nl(table(colspecs, rows)), tablecaption)
                        else:
                            ans = table(colspecs, rows, tablecaption)
                    if self.post_float_yuck and not self.am_inside('table'):
                        ans = join(ans, *self.post_float_yuck)
                        del self.post_float_yuck[:]
                    return ans
                elif h == 'col':  # FIXME
                    assert False, "Unexpected col"
                elif h == 'tr':
                    return " & ".join(map(self.latexify, b))
                elif h == 'td':
                    if 'headcol' in a.get('class', []):
                        return colh(self.latexify(b))
                    return self.latexify(b)
                elif h == 'th':
                    if 'headcol' in a.get('class', []):
                        return rowh(colh(self.latexify(b)))
                    return rowh(self.latexify(b))
                elif h == 'figure':
                    b = b[:]
                    if b[0][0] == 'figcaption':
                        with self.inside('caption'):
                            figcaption = self.latexify(b[0][2])
                        del b[0]
                    else:
                        figcaption = None
                    assert len(b) == 1 and b[0][0] == 'img'
                    img = b[0][1]['src']
                    inline = False
                    warns = []
                    if a['style']['display'] == 'inline':
                        if self.am_inside('table'):
                            warns.append([
                                'Margin figures not supported in tables, '
                                'inserting into table cell'
                            ])
                        else:
                            inline = True
                    if inline:
                        if figcaption:
                            warns.append([
                                'Ignoring figcaption for inline figure:'
                                ' "%s"', figcaption
                            ])
                        ans = marginfigure(img=img)
                    else:
                        fakecaption = figcaption and self.am_inside('table')
                        if fakecaption:
                            warns.append([
                                "Figures in tables can't have captions; "
                                "outputing caption as normal text"
                            ])
                        # inside blockquotes more complicated figure
                        # environments don't seem to work reliably
                        rawincludegraphics = self.am_inside('blockquote')
                        ans = figure(img=img,
                                     classes=a.get('class', []),
                                     width=a['style']['width'],
                                     figcaption=figcaption,
                                     fakecaption=fakecaption,
                                     rawincludegraphics=rawincludegraphics)
                    if self.post_float_yuck and not self.am_inside('table'):
                        ans = join(ans, *self.post_float_yuck)
                        del self.post_float_yuck[:]
                    return ans if not warns else docwarns(ans, *warns)
                elif h == 'img':
                    assert False, 'unexpected image'
                elif h == 'a':
                    if 'name' in a:
                        # we can't do that blindly, because we want to
                        # generate labels for things like lists and headings
                        # this is only a fallback for anchors outside of
                        # 'labelled' envs
                        return cmd('hypertarget', [],
                                   [a['name'].lstrip('#'), ''])
                    elif 'href' in a:
                        if a['href'].startswith('#'):
                            return cmd('hyperref',
                                       [latexify_href(a['href'][1:])],
                                       [self.latexify(b)])
                        ##
                        # XXX(alexander): handle bare urls specially, because
                        # we want more relaxed linebreaking rules for them.
                        # Note that we're not using \url directly, because
                        # it's not robust and also can't cope with certain
                        # arguments, such as unbalanced '{'/'}'s. Also, even
                        # with fairly aggressive hyphenization params, this is
                        # in in itself not enough to resolve all overfull hbox
                        # issues with urls, although it's not 100% clear to me
                        # why.
                        elif b and a['href'] in (b[0], url_fix(b[0])):
                            # XXX(alexander): use url_fixed version here?
                            return urldef(a['href'], self.urldefs)
                        else:
                            ans = cmd(
                                'href', [],
                                [latexify_href(a['href']),
                                 self.latexify(b)])
                            if b[0].startswith('http'):
                                ans = docwarn(
                                    ans, 'Suspicious link with body/href'
                                    ' mismatch: %r != %r' %
                                    (a['href'].encode('utf-8'), b[0]))
                            return ans
                    else:
                        assert False, 'Malformed link: %s' % ((h, a, b), )
                elif h == 'aside':
                    return cmd('comment', [], [self.latexify(b)])
                elif h in ('b', 'i', 'u', 's'):
                    assert not a, 'unexpected <%s %r' % (h, a)
                    return self.handle_emphasis(h, b)
                elif h == 'code':
                    #FIXME: write something more specialized
                    return cmd('texttt', [], [self.latexify(b)])
                elif h == 'sup':
                    return cmd('textsuperscript', [], [self.latexify(b)])
                elif h == 'sub':
                    return cmd('textsubscript', [], [self.latexify(b)])
                elif h == '.footnote':
                    with self.inside('.footnote'):
                        if self.am_inside('caption'):
                            self.post_float_yuck.append(
                                cmd('footnotetext', [], [self.latexify(b)]))
                            return cmd(r'protect\footnotemark', [], [])
                        else:
                            return cmd('footnote', [], [self.latexify(b)])
                elif h == '.pagebreak':
                    return nl(cmd('clearpage', [], [self.latexify(b)]))
                elif h == 'br':
                    assert a == {}
                    assert b == []
                    return nl(cmd('newline'))
                elif h == 'blockquote':
                    with self.inside('blockquote'):
                        return blockquote(self.latexify(b))
                elif (h == 'footer' and b == [Seq['cite', :]]
                      and self.am_inside('blockquote')):
                    return nl(cmd('attrib', [], [self.latexify(b[0][2])]))
                elif node == ('CMD', {'class': ['$']}, b):
                    return join('$', b[0], '$')
                elif node == ('CMD', {
                        'class': [Var('CITE', CITE_REX.match)]
                }, b):
                    return self.munge_cite(node, b)
                elif node == ('CMD', {'class': ['tex']}, b):
                    return b[0]
                elif h in ('CMD', 'LIT'):
                    return self.bad_command(*node)
                elif h == 'pre':
                    return highlight.as_latex(node)
                elif h == 'wbr':
                    return '{}'
                else:
                    #FIXME(alexander): set 1 as error-code?
                    log.error('Unexpected tag: %s %r %r', h, a, b)
                    return join("")
Ejemplo n.º 11
0
def docwarns(latex_body, *warnings):
    ns = [docproblem(*warning, level='warning') for warning in warnings]
    return reduce_right(problem_anchor, ns, latex_body)
def handle_fragment(fragment, indent,
                    transclusions, h_shift, epub_clean, bibliography):
    # pylint: disable=R0911,R0914,R0912,R0913,R0915
    # FIXME(alexander): clean this up a bit, and get rid of pylint muffles
    if isinstance(fragment, basestring):
        return cgi.escape(fragment)

    (tag, attrs, content) = fragment
    if tag in ['script', 'style'] and content:
        content_str, = content
        return NOT_INLINE_TEMPLATE % dict(
            indent=indent,
            tag=tag,
            attrs_str=encode_attrs(attrs, transclusions, epub_clean),
            content_str=_indent(
                '\n' + maybe_cdatafy(_indent(content_str.strip('\n'), ' ')),
                indent))
    if tag == 'pre':
        return '\n' + highlight.as_html(fragment)

    # special case figures and tables
    if tag == 'figure':
        style = attrs['style'].copy()
        width = style.pop('width', '100%')
        attrs = dict(attrs.items(), style=style)
        # FIXME(alexander): dirty hacks to fixup caption & width
        img = content[-1]
        assert img[0] == 'img'
        img[1].setdefault('style', OrderedDict())['width'] = width
        # put figcaption towards end
        if content[0][0] == 'figcaption':
            content[0], content[-1] = content[-1], content[0]
        if style['display'] == 'inline':
            ATTRS = Var('ATTRS') # pylint: disable=C0103
            assert content[:1] == [('img', ATTRS, [])], \
                "figure does not begin with an img"
            attrs = add_class(ATTRS.val, 'margin')
            # peel of the figure tag for inlined stuff
            # as a hack to make epub/html validate
            # (figures can't occur in all contexts imgs can)
            return handle_fragments([('img', attrs, [])],
                                    bibliography=bibliography,
                                    indent=indent,
                                    transclusions=transclusions,
                                    h_shift=h_shift,
                                    epub_clean=epub_clean)
    elif tag == 'table':
        colgroups = [el for el in content if el[0] == 'colgroup']
        COLS = Var("COLS") # pylint: disable=C0103
        assert colgroups == [('colgroup', {}, COLS)], \
                "Expected single colgroup in table %s" % content
        # FIXME(alexander): this deepcopy is a lazy hack so we can mutate away
        # imperatively propagate table cell alignment down
        # this is a pretty horrible hack and would blow
        # up nastily if there is attribute aliasing,
        # but deepcopying should kinda make it work
        content = copy.deepcopy(content)
        _propagate_alignment(content, COLS.val)

    elif tag == 'col':
        if not epub_clean:
            attrs = attrs.copy()
            attrs['width'] = attrs['style']['width']
            del attrs['style']
        # cull
        ## return handle_fragments(content, indent)
    # FIXME(alexander): might make more sense to filter (or h-ify) these out
    # elsewhere, but for now this seems not unreasonable
    elif tag == 'title':
        tag = 'h1'
        attrs = add_class(attrs, 'title')
    elif tag == 'subtitle':
        tag = 'h2'
        attrs = add_class(attrs, 'subtitle')
    elif tag in ('CMD', 'LIT'):
        bad_command = None
        cmd_type, = attrs['class']
        # FIXME(alexander): convert tex to html for non-math;
        # convert tex math to MML for epub
        if cmd_type in ('$', 'tex'):
            tex, = content
            if cmd_type == '$':
                tex = r'\(%s\)' % tex
            return '<span class="tex2jax_process">%s</span>' % cgi.escape(tex)
        elif CITE_REX.match(cmd_type):
            if bibliography:
                bibliography.cited.add(content[0])
                # post = ('[%s]' % content[1] if len(content) > 1 and content[1]
                #         else '')
                # Post is ignored for the moment
                return _format_citation(cmd_type, content[0], bibliography)
            else:
                docerror.docproblem(
                    'Citation exists, but bibliography is missing')
        else:
            bad_command = cmd_type + (':' if content else '')
            docerror.docproblem('Unknown command type:%s' % cmd_type)
    elif epub_clean:
        if tag == 'a' and 'name' in attrs:
            assert len(attrs) == 1
            attrs = {'id': attrs['name']}
        elif tag == 'img':
            attrs = {k: attrs[k] for k in attrs if k not in ('width', 'height')}

    # FIXME(alexander): support continued-list properly in html, by keeping
    # track of numbers of items per list-id and translating it to start

    if tag in H_TAGS:
        if h_shift:
            tag = 'h%d' % min(len(H_TAGS), max(1, int(tag[1]) + h_shift))


    # generic [tagname].class tags
    if '.' in tag:
        if tag == '.pagebreak':
            tag = 'div.pagebreak' # for whitespace sanitization
        tagname, classname = tag.split('.', 1)
        tag = tagname or 'span'
        attrs = add_class(attrs, classname)

    if tag == 'CMD' and bad_command:
        tag = 'span'
        attrs = {'class': ['bad-command']}
        content = [('u', {}, [bad_command])] +  content
    elif tag == 'ERR':
        tag = 'span'
        attrs = {'class': ['err'], 'title': attrs['info'][0]}

    content_str = handle_fragments(content,
                                   indent='  ' + indent,
                                   transclusions=transclusions,
                                   h_shift=h_shift,
                                   epub_clean=epub_clean,
                                   bibliography=bibliography)
    if tag in VOID_TAGS:
        assert not content
        template = "<%(tag)s%(attrs_str)s/>"
    elif tag in INLINE:
        template = "<%(tag)s%(attrs_str)s>%(content_str)s</%(tag)s>"
    elif '\n' in content_str:
        template = NOT_INLINE_TEMPLATE
    else:
        template = COMPACT_NOT_INLINE_TEMPLATE

    # FIXME(alexander): disgusting hack; fix this properly and
    # use a set representation to start with!
    classes = attrs.get('class')
    if classes:
        attrs = attrs.copy()
        attrs['class'] = sorted(set(classes))

    return template % dict(
        indent=indent,
        tag=tag,
        attrs_str=encode_attrs(attrs, transclusions, epub_clean),
        content_str=content_str)