Beispiel #1
0
def url_filter(key, value, format_, meta):
    '''
    Filter special links.  If a link is of the form '!STRING', use the
    bang-expression to search DuckDuckGo.  So for instance (with markdown)
    '[Fishmans](!w)' would search Wikipedia for "Fishmans".  If a link
    is empty, like '[About me]()', then automatically link to the
    slug-form of the text; in this case, the link would be transformed
    to '[About me](./about-me)' (or whatever equivalent in the output
    format).
    '''
    if key == 'Link':
        attr, txt, urllst = value
        url = urllst[0]
        # For debugging
        #with open('log.txt', 'w') as f:
        #    f.write(str(value) + "\n")
        #    f.write("txt: " + str(txt) + "\n")
        #    f.write("url: " + str(url) + "\n")
        #    f.write("attr: " + str(attr) + "\n")
        if url == "!w":
            url = "https://en.wikipedia.org/wiki/" + stringify(txt)
        elif url.startswith("!w%20"):
            url = "https://en.wikipedia.org/wiki/" + url[len("!w%20"):]
        elif url == "!wja":
            url = "https://ja.wikipedia.org/wiki/" + stringify(txt)
        elif url.startswith("!wja%20"):
            url = "https://ja.wikipedia.org/wiki/" + url[len("!wja%20"):]
        elif url.startswith("!"):
            url = "http://duckduckgo.com/?q=" + url + " " + stringify(txt)
        elif url == '':
            # So we want to internally link txt
            url = slug(stringify(txt))
            url = "./" + url
        urllst = [url, urllst[1]]
        return Link(attr, txt, urllst)
Beispiel #2
0
def get_runlist(metadata, kind, options):
    """ return run list for kind from metadata """
    runlist = list()
    # - return empty list unless entries of kind are in metadata
    try:
        metadata_list = get_content(metadata, kind, 'MetaList')
    except (error.WrongType, error.MissingField) as err:
        info.log('WARNING', 'panzer', err)
        return runlist
    for item in metadata_list:
        check_c_and_t_exist(item)
        item_content = item[const.C]
        # - create new entry
        entry = dict()
        entry['kind'] = kind
        entry['command'] = str()
        entry['status'] = const.QUEUED
        # - get entry command
        command_raw = get_content(item_content, 'run', 'MetaInlines')
        command_str = pandocfilters.stringify(command_raw)
        entry['command'] = util.resolve_path(command_str, kind, options)
        # - get entry arguments
        entry['arguments'] = list()
        if 'args' in item_content:
            if get_type(item_content, 'args') == 'MetaInlines':
                # - arguments raw string
                arguments_raw = get_content(item_content, 'args', 'MetaInlines')
                arguments_str = pandocfilters.stringify(arguments_raw)
                entry['arguments'] = shlex.split(arguments_str)
            elif get_type(item_content, 'args') == 'MetaList':
                # - arguments MetaList
                arguments_list = get_content(item_content, 'args', 'MetaList')
                entry['arguments'] = get_runlist_args(arguments_list)
        runlist.append(entry)
    return runlist
def url_filter(key, value, format_, meta):
    '''
    Filter special links.  If a link is of the form '!STRING', use the
    bang-expression to search DuckDuckGo.  So for instance (with markdown)
    '[Fishmans](!w)' would search Wikipedia for "Fishmans".  If a link
    is empty, like '[About me]()', then automatically link to the
    slug-form of the text; in this case, the link would be transformed
    to '[About me](./about-me)' (or whatever equivalent in the output
    format).
    '''
    if key == 'Link':
        [txt, [url, attr]] = value
        if url == "!w":
            url = "https://en.wikipedia.org/wiki/" + stringify(txt)
        elif url.startswith("!w%20"):
            url = "https://en.wikipedia.org/wiki/" + url[len("!w%20"):]
        elif url == "!wja":
            url = "https://ja.wikipedia.org/wiki/" + stringify(txt)
        elif url.startswith("!wja%20"):
            url = "https://ja.wikipedia.org/wiki/" + url[len("!wja%20"):]
        elif url.startswith("!"):
            url = "http://duckduckgo.com/?q=" + url + " " + stringify(txt)
        elif url == '':
            # So we want to internally link txt
            url = slugify_unicode(stringify(txt), to_lower=True)
            url = "./" + url
        return Link(txt, [url, attr])
def structure_header(v, f, m):
    global lastsection, lastsubsection
    if v[0] == 1:
        lastsection = pf.stringify(v[2])
        lastsubsection = ''
    elif v[0] == 2:
        lastsubsection = pf.stringify(v[2])
    def convert_internal_refs(self, key, value, format, metadata):
        """Convert all internal links from '#blah' into format
        specified in self.replacements.
        """
        if key != 'Cite':
            return None

        citations, inlines = value

        if len(citations) > 1:
            '''
            Note: Need to check that *all* of the citations in a
            multicitation are in the reference list. If not, the citation
            is bibliographic, and we want LaTeX to handle it, so just
            return unmodified.
            '''
            for citation in citations:
                if citation['citationId'] not in self.references: return
            return self.convert_multiref(key, value, format, metadata)

        else:
            citation = citations[0]

        prefix = pf.stringify(citation['citationPrefix'])
        suffix = pf.stringify(citation['citationSuffix'])

        if prefix:
            prefix += ' '

        label = citation['citationId']

        if label not in self.references:
            return

        rtype = self.references[label]['type']
        n = self.references[label]['id']
        text = self.replacements[rtype].format(n)

        if format == 'latex' and self.autoref:
            link = u'{pre}\\autoref{{{label}}}{post}'.format(pre=prefix,
                                                             label=label,
                                                             post=suffix)
            return pf.RawInline('latex', link)

        elif format == 'latex' and not self.autoref:
            link = u'{pre}\\ref{{{label}}}{post}'.format(pre=prefix,
                                                         label=label,
                                                         post=suffix)
            return pf.RawInline('latex', link)

        else:
            link_text = u'{}{}{}'.format(prefix, text, suffix)
            link = pf.Link([pf.Str(link_text)], ('#' + label, ''))
            return link
    def figure_replacement(self, key, value, format, metadata):
        """Replace figures with appropriate representation and
        append info to the refdict.
        """
        image = value[0]
        attr_string = pf.stringify(value[1:])
        filename = image['c'][1][0]
        raw_caption = pf.stringify(image['c'][0])
        attrs = attr_parser.parse(attr_string)

        label = attrs['id']
        classes = attrs['classes']
        keys = [(k, v) for k, v in attrs.items() if k not in ('id', 'classes')]

        class_str = 'class="{}"'.format(' '.join(classes)) if classes else ''
        key_str = ' '.join('{}={}'.format(k, v) for k, v in keys)

        self.refdict[label] = {'type': 'figure',
                               'id': self.figure_count}

        caption = 'Figure {n}: {caption}'.format(n=self.figure_count,
                                                 caption=raw_caption)
        self.figure_count += 1

        if format == 'markdown':
            figure = markdown_figure.format(id=label,
                                            caption=caption,
                                            filename=filename)

            return pf.Para([rawmarkdown(figure)])

        elif format == 'html':
            figure = html_figure.format(id=label,
                                        classes=class_str,
                                        keys=key_str,
                                        filename=filename,
                                        alt=caption,
                                        caption=caption)
            return pf.Para([rawhtml(figure)])

        elif format == 'html5':
            figure = html5_figure.format(id=label,
                                         classes=class_str,
                                         keys=key_str,
                                         filename=filename,
                                         alt=caption,
                                         caption=caption)
            return pf.Para([rawhtml(figure)])

        elif format == 'latex':
            figure = latex_figure.format(filename=filename,
                                         caption=raw_caption,
                                         label=label)
            return pf.Para([rawlatex(figure)])
def structure_header(v, f, m):
    global block
    result = []
    if v[0] == 1:
        block.begin(result, pf.stringify(v[2]))
        return result
    elif v[0] == 2:
        # second level ignored and removed on posters
        return []
    elif v[0] == 3:
        result.append(lb(r'\structure{%s}' % pf.stringify(v[2])))
        return result
Beispiel #8
0
def get_list_or_inline(metadata, field):
    """ return content of MetaList or MetaInlines item coerced as list """
    field_type = get_type(metadata, field)
    if field_type == 'MetaInlines':
        content_raw = get_content(metadata, field, 'MetaInlines')
        content = [pandocfilters.stringify(content_raw)]
        return content
    elif field_type == 'MetaList':
        content = list()
        for content_raw in get_content(metadata, field, 'MetaList'):
            content.append(pandocfilters.stringify(content_raw))
        return content
    else:
        raise error.WrongType('"%s" value must be of type "MetaInlines"'
                              'or "MetaList"' % field)
Beispiel #9
0
def main():
    """docstring for main"""
    # read input ast
    ast = read_ast()
    meta = ast['meta']
    if 'metapub' not in meta:
        write_ast(ast)
        return
    # read metapub_file
    if 'metapub_file' not in meta:
        log('ERROR', 'No "metapub_file" metadata field in input document')
        write_ast(ast)
        return
    metapub_file = stringify(meta['metapub_file'])
    log('DEBUG', 'reading from: ' + metapub_file)
    pubs = read_yaml(metapub_file)
    # find entry in metapub_file
    ident = stringify(meta['metapub'])
    log('DEBUG', 'looking for: ' + ident)
    entries = [e for e in pubs
               if ('uuid' in e and e['uuid'] == ident) \
               or ('slug' in e and e['slug'] == ident)]
    if not entries:
        log('ERROR', 'Publication with id "%s" not found in "%s"' % (ident, metapub_file))
        write_ast(ast)
        return
    if len(entries) > 1:
        log('WARNING', 'More than 1 publication with id "%s" found in "%s"' % (ident, metapub_file))
    entry = entries[0]
    # build new metadata from entry
    new = dict()
    add_title(new, entry)
    add_author(new, entry)
    add_date_updated(new, entry)
    add_disclaimer(new, entry)
    add_publication(new, entry)
    add_abstract(new, entry)
    add_keywords(new, entry)
    add_note(new, entry)
    add_review(new, entry)
    # convert new metadata to pandoc's ast metadata format
    log('DEBUG', 'new metadata: ' + str(new))
    incoming = generate_meta(new)
    # update using new metadata
    meta.update(incoming)
    # write output ast
    ast['meta'] = meta
    write_ast(ast)
Beispiel #10
0
def get_runlist_args(arguments_list):
    """ return list of arguments from 'args' MetaList """
    arguments = list()
    for item in arguments_list:
        if item[const.T] != 'MetaMap':
            info.log('ERROR', 'panzer',
                     '"args" list should have fields of type "MetaMap"')
            continue
        fields = item[const.C]
        if len(fields) != 1:
            info.log('ERROR', 'panzer',
                     '"args" list should have exactly one field per item')
            continue
        field_name = "".join(fields.keys())
        field_type = get_type(fields, field_name)
        field_value = get_content(fields, field_name, field_type)
        if field_type == 'MetaBool':
            arguments.append('--' + field_name)
        elif field_type == 'MetaInlines':
            value_str = pandocfilters.stringify(field_value)
            arguments.append('--%s="%s"' % (field_name, value_str))
        else:
            info.log('ERROR', 'panzer',
                     'arguments of type "%s" not' 'supported---"%s" ignored'
                     % (field_type, field_name))
    return arguments
Beispiel #11
0
def mk_columns(k, v, f, m):
    if k == "Para":
        value = pf.stringify(v).strip()
        if value.startswith('[') and value.endswith(']'):
            content = value[1:-1]
            if content.startswith("leftcol"):
                width = content.replace("leftcol", '').strip()
                if width != "":
                    width+="%"
                else:
                    width="50%"
                return html("""    <div id="col-wrapper">
      <div id="col">
        <div style="width:%(width)s; float: left;">""" % {'width': width})
            elif content.startswith("rightcol"):
                width = content.replace("rightcol", '').strip()
                if width != "":
                    width+="%"
                else:
                    width="50%"
                return html("""        </div>
        <div style="width:%(width)s; float:right;">""" % {'width': width})
            elif content.startswith("endcol"):
                return html("""        </div>
      </div>
    </div>""")
def transPara(key, value, format, meta):
    if key == "Link":
        # print "Link"
        if debug == 1:
            fh.write("Link\n")
        hstr = stringify(value[1])
        hstr = re.sub(u"listing", u"清单", hstr)
        hstr = re.sub(u"figure", u"图", hstr)
        hstr = re.sub(u"table", u"表", hstr)
        hstr = re.sub("\.", u"-", hstr)
        if len(hstr) < 1:
            trStr = ""
            return []
        elif re.match(u"图|表|清单[\d\-]+", hstr):
            return Str(hstr)
        elif re.match(u"[\d\-]+章", hstr):
            return Str(u"第" + hstr)
        elif re.match(u"第[\d\-]+章", hstr):
            return Str(hstr)
    elif key == "CodeBlock":
        v1 = ("", [], [])
        return CodeBlock(v1, value[1])
    elif key == "Image":
        v1 = ("", [], [])
        return Image(v1, value[1], value[2])
    else:
        return None
def transPara(key, value, format, meta):
      if key == 'Span':
          if dropHeaderFooter(value[0][2]):
              return []
          hstr = stringify(value[1])
          return Str(hstr)
      elif key == 'Div':
          if dropHeaderFooter(value[0][2]):
              return []
          hstr = stringify(value[1])
          if len(hstr) < 1:
            trStr=""
            return []
          return value[1]
      else:
          return None
Beispiel #14
0
def keyword2html(keyword_node):
    """Return HTML version of keyword with id."""
    keyword = pf.stringify(keyword_node)
    id = normalize_keyword(keyword)
    return [{"t": "Span",
             "c": [[id, [],[]],
                 keyword_node]}]
Beispiel #15
0
def mk_columns(k, v, f, m):
    if k == "Para":
        value = stringify(v)
        if value.startswith('[') and value.endswith(']'):
            if value.count("[columns"):
                div_args = ""
                if value.count(","):
                    div_args += value[value.find(",")+1:-1]
                return html(r'<div %s>' % div_args)
            elif value == "[/columns]":
                return html(r'</div>')
            elif value == "[/column]":
                return html(r'</div>')
            elif value.startswith("[column=") or value.startswith("[column,"):
                digit_re = re.compile("column=(\d+)")
                regex_result = digit_re.search(value)

                if regex_result and regex_result.groups():
                    div_args = r'<div width="%s" ' % regex_result.groups()[0]
                else:
                    div_args = r'<div '

                if value.count(","):
                    div_args += value[value.find(",")+1:-1]
                div_args += ">"
                return html(div_args)
def blockquote2div(key, value, format, meta):
    """Convert a blockquote into a div if it begins with a header
    that has attributes containing a single class that is in the
    allowed classes.

    This function can be passed directly to toJSONFilter
    from pandocfilters.
    """
    if key == 'BlockQuote':
        blockquote = value

        header = find_header(blockquote)
        if not header:
            return
        else:
            level, attr, inlines = header

        id, classes, kvs = attr

        ltitle = pf.stringify(inlines).lower()
        if ltitle in SPECIAL_TITLES:
            classes.append(SPECIAL_TITLES[ltitle])
            return pf.Div(attr, blockquote)

        elif len(classes) == 1 and classes[0] in SPECIAL_CLASSES:
            remove_attributes(blockquote)
            # a blockquote is just a list of blocks, so it can be
            # passed directly to Div, which expects Div(attr, blocks)
            return pf.Div(attr, blockquote)
def create_figures(key, value, format, metadata):
    """Convert Images with attributes to Figures.

    Images are [caption, (filename, title)].

    Figures are [caption, (filename, title), attrs].

    This isn't a supported pandoc type, we just use it internally.
    """
    if isattrfigure(key, value):
        image = value[0]
        attr = PandocAttributes(pf.stringify(value[1:]), 'markdown')
        caption, target = image['c']
        return Figure(caption, target, attr.to_pandoc())

    elif isdivfigure(key, value):
        # use the first image inside
        attr, blocks = value
        images = [b['c'][0] for b in blocks if b['c'][0]['t'] == 'Image']
        image = images[0]
        caption, target = image['c']
        return Figure(caption, target, attr)

    else:
        return None
Beispiel #18
0
def is_attrtable(key, value):
    """True if this is an attributed table; False otherwise."""
    try:
        s = stringify(value[0]).strip()
        return key == 'Table' and ATTR_PATTERN.match(s)
    # pylint: disable=bare-except
    except:
        return False
def id4glossary(key, value, format, meta):
    """Add id to keywords at glossary."""
    if "subtitle" in meta and pf.stringify(meta['subtitle']) == 'Reference':
        if key == "DefinitionList":
            for definition in value:
                definition[0] = keyword2html(definition[0])
            return {"t": key,
                    "c": value}
Beispiel #20
0
 def transform(self):
     """ transform `self` by applying styles listed in `self.stylefull` """
     writer = self.options['pandoc']['write']
     info.log('INFO', 'panzer', 'writer:')
     info.log('INFO', 'panzer', '  %s' % writer)
     # 1. Do transform
     # - start with blank metadata
     new_metadata = dict()
     # - apply styles, first to last
     for style in self.stylefull:
         all_s = meta.get_nested_content(self.styledef, [style, 'all'], 'MetaMap')
         new_metadata = meta.update_metadata(new_metadata, all_s)
         self.apply_commandline(all_s)
         cur_s = meta.get_nested_content(self.styledef, [style, writer], 'MetaMap')
         new_metadata = meta.update_metadata(new_metadata, cur_s)
         self.apply_commandline(cur_s)
     # - add in document metadata in document
     indoc_data = self.get_metadata()
     # -- add items from additive fields in indoc metadata
     new_metadata = meta.update_additive_lists(new_metadata, indoc_data)
     # -- add all other (non-additive) fields in
     new_metadata.update(indoc_data)
     # -- apply items from indoc `commandline` field
     self.apply_commandline(indoc_data)
     # 2. Apply kill rules to trim run lists
     for field in const.RUNLIST_KIND:
         try:
             original_list = meta.get_content(new_metadata, field, 'MetaList')
             trimmed_list = meta.apply_kill_rules(original_list)
             if trimmed_list:
                 meta.set_content(new_metadata, field, trimmed_list, 'MetaList')
             else:
                 # if all items killed, delete field
                 del new_metadata[field]
         except error.MissingField:
             continue
         except error.WrongType as err:
             info.log('WARNING', 'panzer', err)
             continue
     # 3. Set template
     try:
         if meta.get_type(new_metadata, 'template') == 'MetaInlines':
             template_raw = meta.get_content(new_metadata, 'template', 'MetaInlines')
             template_str = pandocfilters.stringify(template_raw)
         elif meta.get_type(new_metadata, 'template') == 'MetaString':
             template_str = meta.get_content(new_metadata, 'template', 'MetaString')
             if template_str == '':
                 raise error.MissingField
         else:
             raise error.WrongType
         self.template = util.resolve_path(template_str, 'template', self.options)
     except (error.MissingField, error.WrongType) as err:
         info.log('DEBUG', 'panzer', err)
     if self.template:
         info.log('INFO', 'panzer', info.pretty_title('template'))
         info.log('INFO', 'panzer', '  %s' % info.pretty_path(self.template))
     # 4. Update document's metadata
     self.set_metadata(new_metadata)
def url_filter(key, value, format_, meta):
    """
    """
    if key == 'Link':
        [txt, [url, attr]] = value
        caught = False
        for i in interwiki_maps:
            key, query = i
            if url.startswith("!" + key):
                url = query + stringify(txt)
                caught = True
                break
        if not caught and url == '':
            # So we want to internally link txt
            url = "http://gwern.net/" + stringify(txt)
        elif not caught and "://" not in url:
            url = "http://gwern.net/" + url
        return Link(txt, [url, attr])
Beispiel #22
0
def walk_metadata(x):
    '''
    x is a JSON dictionary of pandoc metadata
    Walks down a JSON dictionary in the pandoc metadata, returning a
    more manageable representation.
    FIXME: Maybe formatting for e.g. math should be retained instead of
    converting to a string?
    '''
    if x['t'] == 'MetaBool':
        return x['c']
    elif x['t'] == 'MetaInlines':
        return str(pandocfilters.stringify(x))
    elif x['t'] == 'MetaString':
        return str(pandocfilters.stringify(x))
    elif x['t'] == 'MetaList':
        lst = []
        for i in x['c']:
            lst.append(walk_metadata(i))
        return lst
Beispiel #23
0
def mk_center(key, value, *args):
    """Make LaTeX centering."""
    if key == "Para":
        value = pf.stringify(value)
        if value.startswith('[') and value.endswith(']'):
            content = value[1:-1]
            if content == "center":
                return latex(r'\begin{center}')
            elif content == "/center":
                return latex(r'\end{center}')
Beispiel #24
0
def get_list(meta):
    if get_list.checked == True:
        pass
    else:
        try:
            get_list.checked = True
            get_list.hitlist = [stringify(x) for x in meta.get('smallcaps', {})['c']]
        except KeyError:
            pass
    return get_list.hitlist
Beispiel #25
0
def get_list(meta):
    if get_list.checked == True:
        pass
    else:
        try:
            get_list.checked = True
            get_list.hitlist = [stringify(x) for x in meta.get('smallcaps', {})['c']]
            panzertools.log('INFO', 'small caps: ' + repr(get_list.hitlist))
        except KeyError:
            pass
    return get_list.hitlist
Beispiel #26
0
def mk_columns(k, v, f, m):
    if k == "Para":
        value = pf.stringify(v)
        if value.startswith('[') and value.endswith(']'):
            content = value[1:-1]
            if content == "columns":
                return latex(r'\begin{columns}[T]')
            elif content == "/columns":
                return latex(r'\end{columns}')
            elif content.startswith("column="):
                return latex(r'\column{%s\textwidth}' % content[7:])
def duck(key, value, format_, meta):
    '''
    If a link is of the form "!STRING", use the !-expression to search
    DuckDuckGo.  So for instance [Fishmans](!w) would search Wikipedia
    for "Fishmans".
    '''
    if key == 'Link':
        [txt, [url, attr]] = value
        if url.startswith("!"):
            url = "http://duckduckgo.com/?q=" + url + " " + stringify(txt)
            return Link(txt, [url, attr])
Beispiel #28
0
def get_meta(meta, name):
    """Retrieves the metadata variable 'name' from the 'meta' dict."""
    assert name in meta
    data = meta[name]

    if data['t'] in ['MetaString', 'MetaBool']:
        return data['c']
    elif data['t'] == 'MetaInlines':
        # Handle bug in pandoc 2.2.3 and 2.2.3.1: Return boolean value rather
        # than strings, as appropriate.
        if len(data['c']) == 1 and data['c'][0]['t'] == 'Str':
            if data['c'][0]['c'] in ['true', 'True', 'TRUE']:
                return True
            elif data['c'][0]['c'] in ['false', 'False', 'FALSE']:
                return False
        return stringify(data['c'])
    elif data['t'] == 'MetaList':
        return [stringify(v['c']) for v in data['c']]
    else:
        raise RuntimeError("Could not understand metadata variable '%s'." %
                           name)
Beispiel #29
0
def mk_columns(key, value, *args):
    """Make LaTeX columns."""
    if key == "Para":
        value = pf.stringify(value)
        if value.startswith('[') and value.endswith(']'):
            content = value[1:-1]
            if content == "columns":
                return latex(r'\begin{columns}')
            elif content == "/columns":
                return latex(r'\end{columns}')
            elif content.startswith("column="):
                return latex(r'\column{%s\textwidth}' % content[7:])
Beispiel #30
0
def parse_attrimage(value):
    """Parses an attributed image."""

    if len(value[0]['c']) == 2:  # Old pandoc < 1.16
        attrs, (caption, target) = None, value[0]['c']
        s = stringify(value[1:]).strip() # The attribute string
        # Extract label from attributes (label, classes, kvs)
        label = PandocAttributes(s, 'markdown').to_pandoc()[0]
        if label == 'fig:': # Make up a unique description
            label = label + '__'+str(hash(target[0]))+'__'
        return attrs, caption, target, label

    else:  # New pandoc >= 1.16
        assert len(value[0]['c']) == 3
        attrs, caption, target = value[0]['c']
        s = stringify(value[1:]).strip() # The attribute string
        # Extract label from attributes
        label = attrs[0]
        if label == 'fig:': # Make up a unique description
            label = label + '__'+str(hash(target[0]))+'__'
        return attrs, caption, target, label