Ejemplo n.º 1
0
def convert_ipynb_to_gallery(file_name):
    python_file = ""

    nb_dict = json.load(open(file_name))
    cells = nb_dict['cells']

    for i, cell in enumerate(cells):
        if i == 0:
            assert cell['cell_type'] == 'markdown', \
                'First cell has to be markdown'

            md_source = ''.join(cell['source'])
            rst_source = pdoc.convert_text(md_source, 'rst', 'md')
            python_file = '"""\n' + rst_source + '\n"""'
        else:
            if cell['cell_type'] == 'markdown':
                md_source = ''.join(cell['source'])
                rst_source = pdoc.convert_text(md_source, 'rst', 'md')
                commented_source = '\n'.join(['# ' + x for x in
                                              rst_source.split('\n')])
                python_file = python_file + '\n\n\n' + '#' * 70 + '\n' + \
                    commented_source
            elif cell['cell_type'] == 'code':
                source = ''.join(cell['source'])
                python_file = python_file + '\n' * 2 + source

    open(file_name.replace('.ipynb', '.py'), 'w').write(python_file)
Ejemplo n.º 2
0
def convert_ipynb_to_gallery(file_name):
    """
    Blatantly stolen + adapted from
    https://gist.github.com/wuhuikai/4a7ceb8bc52454e17a4eb8327d538d85

    """
    python_file = ""

    nb_dict = json.load(open(file_name))
    cells = nb_dict['cells']

    for i, cell in enumerate(cells):
        if i == 0:
            assert cell['cell_type'] == 'markdown', \
                'First cell has to be markdown'

            md_source = ''.join(cell['source'])
            rst_source = pdoc.convert_text(md_source, 'rst', 'md')
            python_file = '"""\n' + rst_source + '\n"""'
        else:
            if cell['cell_type'] == 'markdown':
                md_source = ''.join(cell['source'])
                rst_source = pdoc.convert_text(md_source, 'rst', 'md')
                commented_source = '\n'.join([
                    '# ' + x for x in rst_source.split('\n')
                ])
                python_file = python_file + '\n\n\n' + '#' * 70 + '\n' + \
                    commented_source
            elif cell['cell_type'] == 'code':
                source = ''.join(cell['source'])
                python_file = python_file + '\n' * 2 + source

    open(file_name.replace('.ipynb', '.py'), 'w').write(python_file)
Ejemplo n.º 3
0
 def expand_description(self, exp):
     return {
         "general": pypandoc.convert_text(exp.find("./description/general").text, "latex", format="md"),
         "details": [
             pypandoc.convert_text(detail.text.strip(), "latex", format="md")
             for detail in exp.findall("./description/details/detail")
         ],
     }
Ejemplo n.º 4
0
def main():
    if len(sys.argv) <= 1:
        sys.exit("Please supply a filename")

    input_format = "markdown"
    pdf_output = common_md()
    html_output = pdf_output["html"]
    pdf_output = pdf_output["pdf"]

    print()

    for arg in sys.argv[1:]:
        p = Path(arg).resolve()
        print(f"Generating: {p}")

        ext = p.suffix

        if ext == ".md":
            p.write_text(pdf_output)
        elif ext == ".html":
            html_output = "# " + VERSION_STR + "\n\n" + html_output
            pypandoc.convert_text(
                html_output,
                format=input_format,
                to="html5",
                outputfile=str(p),
                extra_args=["--standalone",
                            "--self-contained",
                            "--toc",
                            "--toc-depth=2",
                            "--css=" + str(TEMPLATE_DIR / "docs.css"),
                            "--template=" + str(TEMPLATE_DIR /
                                                "template.html")])
        elif ext == ".pdf" or ext == ".tex":
            latex_preamble = env.get_template("latex_preamble.jinja2.md")
            latex = latex_preamble \
                .render(title=VERSION_STR, fonts_dir=FONTS_DIR) + "\n\n"
            latex += pdf_output
            pandoc_version = int(pypandoc.get_pandoc_version()[0])
            engine = ("--pdf-engine=xelatex"
                      if pandoc_version >= 2
                      else "--latex-engine=xelatex")
            pypandoc.convert_text(
                latex,
                format=input_format,
                to=ext[1:],
                outputfile=str(p),
                extra_args=["--standalone",
                            "--column=80",
                            "--toc",
                            "--toc-depth=2",
                            engine,
                            "--variable=papersize:A4"])
Ejemplo n.º 5
0
def render_to_format(request, format, title, template_src, context):

    # for some weird reason we have to cast here explicitly
    format = str(format)
    title = str(title)

    if format in settings.EXPORT_FORMATS:

        # render the template to a html string
        template = get_template(template_src)
        html = template.render(context)

        # remove empty lines
        html = os.linesep.join([line for line in html.splitlines() if line.strip()])

        if format == 'html':

            # create the response object
            response = HttpResponse(html)

        else:
            if format == 'pdf':
                args = ['-V', 'geometry:margin=1in']
                content_disposition = 'filename=%s.%s' % (title, format)
            else:
                args = []
                content_disposition = 'attachment; filename=%s.%s' % (title, format)

            print (content_disposition)

            # create a temporary file
            (tmp_fd, tmp_filename) = mkstemp('.' + format)

            # convert the file using pandoc
            pypandoc.convert_text(html, format, format='html', outputfile=tmp_filename, extra_args=args)

            # read the temporary file
            file_handler = os.fdopen(tmp_fd, 'rb')
            file_content = file_handler.read()
            file_handler.close()

            # delete the temporary file
            os.remove(tmp_filename)

            # create the response object
            response = HttpResponse(file_content, content_type='application/%s' % format)
            response['Content-Disposition'] = content_disposition

        return response
    else:
        return HttpResponseBadRequest(_('This format is not supported.'))
Ejemplo n.º 6
0
def ChangeSpellDesc2MD():
    with open(json_file['spells']) as json_data:
        spells = json.load(json_data)

        for spell in spells:
            #print(spell)
            spell['desc'] = pypandoc.convert_text(spell['desc'],'md',format='html',extra_args=['--wrap=none'])
            if 'higher_level' in spell:
                spell['higher_level'] = pypandoc.convert_text(spell['higher_level'],'md',format='html',extra_args=['--wrap=none'])
            if 'material' in spell:
                spell['material'] = pypandoc.convert_text(spell['material'],'md',format='html',extra_args=['--wrap=none'])
        
        with open(json_file['spells'], 'w') as outfile:
            json.dump(spells, outfile)
Ejemplo n.º 7
0
 def parse(self, response):
     talk_ids = collections.defaultdict(list)
     for day in response.css('div.schedule__day.iframe_schedule_day'):
         curr_date = day.css('p.schedule__date::text').get()
         for r in day.css('div::attr(data-link)'):
             talk_ids[r.get()] = curr_date
     yield talk_ids
     for talk in response.css('div.details.uv-card__mask'):
         for session in talk.css('div.uv-card--session'):
             time_of_day = session.css(
                 'span.session__time:nth-child(1)').xpath(
                 'normalize-space()').get()
             talk_id = talk.xpath('@id').get()
             desc = session.css('div.safe-description').get()
             try:
                 desc_md = html2text(desc)
                 desc = pypandoc.convert_text(desc_md, 'rst', format='md')
             except:
                 pass
             yield {'title': session.xpath('string(.//h2)').get(),
                    'datetime': dateparser.parse('{date} {year} {tod}'.format(
                         date=talk_ids[talk_id],
                         year=2016,
                         tod=time_of_day)),
                    'description': desc,
                    'spearkers': session.css('''
                         div.session__speakers-box
                             div.uv-shortcard__title::text''').extract()}
Ejemplo n.º 8
0
    def convert(self, text):
        text = '\n\n'.join([re.sub(self.regexCodeBlock, r'<pre>\1</pre>', block) for block in text.split('\n\n')])

        # convert from textile to markdown
        text = pypandoc.convert_text(text, 'markdown_strict', format='textile')

        # pandoc does not convert everything, notably the [[link|text]] syntax
        # is not handled. So let's fix that.

        # [[ wikipage | link_text ]] -> [link_text](wikipage)
        text = re.sub(self.regexWikiLinkWithText, self.wiki_link, text, re.MULTILINE | re.DOTALL)

        # [[ link_url ]] -> [link_url](link_url)
        text = re.sub(self.regexWikiLinkWithoutText, self.wiki_link, text, re.MULTILINE | re.DOTALL)

        # nested lists, fix at least the common issues
        text = text.replace("    \\#\\*", "    -")
        text = text.replace("    \\*\\#", "    1.")

        # Redmine is using '>' for blockquote, which is not textile
        text = text.replace("&gt; ", ">")

        # wiki note macros
        text = re.sub(self.regexTipMacro, r'---\n**TIP**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
        text = re.sub(self.regexNoteMacro, r'---\n**NOTE**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
        text = re.sub(self.regexWarningMacro, r'---\n**WARNING**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
        text = re.sub(self.regexImportantMacro, r'---\n**IMPORTANT**: \1\n---\n', text, re.MULTILINE | re.DOTALL)

        # all other macros
        text = re.sub(self.regexAnyMacro, r'\1', text, re.MULTILINE | re.DOTALL)

        return text
Ejemplo n.º 9
0
    def test_pdf_conversion(self):
        with closed_tempfile('.pdf') as file_name:
            ret = pypandoc.convert_text('#some title\n', to='pdf', format='md', outputfile=file_name)
            assert ret == ""
            with io.open(file_name, mode='rb') as f:
                written = f.read()
            assert written[:4] == b"%PDF"
            # TODO: find a test for the content?

        def f():
            # needs an outputfile
            pypandoc.convert_text('#some title\n', to='pdf', format='md')

        self.assertRaises(RuntimeError, f)

        def f():
            # outputfile needs to end in pdf
            with closed_tempfile('.WRONG') as file_name:
                pypandoc.convert_text('#some title\n', to='pdf', format='md', outputfile=file_name)

        self.assertRaises(RuntimeError, f)

        def f():
            # no extensions allowed
            with closed_tempfile('.pdf') as file_name:
                pypandoc.convert_text('#some title\n', to='pdf+somethign', format='md', outputfile=file_name)

        self.assertRaises(RuntimeError, f)
Ejemplo n.º 10
0
def render_markdown(value):
    """Render Markdown"""
    try:
        output = pypandoc.convert_text(value, to='html5', format='md', extra_args=['--mathjax'])
    except RuntimeError:
        output = value
    return output
Ejemplo n.º 11
0
def md2rst(comment):
    """Convert a comment from protobuf markdown to restructuredtext.

    This method:
    - Replaces proto links with literals (e.g. [Foo][bar.baz.Foo] -> `Foo`)
    - Resolves relative URLs to https://cloud.google.com
    - Runs pandoc to convert from markdown to restructuredtext
    """
    comment = _replace_proto_link(comment)
    comment = _replace_relative_link(comment)
    # Calling pypandoc.convert_text is slow, so we try to avoid it if there are
    # no special characters in the markdown.
    if any([i in comment for i in '`[]*_']):
        comment = pypandoc.convert_text(comment, 'rst', format='commonmark')
        # Comments are now valid restructuredtext, but there is a problem. They
        # are being inserted back into a descriptor set, and there is an
        # expectation that each line of a comment will begin with a space, to
        # separate it from the '//' that begins the comment. You would think
        # that we could ignore this detail, but it will cause formatting
        # problems down the line in gapic-generator because parsing code will
        # try to remove the leading space, affecting the indentation of lines
        # that actually do begin with a space, so we insert the additional
        # space now. Comments that are not processed by pypandoc will already
        # have a leading space, so should not be changed.
        comment = _insert_spaces(comment)
    return comment
Ejemplo n.º 12
0
def tokenize_block(source: str, pandoc_extra_args: list=None) -> list:
    """
    Convert a Jupyter output to Pandoc's JSON AST.
    """
    if pandoc_extra_args is None:
        pandoc_extra_args = []
    json_doc = pypandoc.convert_text(source, to='json', format='markdown', extra_args=pandoc_extra_args)
    return json.loads(json_doc)['blocks']
Ejemplo n.º 13
0
    def test_basic_conversion_from_string(self):
        expected = u'some title{0}=========={0}{0}'.format(os.linesep)
        received = pypandoc.convert('#some title', 'rst', format='md')
        self.assertEqualExceptForNewlineEnd(expected, received)

        expected = u'some title{0}=========={0}{0}'.format(os.linesep)
        received = pypandoc.convert_text('#some title', 'rst', format='md')
        self.assertEqualExceptForNewlineEnd(expected, received)
Ejemplo n.º 14
0
    def test_convert_text_with_existing_file(self):
        with closed_tempfile('.md', text='#some title\n') as file_name:
            received = pypandoc.convert_text(file_name, 'rst', format='md')
            self.assertTrue("title" not in received)

            # The following is a problematic case
            received = pypandoc.convert(file_name, 'rst', format='md')
            self.assertTrue("title" in received)
def text_decode(text):
	if re.search(r'\\u', text):
		body = fix_arnaud_post(text)
	elif is_html(text):
		text = escape_special_characters(text)
		body = pypandoc.convert_text(text, 'markdown_strict', format='html')
	else:
		body = text
	return body
Ejemplo n.º 16
0
 def create(self, variables, md_output, pdf_output):
     env = Environment(loader=PackageLoader('qanta', 'reporting/templates'))
     template = env.get_template(self.template)
     markdown = template.render(variables)
     if md_output is not None:
         with open(md_output, 'w') as f:
             f.write(markdown)
     try:
         import pypandoc
         pypandoc.convert_text(
             markdown,
             'pdf',
             format='md',
             outputfile=pdf_output,
             extra_args=['-V', 'geometry:margin=.75in']
         )
     except Exception as e:
         log.warn('Pandoc was not installed or there was an error calling it, omitting PDF report')
         log.warn(str(e))
Ejemplo n.º 17
0
def save_url(chapter, title, url):
    file_name = '{}.tex'.format(title.replace('/', '\\').replace(':', ' -'))
    path = pathlib.Path(os.path.join('content', chapter, 'images'))
    path.mkdir(parents=True, exist_ok=True)

    p = mercury.parse(url)
    html = save_images(p.content, path)

    content = pypandoc.convert_text(html, 'tex', format='html')
    write_content(path.parent.joinpath(file_name), content)
Ejemplo n.º 18
0
def main():
    if len(sys.argv) <= 1:
        sys.exit("Please supply a filename")

    input_format = "markdown"
    output = common_md()

    print()

    for arg in sys.argv[1:]:
        p = Path(arg).resolve()
        print(f"Generating: {p}")

        ext = p.suffix

        if ext == ".md":
            p.write_text(output)
        elif ext == ".html":
            pypandoc.convert_text(
                output,
                format=input_format,
                to="html5",
                outputfile=str(p),
                extra_args=["--standalone",
                            "--self-contained",
                            "--toc",
                            "--toc-depth=2",
                            "--css=" + str(TEMPLATE_DIR / "docs.css")])
        elif ext == ".pdf" or ext == ".tex":
            latex = Path(TEMPLATE_DIR / "latex_preamble.md").read_text()
            latex += output
            pypandoc.convert_text(
                latex,
                format=input_format,
                to=ext[1:],
                outputfile=str(p),
                extra_args=["--standalone",
                            "--column=80",
                            "--toc",
                            "--toc-depth=2",
                            "--latex-engine=xelatex",
                            "--variable=papersize:A4"])
Ejemplo n.º 19
0
def read_long_description():
    try:
        import pypandoc
        with open("README.md") as f:
            text = f.read()

        # Remove screenshots as they get rendered poorly on PyPi
        stripped_text = text[:text.index("# Screenshots")].rstrip()
        return pypandoc.convert_text(stripped_text, 'rst', format='md')
    except:
        return ""
Ejemplo n.º 20
0
    def test_basic_pypandoc_example(self):
        """
        This test is testing a basic pypandoc function call.
        """

        pypandoc_result = pypandoc.convert_text(
            '- *foo* bar', 'html5', format='org')
        expected_html5_result = '<ul>\n<li><strong>foo</strong> bar</li>\n</ul>\n'

        self.assertEqual(
            Utils.normalize_lineendings(pypandoc_result),
            Utils.normalize_lineendings(expected_html5_result))
Ejemplo n.º 21
0
    def test_pypandoc_with_umlauts(self):
        """
        This test is testing umlaut and charset with pypandoc.
        """

        pypandoc_result = pypandoc.convert_text(
            'This is an umlaut test: öÄ߀',
            'html5',
            format='org',
            encoding='utf-8')
        expected_html5_result = '<p>This is an umlaut test: öÄ߀</p>\n'

        # FIXXME: Umlaut conversion does habe encoding issues.
        self.assertEqual(Utils.normalize_lineendings(pypandoc_result),
                         Utils.normalize_lineendings(expected_html5_result))
Ejemplo n.º 22
0
def _init_settings():
    import yaml

    def adjust_path(loader, node): return os.path.join(BASE_DIR, loader.construct_scalar(node))
    yaml.add_constructor('!path', adjust_path)

    configuration_files = ('settings.yml', 'static/settings.yml', 'local_settings.yml')
    for filename in configuration_files:
        with open(os.path.join(BASE_DIR, 'lerna', filename), encoding='utf-8-sig') as f:
            for yml_key, yml_data in yaml.load(f).items():
                if yml_key == 'PREPEND':
                    for key, value in yml_data.items():
                        globals()[key] = value + globals()[key]
                elif yml_key == 'APPEND':
                    for key, value in yml_data.items():
                        globals()[key] += value
                elif yml_key == 'OVERRIDE':
                    for cnf_name, sub_data in yml_data.items():
                        cnf = globals()[cnf_name]
                        for key, value in sub_data.items():
                            cnf[key] = value
                else:
                    globals()[yml_key] = yml_data

    # TODO: Log every failure.
    try:
        import pypandoc as pd
    except ImportError:
        pass
    else:
        try:
            pd.get_pandoc_version()
        except OSError:
            pass
        else:
            output = pd.convert_text('', 'html', format='latex')
            if output not in ('', '\n'):
                raise Exception('pandoc is found, but has not passed a sample test (%r)' % output)

            def check_filter(f):
                try:
                    pd.convert_text('', 'html', format='latex', filters=[f])
                    return True
                except RuntimeError:
                    return False

            PANDOC['REQUIRED'] = True
            PANDOC['FILTERS'] = list(filter(check_filter, PANDOC['FILTERS']))
Ejemplo n.º 23
0
def rst_to_notebook(infile, outfile):
    """Convert an rst file to a notebook file."""

    # Read infile into a string
    with open(infile, 'r') as fin:
        rststr = fin.read()
    # Convert string from rst to markdown
    mdfmt = 'markdown_github+tex_math_dollars+fenced_code_attributes'
    mdstr = pypandoc.convert_text(rststr, mdfmt, format='rst',
                                  extra_args=['--atx-headers'])
    # In links, replace .py extensions with .ipynb
    mdstr = re.sub(r'\(([^\)]+).py\)', r'(\1.ipynb)', mdstr)
    # Enclose the markdown within triple quotes and convert from
    # python to notebook
    mdstr = '"""' + mdstr + '"""'
    nb = py2jn.py_string_to_notebook(mdstr)
    py2jn.tools.write_notebook(nb, outfile, nbver=4)
Ejemplo n.º 24
0
def exercise(src):
    # import pdb; pdb.set_trace()
    d, p, *_ = html.fragments_fromstring(src)

    # title = d.attrib['data-title']
    title = d.find('h1').text_content().strip().replace("Exercise: ", "")
    question = convert_text(p.text, "latex", format="markdown")
    tpl = dedent('''\

    ---

    \\begin{{Exercise}}[title={{{title}}}]
    {question}
    \\end{{Exercise}}
    ''').format(title=title, question=question)

    return tpl
Ejemplo n.º 25
0
def convert_md_2_rst_process(filename_root):
    filename_source = filename_root + ".md"
    filename_target = filename_root + ".rst"
    #convert_text形式
    print 'Converting', os.path.basename(filename_source), 'to', os.path.basename(filename_target)
    file_source = open(filename_source)
    lines = file_source.readlines()
    file_source.close()
    data = '\n'.join(lines)
    data = data.encode('utf-8')
    data = pypandoc.convert_text(data, 'rst', format='md')
    file_target = open(filename_target, "w")
    file_target.write(data)
    file_target.flush()
    file_target.close()
    #shutil.move(filename_target, os.path.abspath('.') + '/source/' + os.path.basename(filename_target))
    #convert_file形式
    """
Ejemplo n.º 26
0
def copy_md2rst(infile, outfile):
    
    # Read infile
    try:
        with open(infile) as f:
            text = f.read()
    except:
        text = ''
    
    # Strip top of file
    try:
        text = text[text.index('## Introduction') + 1:]
    except:
        pass
    
    # Write to outfile
    with open(outfile, 'w') as f:
        f.write(pypandoc.convert_text(text, 'rst', format='md').replace('\r\n', '\n'))
Ejemplo n.º 27
0
def pandoc_process(app, what, name, obj, options, lines):
    """"Convert docstrings in Markdown into reStructureText using pandoc
    """

    if not lines:
        return None

    input_format = app.config.mkdsupport_use_parser
    output_format = 'rst'

    # Since default encoding for sphinx.ext.autodoc is unicode and pypandoc.convert_text, which will always return a
    # unicode string, expects unicode or utf-8 encodes string, there is on need for dealing with coding
    text = SEP.join(lines)
    text = pypandoc.convert_text(text, output_format, format=input_format)

    # The 'lines' in Sphinx is a list of strings and the value should be changed
    del lines[:]
    lines.extend(text.split(SEP))
Ejemplo n.º 28
0
  def renderer(self, text):
    """
    Renders a flat page to HTML.

    :param text: the text of the flat page
    :type text: string
    """
    #if type(text) == str:
    #  text = str(text, self.app.config["FLATPAGES_ENCODING"])

    if self.pre_render:
      text = render_template_string(Markup(text))

    extra_args = [
      "--filter=pandoc-crossref",
      "--filter=pandoc-citeproc",
      "--filter=pandoc-sidenote",
      "--standalone",
      "--mathml",
      "--base-header-level=2",
      "--highlight-style", "pygments",
      "--bibliography=pages/all.bib",
      "--csl=pages/lncs.csl",
      "-Mreference-section-title=References",
      "-Mlink-citations=true"
    ]

    pandocver = int(pypandoc.get_pandoc_version()[0])

    if pandocver < 2:
      extra_args.append("-S")
      format_str = "markdown+raw_tex+yaml_metadata_block"
    else:
      format_str = "markdown+raw_tex+smart+yaml_metadata_block"

    output = pypandoc.convert_text(
      text.encode("utf8"),
      'html',
      format = format_str,
      extra_args=extra_args
    )

    return output
Ejemplo n.º 29
0
def convert(source: str,
            to: str,
            extra_args=(),
            output_file: str = None) -> None:
    """
    Convert a source document to an output file.

    Parameters
    ----------
    source : str
    to : str
    extra_args : iterable
    output_file : str

    Notes
    -----
    Either writes to ``output_file`` or prints to stdout.
    """
    output_name = (os.path.splitext(os.path.basename(output_file))[0]
                   if output_file is not None else 'std_out')

    standalone = '--standalone' in extra_args
    self_contained = '--self-contained' in extra_args
    use_prompt = '--use-prompt' in extra_args
    extra_args = [item for item in extra_args if item != '--use-prompt']
    stitcher = Stitch(name=output_name,
                      to=to,
                      standalone=standalone,
                      self_contained=self_contained,
                      use_prompt=use_prompt)
    result = stitcher.stitch(source)
    result = json.dumps(result)
    newdoc = pypandoc.convert_text(result,
                                   to,
                                   format='json',
                                   extra_args=extra_args,
                                   outputfile=output_file)

    if output_file is None:
        print(newdoc)
Ejemplo n.º 30
0
def convert(ctx, name, destination_format, destination_file, list_docs,
            formats):
    """Convert to destination_format and print to stdout or save to file if provided."""
    # yew = ctx.obj["YEW"]
    if formats or not destination_format:
        formats = pypandoc.get_pandoc_formats()
        click.echo("Input formats:")
        for f in formats[0]:
            click.echo("\t" + f)
        click.echo("Output formats:")
        for f in formats[1]:
            click.echo("\t" + f)
        sys.exit(0)

    docs = shared.get_document_selection(ctx, name, list_docs)
    if not docs:
        sys.exit(1)
    doc = docs[0]
    click.echo(doc.name)
    click.echo(doc.kind)
    click.echo(destination_format)

    if destination_format in ["docx", "pdf", "odt"]:
        destination_file = "{}.{}".format(slugify(doc.name),
                                          destination_format)

    if destination_file:
        dest = pypandoc.convert(
            doc.get_content(),
            format=doc.kind,
            to=destination_format,
            outputfile=destination_file,
        )
        click.echo(destination_file)
    else:
        dest = pypandoc.convert_text(doc.get_content(),
                                     format=doc.kind,
                                     to=destination_format)
        click.echo(dest)
    sys.stdout.flush()
Ejemplo n.º 31
0
    def __call__(self, fh, fh_w):
        try:
            cal = Calendar.from_ical(fh.read())
        except ValueError as e:
            msg = "Parsing error: {}".format(e)
            raise IcalError(msg)

        now = datetime.now()
        start = now - timedelta(days=self.days)
        end = now + timedelta(days=self.days)
        events = recurring_ical_events.of(cal).between(start, end)
        for event in tqdm(events):
            summary = event["SUMMARY"]
            summary = summary.replace('\\,', ',')
            location = None
            if event.get("LOCATION", None):
                location = event['LOCATION'].replace('\\,', ',')
            if not any((summary, location)):
                summary = u"(No title)"
            else:
                summary += " - " + location if location and self.include_location else ''
            fh_w.write(u"* {}".format(summary))
            fh_w.write(u"\n")
            if isinstance(event["DTSTART"].dt, datetime):
                fh_w.write(u"  {}--{}\n".format(
                    org_datetime(event["DTSTART"].dt, self.tz),
                    org_datetime(event["DTEND"].dt, self.tz)))
            else:
                # all day event
                fh_w.write(u"  {}--{}\n".format(
                    org_date(event["DTSTART"].dt, timezone('UTC')),
                    org_date(event["DTEND"].dt - timedelta(days=1), timezone('UTC'))))
            description = event.get("DESCRIPTION", None)
            if description:
                if bool(BeautifulSoup(description, "html.parser").find()):
                    description = pypandoc.convert_text(description, "org", format="html")
                description = '\n'.join(description.split('\\n'))
                description = description.replace('\\,', ',')
                fh_w.write(u"{}\n".format(description))
            fh_w.write(u"\n")
Ejemplo n.º 32
0
    def convert(self, text):
        text = '\n\n'.join([re.sub(self.regexCodeBlock, r'<pre>\1</pre>', block) for block in text.split('\n\n')])

        # convert from textile to markdown
        text = pypandoc.convert_text(text, 'markdown_strict', format='textile')

        # gitlab does not support escaped underscores in a url (???)
        text = re.sub(self.regexHttpLink, self.unescape_link_underscore, text)

        # if the markdown starts with a code block, gitlab will trim the start of the string
        if text[0:4] == '    ':
            text = "Codeblock:\n\n" + text

        # pandoc does not convert everything, notably the [[link|text]] syntax
        # is not handled. So let's fix that.

        # [[ wikipage | link_text ]] -> [link_text](wikipage)
        text = re.sub(self.regexWikiLinkWithText, self.wiki_link, text, re.MULTILINE | re.DOTALL)

        # [[ link_url ]] -> [link_url](link_url)
        text = re.sub(self.regexWikiLinkWithoutText, self.wiki_link, text, re.MULTILINE | re.DOTALL)

        # nested lists, fix at least the common issues
        text = text.replace("    \\#\\*", "    -")
        text = text.replace("    \\*\\#", "    1.")

        # Redmine is using '>' for blockquote, which is not textile
        text = text.replace("&gt; ", ">")

        # wiki note macros
        text = re.sub(self.regexTipMacro, r'---\n**TIP**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
        text = re.sub(self.regexNoteMacro, r'---\n**NOTE**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
        text = re.sub(self.regexWarningMacro, r'---\n**WARNING**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
        text = re.sub(self.regexImportantMacro, r'---\n**IMPORTANT**: \1\n---\n', text, re.MULTILINE | re.DOTALL)

        # all other macros
        text = re.sub(self.regexAnyMacro, r'\1', text, re.MULTILINE | re.DOTALL)

        return text
Ejemplo n.º 33
0
 def build_shell_dict(self):
     """
     This will house all values the templates need.
     :return:
     """
     shell_dict = {
         'id':
         self.json_file['name'].lower().split(' ')[0],
         'name':
         Template('GATK4 AUTO $name').substitute(self.json_file),
         'short_name':
         self.json_file['name'].split(' ')[0],
         'profile':
         self.profile,
         'description':
         self.json_file['summary'].rstrip(' '),
         'summary':
         pypandoc.convert_text(self.json_file['description'],
                               'rst',
                               format='html')
     }
     return shell_dict
Ejemplo n.º 34
0
def with_markdown(content, space, name):
    """User pandoc to get markdown from MediaWiki format."""
    try:
        json_converted = pypandoc.convert_text(content,
                                               'json',
                                               format='mediawiki')

        stream = io.StringIO(json_converted)
        traversable_doc = panflute.load(stream)

        panflute.run_filter(drop_loose_categories, doc=traversable_doc)

        panflute.run_filter(rewrite_internal_links, doc=traversable_doc)

        content = back_to_markdown(traversable_doc)
    except Exception:
        click.echo('Failed to parse content! Continuing ...\n')
        with open(FAILURE_LOG, 'a') as handle:
            handle.write(('Failed to parse content. Could not re-write links '
                          'and drop categories for page {}\n'.format(name)))

    return convert_image_format(content)
Ejemplo n.º 35
0
def rst_to_notebook(infile, outfile, diridx=False):
    """Convert an rst file to a notebook file."""

    # Read infile into a string
    with open(infile, 'r') as fin:
        rststr = fin.read()
    # Convert string from rst to markdown
    mdfmt = 'markdown_github+tex_math_dollars+fenced_code_attributes'
    mdstr = pypandoc.convert_text(rststr,
                                  mdfmt,
                                  format='rst',
                                  extra_args=['--atx-headers'])
    # In links, replace .py extensions with .ipynb
    mdstr = re.sub(r'\(([^\)]+).py\)', r'(\1.ipynb)', mdstr)
    # Links to subdirectories require explicit index file inclusion
    if diridx:
        mdstr = re.sub(r']\(([^\)/]+)\)', r'](\1/index.ipynb)', mdstr)
    # Enclose the markdown within triple quotes and convert from
    # python to notebook
    mdstr = '"""' + mdstr + '"""'
    nb = py2jn.py_string_to_notebook(mdstr)
    py2jn.tools.write_notebook(nb, outfile, nbver=4)
Ejemplo n.º 36
0
def show_dataobj(dataobj_id):
    dataobj = data.get_item(dataobj_id)

    if not dataobj:
        flash("Data could not be found!")
        return redirect("/")

    if request.args.get("raw") == "1":
        return frontmatter.dumps(dataobj)

    extra_pandoc_args = ["--highlight-style="
                         + app.config['PANDOC_HIGHLIGHT_THEME'],
                         "--standalone"]

    content = pypandoc.convert_text(dataobj.content, 'html', format='md',
                                    extra_args=extra_pandoc_args)
    return render_template(
        "dataobjs/show.html",
        title=dataobj["title"],
        dataobj=dataobj,
        content=content,
        form=forms.DeleteDataForm())
Ejemplo n.º 37
0
    def test_basic_conversion_to_file(self):
        with closed_tempfile('.rst', ) as file_name:
            expected = u'some title{0}=========={0}{0}'.format(os.linesep)
            received = pypandoc.convert_text('# some title\n',
                                             to='rst',
                                             format='md',
                                             outputfile=file_name)
            self.assertEqualExceptForNewlineEnd("", received)
            with io.open(file_name) as f:
                written = f.read()
            self.assertEqualExceptForNewlineEnd(expected, written)

        # to odf does not work without a file
        def f():
            pypandoc.convert_text('# some title\n',
                                  to='odf',
                                  format='md',
                                  outputfile=None)

        with self.assertRaisesRegex(RuntimeError,
                                    "Invalid output format! Got odf but "):
            f()
Ejemplo n.º 38
0
    def test_pdf_conversion(self):
        with closed_tempfile('.pdf') as file_name:
            ret = pypandoc.convert_text('# some title\n',
                                        to='pdf',
                                        format='md',
                                        outputfile=file_name)
            assert ret == ""
            with io.open(file_name, mode='rb') as f:
                written = f.read()
            assert written[:4] == b"%PDF"
            # TODO: find a test for the content?

        def f():
            # needs an outputfile
            pypandoc.convert_text('# some title\n', to='pdf', format='md')

        self.assertRaises(RuntimeError, f)

        # outputfile needs to end in pdf
        with closed_tempfile('.WRONG') as file_name:

            def f():
                pypandoc.convert_text('# some title\n',
                                      to='pdf',
                                      format='md',
                                      outputfile=file_name)

            self.assertRaises(RuntimeError, f)

        # no extensions allowed
        with closed_tempfile('.pdf') as file_name:

            def f():
                pypandoc.convert_text('# some title\n',
                                      to='pdf+somethign',
                                      format='md',
                                      outputfile=file_name)

            self.assertRaises(RuntimeError, f)
Ejemplo n.º 39
0
def markdown_to_reveal(text: str, config: Config) -> str:
    """
    Transform a Markdown input file to an HTML (reveal.js) output string.

    Parameters
    ----------
    markdown_text
        Markdown text to convert to HTML.
    config
        Markdownreveal configuration.

    Returns
    -------
        The converted string.
    """
    extra_args = ['-s', '--slide-level=2', '-V', 'revealjs-url=revealjs']
    if config['katex']:
        pandoc_version = get_pandoc_version()
        if LooseVersion(pandoc_version) < LooseVersion('2.0'):
            extra_args.extend([
                '--katex=katex/katex.min.js',
                '--katex-stylesheet=katex/katex.min.css',
            ])
        else:
            extra_args.extend(['--katex=katex/'])
    extra_args.extend(pandoc_extra_to_args(config))
    extra_args.extend(reveal_extra_to_args(config))
    input_format = 'markdown'
    if config['emoji_codes']:
        input_format += '+emoji'
    output = convert_text(source=text,
                          format=input_format,
                          to='revealjs',
                          extra_args=extra_args)

    # HTML substitution
    output = tweak_html(output, config)

    return output
Ejemplo n.º 40
0
    def refresh_page(self):
        """Convert markdown to html and set webView"""
        parsed_stylesheet = parse_stylesheet(get_resource('ViewPaneStyle.css'),
                                             CONSTANTS.theme)

        # Write parsed stylesheet to file so it can be passed to pandoc
        with open(get_resource("parsed_stylesheet.css"), "w") as file:
            file.write(parsed_stylesheet)

        # Convert markdown to html using pandoc
        html = pypandoc.convert_text(
            self.edit_pane.toPlainText(),
            "html",
            format="markdown",
            extra_args=[
                f"--highlight-style={get_resource('syntax.theme')}", "-s",
                "--css="
                f"{get_resource('parsed_stylesheet.css')}",
                f"--katex={get_resource('katex/')}"
            ])
        self.setHtml(html,
                     QtCore.QUrl().fromLocalFile(self.edit_pane.current_file))
Ejemplo n.º 41
0
    def gen():
        for name, option in options:
            default = option.get("default")
            if default is not None:
                default = json.dumps(default)

            example = option.get("example")
            if example is not None:
                if type(example) == dict and example.get("_type") == "literalExample":
                    example = json.dumps(example["text"])
                else:
                    example = json.dumps(example)

            description = option.get("description")
            if description is not None:
                xml_description = (
                    f'<xml xmlns:xlink="http://www.w3.org/1999/xlink">'
                    f"<para>{description}</para>"
                    f"</xml>"
                )
                # we first check if there are some xml elements before using pypandoc
                # since pypandoc calls are quite slow
                root = xml.etree.ElementTree.fromstring(xml_description)
                if len(list(root.find("para"))) > 0:
                    description = pypandoc.convert_text(
                        xml_description, "html", format="docbook",
                    )

            yield dict(
                type="option",
                option_name=name,
                option_name_query=parse_query(name),
                option_description=description,
                option_type=option.get("type"),
                option_default=default,
                option_example=example,
                option_source=option.get("declarations", [None])[0],
            )
Ejemplo n.º 42
0
    def handle_law_from_xml(self, book, book_xml) -> LawBook:
        previous_law = None
        law_order = 1

        # Parse XML tree
        tree = etree.fromstring(book_xml)

        for sect in tree.xpath('sect1'):
            section_title = sect.xpath('title/text()')[0]
            logger.debug('Section: %s' % section_title)

            # if section_title == 'Grundgesetz für die Bundesrepublik Deutschland':
            #     continue

            book.add_section(from_order=law_order, title=section_title.strip())

            for law_key, law_raw in enumerate(sect.xpath('sect2')):
                law_title = law_raw.xpath('title')[0]
                law_title.getparent().remove(law_title)

                # law_docbook = tostring(law_raw).decode('utf-8')
                law_docbook = '\n'.join(tostring(x).decode('utf-8') for x in law_raw.iterchildren())
                law_text = pypandoc.convert_text(law_docbook, 'html', format='docbook')
                law_section = tostring(law_title, method="text").decode('utf-8').strip()

                law = Law(book=book,
                          title='',
                          section=law_section,
                          slug=slugify(law_section),
                          content=law_text,
                          previous=previous_law,
                          order=law_order
                          )
                law.save()
                law_order += 1
                previous_law = law

        return book
Ejemplo n.º 43
0
 def convert_issue_data(self, redmine_issue):
     """
     Generate the data for a new GitHub issue
     """
     description_md = convert_text(
         redmine_issue['description'], 'markdown_github', 'textile'
     )
     porting_note = '###### ported from Redmine #%s (created %s)' % (
         redmine_issue['id'],
         redmine_issue['created_on'].split('T')[0]
     )
     if self.is_closed(redmine_issue):
         porting_note = '%s (CLOSED %s)' % (
             porting_note,
             redmine_issue['closed_on'].split('T')[0]
         )
     body = "%s\n\n%s" % (porting_note, description_md)
     title = "%(subject)s (RM#%(id)s)" % redmine_issue
     return {
         "title": title,
         "body": body,
         "assignees": ["adam-iris"],
     }
Ejemplo n.º 44
0
def pdf(data, check_type):
    """
    Generate pdf and file path and return file path
    """

    rendered = _pdf_string(data, check_type)

    document_id = data['id']
    check_type = check_type

    outputfile = os.path.join(app.config['MEDIA_FOLDER'],
                              f'{document_id}_{check_type}.pdf')

    pdf = pypandoc.convert_text(rendered,
                                'pdf',
                                format='html',
                                outputfile=outputfile,
                                extra_args=[
                                    '--latex-engine=xelatex', '-V',
                                    'mainfont="FreeSerifBold"'
                                ])

    return outputfile
Ejemplo n.º 45
0
def sendEmail(display_config_params,
              coordinates,
              address,
              to,
              sending_email,
              message=None):  #TODO

    yag = yagmail.SMTP(sending_email,
                       oauth2_file="~/.config/bounds/gm_oauth2.json")

    if message is None:
        contents = [
            pypandoc.convert_text(
                'Sent with [bounds](https://github.com/hdb/bounds)',
                to='html',
                format='md')
        ]
    else:
        contents = message

    img_file = '/tmp/folium.png'
    display(*display_config_params, coordinates, img_file)
    yag.send(to, address, contents, attachments=img_file)
Ejemplo n.º 46
0
    def make_cloze_roles(self):

        """
        Pandoc *sometimes inserts an escape before my backticks,
        so I'm catching only those escaped backticks 
        that happen in the cloze translations.
        It also randomly inserts newlines,
        which is a dilemma.
        For now I will simply remove them, 
        since the cloze text is intended to be short.
        """

        def clozerepl(matchobj):
            return ':c'+matchobj.group(1)+':`'+matchobj.group(2)+'removeme`'

        t0 = re.sub(r'{{c([0-9]+)::(.+?)}}', clozerepl, self.text)
        clozeIDs = [r[1] for r in set(re.findall('(:)(c[0-9]+)(:)', t0))]
        t1 = pypandoc.convert_text(t0, 'rst', format='html', extra_args=['--wrap=preserve'])
        t2 = re.sub(r'removeme\\*', '', t1)
        text = re.sub('(?<![\n])(\r?\n)(?![\n])', ' ', t2)
        roles = '\n'.join(['.. role:: '+c+'(emphasis)' for c in sorted(clozeIDs)])

        return (roles, text.strip())
Ejemplo n.º 47
0
    def previewNewsItem(self, item):
        # 1. Get first lines from markdown content that almost fit 400 characters.
        # 2. Append `...` at the end to signify `more`
        # 3. Append links section of markdown content
        # 4. Convert resulting content to HTML
        lines = item.contents.splitlines(True)
        # Compose preview section.
        preview = ""
        for ln in lines:
            preview += ln
            if (len(preview) >= self.PREVIEW_LIMIT):
                break
        preview += "..."
        # Compose links section.
        links = ""
        for ln in lines:
            if re.match("\[.+\]:.+", ln):
                links += ln

        contents = preview + "\n\n\n" + links
        html = pypandoc.convert_text(contents, "html", format="md")
        html = html.encode("utf-8")
        return html
Ejemplo n.º 48
0
def newsExcerpt(article):
    # 1. Get first lines from markdown content that almost fit 400 characters.
    # 2. Append `...` at the end to signify `more`
    # 3. Append links section of markdown content
    # 4. Convert resulting content to HTML, this is an excerpt
    charsLimit = 250
    lines = article.contents.splitlines(True)
    # Compose excerpt section.
    excerpt = ""
    for ln in lines:
        excerpt += ln
        if (len(excerpt) >= charsLimit):
            break
    excerpt += "..."
    # Compose links section.
    links = ""
    for ln in lines:
        if re.match("\[.+\]:.+", ln):
            links += ln

    content = excerpt + "\n\n\n" + links
    html = pypandoc.convert_text(content, "html", format="md")
    return html
Ejemplo n.º 49
0
def _convert_md_table_to_rst(table):
    """Convert a markdown table to rst format"""
    if len(table) < 3:
        return ''
    out = '```eval_rst\n.. list-table::\n   :header-rows: 1\n\n'
    for i, l in enumerate(table):
        cols = l.split('|')[1:-1]
        if i == 0:
            ncol = len(cols)
        else:
            if len(cols) != ncol:
                return ''
        if i == 1:
            for c in cols:
                if len(c) is not 0 and '---' not in c:
                    return ''
        else:
            for j, c in enumerate(cols):
                out += '   * - ' if j == 0 else '     - '
                out += pypandoc.convert_text(c, 'rst', format='md').replace(
                    '\n', ' ').replace('\r', '') + '\n'
    out += '```\n'
    return out
Ejemplo n.º 50
0
def getfeed(mastodon, limit=10):
    toots = mastodon.timeline_home(limit=limit)
    formatted = []
    for t in reversed(toots):
        if t['reblog'] is not None:
            continue

        try:

            toot = pypandoc.convert_text(t['content'], 'plain', format='html')
            toot = '\n'.join(textwrap.wrap(toot, 80))
            toot = textwrap.indent(toot, '  | ')

            formatted += [{
                'content': toot,
                'author': t['account']['display_name'],
                'timestamp': t['created_at'],
            }]

        except KeyError as e:
            print(t.keys())

    return formatted
Ejemplo n.º 51
0
def _convert_md_table_to_rst(table):
    """Convert a markdown table to rst format"""
    if len(table) < 3:
        return ''
    out = '```eval_rst\n.. list-table::\n   :header-rows: 1\n\n'
    for i,l in enumerate(table):
        cols = l.split('|')[1:-1]
        if i == 0:
            ncol = len(cols)
        else:
            if len(cols) != ncol:
                return ''
        if i == 1:
            for c in cols:
                if len(c) is not 0 and '---' not in c:
                    return ''
        else:
            for j,c in enumerate(cols):
                out += '   * - ' if j == 0 else '     - '
                out += pypandoc.convert_text(
                    c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n'
    out += '```\n'
    return out
Ejemplo n.º 52
0
def download_wikis(fb):
    resp = fb.listWikis()

    for wiki in resp.wikis.childGenerator():
        wiki_id = wiki.ixWiki.string
        wiki_name = wiki.sWiki.string
        print(wiki_id, wiki_name)

        # Create a subdirectory with the name of the wiki
        if not os.path.exists(wiki_name):
            os.mkdir(wiki_name)

        article_ids = get_article_ids(fb, wiki_id)
        for article_id in article_ids:
            article = fb.viewArticle(ixWikiPage=article_id)

            headline = article.wikipage.sHeadline.string
            body = article.wikipage.sBody.string
            print(headline)

            filename = headline.replace('/', '') + '.html'
            path = os.path.join(wiki_name, filename)

            # Block for just writing out HTML
            if False:
                with open(path, 'w') as f:
                    try:
                        f.write(body)
                    except:
                        print("Unable to write {} - {}".format(wiki_name, headline))

            # Convert to markdown and write
            try:
                output = pypandoc.convert_text(body, to='md', format='html',
                                               outputfile=path)
            except:
                print("Unable to write {} - {}".format(wiki_name, headline))
Ejemplo n.º 53
0
def fill_notebook(work_notebook, script_blocks, gallery_conf):
    """Writes the Jupyter notebook cells

    If available, uses pypandoc to convert rst to markdown.

    Parameters
    ----------
    script_blocks : list
        Each list element should be a tuple of (label, content, lineno).
    """
    for blabel, bcontent, lineno in script_blocks:
        if blabel == 'code':
            add_code_cell(work_notebook, bcontent)
        else:
            if gallery_conf["pypandoc"] is False:
                markdown = rst2md(bcontent + '\n')
            else:
                import pypandoc
                # pandoc automatically addds \n to the end
                markdown = pypandoc.convert_text(bcontent,
                                                 to='md',
                                                 format='rst',
                                                 **gallery_conf["pypandoc"])
            add_markdown_cell(work_notebook, markdown)
Ejemplo n.º 54
0
    def html(self, pandoc=False):
        """Returns the note formatted as HTML. Will use markdown2 as default, with the option of pandoc (WIP)"""

        # LOG.debug(f"Converting {self.title} into HTML...")

        if pandoc:
            # Still WIP
            import pypandoc
            filters = ['pandoc-xnos']
            args = []
            html = pypandoc.convert_text(self.content,
                                         'html',
                                         format='md',
                                         filters=filters,
                                         extra_args=args)
        else:
            html = render_markdown(self.content)

        # Wrapping converted markdown in a div for styling
        html = f"<div id=\"content\">{html}</div>"

        # LOG.debug(f"{self.title} converted into HTML and placed inside div with id=\"content\"")

        return html
Ejemplo n.º 55
0
def make_tex(metadata_dict, markdown_text):
    metadata = yaml.dump(metadata_dict, default_flow_style=False)
    markdown_text = '---\n' + metadata + '\n---\n\n' + markdown_text

    latex_text = convert_text(
        source=markdown_text,
        to='latex',
        format='markdown',
        extra_args=(
            '--natbib',
            '--bibliography', 'refs.bib',
            '--template', os.path.join(os.getcwd(), 'assets', 'template.latex'),
            # Variables
            '-V', 'documentclass:report',
            '-V', 'classoption:a4paper',
            # Filters
            '--filter', 'pandoc-crossref',
        )
    )

    latex_text = latex_text.replace(
        '\\begin{table}[]', '\\begin{table}[htpb]\n\\centering')

    return latex_text
Ejemplo n.º 56
0
 def md2gopher(self, md):
     # move links below the current block
     md = pypandoc.convert_text(md,
                                "md",
                                format="md",
                                extra_args=[
                                    "--wrap=preserve", "--reference-links",
                                    "--reference-location=block"
                                ])
     # make links into actual links
     # also try to repair Gopher lines corrupted by pandoc
     entries = list()
     for line in md.splitlines():
         match = MARKDOWN_LINK_PATTERN.match(line)
         if match:
             entries.append(self.gopher_menu.html(*match.groups()))
             continue
         match = CORRUPTED_LINE_PATTERN.match(line)
         if match:
             entries.append(self.gopher_menu.entry(*match.groups()))
             continue
         entries.append(line)
     # remove remaining HTML tags and make it a gophermap
     return self.html2gopher("\n".join(entries))
Ejemplo n.º 57
0
    def as_github_issue(self):
        import pypandoc
        is_bug = self.type == 'BUG'
        front_matter = f"""
| Reported by          | URL        | OS        | Browser        | Device type   | Bug happened at |
|----------------------|------------|-----------|----------------|---------------| ----------------|
| {self.user.username} | {self.url} | {self.os} | {self.browser} | {self.device} | {self.happened} |


"""
        return {
            'title':
            self.title or self.content[0:30],
            'body':
            (front_matter.strip() if is_bug else '') + pypandoc.convert_text(
                self.content, 'markdown_github', format='html'),
            'assignees': ['ewen-lbh'],
            'labels': [
                'lang:' + self.language, {
                    'FEATURE': 'enhancement',
                    'BUG': 'bug'
                }[self.type], 'from:schoolsyst.com'
            ]
        }
Ejemplo n.º 58
0
def generate_provider_pdf(url, filename, s=None):
    s = rh.HTMLSession() if not s else s
    r1 = s.get(url)

    html = ""
    anchors = r1.html.find('.nav-visible a')
    links = [a.absolute_links.pop() for a in anchors]
    links = filter(lambda href: href.find('/r/') != -1 or href.find('/d/') != -1, links) # filter out links not data or resource

    print("downloading...")
    for l1 in links:
        r2 = s.get(l1)
        # r2.html.render()
        div = r2.html.find('#inner', first=True)
        # with open("/tmp/b.html", "wt") as f:
        #     f.write(content.html)
        if div:
            html += div.html

    print("generating pdf...")
    try:
        output = pypandoc.convert_text(html, "pdf", format="html", outputfile="./{}.pdf".format(filename), extra_args=['--pdf-engine=xelatex'])
    except Exception as e:
        print(e)
Ejemplo n.º 59
0
def twlight_wikicode2html(value):
    """Passes string through pandoc and returns html"""
    output = pypandoc.convert_text(value, 'html', format='mediawiki')
    return output
Ejemplo n.º 60
0
 def append_markdown(self, markdown, metadata):
     markdown = _ensure_string(markdown)
     html = pypandoc.convert_text(markdown, 'html', format='md')
     # ignore metadata, not supported.
     self._output.write(self._create_tag('text', html) + "\n")