Esempio n. 1
0
def _pandoc_system_call(command_args: List[str], path: Optional[str],
                        text: Optional[str]) -> List[Dict[str, Any]]:
    """
    Call "pandoc citeproc" or "pandoc" using input from a path or text.
    Return dict representing CSL JSON.
    """
    assert command_args[0].startswith("pandoc")
    run_kwargs = {}
    if path:
        command_args.append(os.fspath(path))
    else:
        run_kwargs["input"] = text
    logging.info("load_bibliography subprocess args:\n>>> " +
                 shlex_join(command_args))
    process = subprocess.run(
        command_args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
        **run_kwargs,
    )
    logging.info(f"captured stderr:\n{process.stderr}")
    if process.returncode:
        logging.error(f"Pandoc call returned nonzero exit code.\n"
                      f"{shlex_join(process.args)}\n{process.stderr}")
        return []
    try:
        csl_json = json.loads(process.stdout)
    except (TypeError, json.decoder.JSONDecodeError):
        logging.error(
            f"Error parsing bib2json output as JSON:\n{process.stdout}")
        csl_json = []
    return csl_json
Esempio n. 2
0
def test_example_manuscript(manuscript):
    """
    Test command line execution of manubot to build an example manuscript.
    """
    manuscript_dir = directory.joinpath("manuscripts", manuscript)
    args = [
        "manubot",
        "process",
        "--log-level",
        "INFO",
        "--skip-citations",
        "--content-directory",
        str(manuscript_dir.joinpath("content")),
        "--output-directory",
        str(manuscript_dir.joinpath("output")),
    ]
    if manuscript == "variables":
        args.extend(
            [
                "--template-variables-path",
                str(manuscript_dir.joinpath("content/template-variables.json")),
            ]
        )
    process = subprocess.run(
        args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
    )
    print(shlex_join(process.args))
    print(process.stderr)
    assert process.returncode == 0
Esempio n. 3
0
def load_bibliography(path=None, text=None, input_format=None):
    """
    Convert a bibliography to CSL JSON using `pandoc-citeproc --bib2json`.
    Accepts either a bibliography path or text (string). If supplying text,
    pandoc-citeproc will likely require input_format be specified.
    The CSL JSON is returned as Python objects.

    Parameters
    ----------
    path : str, pathlike, or None
        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the
        format of the input.
    text : str or None
        Text representation of the bibligriophy, such as a JSON-formatted string.
        `input_format` should be specified if providing text input.
    input_format : str or None
        Manually specified input formatted that is supported by pandoc-citeproc:
        https://github.com/jgm/pandoc-citeproc/blob/master/man/pandoc-citeproc.1.md#options

    Returns
    -------
    csl_json : JSON-like object
        CSL JSON Data for the references encoded by the input bibliography.
    """
    use_text = path is None
    use_path = text is None
    if not (use_text ^ use_path):
        raise ValueError(
            "load_bibliography: specify either path or text but not both.")
    if not get_pandoc_info()["pandoc-citeproc"]:
        logging.error(
            "pandoc-citeproc not found on system: manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"
        )
        return []
    args = ["pandoc-citeproc", "--bib2json"]
    if input_format:
        args.extend(["--format", input_format])
    run_kwargs = {}
    if use_path:
        args.append(str(path))
    if use_text:
        run_kwargs["input"] = text
    logging.info("call_pandoc subprocess args:\n>>> " + shlex_join(args))
    process = subprocess.run(
        args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
        **run_kwargs,
    )
    logging.info(f"captured stderr:\n{process.stderr}")
    process.check_returncode()
    try:
        csl_json = json.loads(process.stdout)
    except Exception:
        logging.exception(
            f"Error parsing bib2json output as JSON:\n{process.stdout}")
        csl_json = []
    return csl_json
Esempio n. 4
0
def test_cite_pandoc_filter():
    """
    Test the stdout output of `manubot cite --render` with various formats.
    The output is sensitive to the version of Pandoc used, so rather than fail when
    the system's pandoc is outdated, the test is skipped.

    ```shell
    # Command to regenerate the expected output
    pandoc \
      --to=plain \
      --wrap=preserve \
      --csl=https://github.com/manubot/rootstock/raw/8b9b5ced2c7c963bf3ea5afb8f31f9a4a54ab697/build/assets/style.csl \
      --output=manubot/pandoc/tests/test_cite_filter/output.txt \
      --bibliography=manubot/pandoc/tests/test_cite_filter/bibliography.json \
      --bibliography=manubot/pandoc/tests/test_cite_filter/bibliography.bib \
      --filter=pandoc-manubot-cite \
      --filter=pandoc-citeproc \
      manubot/pandoc/tests/test_cite_filter/input.md
    ```
    """
    data_dir = directory.joinpath("test_cite_filter")
    pandoc_version = get_pandoc_info()["pandoc version"]
    if pandoc_version < (1, 12):
        pytest.skip("Test requires pandoc >= 1.12 to support --filter")
    input_md = data_dir.joinpath("input.md").read_text(encoding="utf-8-sig")
    expected = data_dir.joinpath("output.txt").read_text(encoding="utf-8-sig")
    args = [
        "pandoc",
        "--wrap=preserve",
        "--csl=https://github.com/manubot/rootstock/raw/8b9b5ced2c7c963bf3ea5afb8f31f9a4a54ab697/build/assets/style.csl",
        "--bibliography",
        str(directory.joinpath("test_cite_filter", "bibliography.json")),
        "--bibliography",
        str(directory.joinpath("test_cite_filter", "bibliography.bib")),
        "--filter=pandoc-manubot-cite",
        "--filter=pandoc-citeproc",
        "--to=plain",
    ]
    process = subprocess.run(
        args,
        input=input_md,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
    )
    print(shlex_join(process.args))
    print(process.stdout)
    print(process.stderr)
    assert process.stdout.lower() == expected.lower()
Esempio n. 5
0
def test_cite_pandoc_filter():
    """
    Test the stdout output of `manubot cite --render` with various formats.
    The output is sensitive to the version of Pandoc used, so rather than fail when
    the system's pandoc is outdated, the test is skipped.

    ```shell
    # Command to regenerate the expected output
    pandoc \
      --to=plain \
      --wrap=preserve \
      --output=manubot/pandoc/tests/test_cite_filter/output.txt \
      --filter=pandoc-manubot-cite \
      --filter=pandoc-citeproc \
      manubot/pandoc/tests/test_cite_filter/input.md

    # Command to generate Pandoc JSON input for pandoc-manubot-cite
    pandoc \
      --to=json \
      --wrap=preserve \
      --output=manubot/pandoc/tests/test_cite_filter/filter-input.json \
      manubot/pandoc/tests/test_cite_filter/input.md
    ```
    """
    data_dir = directory.joinpath("test_cite_filter")
    pandoc_version = get_pandoc_info()["pandoc version"]
    if pandoc_version < (1, 12):
        pytest.skip("Test requires pandoc >= 1.12 to support --filter")
    input_md = data_dir.joinpath("input.md").read_text(encoding="utf-8-sig")
    expected = data_dir.joinpath("output.txt").read_text(encoding="utf-8-sig")
    args = [
        "pandoc",
        "--wrap=preserve",
        "--filter=pandoc-manubot-cite",
        "--filter=pandoc-citeproc" if pandoc_version <
        (2, 11) else "--citeproc",
        "--to=plain",
    ]
    process = subprocess.run(
        args,
        input=input_md,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
    )
    print(shlex_join(process.args))
    print(process.stdout)
    print(process.stderr)
    assert process.stdout.lower() == expected.lower()
Esempio n. 6
0
def call_pandoc(metadata, path, format="plain"):
    """
    path is the path to write to.
    """
    _exit_without_pandoc()
    info = get_pandoc_info()
    _check_pandoc_version(info, metadata, format)
    metadata_block = "---\n{yaml}\n...\n".format(
        yaml=json.dumps(metadata, ensure_ascii=False, indent=2)
    )
    args = [
        "pandoc",
        "--filter",
        "pandoc-citeproc",
        "--output",
        str(path) if path else "-",
    ]
    if format == "markdown":
        args.extend(["--to", "markdown_strict", "--wrap", "none"])
    elif format == "jats":
        args.extend(["--to", "jats", "--standalone"])
    elif format == "docx":
        args.extend(["--to", "docx"])
    elif format == "html":
        args.extend(["--to", "html"])
    elif format == "plain":
        args.extend(["--to", "plain", "--wrap", "none"])
        if info["pandoc version"] >= (2,):
            # Do not use ALL_CAPS for bold & underscores for italics
            # https://github.com/jgm/pandoc/issues/4834#issuecomment-412972008
            filter_path = (
                pathlib.Path(__file__)
                .joinpath("..", "plain-pandoc-filter.lua")
                .resolve()
            )
            assert filter_path.exists()
            args.extend(["--lua-filter", str(filter_path)])
    logging.info("call_pandoc subprocess args:\n" + shlex_join(args))
    process = subprocess.run(
        args=args,
        input=metadata_block.encode(),
    )
    process.check_returncode()
Esempio n. 7
0
def call_pandoc(metadata, path, format='plain'):
    """
    path is the path to write to.
    """
    _exit_without_pandoc()
    info = get_pandoc_info()
    _check_pandoc_version(info, metadata, format)
    metadata_block = '---\n{yaml}\n...\n'.format(
        yaml=json.dumps(metadata, ensure_ascii=False, indent=2))
    args = [
        'pandoc',
        '--filter',
        'pandoc-citeproc',
        '--output',
        str(path) if path else '-',
    ]
    if format == 'markdown':
        args.extend(['--to', 'markdown_strict', '--wrap', 'none'])
    elif format == 'jats':
        args.extend(['--to', 'jats', '--standalone'])
    elif format == 'docx':
        args.extend(['--to', 'docx'])
    elif format == 'html':
        args.extend(['--to', 'html'])
    elif format == 'plain':
        args.extend(['--to', 'plain', '--wrap', 'none'])
        if info['pandoc version'] >= (2, ):
            # Do not use ALL_CAPS for bold & underscores for italics
            # https://github.com/jgm/pandoc/issues/4834#issuecomment-412972008
            filter_path = pathlib.Path(__file__).joinpath(
                '..', 'plain-pandoc-filter.lua').resolve()
            assert filter_path.exists()
            args.extend(['--lua-filter', str(filter_path)])
    logging.info('call_pandoc subprocess args:\n' + shlex_join(args))
    process = subprocess.run(
        args=args,
        input=metadata_block.encode(),
        stdout=subprocess.PIPE if path else sys.stdout,
        stderr=sys.stderr,
    )
    process.check_returncode()
Esempio n. 8
0
 def render(format_args):
     args = [
         "manubot",
         "cite",
         "--render",
         "--csl",
         "https://github.com/greenelab/manubot-rootstock/raw/e83e51dcd89256403bb787c3d9a46e4ee8d04a9e/build/assets/style.csl",
         "arxiv:1806.05726v1",
         "doi:10.7717/peerj.338",
         "pmid:29618526",
     ] + format_args
     process = subprocess.run(
         args,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         universal_newlines=True,
     )
     print(shlex_join(process.args))
     print(process.stdout)
     print(process.stderr)
     return process.stdout
Esempio n. 9
0
def test_cite_command_render_stdout(args, expected):
    """
    Test the stdout output of `manubot cite --render` with various formats.
    The output is sensitive to the version of Pandoc used, so rather than fail when
    the system's pandoc is outdated, the test is skipped.
    """
    pandoc_version = get_pandoc_info()['pandoc version']
    for output in 'markdown', 'html', 'jats':
        if output in args and pandoc_version < (2, 5):
            pytest.skip(f"Test {output} output assumes pandoc >= 2.5")
    if pandoc_version < (2, 0):
        pytest.skip(
            "Test requires pandoc >= 2.0 to support --lua-filter and --csl=URL"
        )
    expected = (pathlib.Path(__file__).parent.joinpath('cite-command-rendered',
                                                       expected).read_text())
    args = [
        'manubot',
        'cite',
        '--render',
        '--csl',
        'https://github.com/greenelab/manubot-rootstock/raw/e83e51dcd89256403bb787c3d9a46e4ee8d04a9e/build/assets/style.csl',
        'arxiv:1806.05726v1',
        'doi:10.7717/peerj.338',
        'pmid:29618526',
    ] + args
    process = subprocess.run(
        args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        universal_newlines=True,
    )
    print(shlex_join(process.args))
    print(process.stdout)
    print(process.stderr)
    assert process.stdout == expected
Esempio n. 10
0
def test_cite_command_render_stdout(args, filename):
    """
    Test the stdout output of `manubot cite --render` with various formats.
    The output is sensitive to the version of Pandoc used, so expected output
    files include the pandoc version stamp in their filename.
    When the expected version is missing, the test fails but writes the
    command output to that file. Therefore, subsequent runs of the same test
    will pass. Before committing the auto-generated output, do look to ensure
    its integrity.

    This test uses --bibliography to avoid slow network calls.
    Regenerate the CSL JSON using:

    ```shell
    manubot cite \
      --output=manubot/cite/tests/cite-command-rendered/input-references.json \
      arxiv:1806.05726v1 doi:10.7717/peerj.338 pmid:29618526
    ```
    """
    # get pandoc version info
    pandoc_version = get_pandoc_version()
    pandoc_stamp = ".".join(map(str, pandoc_version))
    path = data_dir.joinpath(filename.format(pandoc_stamp))

    # skip test on old pandoc versions
    for output in "markdown", "html", "jats":
        if output in args and pandoc_version < (2, 5):
            pytest.skip(f"Test {output} output assumes pandoc >= 2.5")
    if pandoc_version < (2, 0):
        pytest.skip("Test requires pandoc >= 2.0 to support --lua-filter and --csl=URL")

    args = [
        "manubot",
        "cite",
        "--bibliography=input-references.json",
        "--render",
        "--csl=https://github.com/greenelab/manubot-rootstock/raw/e83e51dcd89256403bb787c3d9a46e4ee8d04a9e/build/assets/style.csl",
        "arxiv:1806.05726v1",
        "doi:10.7717/peerj.338",
        "pmid:29618526",
    ] + args
    process = subprocess.run(
        args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
        cwd=data_dir,
    )
    print(shlex_join(process.args))
    if not path.exists():
        # https://github.com/manubot/manubot/pull/146#discussion_r333132261
        print(
            f"Missing expected output at {path}\n"
            "Writing output to file such that future tests will pass."
        )
        path.write_text(process.stdout, encoding="utf-8")
        assert False

    print(process.stdout)
    print(process.stderr)
    expected = path.read_text(encoding="utf-8-sig")
    assert process.stdout == expected