Ejemplo n.º 1
0
def load_bibliography(path=None, text=None, input_format=None):
    """
    Convert a bibliography to CSL JSON using `pandoc-citeproc --bib2json`.
    Accepts either a bibliography path or text (string). If supplying text,
    pandoc-citeproc will likely require input_format be specified.
    The CSL JSON is returned as Python objects.

    Parameters
    ----------
    path : str, pathlike, or None
        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the
        format of the input.
    text : str or None
        Text representation of the bibligriophy, such as a JSON-formatted string.
        `input_format` should be specified if providing text input.
    input_format : str or None
        Manually specified input formatted that is supported by pandoc-citeproc:
        https://github.com/jgm/pandoc-citeproc/blob/master/man/pandoc-citeproc.1.md#options

    Returns
    -------
    csl_json : JSON-like object
        CSL JSON Data for the references encoded by the input bibliography.
    """
    use_text = path is None
    use_path = text is None
    if not (use_text ^ use_path):
        raise ValueError(
            "load_bibliography: specify either path or text but not both.")
    if not get_pandoc_info()["pandoc-citeproc"]:
        logging.error(
            "pandoc-citeproc not found on system: manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"
        )
        return []
    args = ["pandoc-citeproc", "--bib2json"]
    if input_format:
        args.extend(["--format", input_format])
    run_kwargs = {}
    if use_path:
        args.append(str(path))
    if use_text:
        run_kwargs["input"] = text
    logging.info("call_pandoc subprocess args:\n>>> " + shlex_join(args))
    process = subprocess.run(
        args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
        **run_kwargs,
    )
    logging.info(f"captured stderr:\n{process.stderr}")
    process.check_returncode()
    try:
        csl_json = json.loads(process.stdout)
    except Exception:
        logging.exception(
            f"Error parsing bib2json output as JSON:\n{process.stdout}")
        csl_json = []
    return csl_json
Ejemplo n.º 2
0
def _exit_without_pandoc():
    """
    Given info from get_pandoc_info, exit Python if Pandoc is not available.
    """
    info = get_pandoc_info()
    for command in 'pandoc', 'pandoc-citeproc':
        if not info[command]:
            logging.critical(f'"{command}" not found on system. '
                             f'Check that Pandoc is installed.')
            raise SystemExit(1)
Ejemplo n.º 3
0
def _exit_without_pandoc() -> None:
    """
    Given info from get_pandoc_info, exit Python if Pandoc is not available.
    """
    if get_pandoc_info()["pandoc"]:
        return
    logging.critical(
        f"pandoc command not found on system. Ensure that Pandoc is installed."
    )
    raise SystemExit(1)
Ejemplo n.º 4
0
def _exit_without_pandoc():
    """
    Given info from get_pandoc_info, exit Python if Pandoc is not available.
    """
    info = get_pandoc_info()
    for command in "pandoc", "pandoc-citeproc":
        if not info[command]:
            logging.critical(
                f"{command!r} not found on system. Check that Pandoc is installed."
            )
            raise SystemExit(1)
Ejemplo n.º 5
0
def test_cite_pandoc_filter():
    """
    Test the stdout output of `manubot cite --render` with various formats.
    The output is sensitive to the version of Pandoc used, so rather than fail when
    the system's pandoc is outdated, the test is skipped.

    ```shell
    # Command to regenerate the expected output
    pandoc \
      --to=plain \
      --wrap=preserve \
      --csl=https://github.com/manubot/rootstock/raw/8b9b5ced2c7c963bf3ea5afb8f31f9a4a54ab697/build/assets/style.csl \
      --output=manubot/pandoc/tests/test_cite_filter/output.txt \
      --bibliography=manubot/pandoc/tests/test_cite_filter/bibliography.json \
      --bibliography=manubot/pandoc/tests/test_cite_filter/bibliography.bib \
      --filter=pandoc-manubot-cite \
      --filter=pandoc-citeproc \
      manubot/pandoc/tests/test_cite_filter/input.md
    ```
    """
    data_dir = directory.joinpath("test_cite_filter")
    pandoc_version = get_pandoc_info()["pandoc version"]
    if pandoc_version < (1, 12):
        pytest.skip("Test requires pandoc >= 1.12 to support --filter")
    input_md = data_dir.joinpath("input.md").read_text(encoding="utf-8-sig")
    expected = data_dir.joinpath("output.txt").read_text(encoding="utf-8-sig")
    args = [
        "pandoc",
        "--wrap=preserve",
        "--csl=https://github.com/manubot/rootstock/raw/8b9b5ced2c7c963bf3ea5afb8f31f9a4a54ab697/build/assets/style.csl",
        "--bibliography",
        str(directory.joinpath("test_cite_filter", "bibliography.json")),
        "--bibliography",
        str(directory.joinpath("test_cite_filter", "bibliography.bib")),
        "--filter=pandoc-manubot-cite",
        "--filter=pandoc-citeproc",
        "--to=plain",
    ]
    process = subprocess.run(
        args,
        input=input_md,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
    )
    print(shlex_join(process.args))
    print(process.stdout)
    print(process.stderr)
    assert process.stdout.lower() == expected.lower()
Ejemplo n.º 6
0
def test_cite_pandoc_filter():
    """
    Test the stdout output of `manubot cite --render` with various formats.
    The output is sensitive to the version of Pandoc used, so rather than fail when
    the system's pandoc is outdated, the test is skipped.

    ```shell
    # Command to regenerate the expected output
    pandoc \
      --to=plain \
      --wrap=preserve \
      --output=manubot/pandoc/tests/test_cite_filter/output.txt \
      --filter=pandoc-manubot-cite \
      --filter=pandoc-citeproc \
      manubot/pandoc/tests/test_cite_filter/input.md

    # Command to generate Pandoc JSON input for pandoc-manubot-cite
    pandoc \
      --to=json \
      --wrap=preserve \
      --output=manubot/pandoc/tests/test_cite_filter/filter-input.json \
      manubot/pandoc/tests/test_cite_filter/input.md
    ```
    """
    data_dir = directory.joinpath("test_cite_filter")
    pandoc_version = get_pandoc_info()["pandoc version"]
    if pandoc_version < (1, 12):
        pytest.skip("Test requires pandoc >= 1.12 to support --filter")
    input_md = data_dir.joinpath("input.md").read_text(encoding="utf-8-sig")
    expected = data_dir.joinpath("output.txt").read_text(encoding="utf-8-sig")
    args = [
        "pandoc",
        "--wrap=preserve",
        "--filter=pandoc-manubot-cite",
        "--filter=pandoc-citeproc" if pandoc_version <
        (2, 11) else "--citeproc",
        "--to=plain",
    ]
    process = subprocess.run(
        args,
        input=input_md,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        encoding="utf-8",
    )
    print(shlex_join(process.args))
    print(process.stdout)
    print(process.stderr)
    assert process.stdout.lower() == expected.lower()
Ejemplo n.º 7
0
def load_bibliography(
    path: Optional[str] = None,
    text: Optional[str] = None,
    input_format: Optional[str] = None,
) -> List[Dict[str, Any]]:
    """
    Convert a bibliography to CSL JSON using either `pandoc-citeproc --bib2json`
    or `pandoc --to=csljson`, depending on availability of pandoc commands on the system.
    Accepts either a bibliography path or text (string). If supplying text,
    pandoc-citeproc will likely require input_format be specified.
    The CSL JSON is returned as Python objects.
    If loading fails, log an error and return an empty list.

    Parameters
    ----------
    path : str, pathlike, or None
        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the
        format of the input.
    text : str or None
        Text representation of the bibliography, such as a JSON-formatted string.
        `input_format` should be specified if providing text input.
    input_format : str or None
        Manually specified input formatted that is supported by pandoc-citeproc:
        https://github.com/jgm/pandoc-citeproc/blob/master/man/pandoc-citeproc.1.md#options
        Use 'bib' for BibLaTeX. Use 'json' for CSL JSON.

    Returns
    -------
    csl_json : JSON-like object
        CSL JSON Data for the references encoded by the input bibliography.
    """
    use_text = path is None
    use_path = text is None
    if use_path:
        path = os.fspath(path)
    if not (use_text ^ use_path):
        raise ValueError(
            "load_bibliography: specify either path or text but not both.")
    pdoc_info = get_pandoc_info()
    if pdoc_info["pandoc-citeproc"]:
        return _load_bibliography_pandoc_citeproc(path, text, input_format)
    if input_format == "bib" or (use_path and path.endswith(".bib")):
        return _load_bibliography_pandoc(path, text)
    logging.error(
        "pandoc-citeproc not found on system, but is required to convert any format besides 'bib': "
        "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"
    )
    return []
Ejemplo n.º 8
0
def call_pandoc(metadata, path, format="plain"):
    """
    path is the path to write to.
    """
    _exit_without_pandoc()
    info = get_pandoc_info()
    _check_pandoc_version(info, metadata, format)
    metadata_block = "---\n{yaml}\n...\n".format(
        yaml=json.dumps(metadata, ensure_ascii=False, indent=2)
    )
    args = [
        "pandoc",
        "--filter",
        "pandoc-citeproc",
        "--output",
        str(path) if path else "-",
    ]
    if format == "markdown":
        args.extend(["--to", "markdown_strict", "--wrap", "none"])
    elif format == "jats":
        args.extend(["--to", "jats", "--standalone"])
    elif format == "docx":
        args.extend(["--to", "docx"])
    elif format == "html":
        args.extend(["--to", "html"])
    elif format == "plain":
        args.extend(["--to", "plain", "--wrap", "none"])
        if info["pandoc version"] >= (2,):
            # Do not use ALL_CAPS for bold & underscores for italics
            # https://github.com/jgm/pandoc/issues/4834#issuecomment-412972008
            filter_path = (
                pathlib.Path(__file__)
                .joinpath("..", "plain-pandoc-filter.lua")
                .resolve()
            )
            assert filter_path.exists()
            args.extend(["--lua-filter", str(filter_path)])
    logging.info("call_pandoc subprocess args:\n" + shlex_join(args))
    process = subprocess.run(
        args=args,
        input=metadata_block.encode(),
    )
    process.check_returncode()
Ejemplo n.º 9
0
def call_pandoc(metadata, path, format='plain'):
    """
    path is the path to write to.
    """
    _exit_without_pandoc()
    info = get_pandoc_info()
    _check_pandoc_version(info, metadata, format)
    metadata_block = '---\n{yaml}\n...\n'.format(
        yaml=json.dumps(metadata, ensure_ascii=False, indent=2))
    args = [
        'pandoc',
        '--filter',
        'pandoc-citeproc',
        '--output',
        str(path) if path else '-',
    ]
    if format == 'markdown':
        args.extend(['--to', 'markdown_strict', '--wrap', 'none'])
    elif format == 'jats':
        args.extend(['--to', 'jats', '--standalone'])
    elif format == 'docx':
        args.extend(['--to', 'docx'])
    elif format == 'html':
        args.extend(['--to', 'html'])
    elif format == 'plain':
        args.extend(['--to', 'plain', '--wrap', 'none'])
        if info['pandoc version'] >= (2, ):
            # Do not use ALL_CAPS for bold & underscores for italics
            # https://github.com/jgm/pandoc/issues/4834#issuecomment-412972008
            filter_path = pathlib.Path(__file__).joinpath(
                '..', 'plain-pandoc-filter.lua').resolve()
            assert filter_path.exists()
            args.extend(['--lua-filter', str(filter_path)])
    logging.info('call_pandoc subprocess args:\n' + shlex_join(args))
    process = subprocess.run(
        args=args,
        input=metadata_block.encode(),
        stdout=subprocess.PIPE if path else sys.stdout,
        stderr=sys.stderr,
    )
    process.check_returncode()
Ejemplo n.º 10
0
def _load_bibliography_pandoc(
    path: Optional[str] = None,
    text: Optional[str] = None,
) -> List[Dict[str, Any]]:
    """
    Convert a biblatex (.bib) bibliography to CSL JSON data using pandoc directly.
    Pandoc support for csljson output requires pandoc >= 2.11.
    """
    pdoc_info = get_pandoc_info()
    if not pdoc_info["pandoc"]:
        logging.error(
            "pandoc not found on system: "
            "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"
        )
        return []
    if pdoc_info["pandoc version"] < (2, 11):
        logging.error(
            "pandoc >= version 2.11 required for biblatex to csljson conversion. "
            "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"
        )
        return []
    command_args = "pandoc --from=biblatex --to=csljson".split()
    return _pandoc_system_call(command_args, path, text)
Ejemplo n.º 11
0
def test_cite_command_render_stdout(args, expected):
    """
    Test the stdout output of `manubot cite --render` with various formats.
    The output is sensitive to the version of Pandoc used, so rather than fail when
    the system's pandoc is outdated, the test is skipped. 
    """
    pandoc_version = get_pandoc_info()['pandoc version']
    for output in 'markdown', 'html', 'jats':
        if output in args and pandoc_version < (2, 5):
            pytest.skip(f"Test {output} output assumes pandoc >= 2.5")
    if pandoc_version < (2, 0):
        pytest.skip(
            "Test requires pandoc >= 2.0 to support --lua-filter and --csl=URL"
        )
    expected = (pathlib.Path(__file__).parent.joinpath('cite-command-rendered',
                                                       expected).read_text())
    args = [
        'manubot',
        'cite',
        '--render',
        '--csl',
        'https://github.com/greenelab/manubot-rootstock/raw/e83e51dcd89256403bb787c3d9a46e4ee8d04a9e/build/assets/style.csl',
        'arxiv:1806.05726v1',
        'doi:10.7717/peerj.338',
        'pmid:29618526',
    ] + args
    process = subprocess.run(
        args,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        universal_newlines=True,
    )
    print(' '.join(process.args))
    print(process.stdout)
    print(process.stderr)
    assert process.stdout == expected