Python read_serialized_dataの例、manubot.util.read_serialized_data Pythonの例

コード例 #1

0

ファイルを表示

ファイル: update-author-metadata.py プロジェクト: yusha-sun/covid19-review

def update_individual(path, keyword):
    """
    Select authors for an individual manuscript. Expects the manuscript keyword
    to be in a dictionary called manuscripts for each author of that manuscript.
    Updates contributions to be the manuscript-specific contributions. Builds the
    list of consortium members.
    """
    metadata = read_serialized_data(path)
    authors = metadata.get("authors", [])
    metadata["consortiummembers"] = generate_consortium_members(authors)

    individual_authors = [author for author in authors if "manuscripts" in author and keyword in author["manuscripts"]]
    # Sort authors by their numeric order for this individual manuscript
    # If the author has the manuscript keyword, which indicates authorship, but not an order
    # the default order is -1, which should move them to the front of the author list
    # Sort by name to break ties
    individual_authors.sort(key=lambda author: (author["manuscripts"][keyword].get("order", -1), author["name"]))

    # Set contributions to the appropriate manuscript-specific contributions
    for author in individual_authors:
        # A list of the author's contributions for this manuscript
        contributions = author["manuscripts"][keyword].get("contributions", MISSING_CONTRIBUTIONS)
        if contributions == MISSING_CONTRIBUTIONS:
            sys.stderr.write(f"Missing {keyword} contributions for {author['name']}\n")
        author["contributions"] = sorted(contributions)

    sys.stderr.write(f"Found {len(individual_authors)} authors for {keyword} manuscript\n")

    metadata["authors"] = individual_authors
    dump_yaml(metadata, path)

コード例 #2

0

ファイルを表示

def load_bibliography(path) -> list:
    """
    Load a bibliography as CSL Items (a CSL JSON Python object).
    For paths that already contain CSL Items (inferred from a .json or .yaml extension),
    parse these files directly. Otherwise, delegate conversion to CSL Items to pandoc-citeproc.
    """
    path = pathlib.Path(path)
    if path.suffix in {".json", ".yaml"}:
        try:
            csl_items = read_serialized_data(path)
        except Exception:
            logging.exception(
                f"process.load_bibliography: error parsing {path}.\n")
            csl_items = []
    else:
        from manubot.pandoc.bibliography import (
            load_bibliography as load_bibliography_pandoc, )

        csl_items = load_bibliography_pandoc(path)
    if not isinstance(csl_items, list):
        logging.error(
            f"process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. "
            "Setting csl_items to an empty list.")
        csl_items = []
    from manubot.cite.csl_item import CSL_Item

    csl_items = [CSL_Item(csl_item) for csl_item in csl_items]
    return csl_items

コード例 #3

0

ファイルを表示

def load_bibliography(path: str) -> list:
    """
    Load a bibliography as CSL Items (a CSL JSON Python object).
    For paths that already contain CSL Items (inferred from a .json or .yaml extension),
    parse these files directly (URLs supported).
    Otherwise, delegate conversion to CSL Items to pandoc-citeproc (URLs not supported).
    If loading fails, log an error and return an empty list.
    """
    path_obj = pathlib.Path(path)
    if path_obj.suffix in {".json", ".yaml"}:
        try:
            csl_items = read_serialized_data(path)
        except Exception as error:
            logging.error(f"load_bibliography: error reading {path!r}.\n{error}")
            logging.info("load_bibliography exception info", exc_info=True)
            csl_items = []
    else:
        from manubot.pandoc.bibliography import (
            load_bibliography as load_bibliography_pandoc,
        )

        csl_items = load_bibliography_pandoc(path)
    if not isinstance(csl_items, list):
        logging.error(
            f"process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. "
            "Setting csl_items to an empty list."
        )
        csl_items = []
    from manubot.cite.csl_item import CSL_Item

    csl_items = [CSL_Item(csl_item) for csl_item in csl_items]
    return csl_items

コード例 #4

0

ファイルを表示

ファイル: util.py プロジェクト: nanjingruixun/manubot

def read_variable_files(paths: List[str],
                        variables: Optional[dict] = None) -> dict:
    """
    Read multiple serialized data files into a user_variables dictionary.
    Provide `paths` (a list of URLs or local file paths).
    Paths can optionally have a namespace prepended.
    For example:

    ```python
    paths = [
        'https://git.io/vbkqm',  # update the dictionary's top-level
        'namespace_1=https://git.io/vbkqm',  # store under 'namespace_1' key
        'namespace_2=some_local_path.json',  # store under 'namespace_2' key
    ]
    ```

    If a namespace is not provided, the JSON must contain a dictionary as its
    top level. Namespaces should consist only of ASCII alphanumeric characters
    (includes underscores, first character cannot be numeric).

    Pass a dictionary to `variables` to update an existing dictionary rather
    than create a new dictionary.
    """
    if variables is None:
        variables = {}
    for path in paths:
        logging.info(f"Reading user-provided templating variables at {path!r}")
        # Match only namespaces that are valid jinja2 variable names
        # http://jinja.pocoo.org/docs/2.10/api/#identifier-naming
        match = re.match(r"([a-zA-Z_][a-zA-Z0-9_]*)=(.+)", path)
        if match:
            namespace, path = match.groups()
            logging.info(
                f"Using the {namespace!r} namespace for template variables from {path!r}"
            )
        try:
            if match:
                obj = {namespace: read_serialized_data(path)}
            else:
                obj = read_serialized_dict(path)
        except Exception:
            logging.exception(
                f"Error reading template variables from {path!r}")
            continue
        assert isinstance(obj, dict)
        conflicts = variables.keys() & obj.keys()
        if conflicts:
            logging.warning(
                f"Template variables in {path!r} overwrite existing "
                "values for the following keys:\n" + "\n".join(conflicts))
        variables.update(obj)
    logging.debug(f"Reading user-provided templating variables complete:\n"
                  f"{json.dumps(variables, indent=2, ensure_ascii=False)}")
    return variables

コード例 #5

0

ファイルを表示

def get_jsonschema_csl_validator():
    """
    Return a jsonschema validator for the CSL Item JSON Schema
    """
    import jsonschema

    url = "https://github.com/dhimmel/csl-schema/raw/manubot/csl-data.json"
    schema = read_serialized_data(url)
    Validator = jsonschema.validators.validator_for(schema)
    Validator.check_schema(schema)
    return Validator(schema)

コード例 #6

0

ファイルを表示

def update_merged(path):
    """
    Update author contributions for the merged manuscript by taking the union
    of all contributions on individual manuscripts. Overwrites existing
    contributions for the author that are not associated with an individual
    manuscript.  Builds the list of consortium members.
    """
    metadata = read_serialized_data(path)
    authors = metadata.get("authors", [])
    metadata["consortiummembers"] = generate_consortium_members(authors)

    # Set contributions to the union of all manuscript-specific contributions
    # Use general contributions if there are no manuscript-specific contributions
    for author in authors:
        contributions = set()
        if "manuscripts" in author:
            for manuscript in author["manuscripts"].keys():
                # A list of the author's contributions for each individual manuscript
                individual_contributions = author["manuscripts"][
                    manuscript].get("contributions", MISSING_CONTRIBUTIONS)
                contributions.update(individual_contributions)
        elif "contributions" in author:
            contributions.update(author["contributions"])
        else:
            contributions.update(MISSING_CONTRIBUTIONS)

        if MISSING_CONTRIBUTIONS[0] in contributions:
            sys.stderr.write(f"Missing contributions for {author['name']}\n")

        author["contributions"] = sorted(contributions)

        # Check whether code of conduct has been approved
        if "code of conduct" not in author or "confirmed" not in author[
                "code of conduct"] or not author["code of conduct"][
                    "confirmed"]:
            sys.stderr.write(
                f"{author['name']} has not approved the code of conduct\n")

    sys.stderr.write(
        f"Updating contributions for {len(authors)} authors for merged manuscript\n"
    )
    metadata["authors"] = authors
    dump_yaml(metadata, path)

コード例 #7

0

ファイルを表示

    with path.open("w", encoding="utf-8") as write_file:
        yaml.dump(
            obj,
            write_file,
            # default_flow_style=False,
            explicit_start=True,
            explicit_end=True,
            width=float("inf"),
            sort_keys=False,
            allow_unicode=True,
        )
        write_file.write("\n")


if __name__ == "__main__":
    """
    Alternative to https://github.com/manubot/manubot/pull/214
    """
    args = parse_args()
    if not args.execute:
        sys.stderr.write(
            "Exiting without doing anything due to --only-on-ci\n")
        sys.exit()
    metadata = read_serialized_data(args.path)
    authors = metadata.get("authors", [])
    if args.shuffle:
        sys.stderr.write("Shuffling metadata.authors\n")
        seed = get_head_commit()
        shuffle(authors, seed=seed)
    dump_yaml(metadata, args.path)

コード例 #8

0

ファイルを表示

def update_latex(keyword, manubot_file, pandoc_file):
    """
    Update Manubot author metadata for an individual manuscript in preparation for LaTeX conversion.
    Prepares a metadata file to override and supplement the metadata in the YAML block in the Markdown
    file Pandoc processes.
    """
    metadata = read_serialized_data(manubot_file)
    authors = metadata.get("authors", [])

    individual_authors = [author for author in authors if "manuscripts" in author and keyword in author["manuscripts"]]
    # Sort authors by their numeric order for this individual manuscript
    # If the author has the manuscript keyword, which indicates authorship, but not an order
    # the default order is -1, which should move them to the front of the author list
    # Sort by name to break ties
    individual_authors.sort(key=lambda author: (author["manuscripts"][keyword].get("order", -1), author["name"]))

    # Set affiliation fields to the manuscript-specific affiliation formatting expected by the LaTeX template
    # and discard fields that will not be used by the LaTeX template
    keep_fields = {"name", "email", "orcid"}  # do not keep the old affiliations
    latex_authors = []
    conflicts = []
    funding = []
    for author in individual_authors:
        latex_author = {field: author[field] for field in keep_fields if field in author}

        # A list of the author's affiliations formatted for the template
        # The first affiliation is stored in the "affiliations" field
        # Any additional affiliations are stored in the "additionalaffiliations" field
        affiliations = author["manuscripts"][keyword].get("affiliations", MISSING_AFFILIATIONS)
        if affiliations == MISSING_AFFILIATIONS:
            sys.stderr.write(f"Missing {keyword} affiliations for {author['name']}\n")

        if len(affiliations) > 0:
            latex_author["affiliations"] = affiliations[0]
        if len(affiliations) > 1:
            latex_author["additionalaffiliations"] = affiliations[1:]
        latex_authors.append(latex_author)

        # Check whether the author has declared conflicts of interest
        if "coi" in author:
            conflict = author["coi"].get("string", MISSING_COI)
            if conflict != "None":
                conflicts.append(f"{author['name']}: {conflict}.")

        # Check whether the author has funding
        # This text will not be used directly but will help write a funding statement manually
        if "funders" in author:
            # Less robust handling of funders field than Manubot
            # https://github.com/manubot/manubot/blob/3ff3000f76dcf82a30694d076a4da95326e3f6ae/manubot/process/util.py#L78
            funders = author["funders"]
            if isinstance(funders, list):
                funders = "; ".join(funders)
            # Assumes initials are always provided
            funding.append(f"{author['initials']}: {funders}.")

    sys.stderr.write(f"Found {len(latex_authors)} authors for {keyword} manuscript\n")

    # Do not retain the other metadata fields and add the .bib file references
    metadata = {"author": latex_authors, "bibfile": keyword + ".bib"}
    metadata.update(ACM_DISCO_2021)
    # Add conflicts if any exist
    if len(conflicts) > 0:
        metadata["conflicts"] = "Conflicts of interest. " + " ".join(conflicts)
    # Add funding comment if funders were listed
    if len(funding) > 0:
        metadata["funding"] = "Author funding. " + " ".join(funding)

    dump_yaml(metadata, pandoc_file)