Esempio n. 1
0
def test_transformer_infores_parser_prefix_rewrite():
    input_args = {
        "filename": [
            os.path.join(RESOURCE_DIR, "test_infores_coercion_nodes.tsv"),
            os.path.join(RESOURCE_DIR, "test_infores_coercion_edges.tsv"),
        ],
        "format":
        "tsv",
        "provided_by": (r"\(.+\)", "", "Monarch"),
        "aggregator_knowledge_source": (r"\(.+\)", "", "Monarch"),
    }

    t = Transformer()
    t.transform(input_args=input_args)

    n1 = t.store.graph.nodes()["FlyBase:FBgn0000008"]
    assert "provided_by" in n1
    assert len(n1["provided_by"]) == 1
    assert "infores:monarch-flybase" in n1["provided_by"]

    n2 = t.store.graph.nodes()["GO:0005912"]
    assert "provided_by" in n2
    assert len(n2["provided_by"]) == 1
    assert "infores:monarch-gene-ontology" in n2["provided_by"]

    et = list(
        t.store.graph.get_edge("FlyBase:FBgn0000008",
                               "GO:0005912").values())[0]
    assert "infores:monarch-gene-ontology" in et["aggregator_knowledge_source"]

    irc = t.get_infores_catalog()
    assert len(irc) == 2
    assert "Gene Ontology (Monarch version 202012)" in irc
    assert ("infores:monarch-gene-ontology"
            in irc["Gene Ontology (Monarch version 202012)"])
Esempio n. 2
0
def transform_source(
    key: str,
    source: Dict,
    output_directory: Optional[str],
    prefix_map: Dict[str, str] = None,
    node_property_predicates: Set[str] = None,
    predicate_mappings: Dict[str, str] = None,
    reverse_prefix_map: Dict = None,
    reverse_predicate_mappings: Dict = None,
    property_types: Dict = None,
    checkpoint: bool = False,
    preserve_graph: bool = True,
    stream: bool = False,
    infores_catalog: Optional[str] = None,
) -> Sink:
    """
    Transform a source from a transform config YAML.

    Parameters
    ----------
    key: str
        Source key
    source: Dict
        Source configuration
    output_directory: Optional[str]
        Location to write output to
    prefix_map: Dict[str, str]
        Non-canonical CURIE mappings
    node_property_predicates: Set[str]
        A set of predicates that ought to be treated as node properties (This is applicable for RDF)
    predicate_mappings: Dict[str, str]
        A mapping of predicate IRIs to property names (This is applicable for RDF)
    reverse_prefix_map: Dict[str, str]
        Non-canonical CURIE mappings for export
    reverse_predicate_mappings: Dict[str, str]
        A mapping of property names to predicate IRIs (This is applicable for RDF)
    property_types: Dict[str, str]
        The xml property type for properties that are other than ``xsd:string``.
        Relevant for RDF export.
    checkpoint: bool
        Whether to serialize each individual source to a TSV
    preserve_graph: true
        Whether or not to preserve the graph corresponding to the source
    stream: bool
        Whether to parse input as a stream
    infores_catalog: Optional[str]
        Optional dump of a TSV file of InfoRes CURIE to Knowledge Source mappings

    Returns
    -------
    kgx.sink.sink.Sink
        Returns an instance of Sink

    """
    log.info(f"Processing source '{key}'")
    input_args = prepare_input_args(
        key,
        source,
        output_directory,
        prefix_map,
        node_property_predicates,
        predicate_mappings,
    )
    output_args = prepare_output_args(
        key,
        source,
        output_directory,
        reverse_prefix_map,
        reverse_predicate_mappings,
        property_types,
    )
    transformer = Transformer(stream=stream, infores_catalog=infores_catalog)
    transformer.transform(input_args, output_args)

    if not preserve_graph:
        transformer.store.graph.clear()

    if infores_catalog:
        with open(infores_catalog, "w") as irc:
            catalog: Dict[str, str] = transformer.get_infores_catalog()
            for source in catalog.keys():
                infores = catalog.setdefault(source, "unknown")
                print(f"{source}\t{infores}", file=irc)

    return transformer.store