def test_transformer_infores_parser_prefix_rewrite(): input_args = { "filename": [ os.path.join(RESOURCE_DIR, "test_infores_coercion_nodes.tsv"), os.path.join(RESOURCE_DIR, "test_infores_coercion_edges.tsv"), ], "format": "tsv", "provided_by": (r"\(.+\)", "", "Monarch"), "aggregator_knowledge_source": (r"\(.+\)", "", "Monarch"), } t = Transformer() t.transform(input_args=input_args) n1 = t.store.graph.nodes()["FlyBase:FBgn0000008"] assert "provided_by" in n1 assert len(n1["provided_by"]) == 1 assert "infores:monarch-flybase" in n1["provided_by"] n2 = t.store.graph.nodes()["GO:0005912"] assert "provided_by" in n2 assert len(n2["provided_by"]) == 1 assert "infores:monarch-gene-ontology" in n2["provided_by"] et = list( t.store.graph.get_edge("FlyBase:FBgn0000008", "GO:0005912").values())[0] assert "infores:monarch-gene-ontology" in et["aggregator_knowledge_source"] irc = t.get_infores_catalog() assert len(irc) == 2 assert "Gene Ontology (Monarch version 202012)" in irc assert ("infores:monarch-gene-ontology" in irc["Gene Ontology (Monarch version 202012)"])
def transform_source( key: str, source: Dict, output_directory: Optional[str], prefix_map: Dict[str, str] = None, node_property_predicates: Set[str] = None, predicate_mappings: Dict[str, str] = None, reverse_prefix_map: Dict = None, reverse_predicate_mappings: Dict = None, property_types: Dict = None, checkpoint: bool = False, preserve_graph: bool = True, stream: bool = False, infores_catalog: Optional[str] = None, ) -> Sink: """ Transform a source from a transform config YAML. Parameters ---------- key: str Source key source: Dict Source configuration output_directory: Optional[str] Location to write output to prefix_map: Dict[str, str] Non-canonical CURIE mappings node_property_predicates: Set[str] A set of predicates that ought to be treated as node properties (This is applicable for RDF) predicate_mappings: Dict[str, str] A mapping of predicate IRIs to property names (This is applicable for RDF) reverse_prefix_map: Dict[str, str] Non-canonical CURIE mappings for export reverse_predicate_mappings: Dict[str, str] A mapping of property names to predicate IRIs (This is applicable for RDF) property_types: Dict[str, str] The xml property type for properties that are other than ``xsd:string``. Relevant for RDF export. checkpoint: bool Whether to serialize each individual source to a TSV preserve_graph: true Whether or not to preserve the graph corresponding to the source stream: bool Whether to parse input as a stream infores_catalog: Optional[str] Optional dump of a TSV file of InfoRes CURIE to Knowledge Source mappings Returns ------- kgx.sink.sink.Sink Returns an instance of Sink """ log.info(f"Processing source '{key}'") input_args = prepare_input_args( key, source, output_directory, prefix_map, node_property_predicates, predicate_mappings, ) output_args = prepare_output_args( key, source, output_directory, reverse_prefix_map, reverse_predicate_mappings, property_types, ) transformer = Transformer(stream=stream, infores_catalog=infores_catalog) transformer.transform(input_args, output_args) if not preserve_graph: transformer.store.graph.clear() if infores_catalog: with open(infores_catalog, "w") as irc: catalog: Dict[str, str] = transformer.get_infores_catalog() for source in catalog.keys(): infores = catalog.setdefault(source, "unknown") print(f"{source}\t{infores}", file=irc) return transformer.store