def make_arg_parser(): arg_parser = argparse.ArgumentParser(description='validate_kg2_util_curies_urls_categories.py: ' + 'checks the file `kg2_util.py` for correctness for its CURIE IDs, Base URLs, and biolink categories.') arg_parser.add_argument('curiesToURLsMapFile', type=str) arg_parser.add_argument('biolinkModelURL', type=str) arg_parser.add_argument('biolinkModelLocalFile', type=str) return arg_parser args = make_arg_parser().parse_args() biolink_model_url = args.biolinkModelURL biolink_model_file_name = args.biolinkModelLocalFile curies_to_urls_map_file_name = args.curiesToURLsMapFile iri_shortener = kg2_util.make_uri_to_curie_shortener(kg2_util.make_curies_to_uri_map(kg2_util.read_file_to_string(curies_to_urls_map_file_name), kg2_util.IDMapperType.CONTRACT)) curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_urls_map_file_name)) curies_to_url_map_data_bidir = {key: listitem[key] for listitem in curies_to_url_map_data['use_for_bidirectional_mapping'] for key in listitem.keys()} curies_to_url_map_data_cont = {key: listitem[key] for listitem in curies_to_url_map_data['use_for_contraction_only'] for key in listitem.keys()} kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name) biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name) biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(biolink_ont) biolink_edge_labels = {url.replace(kg2_util.BASE_URL_BIOLINK_META, '') for url in biolink_ont.children(kg2_util.BASE_URL_BIOLINK_META + 'SlotDefinition')}
arg_parser.add_argument('curiesToURLsMapFile', type=str) arg_parser.add_argument('umls2rdfConfFile', type=str) arg_parser.add_argument('biolinkModelURL', type=str) arg_parser.add_argument('biolinkModelLocalFile', type=str) return arg_parser args = make_arg_parser().parse_args() owl_load_inventory_file_name = args.owlLoadInventoryFile curies_to_urls_map_file_name = args.curiesToURLsMapFile umls2rdf_conf_file_name = args.umls2rdfConfFile biolink_model_url = args.biolinkModelURL biolink_model_file_name = args.biolinkModelLocalFile owl_load_inventory_data = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(owl_load_inventory_file_name)) umls_ttl_files = None with open(umls2rdf_conf_file_name, 'r') as umls2rdf_conf_file: umls_ttl_files = { line.split(",")[1] for line in umls2rdf_conf_file.read().splitlines() if len(line) > 0 and not line.startswith('#') } umls2rdf_conf_file.close() umls_ttl_files.add('umls-semantictypes.ttl') kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name) biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name) biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(
if __name__ == '__main__': args = make_arg_parser().parse_args() predicate_remap_file_name = args.predicateRemapYaml curies_to_uri_lal_file_name = args.curiesToURILALFile input_file_name = args.inputFileJson output_file_name = args.outputFileJson test_mode = args.test drop_negated = args.drop_negated drop_self_edges_except = args.drop_self_edges_except if drop_self_edges_except is not None: assert type(drop_self_edges_except) == str drop_self_edges_except = set(drop_self_edges_except.split(',')) predicate_remap_config = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(predicate_remap_file_name)) curies_to_uri_map = kg2_util.make_curies_to_uri_map( curies_to_uri_lal_file_name) graph = kg2_util.load_json(input_file_name) edge_keys = set() new_edges = dict() relation_curies_not_in_config = set() record_of_relation_curie_occurrences = { relation_curie: False for relation_curie in predicate_remap_config.keys() } command_set = {'delete', 'keep', 'invert', 'rename'} for relation_curie, command in predicate_remap_config.items(): assert len(command) == 1 assert next(iter(command.keys())) in command_set relation_curies_not_in_nodes = set()
'validate_curies_to_categories.py: checks the file `curies-to-categories.yaml` for correctness.' ) arg_parser.add_argument('curiesToCategoriesFile', type=str) arg_parser.add_argument('curiesToURLsMapFile', type=str) arg_parser.add_argument('biolinkModelURL', type=str) arg_parser.add_argument('biolinkModelLocalFile', type=str) return arg_parser args = make_arg_parser().parse_args() curies_to_categories_file_name = args.curiesToCategoriesFile curies_to_urls_map_file_name = args.curiesToURLsMapFile biolink_model_url = args.biolinkModelURL biolink_model_file_name = args.biolinkModelLocalFile curies_to_categories_data = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(curies_to_categories_file_name)) curies_to_url_map_data = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(curies_to_urls_map_file_name)) curies_to_url_map_data_bidir = { next(iter(listitem.keys())) for listitem in curies_to_url_map_data['use_for_bidirectional_mapping'] } kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name) biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name) biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths( biolink_ont) for prefix in curies_to_categories_data['prefix-mappings'].keys(): assert prefix in curies_to_url_map_data_bidir, prefix
'checks the file `kg2_util.py` for correctness for its CURIE IDs, Base URLs, and biolink categories.' ) arg_parser.add_argument('curiesToURLsMapFile', type=str) arg_parser.add_argument('biolinkModelURL', type=str) arg_parser.add_argument('biolinkModelLocalFile', type=str) return arg_parser args = make_arg_parser().parse_args() biolink_model_url = args.biolinkModelURL biolink_model_file_name = args.biolinkModelLocalFile curies_to_urls_map_file_name = args.curiesToURLsMapFile iri_shortener = kg2_util.make_uri_to_curie_shortener( kg2_util.make_curies_to_uri_map( kg2_util.read_file_to_string(curies_to_urls_map_file_name), kg2_util.IDMapperType.CONTRACT)) curies_to_url_map_data = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(curies_to_urls_map_file_name)) curies_to_url_map_data_bidir = { key: listitem[key] for listitem in curies_to_url_map_data['use_for_bidirectional_mapping'] for key in listitem.keys() } curies_to_url_map_data_cont = { key: listitem[key] for listitem in curies_to_url_map_data['use_for_contraction_only'] for key in listitem.keys() }
try: biolink_to_external_mappings['biolink:subclass_of']['narrow_mappings'].remove("umls:rb") biolink_to_external_mappings['biolink:superclass_of']['narrow_mappings'].remove("umls:rb") except ValueError: print('UMLS:RB work around no longer necessary') return biolink_to_external_mappings, biolink_mixins args = make_arg_parser().parse_args() curies_to_urls_map_file_name = args.curiesToURLsMapFile predicate_remap_file_name = args.predicateRemapFile biolink_model_url = args.biolinkModelYamlURL biolink_model_file_name = args.biolinkModelYamlLocalFile curies_to_url_map_data = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(curies_to_urls_map_file_name)) curies_to_url_map_data_bidir = {next(iter(listitem.keys( ))) for listitem in curies_to_url_map_data['use_for_bidirectional_mapping']} kg2_util.download_file_if_not_exist_locally( biolink_model_url, biolink_model_file_name) biolink_model = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(biolink_model_file_name)) mapping_hierarchy = ["exact_mappings", "close_mappings", "narrow_mappings", "broad_mappings", "related_mappings"] # TODO: determine correct order of mappings biolink_to_external_mappings, biolink_mixins = create_biolink_to_external_mappings( biolink_model, mapping_hierarchy) external_to_biolink_mappings = dict()
arg_parser.add_argument('outputFile', type=str) return arg_parser # --------------- main starts here ------------------- if __name__ == '__main__': delete_ontobio_cachier_caches() args = make_arg_parser().parse_args() curies_to_categories_file_name = args.categoriesFile curies_to_uri_lal_file_name = args.curiesToURILALFile owl_load_inventory_file = args.owlLoadInventoryFile output_file = args.outputFile test_mode = args.test curies_to_categories = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(curies_to_categories_file_name)) curies_to_uri_map = kg2_util.make_curies_to_uri_map( curies_to_uri_lal_file_name) uri_to_curie_shortener = make_uri_to_curie_shortener(curies_to_uri_map) map_category_label_to_iri = functools.partial( kg2_util.convert_biolink_category_to_iri, biolink_category_base_iri=kg2_util.BIOLINK_CATEGORY_BASE_IRI) owl_urls_and_files = tuple( kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(owl_load_inventory_file))) make_kg2(curies_to_categories, uri_to_curie_shortener, map_category_label_to_iri, owl_urls_and_files, output_file, test_mode)
def make_arg_parser(): arg_parser = argparse.ArgumentParser(description='validate_curies_to_categories.py: checks the file `curies-to-categories.yaml` for correctness.') arg_parser.add_argument('curiesToCategoriesFile', type=str) arg_parser.add_argument('curiesToURLsMapFile', type=str) arg_parser.add_argument('biolinkModelOWLURL', type=str) arg_parser.add_argument('biolinkModelOWLLocalFile', type=str) return arg_parser args = make_arg_parser().parse_args() curies_to_categories_file_name = args.curiesToCategoriesFile curies_to_urls_map_file_name = args.curiesToURLsMapFile biolink_model_url = args.biolinkModelOWLURL biolink_model_file_name = args.biolinkModelOWLLocalFile curies_to_categories_data = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_categories_file_name)) curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_urls_map_file_name)) curies_to_url_map_data_bidir = {next(iter(listitem.keys())) for listitem in curies_to_url_map_data['use_for_bidirectional_mapping']} kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name) biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name) biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(biolink_ont) for prefix in curies_to_categories_data['prefix-mappings'].keys(): assert prefix in curies_to_url_map_data_bidir, prefix for curie_id in curies_to_categories_data['term-mappings'].keys(): prefix = curie_id.split(':')[0] assert prefix in curies_to_url_map_data_bidir, prefix categories_to_check = list(curies_to_categories_data['prefix-mappings'].values()) +\