'checks the file `kg2_util.py` for correctness for its CURIE IDs, Base URLs, and biolink categories.') arg_parser.add_argument('curiesToURLsMapFile', type=str) arg_parser.add_argument('biolinkModelURL', type=str) arg_parser.add_argument('biolinkModelLocalFile', type=str) return arg_parser args = make_arg_parser().parse_args() biolink_model_url = args.biolinkModelURL biolink_model_file_name = args.biolinkModelLocalFile curies_to_urls_map_file_name = args.curiesToURLsMapFile iri_shortener = kg2_util.make_uri_to_curie_shortener(kg2_util.make_curies_to_uri_map(kg2_util.read_file_to_string(curies_to_urls_map_file_name), kg2_util.IDMapperType.CONTRACT)) curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_urls_map_file_name)) curies_to_url_map_data_bidir = {key: listitem[key] for listitem in curies_to_url_map_data['use_for_bidirectional_mapping'] for key in listitem.keys()} curies_to_url_map_data_cont = {key: listitem[key] for listitem in curies_to_url_map_data['use_for_contraction_only'] for key in listitem.keys()} kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name) biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name) biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(biolink_ont) biolink_edge_labels = {url.replace(kg2_util.BASE_URL_BIOLINK_META, '') for url in biolink_ont.children(kg2_util.BASE_URL_BIOLINK_META + 'SlotDefinition')} for variable_name in dir(kg2_util): variable_value = getattr(kg2_util, variable_name) if variable_name.startswith('CURIE_PREFIX_'):
arg_parser.add_argument('owlLoadInventoryFile', type=str) arg_parser.add_argument('curiesToURLsMapFile', type=str) arg_parser.add_argument('umls2rdfConfFile', type=str) arg_parser.add_argument('biolinkModelURL', type=str) arg_parser.add_argument('biolinkModelLocalFile', type=str) return arg_parser args = make_arg_parser().parse_args() owl_load_inventory_file_name = args.owlLoadInventoryFile curies_to_urls_map_file_name = args.curiesToURLsMapFile umls2rdf_conf_file_name = args.umls2rdfConfFile biolink_model_url = args.biolinkModelURL biolink_model_file_name = args.biolinkModelLocalFile owl_load_inventory_data = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(owl_load_inventory_file_name)) umls_ttl_files = None with open(umls2rdf_conf_file_name, 'r') as umls2rdf_conf_file: umls_ttl_files = { line.split(",")[1] for line in umls2rdf_conf_file.read().splitlines() if len(line) > 0 and not line.startswith('#') } umls2rdf_conf_file.close() umls_ttl_files.add('umls-semantictypes.ttl') kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name) biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name) biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(
return arg_parser if __name__ == '__main__': args = make_arg_parser().parse_args() predicate_remap_file_name = args.predicateRemapYaml curies_to_uri_lal_file_name = args.curiesToURILALFile input_file_name = args.inputFileJson output_file_name = args.outputFileJson test_mode = args.test drop_negated = args.drop_negated drop_self_edges_except = args.drop_self_edges_except if drop_self_edges_except is not None: assert type(drop_self_edges_except) == str drop_self_edges_except = set(drop_self_edges_except.split(',')) predicate_remap_config = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(predicate_remap_file_name)) curies_to_uri_map = kg2_util.make_curies_to_uri_map( curies_to_uri_lal_file_name) graph = kg2_util.load_json(input_file_name) edge_keys = set() new_edges = dict() relation_curies_not_in_config = set() record_of_relation_curie_occurrences = { relation_curie: False for relation_curie in predicate_remap_config.keys() } command_set = {'delete', 'keep', 'invert', 'rename'} for relation_curie, command in predicate_remap_config.items(): assert len(command) == 1 assert next(iter(command.keys())) in command_set relation_curies_not_in_nodes = set()
arg_parser.add_argument('owlLoadInventoryFile', type=str) arg_parser.add_argument('outputFile', type=str) return arg_parser # --------------- main starts here ------------------- if __name__ == '__main__': delete_ontobio_cachier_caches() args = make_arg_parser().parse_args() curies_to_categories_file_name = args.categoriesFile curies_to_uri_lal_file_name = args.curiesToURILALFile owl_load_inventory_file = args.owlLoadInventoryFile output_file = args.outputFile test_mode = args.test curies_to_categories = kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(curies_to_categories_file_name)) curies_to_uri_map = kg2_util.make_curies_to_uri_map( curies_to_uri_lal_file_name) uri_to_curie_shortener = make_uri_to_curie_shortener(curies_to_uri_map) map_category_label_to_iri = functools.partial( kg2_util.convert_biolink_category_to_iri, biolink_category_base_iri=kg2_util.BIOLINK_CATEGORY_BASE_IRI) owl_urls_and_files = tuple( kg2_util.safe_load_yaml_from_string( kg2_util.read_file_to_string(owl_load_inventory_file))) make_kg2(curies_to_categories, uri_to_curie_shortener, map_category_label_to_iri, owl_urls_and_files, output_file, test_mode)