Ejemplo n.º 1
0
def make_arg_parser():
    arg_parser = argparse.ArgumentParser(description='validate_kg2_util_curies_urls_categories.py: ' +
                                         'checks the file `kg2_util.py` for correctness for its CURIE IDs, Base URLs, and biolink categories.')
    arg_parser.add_argument('curiesToURLsMapFile', type=str)
    arg_parser.add_argument('biolinkModelURL', type=str)
    arg_parser.add_argument('biolinkModelLocalFile', type=str)
    return arg_parser


args = make_arg_parser().parse_args()
biolink_model_url = args.biolinkModelURL
biolink_model_file_name = args.biolinkModelLocalFile
curies_to_urls_map_file_name = args.curiesToURLsMapFile

iri_shortener = kg2_util.make_uri_to_curie_shortener(kg2_util.make_curies_to_uri_map(kg2_util.read_file_to_string(curies_to_urls_map_file_name),
                                                                                     kg2_util.IDMapperType.CONTRACT))

curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_urls_map_file_name))
curies_to_url_map_data_bidir = {key: listitem[key] for listitem in curies_to_url_map_data['use_for_bidirectional_mapping'] for key in listitem.keys()}

curies_to_url_map_data_cont = {key: listitem[key] for listitem in curies_to_url_map_data['use_for_contraction_only'] for key in listitem.keys()}


kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name)
biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name)
biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(biolink_ont)

biolink_edge_labels = {url.replace(kg2_util.BASE_URL_BIOLINK_META, '') for url in
                       biolink_ont.children(kg2_util.BASE_URL_BIOLINK_META + 'SlotDefinition')}
Ejemplo n.º 2
0
    arg_parser.add_argument('curiesToURLsMapFile', type=str)
    arg_parser.add_argument('umls2rdfConfFile', type=str)
    arg_parser.add_argument('biolinkModelURL', type=str)
    arg_parser.add_argument('biolinkModelLocalFile', type=str)
    return arg_parser


args = make_arg_parser().parse_args()
owl_load_inventory_file_name = args.owlLoadInventoryFile
curies_to_urls_map_file_name = args.curiesToURLsMapFile
umls2rdf_conf_file_name = args.umls2rdfConfFile
biolink_model_url = args.biolinkModelURL
biolink_model_file_name = args.biolinkModelLocalFile

owl_load_inventory_data = kg2_util.safe_load_yaml_from_string(
    kg2_util.read_file_to_string(owl_load_inventory_file_name))

umls_ttl_files = None
with open(umls2rdf_conf_file_name, 'r') as umls2rdf_conf_file:
    umls_ttl_files = {
        line.split(",")[1]
        for line in umls2rdf_conf_file.read().splitlines()
        if len(line) > 0 and not line.startswith('#')
    }
    umls2rdf_conf_file.close()
umls_ttl_files.add('umls-semantictypes.ttl')

kg2_util.download_file_if_not_exist_locally(biolink_model_url,
                                            biolink_model_file_name)
biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name)
biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(

if __name__ == '__main__':
    args = make_arg_parser().parse_args()
    predicate_remap_file_name = args.predicateRemapYaml
    curies_to_uri_lal_file_name = args.curiesToURILALFile
    input_file_name = args.inputFileJson
    output_file_name = args.outputFileJson
    test_mode = args.test
    drop_negated = args.drop_negated
    drop_self_edges_except = args.drop_self_edges_except
    if drop_self_edges_except is not None:
        assert type(drop_self_edges_except) == str
        drop_self_edges_except = set(drop_self_edges_except.split(','))
    predicate_remap_config = kg2_util.safe_load_yaml_from_string(
        kg2_util.read_file_to_string(predicate_remap_file_name))
    curies_to_uri_map = kg2_util.make_curies_to_uri_map(
        curies_to_uri_lal_file_name)
    graph = kg2_util.load_json(input_file_name)
    edge_keys = set()
    new_edges = dict()
    relation_curies_not_in_config = set()
    record_of_relation_curie_occurrences = {
        relation_curie: False
        for relation_curie in predicate_remap_config.keys()
    }
    command_set = {'delete', 'keep', 'invert', 'rename'}
    for relation_curie, command in predicate_remap_config.items():
        assert len(command) == 1
        assert next(iter(command.keys())) in command_set
    relation_curies_not_in_nodes = set()
        'validate_curies_to_categories.py: checks the file `curies-to-categories.yaml` for correctness.'
    )
    arg_parser.add_argument('curiesToCategoriesFile', type=str)
    arg_parser.add_argument('curiesToURLsMapFile', type=str)
    arg_parser.add_argument('biolinkModelURL', type=str)
    arg_parser.add_argument('biolinkModelLocalFile', type=str)
    return arg_parser


args = make_arg_parser().parse_args()
curies_to_categories_file_name = args.curiesToCategoriesFile
curies_to_urls_map_file_name = args.curiesToURLsMapFile
biolink_model_url = args.biolinkModelURL
biolink_model_file_name = args.biolinkModelLocalFile
curies_to_categories_data = kg2_util.safe_load_yaml_from_string(
    kg2_util.read_file_to_string(curies_to_categories_file_name))
curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(
    kg2_util.read_file_to_string(curies_to_urls_map_file_name))
curies_to_url_map_data_bidir = {
    next(iter(listitem.keys()))
    for listitem in curies_to_url_map_data['use_for_bidirectional_mapping']
}

kg2_util.download_file_if_not_exist_locally(biolink_model_url,
                                            biolink_model_file_name)
biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name)
biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(
    biolink_ont)

for prefix in curies_to_categories_data['prefix-mappings'].keys():
    assert prefix in curies_to_url_map_data_bidir, prefix
        'checks the file `kg2_util.py` for correctness for its CURIE IDs, Base URLs, and biolink categories.'
    )
    arg_parser.add_argument('curiesToURLsMapFile', type=str)
    arg_parser.add_argument('biolinkModelURL', type=str)
    arg_parser.add_argument('biolinkModelLocalFile', type=str)
    return arg_parser


args = make_arg_parser().parse_args()
biolink_model_url = args.biolinkModelURL
biolink_model_file_name = args.biolinkModelLocalFile
curies_to_urls_map_file_name = args.curiesToURLsMapFile

iri_shortener = kg2_util.make_uri_to_curie_shortener(
    kg2_util.make_curies_to_uri_map(
        kg2_util.read_file_to_string(curies_to_urls_map_file_name),
        kg2_util.IDMapperType.CONTRACT))

curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(
    kg2_util.read_file_to_string(curies_to_urls_map_file_name))
curies_to_url_map_data_bidir = {
    key: listitem[key]
    for listitem in curies_to_url_map_data['use_for_bidirectional_mapping']
    for key in listitem.keys()
}

curies_to_url_map_data_cont = {
    key: listitem[key]
    for listitem in curies_to_url_map_data['use_for_contraction_only']
    for key in listitem.keys()
}
    try:
        biolink_to_external_mappings['biolink:subclass_of']['narrow_mappings'].remove("umls:rb")
        biolink_to_external_mappings['biolink:superclass_of']['narrow_mappings'].remove("umls:rb")
    except ValueError:
        print('UMLS:RB work around no longer necessary')
    return biolink_to_external_mappings, biolink_mixins


args = make_arg_parser().parse_args()
curies_to_urls_map_file_name = args.curiesToURLsMapFile
predicate_remap_file_name = args.predicateRemapFile
biolink_model_url = args.biolinkModelYamlURL
biolink_model_file_name = args.biolinkModelYamlLocalFile

curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(
    kg2_util.read_file_to_string(curies_to_urls_map_file_name))
curies_to_url_map_data_bidir = {next(iter(listitem.keys(
))) for listitem in curies_to_url_map_data['use_for_bidirectional_mapping']}

kg2_util.download_file_if_not_exist_locally(
    biolink_model_url, biolink_model_file_name)

biolink_model = kg2_util.safe_load_yaml_from_string(
    kg2_util.read_file_to_string(biolink_model_file_name))

mapping_hierarchy = ["exact_mappings", "close_mappings", "narrow_mappings", "broad_mappings", "related_mappings"]  # TODO: determine correct order of mappings

biolink_to_external_mappings, biolink_mixins = create_biolink_to_external_mappings(
    biolink_model, mapping_hierarchy)

external_to_biolink_mappings = dict()
Ejemplo n.º 7
0
    arg_parser.add_argument('outputFile', type=str)
    return arg_parser


# --------------- main starts here -------------------

if __name__ == '__main__':
    delete_ontobio_cachier_caches()
    args = make_arg_parser().parse_args()
    curies_to_categories_file_name = args.categoriesFile
    curies_to_uri_lal_file_name = args.curiesToURILALFile
    owl_load_inventory_file = args.owlLoadInventoryFile
    output_file = args.outputFile
    test_mode = args.test
    curies_to_categories = kg2_util.safe_load_yaml_from_string(
        kg2_util.read_file_to_string(curies_to_categories_file_name))
    curies_to_uri_map = kg2_util.make_curies_to_uri_map(
        curies_to_uri_lal_file_name)
    uri_to_curie_shortener = make_uri_to_curie_shortener(curies_to_uri_map)
    map_category_label_to_iri = functools.partial(
        kg2_util.convert_biolink_category_to_iri,
        biolink_category_base_iri=kg2_util.BIOLINK_CATEGORY_BASE_IRI)

    owl_urls_and_files = tuple(
        kg2_util.safe_load_yaml_from_string(
            kg2_util.read_file_to_string(owl_load_inventory_file)))

    make_kg2(curies_to_categories, uri_to_curie_shortener,
             map_category_label_to_iri, owl_urls_and_files, output_file,
             test_mode)
Ejemplo n.º 8
0
def make_arg_parser():
    arg_parser = argparse.ArgumentParser(description='validate_curies_to_categories.py: checks the file `curies-to-categories.yaml` for correctness.')
    arg_parser.add_argument('curiesToCategoriesFile', type=str)
    arg_parser.add_argument('curiesToURLsMapFile', type=str)
    arg_parser.add_argument('biolinkModelOWLURL', type=str)
    arg_parser.add_argument('biolinkModelOWLLocalFile', type=str)
    return arg_parser


args = make_arg_parser().parse_args()
curies_to_categories_file_name = args.curiesToCategoriesFile
curies_to_urls_map_file_name = args.curiesToURLsMapFile
biolink_model_url = args.biolinkModelOWLURL
biolink_model_file_name = args.biolinkModelOWLLocalFile
curies_to_categories_data = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_categories_file_name))
curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_urls_map_file_name))
curies_to_url_map_data_bidir = {next(iter(listitem.keys())) for listitem in curies_to_url_map_data['use_for_bidirectional_mapping']}

kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name)
biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name)
biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(biolink_ont)

for prefix in curies_to_categories_data['prefix-mappings'].keys():
    assert prefix in curies_to_url_map_data_bidir, prefix

for curie_id in curies_to_categories_data['term-mappings'].keys():
    prefix = curie_id.split(':')[0]
    assert prefix in curies_to_url_map_data_bidir, prefix

categories_to_check = list(curies_to_categories_data['prefix-mappings'].values()) +\