コード例 #1
0
def make_kg2(curies_to_categories: dict,
             uri_to_curie_shortener: callable,
             map_category_label_to_iri: callable,
             owl_urls_and_files: tuple,
             output_file_name: str,
             test_mode: bool = False):

    owl_file_information_dict_list = []

    # for each OWL file (or URL for an OWL file) described in the YAML config file...
    for ont_source_info_dict in owl_urls_and_files:
        if ont_source_info_dict['download']:
            # get the OWL file onto the local file system and get a full path to it
            local_file_name = kg2_util.download_file_if_not_exist_locally(
                ont_source_info_dict['url'], ont_source_info_dict['file'])
        else:
            local_file_name = ont_source_info_dict['file']
            assert os.path.exists(ont_source_info_dict['file'])
        # load the OWL file dadta into an ontobio.ontol.Ontology data structure and information dictionary
        [ont, metadata_dict] = load_owl_file_return_ontology_and_metadata(
            local_file_name, ont_source_info_dict['url'],
            ont_source_info_dict['title'])
        metadata_dict['ontology'] = ont
        owl_file_information_dict_list.append(metadata_dict)

    kg2_util.log_message('Calling make_nodes_dict_from_ontologies_list')

    nodes_dict = make_nodes_dict_from_ontologies_list(
        owl_file_information_dict_list, curies_to_categories,
        uri_to_curie_shortener, map_category_label_to_iri)

    kg2_util.log_message('Calling make_map_of_node_ontology_ids_to_curie_ids')

    map_of_node_ontology_ids_to_curie_ids = make_map_of_node_ontology_ids_to_curie_ids(
        nodes_dict)

    kg2_util.log_message('Calling get_rels_dict')

    # get a dictionary of all relationships including xrefs as relationships
    all_rels_dict = get_rels_dict(nodes_dict, owl_file_information_dict_list,
                                  uri_to_curie_shortener,
                                  map_of_node_ontology_ids_to_curie_ids)

    kg2_dict = dict()
    kg2_dict['edges'] = [rel_dict for rel_dict in all_rels_dict.values()]
    kg2_util.log_message('Number of edges: ' + str(len(kg2_dict['edges'])))
    kg2_dict['nodes'] = list(nodes_dict.values())
    kg2_util.log_message('Number of nodes: ' + str(len(kg2_dict['nodes'])))
    del nodes_dict

    # delete xrefs from all_nodes_dict
    for node_dict in kg2_dict['nodes']:
        del node_dict['xrefs']
        del node_dict['ontology node ids']

    kg2_util.log_message('Saving JSON file')
    kg2_util.save_json(kg2_dict, output_file_name, test_mode)
コード例 #2
0
args = make_arg_parser().parse_args()
biolink_model_url = args.biolinkModelURL
biolink_model_file_name = args.biolinkModelLocalFile
curies_to_urls_map_file_name = args.curiesToURLsMapFile

iri_shortener = kg2_util.make_uri_to_curie_shortener(kg2_util.make_curies_to_uri_map(kg2_util.read_file_to_string(curies_to_urls_map_file_name),
                                                                                     kg2_util.IDMapperType.CONTRACT))

curies_to_url_map_data = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_urls_map_file_name))
curies_to_url_map_data_bidir = {key: listitem[key] for listitem in curies_to_url_map_data['use_for_bidirectional_mapping'] for key in listitem.keys()}

curies_to_url_map_data_cont = {key: listitem[key] for listitem in curies_to_url_map_data['use_for_contraction_only'] for key in listitem.keys()}


kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name)
biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name)
biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(biolink_ont)

biolink_edge_labels = {url.replace(kg2_util.BASE_URL_BIOLINK_META, '') for url in
                       biolink_ont.children(kg2_util.BASE_URL_BIOLINK_META + 'SlotDefinition')}

for variable_name in dir(kg2_util):
    variable_value = getattr(kg2_util, variable_name)
    if variable_name.startswith('CURIE_PREFIX_'):
        assert variable_value in curies_to_url_map_data_bidir, variable_name
    elif variable_name.startswith('BASE_URL_'):
        url_str = variable_value
        curie = iri_shortener(url_str)
        assert curie is not None, url_str
    elif variable_name.startswith('BIOLINK_CATEGORY_'):