Ejemplo n.º 1
0
    def construct_knowledge_graph(self) -> None:
        """Builds a full knowledge graph. Please note that the process to build this version of the knowledge graph
        does not include running a reasoner. The full build includes the following steps: (1) Process relation/inverse
        relations; (2) Merge ontologies; (3) Process node metadata; (4) Create graph subsets; (5) Add master edge
        list to merged ontologies; (6) Decode OWL-encoded classes; (7) Output knowledge graphs and create edge lists
        and (8) Extract and write node metadata.

        Returns:
            None.
        """

        log_str = '### Starting Knowledge Graph Build: FULL ###'; print('\n' + log_str)
        logger.info('*' * 10 + 'PKT STEP: CONSTRUCTING KNOWLEDGE GRAPH' + '*' * 10 + '\n' + log_str)

        # STEP 1: PROCESS RELATION AND INVERSE RELATION DATA
        log_str = '*** Loading Relations Data ***'; print(log_str); logger.info(log_str)
        self.reverse_relation_processor()

        # STEP 2: MERGE ONTOLOGIES
        if self.merged_ont_kg in glob.glob(self.write_location + '/*.owl'):
            log_str = '*** Loading Merged Ontologies ***'; print(log_str); logger.info(log_str)
            self.graph = Graph().parse(self.merged_ont_kg, format='xml')
        else:
            log_str = '*** Merging Ontology Data ***'; print(log_str); logger.info(log_str)
            merges_ontologies(self.ontologies, self.merged_ont_kg.split('/')[-1], self.owl_tools)
            self.graph.parse(self.merged_ont_kg, format='xml')
        stats = 'Merged Ontologies {}'.format(derives_graph_statistics(self.graph)); print(stats); logger.info(stats)

        # STEP 3: PROCESS NODE METADATA
        log_str = '*** Loading Node Metadata Data ***'; print(log_str); logger.info(log_str)
        meta = Metadata(self.kg_version, self.write_location, self.full_kg, self.node_data, self.node_dict)
        if self.node_data: meta.metadata_processor(); meta.extract_metadata(self.graph)

        # STEP 4: CREATE GRAPH SUBSETS
        log_str = '*** Splitting Graph ***'; print(log_str); logger.info(log_str)
        f = self.write_location; self.graph, annotation_triples = splits_knowledge_graph(self.graph)
        s = 'Merged Ontologies - Logic Subset {}'.format(derives_graph_statistics(self.graph)); print(s); logger.info(s)
        kg_owl = '_'.join(self.full_kg.split('_')[0:-1]) + '_OWL.owl'; kg_owl_main = kg_owl[:-8] + '.owl'
        annot, logic, full = kg_owl[:-4] + '_AnnotationsOnly.nt', kg_owl[:-4] + '_LogicOnly.nt', kg_owl[:-4] + '.nt'
        appends_to_existing_file(annotation_triples, f + annot); appends_to_existing_file(self.graph, f + logic)
        del annotation_triples

        # STEP 5: ADD EDGE DATA TO KNOWLEDGE GRAPH DATA
        log_str = '*** Building Knowledge Graph Edges ***'; print('\n' + log_str); logger.info(log_str)
        self.ont_classes = gets_ontology_classes(self.graph); self.obj_properties = gets_object_properties(self.graph)
        try: ray.init()
        except RuntimeError: pass
        args = {'construction': self.construct_approach, 'edge_dict': self.edge_dict, 'node_data': self.node_data,
                'rel_dict': self.relations_dict, 'inverse_dict': self.inverse_relations_dict, 'kg_owl': kg_owl,
                'ont_cls': self.ont_classes, 'obj_props': self.obj_properties, 'metadata': meta.creates_node_metadata,
                'write_loc': self.write_location}
        edges = sublist_creator({k: len(v['edge_list']) for k, v in self.edge_dict.items()}, self.cpus)
        actors = [ray.remote(self.EdgeConstructor).remote(args) for _ in range(self.cpus)]  # type: ignore
        for i in range(0, len(edges)): [actors[i].creates_new_edges.remote(j) for j in edges[i]]  # type: ignore
        _ = ray.wait([x.graph_getter.remote() for x in actors], num_returns=len(actors))
        res = ray.get([x.graph_getter.remote() for x in actors]); g1 = [x[0] for x in res]; g2 = [x[1] for x in res]
        error_dicts = dict(ChainMap(*ray.get([x.error_dict_getter.remote() for x in actors]))); del actors
        if len(error_dicts.keys()) > 0:  # output error logs
            log_file = glob.glob(self.res_dir + '/construction*')[0] + '/subclass_map_log.json'
            logger.info('See log: {}'.format(log_file)); outputs_dictionary_data(error_dicts, log_file)

        # STEP 6: DECODE OWL SEMANTICS
        results = [set(x for y in [set(x) for x in [self.graph] + g1] for x in y), None, None]
        stats = 'Full Logic {}'.format(derives_graph_statistics(results[0])); print(stats); logger.info(stats)
        s1 = convert_to_networkx(self.write_location, kg_owl[:-4], results[0], True)
        if s1 is not None: log_stats = 'Full Logic Subset (OWL) {}'.format(s1); logger.info(log_stats); print(log_stats)
        # aggregates processed owl-nets output derived when constructing non-ontology edges
        if self.decode_owl is not None:
            graphs = [updates_pkt_namespace_identifiers(self.graph, self.construct_approach)] + g2
            owlnets = OwlNets(graphs, self.write_location, kg_owl_main, self.construct_approach, self.owl_tools)
            results = [results[0]] + list(owlnets.runs_owlnets(self.cpus))

        # STEP 7: WRITE OUT KNOWLEDGE GRAPH METADATA AND CREATE EDGE LISTS
        log_str = '*** Writing Knowledge Graph Edge Lists ***'; print('\n' + log_str); logger.info(log_str)
        f_prefix = ['_OWL', '_OWLNETS', '_OWLNETS_' + self.construct_approach.upper() + '_purified']
        for x in range(0, len(results)):
            graph = results[x]; p_str = 'OWL' if x == 0 else 'OWL-NETS' if x == 1 else 'Purified OWL-NETS'
            if graph is not None:
                log_str = '*** Processing {} Graph ***'.format(p_str); print('\n' + log_str); logger.info(log_str)
                triple_list_file = kg_owl[:-8] + f_prefix[x] + '_Triples_Integers.txt'
                triple_map = triple_list_file[:-5] + '_Identifier_Map.json'
                node_int_map = maps_ids_to_integers(graph, self.write_location, triple_list_file, triple_map)

                # STEP 8: EXTRACT AND WRITE NODE METADATA
                meta.full_kg = kg_owl[:-8] + f_prefix[x] + '.owl'
                if self.node_data: meta.output_metadata(node_int_map, graph)

        # deduplicate logic and annotation files, merge them, and print final stats
        deduplicates_file(f + annot); deduplicates_file(f + logic); merges_files(f + annot, f + logic, f + full)
        str1 = '\nLoading Full (Logic + Annotation) Graph'; print('\n' + str1); logger.info(str1)
        graph = Graph().parse(f + full, format='nt'); str2 = 'Deriving Stats'; print('\n' + str2); logger.info(str2)
        s = 'Full (Logic + Annotation) {}'.format(derives_graph_statistics(graph)); print('\n' + s); logger.info(s)

        return None
Ejemplo n.º 2
0
class TestOwlNets(unittest.TestCase):
    """Class to test the OwlNets class from the owlnets script."""
    def setUp(self):
        warnings.simplefilter('ignore', ResourceWarning)

        # initialize file location
        current_directory = os.path.dirname(__file__)
        dir_loc = os.path.join(current_directory, 'data')
        self.dir_loc = os.path.abspath(dir_loc)

        # set-up environment - make temp directory
        dir_loc_resources = os.path.join(current_directory, 'data/resources')
        self.dir_loc_resources = os.path.abspath(dir_loc_resources)
        os.mkdir(self.dir_loc_resources)
        os.mkdir(self.dir_loc_resources + '/knowledge_graphs')
        os.mkdir(self.dir_loc_resources + '/owl_decoding')

        # handle logging
        self.logs = os.path.abspath(current_directory + '/builds/logs')
        logging.disable(logging.CRITICAL)
        if len(glob.glob(self.logs + '/*.log')) > 0:
            os.remove(glob.glob(self.logs + '/*.log')[0])

        # copy data
        # ontology data
        shutil.copyfile(
            self.dir_loc + '/ontologies/so_with_imports.owl',
            self.dir_loc_resources + '/knowledge_graphs/so_with_imports.owl')
        # set-up input arguments
        self.write_location = self.dir_loc_resources + '/knowledge_graphs'
        self.kg_filename = '/so_with_imports.owl'
        # read in knowledge graph
        self.graph = Graph().parse(self.dir_loc_resources +
                                   '/knowledge_graphs/so_with_imports.owl',
                                   format='xml')
        # initialize class
        self.owl_nets = OwlNets(kg_construct_approach='subclass',
                                graph=self.graph,
                                write_location=self.write_location,
                                filename=self.kg_filename)
        self.owl_nets2 = OwlNets(kg_construct_approach='instance',
                                 graph=self.graph,
                                 write_location=self.write_location,
                                 filename=self.kg_filename)

        # update class attributes
        dir_loc_owltools = os.path.join(current_directory, 'utils/owltools')
        self.owl_nets.owl_tools = os.path.abspath(dir_loc_owltools)
        self.owl_nets2.owl_tools = os.path.abspath(dir_loc_owltools)

        return None

    def test_initialization_state(self):
        """Tests the class initialization state."""

        # write_location
        self.assertIsInstance(self.write_location, str)
        self.assertEqual(self.dir_loc_resources + '/knowledge_graphs',
                         self.write_location)
        self.assertIsInstance(self.write_location, str)
        self.assertEqual(self.dir_loc_resources + '/knowledge_graphs',
                         self.write_location)

        return None

    def test_initialization_owltools_default(self):
        """Tests the class initialization state for the owl_tools parameter when no default argument is passed."""

        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename)

        self.assertEqual(owl_nets.owl_tools, './pkt_kg/libs/owltools')

        return None

    def test_initialization_owltools(self):
        """Tests the class initialization state for the owl_tools parameter when an argument is passed."""

        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename,
                           owl_tools='test_location')

        self.assertEqual(owl_nets.owl_tools, 'test_location')

        return None

    def test_initialization_support(self):
        """Tests the class initialization state for the support parameter."""

        # when no list is passed
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename)
        self.assertEqual(owl_nets.support, ['IAO', 'SWO', 'OBI', 'UBPROP'])
        # when an argument is passed
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename,
                           support=['IAO'])
        self.assertEqual(owl_nets.support, ['IAO'])

        return None

    def test_initialization_top_level(self):
        """Tests the class initialization state for the top_level parameter."""

        # when no list is passed
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename)
        self.assertEqual(owl_nets.top_level, ['ISO', 'SUMO', 'BFO'])
        # when an argument is passed
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename,
                           top_level=['BFO'])
        self.assertEqual(owl_nets.top_level, ['BFO'])

        return None

    def test_initialization_relations(self):
        """Tests the class initialization state for the relations parameter."""

        # when no list is passed
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename)
        self.assertEqual(owl_nets.relations, ['RO'])
        # when an argument is passed
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename,
                           relations=['RO'])
        self.assertEqual(owl_nets.relations, ['RO'])

        return None

    def test_initialization_state_graph(self):
        """Tests the class initialization state for graphs."""

        # verify input graph object - when wrong data type
        self.assertRaises(TypeError,
                          OwlNets,
                          kg_construct_approach='subclass',
                          graph=1,
                          write_location=self.write_location,
                          filename=self.kg_filename)

        # verify input graph object - when graph file is empty
        self.assertRaises(ValueError,
                          OwlNets,
                          kg_construct_approach='subclass',
                          graph=list(),
                          write_location=self.write_location,
                          filename=self.kg_filename)
        self.assertRaises(ValueError,
                          OwlNets,
                          kg_construct_approach='subclass',
                          graph=[],
                          write_location=self.write_location,
                          filename=self.kg_filename)

        # verify input graph object points to a file that does not exist
        self.assertRaises(OSError,
                          OwlNets,
                          kg_construct_approach='subclass',
                          graph=self.dir_loc_resources +
                          '/knowledge_graphs/so_with_import_FAKE.owl',
                          write_location=self.write_location,
                          filename=self.kg_filename)

        return None

    def test_graph_input_types(self):
        """Tests different graph input types."""

        # when graph is provided
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename,
                           owl_tools='test_location')
        self.assertIsInstance(owl_nets.graph, Graph)

        # when path to graph is provided
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.dir_loc_resources +
                           '/knowledge_graphs/so_with_imports.owl',
                           write_location=self.write_location,
                           filename=self.kg_filename,
                           owl_tools='test_location')
        self.assertIsInstance(owl_nets.graph, Graph)

        return None

    def test_initialization_state_construction_approach(self):
        """Tests the class initialization state for construction approach type."""

        self.assertIsInstance(self.owl_nets.kg_construct_approach, str)
        self.assertTrue(self.owl_nets.kg_construct_approach == 'subclass')
        self.assertFalse(self.owl_nets.kg_construct_approach == 'instance')

        return None

    def test_initialization_owl_nets_dict(self):
        """Tests the class initialization state for owl_nets_dict."""

        self.assertIsInstance(self.owl_nets.owl_nets_dict, Dict)
        self.assertIn('decoded_entities', self.owl_nets.owl_nets_dict.keys())
        self.assertIn('cardinality', self.owl_nets.owl_nets_dict.keys())
        self.assertIn('misc', self.owl_nets.owl_nets_dict.keys())
        self.assertIn('negation', self.owl_nets.owl_nets_dict.keys())
        self.assertIn('complementOf', self.owl_nets.owl_nets_dict.keys())
        self.assertIn('disjointWith', self.owl_nets.owl_nets_dict.keys())
        self.assertIn('filtered_triples', self.owl_nets.owl_nets_dict.keys())

        return None

    def test_removes_disjoint_with_axioms(self):
        """Tests the removes_disjoint_with_axioms method."""

        # create test data
        triples = [
            (BNode('N9f94b'),
             URIRef('http://www.geneontology.org/formats/oboInOwl#source'),
             Literal(
                 'lexical',
                 datatype=URIRef('http://www.w3.org/2001/XMLSchema#string'))),
            (BNode('N9f94b'), RDF.type, OWL.Axiom),
            (BNode('N9f94b'), OWL.AnnotatedTarget, obo.UBERON_0022716),
            (BNode('N9f94b'), OWL.AnnotatedSource, obo.UBERON_0022352),
            (BNode('N9f94b'), OWL.AnnotatedProperty, OWL.disjointWith)
        ]
        self.owl_nets.graph = adds_edges_to_graph(Graph(), triples, False)

        # test method
        self.owl_nets.removes_disjoint_with_axioms()
        self.assertTrue(len(self.owl_nets.graph) == 4)

        return None

    def test_removes_edges_with_owl_semantics(self):
        """Tests the removes_edges_with_owl_semantics method."""

        filtered_graph = self.owl_nets.removes_edges_with_owl_semantics()

        self.assertIsInstance(filtered_graph, Graph)
        self.assertEqual(len(filtered_graph), 2328)

        return None

    def test_cleans_decoded_graph(self):
        """Tests the cleans_decoded_graph method when owl has been decoded."""

        self.owl_nets.owl_nets_dict['decoded_classes'] = [1, 2, 3, 4, 5]

        # run method
        filtered_graph = self.owl_nets.cleans_decoded_graph()
        self.assertIsInstance(filtered_graph, Graph)
        self.assertEqual(len(filtered_graph), 2745)

        return None

    def test_recurses_axioms(self):
        """Tests the recurses_axioms method."""

        # run method when passing axioms that include BNodes
        seen_nodes = []
        axioms = [(BNode('N194ae548a89740849c3536d9753d39d8'),
                   OWL.someValuesFrom, obo.SO_0000784)]
        visited_nodes = self.owl_nets.recurses_axioms(seen_nodes, axioms)
        self.assertIsInstance(visited_nodes, List)
        self.assertEqual(len(visited_nodes), 1)
        self.assertIn(BNode('N194ae548a89740849c3536d9753d39d8'),
                      visited_nodes)

        # run method when passing axioms that do not include BNodes
        seen_nodes = []
        axioms = [(obo.SO_0002047, RDF.type, OWL.Class)]
        visited_nodes = self.owl_nets.recurses_axioms(seen_nodes, axioms)
        self.assertIsInstance(visited_nodes, List)
        self.assertEqual(len(visited_nodes), 0)

        return None

    def test_finds_uri(self):
        """Tests the finds_bnode_uri method."""

        # set-up testing data
        triples = [
            (BNode('N31fefc6d'), RDF.type, OWL.Axiom),
            (BNode('N31fefc6d'), OWL.annotatedProperty, RDFS.subClassOf),
            (BNode('N31fefc6d'), OWL.annotatedSource, obo.UBERON_0002373),
            (BNode('N31fefc6d'), OWL.annotatedTarget, BNode('N26cd7b2c')),
            (BNode('N26cd7b2c'), RDF.type, OWL.Restriction),
            (BNode('N26cd7b2c'), OWL.onProperty, obo.RO_0002202),
            (BNode('N26cd7b2c'), OWL.someValuesFrom, obo.UBERON_0010023),
            (obo.UBERON_0010023, RDF.type, OWL.Class)
        ]
        self.owl_nets.graph = adds_edges_to_graph(Graph(), triples)

        # test method
        node = self.owl_nets.finds_uri(BNode('N26cd7b2c'), obo.UBERON_0002373)
        self.assertEqual(node, obo.UBERON_0010023)

        return None

    def test_reconciles_axioms(self):
        """Tests the reconciles_axioms method."""

        # set-up testing data
        triples = [
            (BNode('N31fefc6d'), RDF.type, OWL.Axiom),
            (BNode('N31fefc6d'), OWL.annotatedProperty, RDFS.subClassOf),
            (BNode('N31fefc6d'), OWL.annotatedSource, obo.UBERON_0002373),
            (BNode('N31fefc6d'), OWL.annotatedTarget, BNode('N26cd7b2c')),
            (BNode('N26cd7b2c'), RDF.type, OWL.Restriction),
            (BNode('N26cd7b2c'), OWL.onProperty, obo.RO_0002202),
            (BNode('N26cd7b2c'), OWL.someValuesFrom, obo.UBERON_0010023),
            (obo.UBERON_0010023, RDF.type, OWL.Class)
        ]
        result = {(BNode('N26cd7b2c'), RDF.type, OWL.Restriction),
                  (BNode('N26cd7b2c'), OWL.onProperty, obo.RO_0002202),
                  (BNode('N26cd7b2c'), OWL.someValuesFrom, obo.UBERON_0010023)}
        self.owl_nets.graph = adds_edges_to_graph(Graph(), triples)

        # test method
        node, matches = self.owl_nets.reconciles_axioms(
            obo.UBERON_0002373, BNode('N26cd7b2c'))
        self.assertIsInstance(node, URIRef)
        self.assertIsInstance(matches, Set)
        self.assertEqual(sorted(list(matches)), sorted(list(result)))

        return None

    def test_reconciles_classes(self):
        """Tests the reconciles_classes method."""

        # set-up testing data
        triples = [(obo.UBERON_0002374, RDFS.subClassOf, BNode('N41c7c5fd')),
                   (BNode('N41c7c5fd'), RDF.type, OWL.Restriction),
                   (BNode('N41c7c5fd'), OWL.onProperty, obo.BFO_0000050),
                   (BNode('N41c7c5fd'), OWL.someValuesFrom, obo.UBERON_0010544)
                   ]
        result = {(BNode('N41c7c5fd'), OWL.someValuesFrom, obo.UBERON_0010544),
                  (BNode('N41c7c5fd'), RDF.type, OWL.Restriction),
                  (BNode('N41c7c5fd'), OWL.onProperty, obo.BFO_0000050)}
        self.owl_nets.graph = adds_edges_to_graph(Graph(), triples)

        # test method
        matches = self.owl_nets.reconciles_classes(obo.UBERON_0002374)
        self.assertIsInstance(matches, Set)
        self.assertEqual(sorted(list(matches)), sorted(list(result)))

        return None

    def test_creates_edge_dictionary(self):
        """Tests the creates_edge_dictionary method."""

        node, edge_dict, cardinality = self.owl_nets.creates_edge_dictionary(
            obo.SO_0000822)
        self.assertIsInstance(node, URIRef)
        self.assertIsInstance(edge_dict, Dict)
        self.assertEqual(len(edge_dict), 5)
        self.assertIsInstance(edge_dict[list(edge_dict.keys())[0]], Dict)
        self.assertIsInstance(cardinality, Set)
        self.assertEqual(len(cardinality), 0)

        return None

    def test_detects_complement_of_constructed_classes_true(self):
        """Tests the detects_complement_of_constructed_classes method when complementOf is present."""

        # set-up test data
        node_info = {
            BNode('N6ebac4ecc22240cdafe506f43d240733'): {
                'complementOf': OWL.Restriction
            }
        }

        result = self.owl_nets.detects_complement_of_constructed_classes(
            node_info, obo.UBERON_0000061)
        self.assertTrue(result)

        return None

    def test_detects_complement_of_constructed_classes_false(self):
        """Tests the detects_complement_of_constructed_classes method when complementOf is not present."""

        # set-up test data
        node_info = {
            BNode('N6ebac4ecc22240cdafe506f43d240733'): {
                'type': OWL.Restriction,
                'onClass': obo.UBERON_0000061,
                'onProperty': obo.RO_0002180
            }
        }

        result = self.owl_nets.detects_complement_of_constructed_classes(
            node_info, obo.UBERON_0000061)
        self.assertFalse(result)

        return None

    def test_detects_negation_axioms_true(self):
        """Tests the detects_negation_axioms method for negation axioms when one is present"""

        # set-up test data
        node_info = {
            BNode('N6ebac4ecc22240cdafe506f43d240733'): {
                'type':
                OWL.Restriction,
                'onClass':
                obo.UBERON_0000061,
                'onProperty':
                URIRef('http://purl.obolibrary.org/obo/cl#lacks_part')
            }
        }

        result = self.owl_nets.detects_negation_axioms(node_info,
                                                       obo.UBERON_0000061)
        self.assertTrue(result)

        return None

    def test_detects_negation_axioms_false(self):
        """Tests the detects_negation_axioms method for negation axioms when none present"""

        # set-up test data
        node = obo.UBERON_0000061
        node_info = {
            BNode('N6ebac4ecc22240cdafe506f43d240733'): {
                'type': OWL.Restriction,
                'onClass': obo.UBERON_0000061,
                'onProperty': obo.RO_0001111
            }
        }

        result = self.owl_nets.detects_negation_axioms(node_info, node)
        self.assertFalse(result)

        return None

    def test_captures_cardinality_axioms(self):
        """Tests the captures_cardinality_axioms method for a cardinality object."""

        # set-up input
        triples = [
            (BNode('N6ebac'),
             URIRef('http://www.w3.org/2002/07/owl#minQualifiedCardinality'),
             Literal(
                 '2',
                 datatype=URIRef(
                     'http://www.w3.org/2001/XMLSchema#nonNegativeInteger'))),
            (BNode('N6ebac'), OWL.onClass, obo.UBERON_0000061),
            (BNode('N6ebac'), RDF.type, OWL.Restriction),
            (BNode('N6ebac'), OWL.onProperty, obo.RO_0002180)
        ]
        self.owl_nets.graph = adds_edges_to_graph(Graph(), triples)

        # test method
        self.owl_nets.captures_cardinality_axioms(
            {str(obo.UBERON_0034923) + ': N6ebac'}, obo.UBERON_0034923)
        card_triples = self.owl_nets.owl_nets_dict['cardinality']
        self.assertIsInstance(card_triples, dict)
        self.assertIsInstance(
            card_triples['<http://purl.obolibrary.org/obo/UBERON_0034923>'],
            set)
        self.assertEqual(
            len(card_triples['<http://purl.obolibrary.org/obo/UBERON_0034923>']
                ), 4)

        return None

    def test_returns_object_property(self):
        """Tests the returns_object_property method."""

        # when sub and obj are PATO terms and property is none
        res1 = self.owl_nets.returns_object_property(obo.PATO_0001199,
                                                     obo.PATO_0000402, None)
        self.assertIsInstance(res1, URIRef)
        self.assertEqual(res1, RDFS.subClassOf)

        # when sub and obj are NOT PATO terms and property is none
        res2 = self.owl_nets.returns_object_property(obo.SO_0000784,
                                                     obo.GO_2000380, None)
        self.assertIsInstance(res2, URIRef)
        self.assertEqual(res2, RDFS.subClassOf)

        # when the obj is a PATO term and property is none
        res3 = self.owl_nets.returns_object_property(obo.SO_0000784,
                                                     obo.PATO_0001199, None)
        self.assertIsInstance(res3, URIRef)
        self.assertEqual(res3, obo.RO_0000086)

        # when the obj is a PATO term and property is NOT none
        res4 = self.owl_nets.returns_object_property(obo.SO_0000784,
                                                     obo.PATO_0001199,
                                                     obo.RO_0002202)
        self.assertIsInstance(res4, URIRef)
        self.assertEqual(res4, obo.RO_0000086)

        # when sub is a PATO term and property is NOT none
        res5 = self.owl_nets.returns_object_property(obo.PATO_0001199,
                                                     obo.SO_0000784,
                                                     obo.RO_0002202)
        self.assertIsInstance(res5, URIRef)
        self.assertEqual(res5, obo.RO_0002202)

        # when sub is a PATO term and property is none
        res6 = self.owl_nets.returns_object_property(obo.PATO_0001199,
                                                     obo.SO_0000784, None)
        self.assertEqual(res6, None)

        return None

    def test_parses_subclasses(self):
        """Tests the parses_subclasses method."""

        # set-up input data
        node = obo.UBERON_0010757
        edges = {
            'type': OWL.Class,
            'subClassOf': obo.UBERON_0002238,
            'intersectionOf': BNode('N6add87')
        }
        class_dict = {
            BNode('N2af571'): {
                'first': BNode('N8a9450'),
                'rest': RDF.nil
            },
            BNode('N5fef06'): {
                'type': OWL.Class,
                'subClassOf': obo.UBERON_0002238,
                'intersectionOf': BNode('N6add87')
            },
            BNode('N6add87'): {
                'first': obo.UBERON_0010757,
                'rest': BNode('N2af571')
            },
            BNode('N8a9450'): {
                'type': OWL.Restriction,
                'onProperty': obo.BFO_0000050,
                'someValuesFrom': obo.NCBI_9606
            }
        }

        # test method
        results = self.owl_nets.parses_subclasses(node, edges, class_dict)
        self.assertIsInstance(results[0], set)
        self.assertIsInstance(results[1], dict)
        self.assertEqual(
            results[0],
            {(obo.UBERON_0010757, RDFS.subClassOf, obo.UBERON_0002238)})
        self.assertEqual(results[1], {
            'type': OWL.Class,
            'intersectionOf': BNode('N6add87')
        })

        return None

    def test_parses_anonymous_axioms(self):
        """Tests the parses_anonymous_axioms method."""

        # set-up input variables
        class_dict = {
            BNode('N41aa20'): {
                'first': obo.SO_0000340,
                'rest': BNode('N6e7b')
            },
            BNode('Nbb739'): {
                'intersectionOf': BNode('N41aa20'),
                'type': OWL.Class
            },
            BNode('N6e7b'): {
                'first': BNode('N5119'),
                'rest': RDF.nil
            },
            BNode('N5119'): {
                'onProperty':
                URIRef('http://purl.obolibrary.org/obo/so#has_origin'),
                'someValuesFrom':
                obo.SO_0000746,
                'type':
                OWL.Restriction
            },
            BNode('Na36bfb34a35047838a8df32b37a8ff50'): {
                'someValuesFrom':
                obo.SO_0000746,
                'type':
                OWL.Restriction,
                'onProperty':
                URIRef('http://purl.obolibrary.org/obo/so#has_origin')
            }
        }
        edges = {'first': obo.SO_0000340, 'rest': BNode('N6e7b')}

        # test when first is a URIRef and rest is a BNode
        res1 = self.owl_nets.parses_anonymous_axioms(edges, class_dict)
        self.assertIsInstance(res1, Dict)
        self.assertTrue(len(res1), 2)
        self.assertIn('first', res1.keys())
        self.assertIn('rest', res1.keys())

        # test when first is a BNode and rest is a URIRef
        edges = {'first': BNode('N5119'), 'rest': RDF.nil}
        res2 = self.owl_nets.parses_anonymous_axioms(edges, class_dict)

        self.assertIsInstance(res2, Dict)
        self.assertTrue(len(res2), 3)
        self.assertIn('onProperty', res2.keys())
        self.assertIn('type', res2.keys())
        self.assertIn('someValuesFrom', res2.keys())

        return None

    def test_parses_constructors_intersection(self):
        """Tests the parses_constructors method for the intersectionOf class constructor"""

        # set-up inputs
        node = obo.SO_0000034
        node_info = self.owl_nets.creates_edge_dictionary(node)
        bnodes = set(x for x in self.owl_nets.graph.objects(node, None)
                     if isinstance(x, BNode))
        edges = {
            k: v
            for k, v in node_info[1].items()
            if 'intersectionOf' in v.keys() and k in bnodes
        }
        edges = node_info[1][list(x for x in bnodes if x in edges.keys())[0]]

        # test method
        res = self.owl_nets.parses_constructors(node, edges, node_info[1])
        self.assertIsInstance(res, Tuple)
        self.assertEqual(res[0],
                         {(obo.SO_0000034, RDFS.subClassOf, obo.SO_0001247)})
        self.assertEqual(len(res[1]), 3)

        return None

    def test_parses_constructors_intersection2(self):
        """Tests the parses_constructors method for the UnionOf class constructor"""

        # set-up inputs
        node = obo.SO_0000078
        node_info = self.owl_nets.creates_edge_dictionary(node)
        bnodes = set(x for x in self.owl_nets.graph.objects(node, None)
                     if isinstance(x, BNode))
        edges = {
            k: v
            for k, v in node_info[1].items()
            if 'intersectionOf' in v.keys() and k in bnodes
        }
        edges = node_info[1][list(x for x in bnodes if x in edges.keys())[0]]

        # test method
        res = self.owl_nets.parses_constructors(node, edges, node_info[1])
        self.assertIsInstance(res, Tuple)
        self.assertEqual(res[0],
                         {(obo.SO_0000078, RDFS.subClassOf, obo.SO_0000673)})
        self.assertEqual(len(res[1]), 3)

        return None

    def test_parses_restrictions(self):
        """Tests the parses_restrictions method."""

        # set-up inputs
        node = obo.SO_0000078
        node_info = self.owl_nets.creates_edge_dictionary(node)
        bnodes = set(x for x in self.owl_nets.graph.objects(node, None)
                     if isinstance(x, BNode))
        edges = {
            k: v
            for k, v in node_info[1].items()
            if ('type' in v.keys() and v['type'] == OWL.Restriction)
            and k in bnodes
        }
        edges = node_info[1][list(x for x in bnodes if x in edges.keys())[0]]

        # test method
        res = self.owl_nets.parses_restrictions(node, edges, node_info[1])
        self.assertIsInstance(res, Tuple)
        self.assertEqual(
            res[0], {(obo.SO_0000078,
                      URIRef('http://purl.obolibrary.org/obo/so#has_quality'),
                      obo.SO_0000880)})
        self.assertEqual(res[1], None)

        return None

    def test_cleans_owl_encoded_entities(self):
        """Tests the cleans_owl_encoded_entities method"""

        # test method
        self.owl_nets.cleans_owl_encoded_entities([obo.SO_0000822])
        self.assertIsInstance(self.owl_nets.graph, Graph)
        self.assertEqual(len(self.owl_nets.graph), 2)
        self.assertEqual(
            sorted([
                str(x)
                for y in list(self.owl_nets.graph.triples((None, None, None)))
                for x in y
            ]), [
                'http://purl.obolibrary.org/obo/SO_0000340',
                'http://purl.obolibrary.org/obo/SO_0000746',
                'http://purl.obolibrary.org/obo/SO_0000822',
                'http://purl.obolibrary.org/obo/SO_0000822',
                'http://purl.obolibrary.org/obo/so#has_origin',
                'http://www.w3.org/2000/01/rdf-schema#subClassOf'
            ])

        return None

    def test_makes_graph_connected_default(self):
        """Tests the makes_graph_connected method using the default argument for common_ancestor."""

        starting_size = len(self.owl_nets.graph)
        connected_graph = self.owl_nets.makes_graph_connected(
            self.owl_nets.graph)
        self.assertTrue(len(connected_graph) > starting_size)

        return None

    def test_makes_graph_connected_other(self):
        """Tests the makes_graph_connected method using something other than the default arg for common_ancestor."""

        starting_size = len(self.owl_nets.graph)

        # test when bad node is passed
        self.assertRaises(ValueError, self.owl_nets.makes_graph_connected,
                          self.owl_nets.graph, 'SO_0000110')

        # test when good node is passed
        node = 'http://purl.obolibrary.org/obo/SO_0000110'
        connected_graph = self.owl_nets.makes_graph_connected(
            self.owl_nets.graph, node)
        self.assertTrue(len(connected_graph) > starting_size)

        return None

    def test_purifies_graph_build_none(self):
        """Tests the purifies_graph_build method when kg_construction is None."""

        # initialize method
        owl_nets = OwlNets(graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename)

        # test method
        self.graph = owl_nets.purifies_graph_build(self.graph)
        self.assertTrue(len(self.graph), 3054)

        return None

    def test_purifies_graph_build_instance(self):
        """Tests the purifies_graph_build method when kg_construction is instance."""

        # initialize method
        owl_nets = OwlNets(kg_construct_approach='instance',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename)

        # test method
        self.graph = owl_nets.purifies_graph_build(self.graph)
        self.assertTrue(len(self.graph), 3054)

        return None

    def test_purifies_graph_build_subclass(self):
        """Tests the purifies_graph_build method when kg_construction is subclass."""

        # initialize method
        owl_nets = OwlNets(kg_construct_approach='subclass',
                           graph=self.graph,
                           write_location=self.write_location,
                           filename=self.kg_filename)

        # test method
        self.graph = owl_nets.purifies_graph_build(self.graph)
        self.assertTrue(len(self.graph), 3054)

        return None

    def test_write_out_results_regular(self):
        """Tests the write_out_results method."""

        self.owl_nets.kg_construct_approach = None
        graph1, graph2 = self.owl_nets.runs_owlnets()
        ray.shutdown()

        # test graph output
        self.assertIsInstance(graph1, Set)
        self.assertEqual(graph2, None)

        # make sure files are written locally
        nx_mdg_file = 'so_with_imports_OWLNETS_NetworkxMultiDiGraph.gpickle'
        self.assertTrue(
            os.path.exists(self.dir_loc_resources +
                           '/knowledge_graphs/so_with_imports_OWLNETS.nt'))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           nx_mdg_file))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs'
                           '/so_with_imports_OWLNETS_decoding_dict.pkl'))

        return None

    def test_write_out_results_subclass_purified(self):
        """Tests the owl_nets method."""

        self.owl_nets.kg_construct_approach = 'subclass'
        graph1, graph2 = self.owl_nets.runs_owlnets()
        ray.shutdown()

        # test graph output
        self.assertIsInstance(graph1, Set)
        self.assertIsInstance(graph2, Set)
        self.assertTrue(len(graph2) >= len(graph1))

        # make sure files are written locally for each graph
        # purified
        nx_mdg_file = 'so_with_imports_OWLNETS_SUBCLASS_purified_NetworkxMultiDiGraph.gpickle'
        nt_file = 'so_with_imports_OWLNETS_SUBCLASS_purified.nt'
        dict_file = '/so_with_imports_OWLNETS_SUBCLASS_purified_decoding_dict.pkl'
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           nt_file))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           nx_mdg_file))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs' +
                           dict_file))
        # regular
        nx_mdg_file = 'so_with_imports_OWLNETS_NetworkxMultiDiGraph.gpickle'
        self.assertTrue(
            os.path.exists(self.dir_loc_resources +
                           '/knowledge_graphs/so_with_imports_OWLNETS.nt'))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           nx_mdg_file))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs'
                           '/so_with_imports_OWLNETS_decoding_dict.pkl'))

        return None

    def test_write_out_results_instance_purified(self):
        """Tests the owl_nets method."""

        graph1, graph2 = self.owl_nets2.runs_owlnets()
        ray.shutdown()

        # test graph output
        self.assertIsInstance(graph1, Set)
        self.assertIsInstance(graph2, Set)
        self.assertTrue(len(graph2) > len(graph1))

        # make sure files are written locally for each graph
        # purified
        nx_mdg_file = 'so_with_imports_OWLNETS_INSTANCE_purified_NetworkxMultiDiGraph.gpickle'
        nt_file = 'so_with_imports_OWLNETS_INSTANCE_purified.nt'
        dict_file = '/so_with_imports_OWLNETS_INSTANCE_purified_decoding_dict.pkl'
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           nt_file))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           nx_mdg_file))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs' +
                           dict_file))
        # regular
        nx_mdg_file = 'so_with_imports_OWLNETS_NetworkxMultiDiGraph.gpickle'
        self.assertTrue(
            os.path.exists(self.dir_loc_resources +
                           '/knowledge_graphs/so_with_imports_OWLNETS.nt'))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           nx_mdg_file))
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs'
                           '/so_with_imports_OWLNETS_decoding_dict.pkl'))

        return None

    def tests_gets_owlnets_dict(self):
        """Tests gets_owlnets_dict method."""

        results = self.owl_nets.gets_owlnets_dict()

        # verify results
        self.assertIsInstance(results, dict)

        return None

    def tests_gets_owlnets_graph(self):
        """Tests gets_owlnets_graphs method."""

        graphs = self.owl_nets.gets_owlnets_graph()

        # verify results
        self.assertIsInstance(graphs, Graph)

        return None

    def tearDown(self):
        warnings.simplefilter('default', ResourceWarning)

        # remove resource directory
        shutil.rmtree(self.dir_loc_resources)

        return None
Ejemplo n.º 3
0
    def construct_knowledge_graph(self) -> None:
        """Builds a post-closure knowledge graph. This build is recommended when one has previously performed a
        "partial" knowledge graph build and then ran a reasoner over it. This build type inputs the closed partially
        built knowledge graph and completes the build process.

        The post-closure build utilizes the following steps: (1) Process relation and inverse relation data; (2)
        Load closed knowledge graph; (3) Process node metadata; (4) Create graph subsets; (5) Decode OWL-encoded
        classes; (6) Output knowledge graph files and create edge lists; and (7) Extract and write node metadata.

        Returns:
            None.

        Raises:
            OSError: If closed knowledge graph file does not exist.
            TypeError: If the closed knowledge graph file is empty.
        """

        log_str = '### Starting Knowledge Graph Build: POST-CLOSURE ###'; print('\n' + log_str)
        logger.info('*' * 10 + 'PKT STEP: CONSTRUCTING KNOWLEDGE GRAPH' + '*' * 10 + '\n' + log_str)

        # STEP 1: PROCESS RELATION AND INVERSE RELATION DATA
        log_str = '*** Loading Relations Data ***'; print(log_str); logger.info(log_str)
        self.reverse_relation_processor()

        # STEP 2: LOAD CLOSED KNOWLEDGE GRAPH
        closed_kg = glob.glob(self.write_location + '/*.owl')
        if len(closed_kg) == 0: logs = 'KG file does not exist!'; logger.error('OSError: ' + logs); raise OSError(logs)
        elif os.stat(closed_kg[0]).st_size == 0:
            logs = '{} is empty'.format(closed_kg); logger.error('TypeError: ' + logs); raise TypeError(logs)
        else:
            log_str = '*** Loading Closed Knowledge Graph ***'; print(log_str); logger.info(log_str)
            os.rename(closed_kg[0], self.write_location + self.full_kg)  # rename closed kg file
            self.graph = Graph().parse(self.write_location + self.full_kg, format='xml')
        stats = 'Input {}'.format(derives_graph_statistics(self.graph)); print(stats); logger.info(stats)

        # STEP 3: PROCESS NODE METADATA
        log_str = '*** Loading Node Metadata Data ***'; print(log_str); logger.info(log_str)
        meta = Metadata(self.kg_version, self.write_location, self.full_kg, self.node_data, self.node_dict)
        if self.node_data: meta.metadata_processor(); meta.extract_metadata(self.graph)

        # STEP 4: CREATE GRAPH SUBSETS
        log_str = '*** Splitting Graph ***'; print(log_str); logger.info(log_str)
        _ = self.write_location; self.graph, annotation_triples = splits_knowledge_graph(self.graph)
        stats = 'Merged Logic Subset {}'.format(derives_graph_statistics(self.graph)); print(stats); logger.info(stats)
        kg_owl = '_'.join(self.full_kg.split('_')[0:-1]) + '_OWL.owl'; kg_owl_main = kg_owl[:-8] + '.owl'
        annot, logic, full = kg_owl[:-4] + '_AnnotationsOnly.nt', kg_owl[:-4] + '_LogicOnly.nt', kg_owl[:-4] + '.nt'
        appends_to_existing_file(annotation_triples, _ + annot); appends_to_existing_file(self.graph, _ + logic)
        del annotation_triples

        # STEP 5: DECODE OWL SEMANTICS
        results = [set(self.graph), None, None]
        stats = 'Full Logic {}'.format(derives_graph_statistics(results[0])); print(stats); logger.info(stats)
        logger.info('*** Converting Knowledge Graph to Networkx MultiDiGraph ***')
        s = convert_to_networkx(self.write_location, kg_owl[:-4], results[0], True)
        if s is not None: log_stats = 'Full Logic Subset (OWL) {}'.format(s); logger.info(log_stats); print(log_stats)
        if self.decode_owl:
            self.graph = updates_pkt_namespace_identifiers(self.graph, self.construct_approach)
            owlnets = OwlNets(self.graph, self.write_location, kg_owl_main, self.construct_approach, self.owl_tools)
            results = [results[0]] + list(owlnets.runs_owlnets(self.cpus))

        # STEP 7: WRITE OUT KNOWLEDGE GRAPH METADATA AND CREATE EDGE LISTS
        log_str = '*** Writing Knowledge Graph Edge Lists ***'; print('\n' + log_str); logger.info(log_str)
        f_prefix = ['_OWL', '_OWLNETS', '_OWLNETS_' + self.construct_approach.upper() + '_purified']
        for x in range(0, len(results)):
            graph = results[x]; p_str = 'OWL' if x == 0 else 'OWL-NETS' if x == 1 else 'Purified OWL-NETS'
            if graph is not None:
                log_str = '*** Processing {} Graph ***'.format(p_str); print(log_str); logger.info(log_str)
                triple_list_file = kg_owl[:-8] + f_prefix[x] + '_Triples_Integers.txt'
                triple_map = triple_list_file[:-5] + '_Identifier_Map.json'
                node_int_map = maps_ids_to_integers(graph, self.write_location, triple_list_file, triple_map)

                # STEP 8: EXTRACT AND WRITE NODE METADATA
                meta.full_kg = kg_owl[:-8] + f_prefix[x] + '.owl'
                if self.node_data: meta.output_metadata(node_int_map, graph)

        # deduplicate logic and annotation files and then merge them
        deduplicates_file(_ + annot); deduplicates_file(_ + logic); merges_files(_ + annot, _ + logic, _ + full)

        return None