예제 #1
0
    def setUp(self):
        warnings.simplefilter('ignore', ResourceWarning)

        # initialize file location
        current_directory = os.path.dirname(__file__)
        dir_loc = os.path.join(current_directory, 'data')
        self.dir_loc = os.path.abspath(dir_loc)

        # set-up environment - make temp directory
        dir_loc_resources = os.path.join(current_directory, 'resources')
        self.dir_loc_resources = os.path.abspath(dir_loc_resources)
        os.mkdir(self.dir_loc_resources)
        os.mkdir(self.dir_loc_resources + '/knowledge_graphs')
        os.mkdir(self.dir_loc_resources + '/relations_data')
        os.mkdir(self.dir_loc_resources + '/node_data')
        os.mkdir(self.dir_loc_resources + '/ontologies')
        os.mkdir(self.dir_loc_resources + '/construction_approach')

        # handle logging
        self.logs = os.path.abspath(current_directory + '/builds/logs')
        logging.disable(logging.CRITICAL)
        if len(glob.glob(self.logs + '/*.log')) > 0:
            os.remove(glob.glob(self.logs + '/*.log')[0])

        # copy needed data data
        # node metadata
        shutil.copyfile(
            self.dir_loc + '/node_data/node_metadata_dict.pkl',
            self.dir_loc_resources + '/node_data/node_metadata_dict.pkl')
        # ontology data
        shutil.copyfile(
            self.dir_loc + '/ontologies/empty_hp_with_imports.owl',
            self.dir_loc_resources + '/ontologies/hp_with_imports.owl')
        # merged ontology data
        shutil.copyfile(
            self.dir_loc + '/ontologies/so_with_imports.owl',
            self.dir_loc_resources +
            '/knowledge_graphs/PheKnowLator_MergedOntologies.owl')
        # relations data
        shutil.copyfile(
            self.dir_loc + '/RELATIONS_LABELS.txt',
            self.dir_loc_resources + '/relations_data/RELATIONS_LABELS.txt')
        # inverse relations
        shutil.copyfile(
            self.dir_loc + '/INVERSE_RELATIONS.txt',
            self.dir_loc_resources + '/relations_data/INVERSE_RELATIONS.txt')
        # empty master edges
        shutil.copyfile(
            self.dir_loc + '/Master_Edge_List_Dict_empty.json',
            self.dir_loc_resources + '/Master_Edge_List_Dict_empty.json')

        # create edge list
        edge_dict = {
            "gene-phenotype": {
                "data_type":
                "entity-class",
                "edge_relation":
                "RO_0003302",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://purl.obolibrary.org/obo/"
                ],
                "edge_list": [["2", "SO_0000162"], ["2", "SO_0000196"],
                              ["2", "SO_0000323"], ["9", "SO_0001490"],
                              ["9", "SO_0000301"], ["9", "SO_0001560"],
                              ["9", "SO_0001560"], ["10", "SO_0000444"],
                              ["10", "SO_0002138"], ["10", "SO_0000511"]]
            },
            "gene-gene": {
                "data_type":
                "entity-entity",
                "edge_relation":
                "RO_0002435",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://www.ncbi.nlm.nih.gov/gene/"
                ],
                "edge_list": [["1", "2"], ["2", "3"], ["3", "18"],
                              ["17", "19"], ["4", "17"], ["5", "11"],
                              ["11", "12"], ["4", "5"]]
            },
            "disease-disease": {
                "data_type":
                "class-class",
                "edge_relation":
                "RO_0002435",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://www.ncbi.nlm.nih.gov/gene/"
                ],
                "edge_list": [["DOID_3075", "DOID_1080"],
                              ["DOID_3075", "DOID_4267"],
                              ["DOID_4800", "DOID_10190"],
                              ["DOID_4800", "DOID_80219"],
                              ["DOID_2729", "DOID_1962"],
                              ["DOID_2729", "DOID_5096"],
                              ["DOID_8837", "DOID_6774"],
                              ["DOID_8837", "DOID_8754"]]
            },
            "entity_namespaces": {
                "gene": "http://purl.uniprot.org/geneid/"
            }
        }

        # save data
        with open(self.dir_loc_resources + '/Master_Edge_List_Dict.json',
                  'w') as filepath:
            json.dump(edge_dict, filepath)

        # create subclass mapping data
        subcls_map = {
            "1": ['SO_0001217'],
            "2": ['SO_0001217'],
            "3": ['SO_0001217'],
            "4": ['SO_0001217'],
            "5": ['SO_0001217'],
            "11": ['SO_0001217'],
            "12": ['SO_0001217'],
            "17": ['SO_0001217'],
            "18": ['SO_0001217'],
            "5096": ['SO_0001217'],
            "6774": ['SO_0001217'],
            "19": ['SO_0001217']
        }

        # save data
        with open(
                self.dir_loc_resources +
                '/construction_approach/subclass_construction_map.pkl',
                'wb') as f:
            pickle.dump(subcls_map, f, protocol=4)

        # set write location
        self.write_location = self.dir_loc_resources + '/knowledge_graphs'

        # instantiate class
        self.kg = PartialBuild('subclass', 'yes', 'yes', 'yes', 1,
                               self.write_location)

        # update class attributes
        dir_loc_owltools = os.path.join(current_directory, 'utils/owltools')
        self.kg.owl_tools = os.path.abspath(dir_loc_owltools)

        return None
class TestKGBuilder(unittest.TestCase):
    """Class to test the KGBuilder class from the knowledge graph script."""
    def setUp(self):

        warnings.simplefilter('ignore', ResourceWarning)

        # initialize file location
        current_directory = os.path.dirname(__file__)
        dir_loc = os.path.join(current_directory, 'data')
        self.dir_loc = os.path.abspath(dir_loc)

        # set-up environment - make temp directory
        dir_loc_resources = os.path.join(current_directory, 'resources')
        self.dir_loc_resources = os.path.abspath(dir_loc_resources)
        os.mkdir(self.dir_loc_resources)
        os.mkdir(self.dir_loc_resources + '/knowledge_graphs')
        os.mkdir(self.dir_loc_resources + '/relations_data')
        os.mkdir(self.dir_loc_resources + '/node_data')
        os.mkdir(self.dir_loc_resources + '/ontologies')
        os.mkdir(self.dir_loc_resources + '/construction_approach')

        # handle logging
        self.logs = os.path.abspath(current_directory + '/builds/logs')
        logging.disable(logging.CRITICAL)
        if len(glob.glob(self.logs + '/*.log')) > 0:
            os.remove(glob.glob(self.logs + '/*.log')[0])

        # copy needed data data
        # node metadata
        shutil.copyfile(
            self.dir_loc + '/node_data/node_metadata_dict.pkl',
            self.dir_loc_resources + '/node_data/node_metadata_dict.pkl')
        # ontology data
        shutil.copyfile(
            self.dir_loc + '/ontologies/empty_hp_with_imports.owl',
            self.dir_loc_resources + '/ontologies/hp_with_imports.owl')
        # merged ontology data
        shutil.copyfile(
            self.dir_loc + '/ontologies/so_with_imports.owl',
            self.dir_loc_resources +
            '/knowledge_graphs/PheKnowLator_MergedOntologies.owl')
        # relations data
        shutil.copyfile(
            self.dir_loc + '/RELATIONS_LABELS.txt',
            self.dir_loc_resources + '/relations_data/RELATIONS_LABELS.txt')
        # inverse relations
        shutil.copyfile(
            self.dir_loc + '/INVERSE_RELATIONS.txt',
            self.dir_loc_resources + '/relations_data/INVERSE_RELATIONS.txt')
        # empty master edges
        shutil.copyfile(
            self.dir_loc + '/Master_Edge_List_Dict_empty.json',
            self.dir_loc_resources + '/Master_Edge_List_Dict_empty.json')

        # create edge list
        edge_dict = {
            "gene-phenotype": {
                "data_type":
                "entity-class",
                "edge_relation":
                "RO_0003302",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://purl.obolibrary.org/obo/"
                ],
                "edge_list": [["2", "SO_0000162"], ["2", "SO_0000196"],
                              ["3", "SO_0000323"], ["9", "SO_0001490"],
                              ["10", "SO_0000301"], ["11", "SO_0001560"],
                              ["12", "SO_0001560"], ["17", "SO_0000444"],
                              ["18", "SO_0002138"], ["20", "SO_0000511"]]
            },
            "gene-gene": {
                "data_type":
                "entity-entity",
                "edge_relation":
                "RO_0002435",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://www.ncbi.nlm.nih.gov/gene/"
                ],
                "edge_list": [["1", "2"], ["2", "3"], ["3", "18"],
                              ["17", "19"], ["4", "17"], ["5", "11"],
                              ["11", "12"], ["4", "5"]]
            },
            "disease-disease": {
                "data_type":
                "class-class",
                "edge_relation":
                "RO_0002435",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://www.ncbi.nlm.nih.gov/gene/"
                ],
                "edge_list": [["DOID_3075", "DOID_1080"],
                              ["DOID_3075", "DOID_4267"],
                              ["DOID_4800", "DOID_10190"],
                              ["DOID_4800", "DOID_80219"],
                              ["DOID_2729", "DOID_1962"],
                              ["DOID_2729", "DOID_5096"],
                              ["DOID_8837", "DOID_6774"],
                              ["DOID_8837", "DOID_8754"]]
            },
            "entity_namespaces": {
                "gene": "http://purl.uniprot.org/geneid/"
            }
        }

        edge_dict_inst = {
            "gene-phenotype": {
                "data_type":
                "entity-class",
                "edge_relation":
                "RO_0003302",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://purl.obolibrary.org/obo/"
                ],
                "edge_list": [["2", "SO_0000162"], ["2", "SO_0000196"],
                              ["3", "SO_0000323"], ["9", "SO_0001490"],
                              ["10", "SO_0000301"], ["11", "SO_0001560"],
                              ["12", "SO_0001560"], ["17", "SO_0000444"],
                              ["18", "SO_0002138"], ["19", "SO_0000511"]]
            },
            "gene-gene": {
                "data_type":
                "entity-entity",
                "edge_relation":
                "RO_0002435",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://www.ncbi.nlm.nih.gov/gene/"
                ],
                "edge_list": [["1", "2"], ["2", "3"], ["3", "18"],
                              ["17", "19"], ["4", "17"], ["5", "11"],
                              ["11", "12"], ["4", "5"]]
            },
            "disease-disease": {
                "data_type":
                "class-class",
                "edge_relation":
                "RO_0002435",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://www.ncbi.nlm.nih.gov/gene/"
                ],
                "edge_list": [["DOID_3075", "DOID_1080"],
                              ["DOID_3075", "DOID_4267"],
                              ["DOID_4800", "DOID_10190"],
                              ["DOID_4800", "DOID_80219"],
                              ["DOID_2729", "DOID_1962"],
                              ["DOID_2729", "DOID_5096"],
                              ["DOID_8837", "DOID_6774"],
                              ["DOID_8837", "DOID_8754"]]
            },
            "entity_namespaces": {
                "gene": "http://purl.uniprot.org/geneid/"
            }
        }

        # save data
        with open(self.dir_loc_resources + '/Master_Edge_List_Dict.json',
                  'w') as filepath:
            json.dump(edge_dict, filepath)

        with open(
                self.dir_loc_resources +
                '/Master_Edge_List_Dict_instance.json', 'w') as filepath:
            json.dump(edge_dict_inst, filepath)

        # create subclass mapping data
        subcls_map = {
            "1": ['SO_0001217'],
            "2": ['SO_0001217'],
            "3": ['SO_0001217'],
            "4": ['SO_0001217'],
            "5": ['SO_0001217'],
            "11": ['SO_0001217'],
            "12": ['SO_0001217'],
            "17": ['SO_0001217'],
            "18": ['SO_0001217'],
            "5096": ['SO_0001217'],
            "6774": ['SO_0001217'],
            "19": ['SO_0001217']
        }

        # save data
        with open(
                self.dir_loc_resources +
                '/construction_approach/subclass_construction_map.pkl',
                'wb') as f:
            pickle.dump(subcls_map, f, protocol=4)

        # set write location
        self.write_location = self.dir_loc_resources + '/knowledge_graphs'

        # build 3 different knowledge graphs
        self.kg_subclass = FullBuild(construction='subclass',
                                     node_data='yes',
                                     inverse_relations='yes',
                                     decode_owl='yes',
                                     cpus=1,
                                     write_location=self.write_location)
        self.kg_instance = PartialBuild(construction='instance',
                                        node_data='yes',
                                        inverse_relations='no',
                                        decode_owl='no',
                                        cpus=1,
                                        write_location=self.write_location)
        self.kg_instance2 = PartialBuild(construction='instance',
                                         node_data='yes',
                                         inverse_relations='yes',
                                         decode_owl='yes',
                                         cpus=1,
                                         write_location=self.write_location)
        self.kg_closure = PostClosureBuild(construction='instance',
                                           node_data='yes',
                                           inverse_relations='yes',
                                           decode_owl='no',
                                           cpus=1,
                                           write_location=self.write_location)

        # update class attributes for the location of owltools
        dir_loc_owltools = os.path.join(current_directory, 'utils/owltools')
        self.kg_subclass.owl_tools = os.path.abspath(dir_loc_owltools)
        self.kg_instance.owl_tools = os.path.abspath(dir_loc_owltools)
        self.kg_instance2.owl_tools = os.path.abspath(dir_loc_owltools)

        # create inner class instance
        args = {
            'construction': self.kg_subclass.construct_approach,
            'edge_dict': self.kg_subclass.edge_dict,
            'kg_owl': '',
            'rel_dict': self.kg_subclass.relations_dict,
            'inverse_dict': self.kg_subclass.inverse_relations_dict,
            'node_data': self.kg_subclass.node_data,
            'ont_cls': self.kg_subclass.ont_classes,
            'metadata': None,
            'obj_props': self.kg_subclass.obj_properties,
            'write_loc': self.kg_subclass.write_location
        }
        self.inner_class = self.kg_subclass.EdgeConstructor(args)

        # get release
        self.current_release = 'v' + __version__

        return None

    def test_class_initialization_parameters_version(self):
        """Tests the class initialization parameters for version."""

        self.assertEqual(self.kg_subclass.kg_version, self.current_release)

        return None

    def test_class_initialization_parameters_ontologies_missing(self):
        """Tests the class initialization parameters for ontologies when the directory is missing."""

        # run test when there is no ontologies directory
        shutil.rmtree(self.dir_loc_resources + '/ontologies')
        self.assertRaises(OSError, FullBuild, 'subclass', 'yes', 'yes', 'yes',
                          1, self.write_location)

        return None

    def test_class_initialization_parameters_ontologies_empty(self):
        """Tests the class initialization parameters for ontologies when it's empty."""

        # create empty ontologies directory
        shutil.rmtree(self.dir_loc_resources + '/ontologies')
        os.mkdir(self.dir_loc_resources + '/ontologies')
        self.assertRaises(TypeError, FullBuild, 'subclass', 'yes', 'yes',
                          'yes', 1, self.write_location)

        return None

    def test_class_initialization_parameters_construction_approach(self):
        """Tests the class initialization parameters for construction_approach."""

        self.assertRaises(ValueError, FullBuild, 1, 'yes', 'yes', 'yes', 1,
                          self.write_location)
        self.assertRaises(ValueError, FullBuild, 'subcls', 'yes', 'yes', 'yes',
                          1, self.write_location)
        self.assertRaises(ValueError, FullBuild, 'inst', 'yes', 'yes', 'yes',
                          1, self.write_location)

        return None

    def test_class_initialization_parameters_edge_data_missing(self):
        """Tests the class initialization parameters for edge_data when the file is missing."""

        # remove file to trigger OSError
        os.remove(self.dir_loc_resources + '/Master_Edge_List_Dict.json')
        self.assertRaises(OSError, FullBuild, 'subclass', 'yes', 'yes', 'yes',
                          1, self.write_location)

        return None

    def test_class_initialization_parameters_edge_data_empty(self):
        """Tests the class initialization parameters for edge_data when the file is empty."""

        # rename empty to be main file
        os.rename(self.dir_loc_resources + '/Master_Edge_List_Dict_empty.json',
                  self.dir_loc_resources + '/Master_Edge_List_Dict.json')
        self.assertRaises(TypeError, FullBuild, 'subclass', 'yes', 'yes',
                          'yes', 1, self.write_location)

        return None

    def test_class_initialization_parameter_relations_format(self):
        """Tests the class initialization parameters for relations when the input parameter is formatted wrong."""

        self.assertRaises(ValueError, FullBuild, 'subclass', 'yes', 1, 'yes',
                          1, self.write_location)
        self.assertRaises(ValueError, FullBuild, 'subclass', 'yes', 'ye',
                          'yes', 1, self.write_location)

        return None

    def test_class_initialization_parameter_relations_missing(self):
        """Tests the class initialization parameters for relations when the files are missing."""

        # remove relations and inverse relations data
        rel_loc = self.dir_loc_resources + '/relations_data/RELATIONS_LABELS.txt'
        invrel_loc = self.dir_loc_resources + '/relations_data/INVERSE_RELATIONS.txt'
        os.remove(rel_loc)
        os.remove(invrel_loc)
        self.assertRaises(TypeError, FullBuild, 'subclass', 'yes', 'yes',
                          'yes', 1, self.write_location)

        # add back deleted data
        shutil.copyfile(self.dir_loc + '/RELATIONS_LABELS.txt', rel_loc)
        shutil.copyfile(self.dir_loc + '/INVERSE_RELATIONS.txt', invrel_loc)

        return None

    def test_class_initialization_parameters_node_metadata_format(self):
        """Tests the class initialization parameters for node_metadata with different formatting."""

        self.assertRaises(ValueError, FullBuild, 'subclass', 1, 'yes', 'yes',
                          1, self.write_location)
        self.assertRaises(ValueError, FullBuild, 'subclass', 'ye', 'yes',
                          'yes', 1, self.write_location)

        return None

    def test_class_initialization_parameters_node_metadata_missing(self):
        """Tests the class initialization parameters for node_metadata."""

        # remove node metadata
        gene_phen_data = self.dir_loc_resources + '/node_data/node_metadata_dict.pkl'
        os.remove(gene_phen_data)

        # test method
        self.assertRaises(TypeError, FullBuild, 'subclass', 'yes', 'yes',
                          'yes', 1, self.write_location)

        # add back deleted data
        shutil.copyfile(self.dir_loc + '/node_data/node_metadata_dict.pkl',
                        gene_phen_data)

        return None

    def test_class_initialization_parameters_decoding_owl(self):
        """Tests the class initialization parameters for decoding owl."""

        self.assertRaises(ValueError, FullBuild, 'subclass', 'yes', 'yes', 1,
                          1, self.write_location)
        self.assertRaises(ValueError, FullBuild, 'subclass', 'yes', 'yes',
                          'ye', 1, self.write_location)

        return None

    def test_class_initialization_ontology_data(self):
        """Tests the class initialization for when no merged ontology file is created."""

        # removed merged ontology file
        os.remove(self.dir_loc_resources +
                  '/knowledge_graphs/PheKnowLator_MergedOntologies.owl')

        # run test
        self.kg_subclass = FullBuild('subclass', 'yes', 'yes', 'yes', 1,
                                     self.write_location)
        # check that there is 1 ontology file
        self.assertIsInstance(self.kg_subclass.ontologies, List)
        self.assertTrue(len(self.kg_subclass.ontologies) == 1)

        return None

    def test_class_initialization_attributes(self):
        """Tests the class initialization for class attributes."""

        self.assertTrue(self.kg_subclass.build == 'full')
        self.assertTrue(self.kg_subclass.construct_approach == 'subclass')
        self.assertTrue(self.kg_subclass.kg_version == self.current_release)
        path = os.path.abspath(self.dir_loc_resources + '/knowledge_graphs')
        self.assertTrue(self.kg_subclass.write_location == path)

        return None

    def test_class_initialization_edgelist(self):
        """Tests the class initialization for edge_list inputs."""

        self.assertIsInstance(self.kg_subclass.edge_dict, Dict)
        self.assertIn('gene-phenotype', self.kg_subclass.edge_dict.keys())
        self.assertIn('data_type',
                      self.kg_subclass.edge_dict['gene-phenotype'].keys())
        self.assertTrue(self.kg_subclass.edge_dict['gene-phenotype']
                        ['data_type'] == 'entity-class')
        self.assertIn('uri',
                      self.kg_subclass.edge_dict['gene-phenotype'].keys())
        self.assertTrue(
            len(self.kg_subclass.edge_dict['gene-phenotype']['uri']) == 2)
        self.assertIn('edge_list',
                      self.kg_subclass.edge_dict['gene-phenotype'].keys())
        self.assertTrue(
            len(self.kg_subclass.edge_dict['gene-phenotype']['edge_list']) ==
            10)
        self.assertIn('edge_relation',
                      self.kg_subclass.edge_dict['gene-phenotype'].keys())

        return None

    def test_class_initialization_node_metadata(self):
        """Tests the class initialization for node metadata inputs."""

        self.assertIsInstance(self.kg_subclass.node_dict, Dict)
        self.assertTrue(len(self.kg_subclass.node_dict) == 0)

        return None

    def test_class_initialization_relations(self):
        """Tests the class initialization for relations input."""

        self.assertIsInstance(self.kg_subclass.inverse_relations, List)
        self.assertIsInstance(self.kg_subclass.relations_dict, Dict)
        self.assertTrue(len(self.kg_subclass.relations_dict) == 0)
        self.assertIsInstance(self.kg_subclass.inverse_relations_dict, Dict)
        self.assertTrue(len(self.kg_subclass.inverse_relations_dict) == 0)

        return None

    def test_class_initialization_ontologies(self):
        """Tests the class initialization for ontology inputs."""

        self.assertIsInstance(self.kg_subclass.ontologies, List)
        self.assertTrue(len(self.kg_subclass.ontologies) == 1)
        self.assertTrue(os.path.exists(self.kg_subclass.merged_ont_kg))

        return None

    def test_class_initialization_owl_decoding(self):
        """Tests the class initialization for the decode_owl input."""

        self.assertTrue(self.kg_subclass.decode_owl == 'yes')

        return None

    def test_class_initialization_subclass(self):
        """Tests the subclass construction approach class initialization."""

        # check construction type
        self.assertTrue(self.kg_subclass.construct_approach == 'subclass')

        # check filepath and write location for knowledge graph
        write_file = '/PheKnowLator_' + self.current_release + '_full_subclass_inverseRelations_noOWL.owl'
        self.assertEqual(self.kg_subclass.full_kg, write_file)

        return None

    def test_class_initialization_instance(self):
        """Tests the instance construction approach class initialization."""

        # check build type
        self.assertTrue(self.kg_instance.build == 'partial')

        # check relations and owl decoding
        self.assertIsNone(self.kg_instance.decode_owl)
        self.assertIsNone(self.kg_instance.inverse_relations)

        # check construction type
        self.assertTrue(self.kg_instance.construct_approach == 'instance')

        # check filepath and write location for knowledge graph
        write_file = '/PheKnowLator_' + self.current_release + '_partial_instance_relationsOnly_OWL.owl'
        self.assertEqual(self.kg_instance.full_kg, write_file)

        return None

    def test_reverse_relation_processor(self):
        """Tests the reverse_relation_processor method."""

        self.kg_subclass.reverse_relation_processor()

        # check if data was successfully processed
        self.assertIsInstance(self.kg_subclass.inverse_relations_dict, Dict)
        self.assertTrue(len(self.kg_subclass.inverse_relations_dict) > 0)
        self.assertIsInstance(self.kg_subclass.relations_dict, Dict)
        self.assertTrue(len(self.kg_subclass.relations_dict) > 0)
        self.kg_instance.reverse_relation_processor()

        # check if data was successfully processed
        self.assertIsNone(self.kg_instance.inverse_relations_dict)
        self.assertIsInstance(self.kg_instance.relations_dict, Dict)

        return None

    def test_verifies_object_property(self):
        """Tests the verifies_object_property method."""

        # load graph
        self.kg_subclass.EdgeConstructor.graph = Graph()
        self.kg_subclass.graph.parse(self.dir_loc +
                                     '/ontologies/so_with_imports.owl')

        # get object properties
        self.kg_subclass.obj_properties = gets_object_properties(
            self.kg_subclass.graph)
        self.inner_class.object_properties = self.kg_subclass.obj_properties

        # check for presence of existing obj_prop
        self.assertIn(URIRef('http://purl.obolibrary.org/obo/so#position_of'),
                      self.inner_class.object_properties)

        # test adding bad relation
        self.assertRaises(TypeError, self.inner_class.verifies_object_property,
                          'RO_0002200')

        # test adding a good relation
        new_relation = URIRef('http://purl.obolibrary.org/obo/' + 'RO_0002566')
        self.inner_class.verifies_object_property(new_relation)

        # update list of object properties
        self.kg_subclass.obj_properties = gets_object_properties(
            self.kg_subclass.graph)
        self.inner_class.object_properties = self.kg_subclass.obj_properties

        # make sure that object property was added to the graph
        self.assertTrue(new_relation in self.inner_class.obj_properties)

        return None

    def test_checks_classes(self):
        """Tests the checks_classes method for class-class edges."""

        # set-up inputs for class-class
        edge_info = {
            'n1':
            'class',
            'n2':
            'class',
            'rel':
            'RO_0003302',
            'inv_rel':
            None,
            'uri': [
                'https://www.ncbi.nlm.nih.gov/gene/',
                'http://purl.obolibrary.org/obo/'
            ],
            'edges': ['CHEBI_81395', 'DOID_12858']
        }

        self.inner_class.ont_classes = [
            URIRef('http://purl.obolibrary.org/obo/CHEBI_81395'),
            URIRef('http://purl.obolibrary.org/obo/DOID_12858')
        ]

        self.assertTrue(self.inner_class.checks_classes(edge_info))

        # set-up inputs for class-class (FALSE)
        edge_info = {
            'n1':
            'class',
            'n2':
            'class',
            'rel':
            'RO_0003302',
            'inv_rel':
            None,
            'uri': [
                'https://www.ncbi.nlm.nih.gov/gene/',
                'http://purl.obolibrary.org/obo/'
            ],
            'edges': ['CHEBI_81395', 'DOID_1']
        }

        self.inner_class.ont_classes = [
            'http://purl.obolibrary.org/obo/CHEBI_81395',
            'http://purl.obolibrary.org/obo/DOID_128987'
        ]

        self.assertFalse(self.inner_class.checks_classes(edge_info))

        return None

    def test_checks_classes_subclasses(self):
        """Tests the checks_classes method for subclass edges."""

        # set-up inputs for subclass-subclass
        self.inner_class.ont_classes = {
            URIRef('http://purl.obolibrary.org/obo/DOID_12858')
        }

        edge_info = {
            'n1':
            'entity',
            'n2':
            'entity',
            'rel':
            'RO_0003302',
            'inv_rel':
            None,
            'uri': [
                'https://www.ncbi.nlm.nih.gov/gene/',
                'http://purl.obolibrary.org/obo/'
            ],
            'edges': ['14', '134056']
        }

        self.assertTrue(self.inner_class.checks_classes(edge_info))

        # set-up inputs for subclass-class
        edge_info = {
            'n1':
            'entity',
            'n2':
            'class',
            'rel':
            'RO_0003302',
            'inv_rel':
            None,
            'uri': [
                'https://www.ncbi.nlm.nih.gov/gene/',
                'http://purl.obolibrary.org/obo/'
            ],
            'edges': ['14', 'DOID_12858']
        }

        self.assertTrue(self.inner_class.checks_classes(edge_info))

        # set-up inputs for class-subclass
        edge_info = {
            'n1':
            'class',
            'n2':
            'entity',
            'rel':
            'RO_0003302',
            'inv_rel':
            None,
            'uri': [
                'http://purl.obolibrary.org/obo/',
                'https://www.ncbi.nlm.nih.gov/gene/'
            ],
            'edges': ['DOID_12858', '14']
        }

        self.assertTrue(self.inner_class.checks_classes(edge_info))

        return None

    def test_checks_relations(self):
        """Tests the checks_relations method."""

        self.kg_subclass.reverse_relation_processor()
        args = {
            'construction': self.kg_subclass.construct_approach,
            'edge_dict': self.kg_subclass.edge_dict,
            'kg_owl': '',
            'rel_dict': self.kg_subclass.relations_dict,
            'inverse_dict': self.kg_subclass.inverse_relations_dict,
            'node_data': self.kg_subclass.node_data,
            'ont_cls': self.kg_subclass.ont_classes,
            'metadata': None,
            'obj_props': self.kg_subclass.obj_properties,
            'write_loc': self.kg_subclass.write_location
        }
        self.inner_class = self.kg_subclass.EdgeConstructor(args)

        # test 1
        edge_list1 = set(
            tuple(x)
            for x in self.inner_class.edge_dict['gene-phenotype']['edge_list'])
        rel1_check = self.inner_class.checks_relations('RO_0003302',
                                                       edge_list1)
        self.assertIsNone(rel1_check)

        # test 2
        edge_list2 = set(
            tuple(x)
            for x in self.inner_class.edge_dict['gene-gene']['edge_list'])
        rel2_check = self.inner_class.checks_relations('RO_0002435',
                                                       edge_list2)
        self.assertEqual(rel2_check, 'RO_0002435')

        return None

    def test_gets_edge_statistics(self):
        """Tests the gets_edge_statistics method."""

        # no inverse edges
        edges = [(1, 2, 3), (3, 2, 5), (4, 6, 7)]
        stats = self.inner_class.gets_edge_statistics(
            'gene-gene', edges, [{1, 2, 3}, {1, 2, 3}, 8])
        expected_str = '3 OWL Edges, 8 Original Edges; 5 OWL Nodes, Original Nodes: 3 gene(s), 3 gene(s)'
        self.assertEqual(stats, expected_str)

        return None

    def test_gets_edge_statistics_inverse_relations(self):
        """Tests the gets_edge_statistics method when including inverse relations."""

        # no inverse edges
        edges = [(1, 2, 3), (3, 2, 5), (4, 6, 7)]
        stats = self.inner_class.gets_edge_statistics(
            'drug-gene', edges, [{1, 2, 3}, {1, 2, 3}, 8])
        expected_str = '3 OWL Edges, 8 Original Edges; 5 OWL Nodes, Original Nodes: 3 drug(s), 3 gene(s)'
        self.assertEqual(stats, expected_str)

        return None

    def test_creates_new_edges_not_adding_metadata_to_kg(self):
        """Tests the creates_new_edges method without adding node metadata to the KG."""

        self.kg_subclass.reverse_relation_processor()

        # make sure that kg is empty
        self.kg_subclass.graph = Graph().parse(
            self.dir_loc + '/ontologies/so_with_imports.owl')
        self.kg_subclass.obj_properties = gets_object_properties(
            self.kg_subclass.graph)
        self.kg_subclass.ont_classes = gets_ontology_classes(
            self.kg_subclass.graph)
        # make sure to not add node_metadata
        self.kg_subclass.node_dict, self.kg_subclass.node_data = None, None
        # initialize metadata class
        meta = Metadata(self.kg_subclass.kg_version,
                        self.kg_subclass.write_location,
                        self.kg_subclass.full_kg, self.kg_subclass.node_data,
                        self.kg_subclass.node_dict)
        if self.kg_subclass.node_data:
            meta.metadata_processor()
            meta.extract_metadata(self.kg_subclass.graph)
        # create graph subsets
        self.kg_subclass.graph, annotation_triples = splits_knowledge_graph(
            self.kg_subclass.graph)
        full_kg_owl = '_'.join(
            self.kg_subclass.full_kg.split('_')[0:-1]) + '_OWL.owl'
        annot, full = full_kg_owl[:
                                  -4] + '_AnnotationsOnly.nt', full_kg_owl[:
                                                                           -4] + '.nt'
        appends_to_existing_file(annotation_triples,
                                 self.kg_subclass.write_location + annot, ' ')
        clean_graph = updates_pkt_namespace_identifiers(
            self.kg_subclass.graph, self.kg_subclass.construct_approach)

        # test method
        shutil.copy(self.kg_subclass.write_location + annot,
                    self.kg_subclass.write_location + full)
        appends_to_existing_file(set(self.kg_subclass.graph),
                                 self.kg_subclass.write_location + full, ' ')
        args = {
            'construction': self.kg_subclass.construct_approach,
            'edge_dict': self.kg_subclass.edge_dict,
            'kg_owl': full_kg_owl,
            'rel_dict': self.kg_subclass.relations_dict,
            'metadata': meta.creates_node_metadata,
            'inverse_dict': self.kg_subclass.inverse_relations_dict,
            'node_data': self.kg_subclass.node_data,
            'ont_cls': self.kg_subclass.ont_classes,
            'obj_props': self.kg_subclass.obj_properties,
            'write_loc': self.kg_subclass.write_location
        }
        edges = [x for x in self.kg_subclass.edge_dict.keys()]
        ray.init(local_mode=True, ignore_reinit_error=True)
        actors = [
            ray.remote(self.kg_subclass.EdgeConstructor).remote(args)
            for _ in range(self.kg_subclass.cpus)
        ]
        for i in range(0, len(edges)):
            actors[i % self.kg_subclass.cpus].creates_new_edges.remote(
                edges[i])
        res = ray.get([x.graph_getter.remote() for x in actors])
        g1 = [self.kg_subclass.graph] + [x[0] for x in res]
        g2 = [clean_graph] + [x[1] for x in res]
        error_dicts = dict(
            ChainMap(*ray.get([x.error_dict_getter.remote() for x in actors])))
        del actors
        ray.shutdown()

        # check that edges were added to the graph
        graph1 = set(x for y in [set(x) for x in g1] for x in y)
        graph2 = set(x for y in [set(x) for x in g2] for x in y)
        self.assertEqual(len(graph1), 9820)
        self.assertEqual(len(graph2), 9774)
        self.assertIsInstance(error_dicts, Dict)
        # check graph files were saved
        f_name = full_kg_owl[:-4] + '_AnnotationsOnly.nt'
        self.assertTrue(
            os.path.exists(self.kg_subclass.write_location + f_name))
        f_name = full_kg_owl[:-4] + '.nt'
        self.assertTrue(
            os.path.exists(self.kg_subclass.write_location + f_name))

        return None

    def test_creates_new_edges_adding_metadata_to_kg(self):
        """Tests the creates_new_edges method and adds node metadata to the KG."""

        self.kg_subclass.reverse_relation_processor()
        # make sure that kg is empty
        self.kg_subclass.graph = Graph().parse(
            self.dir_loc + '/ontologies/so_with_imports.owl')
        self.kg_subclass.obj_properties = gets_object_properties(
            self.kg_subclass.graph)
        self.kg_subclass.ont_classes = gets_ontology_classes(
            self.kg_subclass.graph)
        # make sure to add node_metadata
        meta = Metadata(self.kg_subclass.kg_version,
                        self.kg_subclass.write_location,
                        self.kg_subclass.full_kg, self.kg_subclass.node_data,
                        self.kg_subclass.node_dict)
        if self.kg_subclass.node_data:
            meta.metadata_processor()
            meta.extract_metadata(self.kg_subclass.graph)
        # create graph subsets
        self.kg_subclass.graph, annotation_triples = splits_knowledge_graph(
            self.kg_subclass.graph)
        full_kg_owl = '_'.join(
            self.kg_subclass.full_kg.split('_')[0:-1]) + '_OWL.owl'
        annot, full = full_kg_owl[:
                                  -4] + '_AnnotationsOnly.nt', full_kg_owl[:
                                                                           -4] + '.nt'
        appends_to_existing_file(annotation_triples,
                                 self.kg_subclass.write_location + annot, ' ')
        clean_graph = updates_pkt_namespace_identifiers(
            self.kg_subclass.graph, self.kg_subclass.construct_approach)
        # test method
        shutil.copy(self.kg_subclass.write_location + annot,
                    self.kg_subclass.write_location + full)
        appends_to_existing_file(set(self.kg_subclass.graph),
                                 self.kg_subclass.write_location + full, ' ')
        args = {
            'construction': self.kg_subclass.construct_approach,
            'edge_dict': self.kg_subclass.edge_dict,
            'kg_owl': full_kg_owl,
            'rel_dict': self.kg_subclass.relations_dict,
            'metadata': meta.creates_node_metadata,
            'inverse_dict': self.kg_subclass.inverse_relations_dict,
            'node_data': self.kg_subclass.node_data,
            'ont_cls': self.kg_subclass.ont_classes,
            'obj_props': self.kg_subclass.obj_properties,
            'write_loc': self.kg_subclass.write_location
        }
        edges = [x for x in self.kg_subclass.edge_dict.keys()]
        ray.init(local_mode=True, ignore_reinit_error=True)
        actors = [
            ray.remote(self.kg_subclass.EdgeConstructor).remote(args)
            for _ in range(self.kg_subclass.cpus)
        ]
        for i in range(0, len(edges)):
            actors[i % self.kg_subclass.cpus].creates_new_edges.remote(
                edges[i])
        res = ray.get([x.graph_getter.remote() for x in actors])
        g1 = [self.kg_subclass.graph] + [x[0] for x in res]
        g2 = [clean_graph] + [x[1] for x in res]
        error_dicts = dict(
            ChainMap(*ray.get([x.error_dict_getter.remote() for x in actors])))
        del actors
        ray.shutdown()

        # check that edges were added to the graph
        graph1 = set(x for y in [set(x) for x in g1] for x in y)
        graph2 = set(x for y in [set(x) for x in g2] for x in y)
        self.assertEqual(len(graph1), 9780)
        self.assertEqual(len(graph2), 9746)
        self.assertIsInstance(error_dicts, Dict)
        # check graph files were saved
        f_name = full_kg_owl[:-4] + '_AnnotationsOnly.nt'
        self.assertTrue(
            os.path.exists(self.kg_subclass.write_location + f_name))
        f_name = full_kg_owl[:-4] + '.nt'
        self.assertTrue(
            os.path.exists(self.kg_subclass.write_location + f_name))

        return None

    def test_creates_new_edges_instance_no_inverse(self):
        """Tests the creates_new_edges method when applied to a kg with instance-based construction without inverse
        relations."""

        self.kg_instance.reverse_relation_processor()
        # make sure that kg is empty
        self.kg_instance.graph = Graph().parse(
            self.dir_loc + '/ontologies/so_with_imports.owl')
        # initialize metadata class
        meta = Metadata(self.kg_instance.kg_version,
                        self.kg_instance.write_location,
                        self.kg_instance.full_kg, self.kg_instance.node_data,
                        self.kg_instance.node_dict)
        if self.kg_instance.node_data:
            meta.metadata_processor()
            meta.extract_metadata(self.kg_instance.graph)
        # create graph subsets
        self.kg_instance.graph, annotation_triples = splits_knowledge_graph(
            self.kg_instance.graph)
        full_kg_owl = '_'.join(
            self.kg_instance.full_kg.split('_')[0:-1]) + '_OWL.owl'
        annot, full = full_kg_owl[:
                                  -4] + '_AnnotationsOnly.nt', full_kg_owl[:
                                                                           -4] + '.nt'
        appends_to_existing_file(annotation_triples,
                                 self.kg_instance.write_location + annot, ' ')
        clean_graph = updates_pkt_namespace_identifiers(
            self.kg_instance.graph, self.kg_instance.construct_approach)

        # test method
        shutil.copy(self.kg_instance.write_location + annot,
                    self.kg_instance.write_location + full)
        appends_to_existing_file(set(self.kg_instance.graph),
                                 self.kg_instance.write_location + full, ' ')
        # check that edges were added to the graph
        args = {
            'construction': self.kg_instance.construct_approach,
            'edge_dict': self.kg_instance.edge_dict,
            'kg_owl': full_kg_owl,
            'rel_dict': self.kg_instance.relations_dict,
            'metadata': meta.creates_node_metadata,
            'inverse_dict': self.kg_instance.inverse_relations_dict,
            'node_data': self.kg_instance.node_data,
            'ont_cls': self.kg_instance.ont_classes,
            'obj_props': self.kg_instance.obj_properties,
            'write_loc': self.kg_instance.write_location
        }
        edges = [x for x in self.kg_instance.edge_dict.keys()]
        ray.init(local_mode=True, ignore_reinit_error=True)
        actors = [
            ray.remote(self.kg_instance.EdgeConstructor).remote(args)
            for _ in range(self.kg_instance.cpus)
        ]
        for i in range(0, len(edges)):
            actors[i % self.kg_instance.cpus].creates_new_edges.remote(
                edges[i])
        res = ray.get([x.graph_getter.remote() for x in actors])
        g1 = [self.kg_instance.graph] + [x[0] for x in res]
        g2 = [clean_graph] + [x[1] for x in res]
        error_dicts = dict(
            ChainMap(*ray.get([x.error_dict_getter.remote() for x in actors])))
        del actors
        ray.shutdown()

        # check that edges were added to the graph
        graph1 = set(x for y in [set(x) for x in g1] for x in y)
        graph2 = set(x for y in [set(x) for x in g2] for x in y)
        self.assertEqual(len(graph1), 9702)
        self.assertEqual(len(graph2), 9682)
        self.assertIsInstance(error_dicts, Dict)
        # check graph files were saved
        f_name = full_kg_owl[:-4] + '_AnnotationsOnly.nt'
        self.assertTrue(
            os.path.exists(self.kg_instance.write_location + f_name))
        f_name = full_kg_owl[:-4] + '.nt'
        self.assertTrue(
            os.path.exists(self.kg_instance.write_location + f_name))

        return None

    def test_creates_new_edges_instance_inverse(self):
        """Tests the creates_new_edges method when applied to a kg with instance-based construction with inverse
        relations."""

        self.kg_instance2.reverse_relation_processor()
        # make sure that kg is empty
        self.kg_instance2.graph = Graph().parse(
            self.dir_loc + '/ontologies/so_with_imports.owl')
        # initialize metadata class
        meta = Metadata(self.kg_instance2.kg_version,
                        self.kg_instance2.write_location,
                        self.kg_instance2.full_kg, self.kg_instance2.node_data,
                        self.kg_instance2.node_dict)
        if self.kg_instance2.node_data:
            meta.metadata_processor()
            meta.extract_metadata(self.kg_instance2.graph)
        # create graph subsets
        self.kg_instance2.graph, annotation_triples = splits_knowledge_graph(
            self.kg_instance2.graph)
        full_kg_owl = '_'.join(
            self.kg_instance2.full_kg.split('_')[0:-1]) + '_OWL.owl'
        annot, full = full_kg_owl[:
                                  -4] + '_AnnotationsOnly.nt', full_kg_owl[:
                                                                           -4] + '.nt'
        appends_to_existing_file(annotation_triples,
                                 self.kg_instance2.write_location + annot, ' ')
        clean_graph = updates_pkt_namespace_identifiers(
            self.kg_instance2.graph, self.kg_instance2.construct_approach)

        # test method
        shutil.copy(self.kg_instance2.write_location + annot,
                    self.kg_instance2.write_location + full)
        appends_to_existing_file(set(self.kg_instance2.graph),
                                 self.kg_instance2.write_location + full, ' ')
        # check that edges were added to the graph
        args = {
            'construction': self.kg_instance2.construct_approach,
            'edge_dict': self.kg_instance2.edge_dict,
            'kg_owl': full_kg_owl,
            'rel_dict': self.kg_instance2.relations_dict,
            'metadata': meta.creates_node_metadata,
            'inverse_dict': self.kg_instance2.inverse_relations_dict,
            'node_data': self.kg_instance2.node_data,
            'ont_cls': self.kg_instance2.ont_classes,
            'obj_props': self.kg_instance2.obj_properties,
            'write_loc': self.kg_instance2.write_location
        }
        edges = [x for x in self.kg_instance2.edge_dict.keys()]
        ray.init(local_mode=True, ignore_reinit_error=True)
        actors = [
            ray.remote(self.kg_instance2.EdgeConstructor).remote(args)
            for _ in range(self.kg_instance2.cpus)
        ]
        for i in range(0, len(edges)):
            actors[i % self.kg_instance2.cpus].creates_new_edges.remote(
                edges[i])
        res = ray.get([x.graph_getter.remote() for x in actors])
        g1 = [self.kg_instance2.graph] + [x[0] for x in res]
        g2 = [clean_graph] + [x[1] for x in res]
        error_dicts = dict(
            ChainMap(*ray.get([x.error_dict_getter.remote() for x in actors])))
        del actors
        ray.shutdown()

        # check that edges were added to the graph
        graph1 = set(x for y in [set(x) for x in g1] for x in y)
        graph2 = set(x for y in [set(x) for x in g2] for x in y)
        self.assertEqual(len(graph1), 9707)
        self.assertEqual(len(graph2), 9687)
        self.assertIsInstance(error_dicts, Dict)
        # check graph files were saved
        f_name = full_kg_owl[:-4] + '_AnnotationsOnly.nt'
        self.assertTrue(
            os.path.exists(self.kg_instance2.write_location + f_name))
        f_name = full_kg_owl[:-4] + '.nt'
        self.assertTrue(
            os.path.exists(self.kg_instance2.write_location + f_name))

        return None

    def test_creates_new_edges_adding_metadata_to_kg_bad(self):
        """Tests the creates_new_edges method and adds node metadata to the KG, but also makes sure that a log file is
        written for genes that are not in the subclass_map."""

        self.kg_subclass.reverse_relation_processor()
        # make sure that kg is empty
        self.kg_subclass.graph.parse(self.dir_loc +
                                     '/ontologies/so_with_imports.owl')
        self.kg_subclass.obj_properties = gets_object_properties(
            self.kg_subclass.graph)
        self.kg_subclass.ont_classes = gets_ontology_classes(
            self.kg_subclass.graph)
        # initialize metadata class
        meta = Metadata(self.kg_subclass.kg_version,
                        self.kg_subclass.write_location,
                        self.kg_subclass.full_kg, self.kg_subclass.node_data,
                        self.kg_subclass.node_dict)
        if self.kg_subclass.node_data:
            meta.metadata_processor()
            meta.extract_metadata(self.kg_subclass.graph)
        # test method
        args = {
            'construction': self.kg_subclass.construct_approach,
            'edge_dict': self.kg_subclass.edge_dict,
            'kg_owl': '',
            'rel_dict': self.kg_subclass.relations_dict,
            'ont_cls': self.kg_subclass.ont_classes,
            'metadata': meta.creates_node_metadata,
            'inverse_dict': self.kg_subclass.inverse_relations_dict,
            'node_data': self.kg_subclass.node_data,
            'obj_props': self.kg_subclass.obj_properties,
            'write_loc': self.kg_subclass.write_location
        }
        edges = [x for x in self.kg_subclass.edge_dict.keys()]
        ray.init(local_mode=True, ignore_reinit_error=True)
        actors = [
            ray.remote(self.kg_subclass.EdgeConstructor).remote(args)
            for _ in range(self.kg_subclass.cpus)
        ]
        for i in range(0, len(edges)):
            actors[i % self.kg_subclass.cpus].creates_new_edges.remote(
                edges[i])
        error_dicts = dict(
            ChainMap(*ray.get([x.error_dict_getter.remote() for x in actors])))
        del actors
        ray.shutdown()

        # check that log file was written out
        self.assertIsInstance(error_dicts, Dict)
        self.assertEqual(len(error_dicts), 1)
        self.assertIn('gene-phenotype', error_dicts.keys())
        self.assertEqual(sorted(list(error_dicts['gene-phenotype'])),
                         ['10', '20', '9'])

        return None

    def tests_graph_getter(self):
        """Tests graph_getter method."""

        results = self.inner_class.graph_getter()

        # verify results
        self.assertTrue(len(results) == 2)
        self.assertIsInstance(results[0], Graph)
        self.assertIsInstance(results[1], Graph)

        return None

    def tearDown(self):
        warnings.simplefilter('default', ResourceWarning)

        # remove resource directory
        shutil.rmtree(self.dir_loc_resources)

        return None
예제 #3
0
class TestPartialBuild(unittest.TestCase):
    """Class to test the partialBuild class from the knowledge graph script."""
    def setUp(self):
        warnings.simplefilter('ignore', ResourceWarning)

        # initialize file location
        current_directory = os.path.dirname(__file__)
        dir_loc = os.path.join(current_directory, 'data')
        self.dir_loc = os.path.abspath(dir_loc)

        # set-up environment - make temp directory
        dir_loc_resources = os.path.join(current_directory, 'resources')
        self.dir_loc_resources = os.path.abspath(dir_loc_resources)
        os.mkdir(self.dir_loc_resources)
        os.mkdir(self.dir_loc_resources + '/knowledge_graphs')
        os.mkdir(self.dir_loc_resources + '/relations_data')
        os.mkdir(self.dir_loc_resources + '/node_data')
        os.mkdir(self.dir_loc_resources + '/ontologies')
        os.mkdir(self.dir_loc_resources + '/construction_approach')

        # handle logging
        self.logs = os.path.abspath(current_directory + '/builds/logs')
        logging.disable(logging.CRITICAL)
        if len(glob.glob(self.logs + '/*.log')) > 0:
            os.remove(glob.glob(self.logs + '/*.log')[0])

        # copy needed data data
        # node metadata
        shutil.copyfile(
            self.dir_loc + '/node_data/node_metadata_dict.pkl',
            self.dir_loc_resources + '/node_data/node_metadata_dict.pkl')
        # ontology data
        shutil.copyfile(
            self.dir_loc + '/ontologies/empty_hp_with_imports.owl',
            self.dir_loc_resources + '/ontologies/hp_with_imports.owl')
        # merged ontology data
        shutil.copyfile(
            self.dir_loc + '/ontologies/so_with_imports.owl',
            self.dir_loc_resources +
            '/knowledge_graphs/PheKnowLator_MergedOntologies.owl')
        # relations data
        shutil.copyfile(
            self.dir_loc + '/RELATIONS_LABELS.txt',
            self.dir_loc_resources + '/relations_data/RELATIONS_LABELS.txt')
        # inverse relations
        shutil.copyfile(
            self.dir_loc + '/INVERSE_RELATIONS.txt',
            self.dir_loc_resources + '/relations_data/INVERSE_RELATIONS.txt')
        # empty master edges
        shutil.copyfile(
            self.dir_loc + '/Master_Edge_List_Dict_empty.json',
            self.dir_loc_resources + '/Master_Edge_List_Dict_empty.json')

        # create edge list
        edge_dict = {
            "gene-phenotype": {
                "data_type":
                "entity-class",
                "edge_relation":
                "RO_0003302",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://purl.obolibrary.org/obo/"
                ],
                "edge_list": [["2", "SO_0000162"], ["2", "SO_0000196"],
                              ["2", "SO_0000323"], ["9", "SO_0001490"],
                              ["9", "SO_0000301"], ["9", "SO_0001560"],
                              ["9", "SO_0001560"], ["10", "SO_0000444"],
                              ["10", "SO_0002138"], ["10", "SO_0000511"]]
            },
            "gene-gene": {
                "data_type":
                "entity-entity",
                "edge_relation":
                "RO_0002435",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://www.ncbi.nlm.nih.gov/gene/"
                ],
                "edge_list": [["1", "2"], ["2", "3"], ["3", "18"],
                              ["17", "19"], ["4", "17"], ["5", "11"],
                              ["11", "12"], ["4", "5"]]
            },
            "disease-disease": {
                "data_type":
                "class-class",
                "edge_relation":
                "RO_0002435",
                "uri": [
                    "http://www.ncbi.nlm.nih.gov/gene/",
                    "http://www.ncbi.nlm.nih.gov/gene/"
                ],
                "edge_list": [["DOID_3075", "DOID_1080"],
                              ["DOID_3075", "DOID_4267"],
                              ["DOID_4800", "DOID_10190"],
                              ["DOID_4800", "DOID_80219"],
                              ["DOID_2729", "DOID_1962"],
                              ["DOID_2729", "DOID_5096"],
                              ["DOID_8837", "DOID_6774"],
                              ["DOID_8837", "DOID_8754"]]
            },
            "entity_namespaces": {
                "gene": "http://purl.uniprot.org/geneid/"
            }
        }

        # save data
        with open(self.dir_loc_resources + '/Master_Edge_List_Dict.json',
                  'w') as filepath:
            json.dump(edge_dict, filepath)

        # create subclass mapping data
        subcls_map = {
            "1": ['SO_0001217'],
            "2": ['SO_0001217'],
            "3": ['SO_0001217'],
            "4": ['SO_0001217'],
            "5": ['SO_0001217'],
            "11": ['SO_0001217'],
            "12": ['SO_0001217'],
            "17": ['SO_0001217'],
            "18": ['SO_0001217'],
            "5096": ['SO_0001217'],
            "6774": ['SO_0001217'],
            "19": ['SO_0001217']
        }

        # save data
        with open(
                self.dir_loc_resources +
                '/construction_approach/subclass_construction_map.pkl',
                'wb') as f:
            pickle.dump(subcls_map, f, protocol=4)

        # set write location
        self.write_location = self.dir_loc_resources + '/knowledge_graphs'

        # instantiate class
        self.kg = PartialBuild('subclass', 'yes', 'yes', 'yes', 1,
                               self.write_location)

        # update class attributes
        dir_loc_owltools = os.path.join(current_directory, 'utils/owltools')
        self.kg.owl_tools = os.path.abspath(dir_loc_owltools)

        return None

    def test_class_initialization(self):
        """Tests initialization of the class."""

        # check build type
        self.assertEqual(self.kg.gets_build_type(), 'Partial Build')
        self.assertFalse(self.kg.gets_build_type() == 'Full Build')
        self.assertFalse(self.kg.gets_build_type() == 'Post-Closure Build')

        return None

    def test_construct_knowledge_graph(self):
        """Tests the construct_knowledge_graph method."""

        # test out the build
        self.kg.construct_knowledge_graph()
        full_kg_owl = '_'.join(self.kg.full_kg.split('_')[0:-1]) + '_OWL.owl'

        # check for output files
        f_name = full_kg_owl[:-4] + '_LogicOnly.nt'
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           f_name))
        f_name = full_kg_owl[:-4] + '_AnnotationsOnly.nt'
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           f_name))
        f_name = full_kg_owl[:-4] + '.nt'
        self.assertTrue(
            os.path.exists(self.dir_loc_resources + '/knowledge_graphs/' +
                           f_name))

        return None

    def tearDown(self):
        warnings.simplefilter('default', ResourceWarning)

        # remove resource directory
        shutil.rmtree(self.dir_loc_resources)

        return None
예제 #4
0
def main():
    parser = argparse.ArgumentParser(description=('PheKnowLator: This program builds a biomedical knowledge graph using'
                                                  ' Open Biomedical Ontologies and linked open data. The program takes '
                                                  'the following arguments:'))
    parser.add_argument('-p', '--cpus', help='# workers to use; defaults to use all available cores', default=None)
    parser.add_argument('-g', '--onts', help='name/path to text file containing ontologies', required=True)
    parser.add_argument('-e', '--edg', help='name/path to text file containing edge sources', required=True)
    parser.add_argument('-a', '--app', help='construction approach to use (i.e. instance or subclass)', required=True)
    parser.add_argument('-t', '--res', help='name/path to text file containing resource_info', required=True)
    parser.add_argument('-b', '--kg', help='build type: "partial", "full", or "post-closure"', required=True)
    parser.add_argument('-r', '--rel', help='yes/no - adding inverse relations to knowledge graph', required=True)
    parser.add_argument('-s', '--owl', help='yes/no - removing OWL Semantics from knowledge graph', required=True)
    parser.add_argument('-m', '--nde', help='yes/no - adding node metadata to knowledge graph', required=True)
    parser.add_argument('-o', '--out', help='name/path to directory where to write knowledge graph', required=True)
    args = parser.parse_args()

    ######################
    #### READ IN DATA ####
    ######################

    # STEP 1: CREATE INPUT DOCUMENTS
    # see https://github.com/callahantiff/PheKnowLator/wiki/Dependencies page for how to prepare input data files

    # STEP 2: DOWNLOAD AND PREPROCESS DATA
    # see the 'Data_Preparation.ipynb' and 'Ontology_Cleaning.ipynb' file for examples and guidelines

    # STEP 3: DOWNLOAD ONTOLOGIES
    print('\n' + '=' * 40 + '\nPKT: DOWNLOADING DATA: ONTOLOGY DATA\n' + '=' * 40 + '\n')
    start = time.time()
    ont = OntData(data_path=args.onts, resource_data=args.res)
    # ont = OntData(data_path='resources/ontology_source_list.txt', resource_data='resources/resource_info.txt')
    ont.downloads_data_from_url()
    end = time.time(); timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print('\nPKT: TOTAL SECONDS TO DOWNLOAD ONTOLOGIES: {} @ {}'.format(end - start, timestamp))

    # STEP 4: DOWNLOAD EDGE DATA SOURCES
    print('\n' + '=' * 37 + '\nPKT: DOWNLOADING DATA: CLASS DATA\n' + '=' * 37 + '\n')
    start = time.time()
    ent = LinkedData(data_path=args.edg, resource_data=args.res)
    # ent = LinkedData(data_path='resources/edge_source_list.txt', resource_data='resources/resource_info.txt')
    ent.downloads_data_from_url()
    end = time.time(); timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print('\nPKT: TOTAL SECONDS TO DOWNLOAD NON-ONTOLOGY DATA: {} @ {}'.format(end - start, timestamp))

    #####################
    # CREATE EDGE LISTS #
    #####################

    # set-up environment
    cpus = psutil.cpu_count(logical=True) if args.cpus is None else args.cpus; ray.init(ignore_reinit_error=True)

    print('\n' + '=' * 28 + '\nPKT: CONSTRUCT EDGE LISTS\n' + '=' * 28 + '\n')
    start = time.time()
    combined_edges = dict(ent.data_files, **ont.data_files)
    # master_edges = CreatesEdgeList(data_files=combined_edges, source_file='resources/resource_info.txt')
    master_edges = CreatesEdgeList(data_files=combined_edges, source_file=args.res)
    master_edges.runs_creates_knowledge_graph_edges(source_file=args.res, data_files=combined_edges, cpus=cpus)
    end = time.time(); timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print('\nPKT: TOTAL SECONDS TO BUILD THE MASTER EDGE LIST: {} @ {}'.format(end - start, timestamp))

    del ont, ent, master_edges  # clean up environment before build knowledge graph

    #########################
    # BUILD KNOWLEDGE GRAPH #
    #########################

    print('\n' + '=' * 33 + '\nPKT: BUILDING KNOWLEDGE GRAPH\n' + '=' * 33 + '\n')
    start = time.time()

    if args.kg == 'partial':
        kg = PartialBuild(construction=args.app,
                          node_data=args.nde,
                          inverse_relations=args.rel,
                          decode_owl=args.owl,
                          cpus=cpus,
                          write_location=args.out)
    elif args.kg == 'post-closure':
        kg = PostClosureBuild(construction=args.app,
                              node_data=args.nde,
                              inverse_relations=args.rel,
                              decode_owl=args.owl,
                              cpus=cpus,
                              write_location=args.out)
    else:
        kg = FullBuild(construction=args.app,
                       node_data=args.nde,
                       inverse_relations=args.rel,
                       decode_owl=args.owl,
                       cpus=cpus,
                       write_location=args.out)
    kg.construct_knowledge_graph()

    # ray.shutdown()  # uncomment if running this independently of the CI/CD builds
    end = time.time(); timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print('\nPKT: TOTAL SECONDS TO CONSTRUCT A KG: {} @ {}'.format(end - start, timestamp))