コード例 #1
0
def hpo_to_tree(cls, hpo_terms, hpo_graph, tree, path):
    tree_path = copy.copy(path)
    tree_path.append(cls)
    curie_util = CurieUtil(curie_map.get())
    if cls not in hpo_terms:
        hpo_terms[cls] = {
            'label': hpo_graph.label(URIRef(curie_util.get_uri(cls)))
        }
        parents = hpo_graph.objects(URIRef(curie_util.get_uri(cls)),
                                    RDFS.subClassOf)
        hpo_terms[cls]['parents'] = len(list(parents))

        lay_person = get_lay_person(cls, hpo_graph)
        hpo_terms[cls]["lay_person"] = lay_person

    # Traverse the tree to get to the input class
    position = tree[tree_path[0]]
    for term in tree_path[1:]:
        position = position[term]

    for sub_class in hpo_graph.subjects(
            RDFS.subClassOf, URIRef(curie_util.get_uri(tree_path[-1]))):
        curie = curie_util.get_curie(sub_class).replace("OBO:HP_", "HP:")
        position[curie] = {}
        hpo_to_tree(curie, hpo_terms, hpo_graph, tree, tree_path)
コード例 #2
0
ファイル: test_impc.py プロジェクト: putmantime/dipper
    def setUp(self):
        self.assoc_curie = 'MONARCH:test_association'
        self.eco_id = 'ECO:0000015'

        self.test_set_1 = (
            'MGI:1920145', 'Setd5', 'WTSI', 'MEFW', 'male', 'heterozygote',
            'MGI:4432631', 'Setd5<tm1a(EUCOMM)Wtsi>',
            'targeted mutation 1a, Wellcome Trust Sanger Institute',
            'MGI:2159965', 'C57BL/6N', 'MGP',
            'Wellcome Trust Sanger Institute Mouse Genetics Project',
            'MGP Select Pipeline', 'MGP_001', 'MGP_XRY_001', 'X-ray',
            'IMPC_XRY_008_001', 'Number of ribs right', 'MP:0005390',
            'skeleton phenotype', 'MP:0000480', 'increased rib number',
            '1.637023E-010', '', '8.885439E-007',
            'Wilcoxon rank sum test with continuity correction', 'IMPC')

        # Generate test curies, these are otherwise generated
        # within _add_evidence() and _add_study_provenance()
        self.study_curie = "_:study"
        self.evidence_curie = "_:evidence"

        # IRIs for testing sparql output
        curie_dict = curie_map.get()
        curie_util = CurieUtil(curie_dict)
        self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie))

        return
コード例 #3
0
ファイル: test_impc.py プロジェクト: kshefchek/dipper
    def setUp(self):
        self.assoc_curie = 'MONARCH:test_association'
        self.eco_id = 'ECO:0000015'

        self.test_set_1 = ('MGI:1920145', 'Setd5', 'WTSI', 'MEFW', 'male',
                           'heterozygote', 'MGI:4432631', 'Setd5<tm1a(EUCOMM)Wtsi>',
                           'targeted mutation 1a, Wellcome Trust Sanger Institute',
                           'MGI:2159965', 'C57BL/6N', 'MGP',
                           'Wellcome Trust Sanger Institute Mouse Genetics Project',
                           'MGP Select Pipeline', 'MGP_001', 'MGP_XRY_001', 'X-ray',
                           'IMPC_XRY_008_001', 'Number of ribs right', 'MP:0005390',
                           'skeleton phenotype', 'MP:0000480', 'increased rib number',
                           '1.637023E-010', '', '8.885439E-007',
                           'Wilcoxon rank sum test with continuity correction', 'IMPC')

        # Generate test curies, these are otherwise generated
        # within _add_evidence() and _add_study_provenance()
        self.study_curie = "_:study"
        self.evidence_curie = "_:evidence"

        # IRIs for testing sparql output
        curie_dict = curie_map.get()
        curie_util = CurieUtil(curie_dict)
        self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie))

        return
コード例 #4
0
ファイル: test_genotype.py プロジェクト: tegar9000/dipper-1
 def test_addGenotype(self):
     cutil = CurieUtil(self.curie_map)
     gid = 'MGI:5515892'
     label = \
         'Pmp22<Tr-2J>/Pmp22<+> [C57BL/6J-Pmp22<Tr-2J>/GrsrJ]'
     self.genotype.addGenotype(gid, label)
     self.assertTrue((URIRef(cutil.get_uri(gid)), RDFS['label'],
                      Literal(label)) in self.genotype.graph)
コード例 #5
0
 def test_addGenotype(self):
     from rdflib.namespace import RDFS, URIRef
     from rdflib import Literal
     from dipper.utils.CurieUtil import CurieUtil
     cutil = CurieUtil(self.curie_map)
     gid = 'MGI:5515892'
     label = \
         'Pmp22<Tr-2J>/Pmp22<+> [C57BL/6J-Pmp22<Tr-2J>/GrsrJ]'
     self.genotype.addGenotype(gid, label)
     self.assertTrue((URIRef(cutil.get_uri(gid)), RDFS['label'],
                      Literal(label)) in self.genotype.graph)
コード例 #6
0
ファイル: test_genotype.py プロジェクト: d3borah/dipper
 def test_addGenotype(self):
     from rdflib.namespace import RDFS,URIRef
     from rdflib import Literal
     from dipper.utils.CurieUtil import CurieUtil
     cu = CurieUtil(self.curie_map)
     id = 'MGI:5515892'
     label = \
         'Pmp22<Tr-2J>/Pmp22<+> [C57BL/6J-Pmp22<Tr-2J>/GrsrJ]'
     self.genotype.addGenotype(id, label)
     self.assertTrue((URIRef(cu.get_uri(id)), RDFS['label'],
                      Literal(label)) in self.genotype.graph)
コード例 #7
0
ファイル: treeify-hpo.py プロジェクト: DoctorBud/dipper
def hpo_to_tree(cls, hpo_terms, hpo_graph, tree, path):
    tree_path = copy.copy(path)
    tree_path.append(cls)
    curie_util = CurieUtil(curie_map.get())
    if cls not in hpo_terms:
        hpo_terms[cls] = {
            'label': hpo_graph.label(URIRef(curie_util.get_uri(cls)))
        }
        parents = hpo_graph.objects(URIRef(curie_util.get_uri(cls)), RDFS.subClassOf)
        hpo_terms[cls]['parents'] = len(list(parents))

        lay_person = get_lay_person(cls, hpo_graph)
        hpo_terms[cls]["lay_person"] = lay_person

    # Traverse the tree to get to the input class
    position = tree[tree_path[0]]
    for term in tree_path[1:]:
        position = position[term]

    for sub_class in hpo_graph.subjects(RDFS.subClassOf, URIRef(curie_util.get_uri(tree_path[-1]))):
        curie = curie_util.get_curie(sub_class).replace("OBO:HP_", "HP:")
        position[curie] = {}
        hpo_to_tree(curie, hpo_terms, hpo_graph, tree, tree_path)
コード例 #8
0
    def setUp(self):

        self.curie_map = curie_map.get()
        cu = CurieUtil(self.curie_map)
        # Fake credentials as these tests do not require a database connection
        database = 'foo'
        user = '******'
        password = '******'

        self.cgd = CGD(database, user, password)
        test_data = ((387, 'MLH1 any mutation', 13, 'Adenocarcinoma',
                     None, 'Colon', 'no response', 1,
                     '5FU-based adjuvant therapy', 'late trials', '20498393'),)
        self.cgd.add_disease_drug_variant_to_graph(test_data)

        (variant_key, variant_label, diagnoses_key, diagnoses,
         specific_diagnosis, organ, relationship,
         drug_key, drug, therapy_status, pubmed_id) = test_data[0]

        source_id = "PMID:{0}".format(pubmed_id)
        variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key))
        disease_id = self.cgd.make_cgd_id('disease{0}{1}'.format(diagnoses_key,
                                                                 diagnoses))
        relationship_id = "RO:has_environment"
        disease_quality = ("CGD:{0}".format(relationship)).replace(" ", "_")
        has_quality_property = "BFO:0000159"
        drug_id = self.cgd.make_cgd_id('drug{0}'.format(drug_key))
        disease_instance_id = self.cgd.make_cgd_id('phenotype{0}{1}{2}'.format(
            diagnoses, variant_key, relationship))

        variant_disease_annot = self.cgd.make_cgd_id("assoc{0}{1}".format(variant_key, diagnoses))

        # Set up URIs
        self.source_uri = URIRef(cu.get_uri(source_id))
        self.variant_uri = URIRef(cu.get_uri(variant_id))
        self.disease_uri = URIRef(cu.get_uri(disease_id))
        self.disease_ind_uri = URIRef(cu.get_uri(disease_instance_id))
        self.relationship_uri = URIRef(cu.get_uri(relationship_id))
        self.drug_uri = URIRef(cu.get_uri(drug_id))
        self.vd_annot_uri = URIRef(cu.get_uri(variant_disease_annot))
        self.disease_quality_uri = URIRef(cu.get_uri(disease_quality))

        self.variant_label = variant_label
        self.disease_label = diagnoses
        self.disease_instance_label = "{0} with {1} to therapy".format(diagnoses, relationship)
        self.drug_label = drug

        return
コード例 #9
0
ファイル: CGD.py プロジェクト: zzygyx9119/mckb
    def _replace_entity(graph, old_id, new_id, bindings={}, is_property=False):
        """
        Replace entity in graph
        Replace one ID with another
        :param graph rdflib.graph object
        :param old_id, String curie,IRI, or literal to be replaced
        :param new_id, String curie, IRI, or literal to replace the old id
        :param bindings, Dict, dictionary of namespace prefixes
        :param is_property, Boolean, is an id a property/predicate rather than
                                 a class, individual, or literal
        :return: None
        """
        cu = CurieUtil(curie_map.get())
        old_uri = URIRef(cu.get_uri(old_id))
        new_uri = URIRef(cu.get_uri(new_id))
        if is_property is False:
            sparql_update = \
                """
                DELETE {{ <{0}> ?pred ?obj }}
                INSERT {{ <{1}> ?pred ?obj }}
                WHERE {{ <{0}> ?pred ?obj }}
                """.format(old_uri, new_uri)

            graph.update(sparql_update, 'sparql', bindings)

            sparql_update = \
                """
                DELETE {{ ?sub ?pred <{0}> }}
                INSERT {{ ?sub ?pred <{1}> }}
                WHERE {{ ?sub ?pred <{0}> }}
                """.format(old_uri, new_uri)

            graph.update(sparql_update, 'sparql', bindings)
        else:
            sparql_update = \
                """
                DELETE {{ ?sub <{0}> ?obj }}
                INSERT {{ ?sub <{1}> ?obj }}
                WHERE {{ ?sub <{0}> {?obj} }}
                """.format(old_uri, new_uri)

            graph.update(sparql_update, 'sparql', bindings)

        return
コード例 #10
0
    def test_associations(self):
        """
        Given the above sample input, produce the following:
        CGD:VariantID has_phenotype(RO:0002200) CGD:DiseaseInstance

        A CGD:AssociationID OBO:RO_0002558 Traceable Author Statement (ECO:0000033)
        A CGD:AssociationID dc:source PMID:20498393
        A CGD:AssociationID has_environment CGD:DrugID
        A CGD:AssociationID OBAN:association_has_subject CGD:VariantID
        A CGD:AssociationID OBAN:association_has_object_property has_phenotype
        A CGD:AssociationID OBAN:association_has_object CGD:DiseaseInstance
        """
        from dipper.utils.TestUtils import TestUtils

        # Make testutils object and load bindings
        cu = CurieUtil(self.curie_map)
        test_env = TestUtils(self.cgd.graph)
        self.cgd.load_bindings()
        evidence = 'OBO:ECO_0000033'
        evidence_uri = URIRef(cu.get_uri(evidence))

        sparql_query = """
                       SELECT ?diseaseInd ?variant ?drug ?vdannot ?source ?evidence
                       WHERE {{
                           ?variant OBO:RO_0002200 ?diseaseInd .

                           ?vdannot a OBAN:association ;
                               OBO:RO_0002558 ?evidence ;
                               dc:source ?source ;
                               <{0}> ?drug ;
                               OBAN:association_has_object ?diseaseInd ;
                               OBAN:association_has_object_property OBO:RO_0002200 ;
                               OBAN:association_has_subject ?variant .
                       }}
                       """.format(self.relationship_uri)

        # Expected Results
        expected_results = [[self.disease_ind_uri, self.variant_uri, self.drug_uri,
                             self.vd_annot_uri,
                             self.source_uri, evidence_uri]]
        # Query graph
        sparql_output = test_env.query_graph(sparql_query)

        self.assertEqual(expected_results, sparql_output)
コード例 #11
0
ファイル: test_model.py プロジェクト: tegar9000/dipper-1
class ModelTestCase(unittest.TestCase):
    def setUp(self):
        g = RDFGraph()
        self.model = Model(g)

        this_curie_map = curie_map.get()
        self.cutil = CurieUtil(this_curie_map)

        # stuff to make test triples
        self.test_cat_subj_curie = "MGI:1234"
        self.test_cat_subj = self.cutil.get_uri("MGI:1234")
        self.test_cat_default_pred = self.cutil.get_uri("biolink:category")
        self.test_named_indiv = self.cutil.get_uri("owl:NamedIndividual")
        self.test_label_pred = self.cutil.get_uri("rdfs:label")
        self.test_label = "some label"

        self.test_comment_IRI = self.cutil.get_uri("rdfs:comment")
        self.test_comment = 'bonus eruptus'

    def tearDown(self):
        self.graph = None

    def test_addIndividualToGraph_assign_label(self):
        self.model.addIndividualToGraph(self.test_cat_subj_curie, "some label")

        label_triple = list(
            self.model.graph.triples((URIRef(self.test_cat_subj),
                                      URIRef(self.test_label_pred), None)))

        self.assertEqual(len(label_triple), 1, "method didn't assign label")
        self.assertEqual(str(label_triple[0][2]), self.test_label,
                         "method didn't assign correct label")

    def test_addIndividualToGraph_assign_type_named_individual(self):
        self.model.addIndividualToGraph(self.test_cat_subj_curie, "some label")

        triples = list(
            self.model.graph.triples((URIRef(self.test_cat_subj), None,
                                      URIRef(self.test_named_indiv))))

        self.assertEqual(len(triples), 1,
                         "method didn't assign type as named individual")

    def test_addIndividualToGraph_assign_category(self):
        self.model.addIndividualToGraph(self.test_cat_subj_curie,
                                        "some label",
                                        ind_category=blv.terms['Genotype'])

        triples = list(
            self.model.graph.triples(
                (URIRef(self.test_cat_subj),
                 URIRef(self.test_cat_default_pred), None)))

        self.assertEqual(len(triples), 1, "method didn't assign category")

    def test_add_comment(self):
        self.model.addComment(self.test_cat_subj, self.test_comment)

        triples = list(
            self.model.graph.triples(
                (URIRef(self.test_cat_subj), URIRef(self.test_comment_IRI),
                 Literal(self.test_comment))))

        self.assertEqual(len(triples), 1, "method didn't assign comment")

    def test_add_comment_assign_subject_category(self):
        self.model.addComment(self.test_cat_subj,
                              self.test_comment,
                              subject_category=blv.terms['Genotype'])

        triples = list(
            self.model.graph.triples(
                (URIRef(self.test_cat_subj),
                 URIRef(self.test_cat_default_pred), None)))
        self.assertEqual(len(triples), 1, "method didn't assign category")
コード例 #12
0
ファイル: test_impc.py プロジェクト: TomConlin/dipper
    def setUp(self):
        self.test_util = TestUtils()
        self.assoc_curie = 'MONARCH:test_association'
        self.eco_id = 'ECO:0000015'

        # Headers:
        # 01 marker_accession_id,
        # 02 marker_symbol,
        # 03 phenotyping_center,
        # 04 colony_raw,
        # 05 sex,
        # 06 zygosity,
        # 07 allele_accession_id,
        # 08 allele_symbol,
        # 09 allele_name,
        # 10 strain_accession_id,
        # 11 strain_name,
        # 12 project_name,
        # 13 project_fullname,
        # 14 pipeline_name,
        # 15 pipeline_stable_id,
        # 16 procedure_stable_id,
        # 17 procedure_name,
        # 18 parameter_stable_id,
        # 19 parameter_name,
        # 20 top_level_mp_term_id,
        # 21 top_level_mp_term_name,
        # 22 mp_term_id,
        # 23 mp_term_name,
        # 24 p_value,
        # 25 percentage_change,
        # 26 effect_size,
        # 27 statistical_method,
        # 28 resource_name

        self.test_set_1 = (
            'MGI:1920145',              # 01
            'Setd5',                    # 02
            'WTSI',                     # 03
            'MEFW',                     # 04
            'male',                     # 05
            'heterozygote',             # 06
            'MGI:4432631',              # 07
            'Setd5<tm1a(EUCOMM)Wtsi>',  # 08
            'targeted mutation 1a, Wellcome Trust Sanger Institute',    # 09
            'MGI:2159965',              # 10
            'C57BL/6N',                 # 11
            'MGP',                      # 12
            'Wellcome Trust Sanger Institute Mouse Genetics Project',   # 13
            'MGP Select Pipeline',      # 14
            'MGP_001',                  # 15
            'MGP_XRY_001',              # 16
            'X-ray',                    # 17
            'IMPC_XRY_008_001',         # 18
            'Number of ribs right',     # 19
            'MP:0005390',               # 20
            'skeleton phenotype',       # 21
            'MP:0000480',               # 22
            'increased rib number',     # 23
            '1.637023E-010',            # 24
            '',                         # 25
            '8.885439E-007',            # 26
            'Wilcoxon rank sum test with continuity correction',    # 27
            'IMPC'            # 28
        )

        # Generate test curies, these are otherwise generated
        # within _add_evidence() and _add_study_provenance()
        # these blank nodes are hardcoded as NOT Skolemized  ...
        self.study_curie = "_:study"
        self.evidence_curie = "_:evidence"

        # IRIs for testing sparql output
        curie_dict = curie_map.get()
        curie_util = CurieUtil(curie_dict)
        self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie))

        return
コード例 #13
0
ファイル: test_genotype.py プロジェクト: tegar9000/dipper-1
class GenotypeTestCase(unittest.TestCase):
    def setUp(self):
        self.graph = RDFGraph()
        self.curie_map = curie_map.get()
        self.genotype = Genotype(self.graph)
        self.cutil = CurieUtil(self.curie_map)
        self.test_cat_pred = self.cutil.get_uri(blv.terms['category'])
        self.test_cat_genotype_category = self.cutil.get_uri(
            blv.terms['Genotype'])
        self.test_cat_background_category = self.cutil.get_uri(
            blv.terms['PopulationOfIndividualOrganisms'])

    def tearDown(self):
        self.genotype = None

    def test_addGenotype(self):
        cutil = CurieUtil(self.curie_map)
        gid = 'MGI:5515892'
        label = \
            'Pmp22<Tr-2J>/Pmp22<+> [C57BL/6J-Pmp22<Tr-2J>/GrsrJ]'
        self.genotype.addGenotype(gid, label)
        self.assertTrue((URIRef(cutil.get_uri(gid)), RDFS['label'],
                         Literal(label)) in self.genotype.graph)

    def test_addGenomicBackgroundToGenotype_adds_genotype(self):
        """
         test that addGenomicBackgroundToGenotype() correctly assigns
         subject/object category
         """
        genotype_id = "GENO:0000002"
        background_id = "GENO:0000002"  # no idea what a good example background ID is
        self.genotype.addGenomicBackgroundToGenotype(
            background_id=background_id, genotype_id=genotype_id)

        geno_triples = list(
            self.graph.triples((URIRef(self.cutil.get_uri(genotype_id)),
                                URIRef(self.test_cat_pred),
                                URIRef(self.test_cat_genotype_category))))

    def test_addGenomicBackgroundToGenotype_adds_categories(self):
        """
         test that addGenomicBackgroundToGenotype() correctly assigns
         subject/object category
         """
        genotype_id = "GENO:0000002"
        background_id = "GENO:0000002"  # no idea what a good example background ID is
        self.genotype.addGenomicBackgroundToGenotype(
            background_id=background_id, genotype_id=genotype_id)

        geno_triples = list(
            self.graph.triples((URIRef(self.cutil.get_uri(genotype_id)),
                                URIRef(self.test_cat_pred),
                                URIRef(self.test_cat_genotype_category))))
        self.assertEqual(
            len(geno_triples), 1,
            "addTriples() didn't make exactly 1 genotype category triple")
        self.assertEqual(
            geno_triples[0][2], URIRef(self.test_cat_genotype_category),
            "addTriples() didn't assign the right genotype category")

        background_triples = list(
            self.graph.triples((URIRef(self.cutil.get_uri(background_id)),
                                URIRef(self.test_cat_pred),
                                URIRef(self.test_cat_background_category))))
        self.assertEqual(
            len(background_triples), 1,
            "addTriples() didn't make exactly 1 genotype category triple")
        self.assertEqual(
            background_triples[0][2],
            URIRef(self.test_cat_background_category),
            "addTriples() didn't assign the right background category")

        # does not compile
        #    def test_addParts(self):
        #        """
        #        """
        #        if part_relationship is None:
        #            part_relationship = self.globaltt['has_part']
        #        # Fail loudly if parent or child identifiers are None
        #        if parent_id is None:
        #            raise TypeError('Attempt to pass None as parent')
        #        elif part_id is None:
        #            raise TypeError('Attempt to pass None as child')
        #        elif part_relationship is None:
        #            part_relationship = self.globaltt['has_part']
        #
        #        self.graph.addTriple(parent_id, part_relationship, part_id,
        #                             subject_category=subject_category,
        #                             object_category=object_category)

        return
コード例 #14
0
ファイル: test_genotype.py プロジェクト: zzygyx9119/mckb
    def test_missense_variant_cdna_model(self):
        """
        Test missense variant with cdna information
        Using test data set 2, and the function add_variant_info_to_graph()
        We want to test the following triples:

        CGD:VariantID is an instance of OBO:SO_0001059
        CGD:VariantID is an instance of OBO:SO_0001583
        CGD:VariantID has the label "ABL1 T315I missense mutation"
        CGD:VariantID is_sequence_variant_instance_of (OBO:GENO_0000408) NCBIGene:25
        CGD:VariantID has location (faldo:location) AminoAcidRegionID
        CGD:VariantID has location (faldo:location) CDNARegionID
        CGD:VariantID has location (faldo:location) ChromosomalRegionID
        CGD:VariantID OBO:GENO_reference_amino_acid "T"
        CGD:VariantID OBO:GENO_results_in_amino_acid_change "I"
        CGD:VariantID owl:sameAs dbSNP:rs121913459
        CGD:VariantID owl:sameAs COSMIC:12560
        CGD:VariantID RO:0002205 (transcribed_to) CCDS:35166.1

        CCDS:35166.1 is an instance of OBO:SO_0000233
        CCDS:35166.1 has the label "CCDS35166.1"
        CCDS:35166.1 OBO:RO_0002513 (translates_to) UniProtKB:P00519#P00519-1
        CCDS:35166.1 OBO:RO_0002513 (translates_to) NCBIProtein:NP_005148.2

        UniProtKB:P00519#P00519-1 owl:sameAs NCBIProtein:NP_005148.2

        UniProtKB:P00519#P00519-1 is an instance of OBO:SO_0000104 (polypeptide)
        UniProtKB:P00519#P00519-1 has the label "P00519#P00519-1"

        NCBIProtein:NP_005148.2 is an instance of OBO:SO_0000104 (polypeptide)
        NCBIProtein:NP_005148.2 has the label "NP_005148.2"
        """
        from dipper.utils.TestUtils import TestUtils

        self.cgd.add_variant_info_to_graph(self.test_set_2)

        # Make testutils object and load bindings
        test_env = TestUtils(self.cgd.graph)
        cu = CurieUtil(self.curie_map)
        self.cgd.load_bindings()

        (variant_key, variant_label, amino_acid_variant, amino_acid_position,
         transcript_id, transcript_priority, protein_variant_type,
         functional_impact, stop_gain_loss, transcript_gene,
         protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id,
         db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base,
         primary_transcript_exons, primary_transcript_variant_sub_types,
         variant_type, chromosome, genome_build, build_version,
         build_date) = self.test_set_2[0]

        gene_id = self.cgd.gene_map[transcript_gene]
        ref_amino_acid = "T"
        altered_amino_acid = "I"
        db_snp_curie = "dbSNP:121913459"
        cosmic_curie = "COSMIC:12560"
        uniprot_curie = "UniProtKB:P00519#P00519-1"
        uniprot_id = "P00519#P00519-1"
        refseq_curie = "NCBIProtein:NP_005148.2"
        transcript_curie = "CCDS:35166.1"
        ccds_id = "35166.1"
        position = 315
        chromosome_curie = "hg19chr9"

        variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key))
        aa_region_id = ":_{0}{1}{2}Region".format(position, position,
                                                  uniprot_curie)
        cdna_region_id = ":_{0}Region".format(transcript_curie)
        chr_region_id = ":_{0}{1}Region-{2}-{3}".format(
            genome_build, chromosome, genome_pos_start, genome_pos_end)
        aa_coord_id = ":_{0}-{1}".format(uniprot_id, position)
        cdna_coord_id = ":_{0}-{1}".format(ccds_id, bp_pos)
        # chr_coord_id = "CHR:{0}-{1}".format(chromosome_curie, genome_pos_start)
        chr_coord_id = ":_{0}-{1}".format(chromosome_curie, genome_pos_start)

        variant_uri = URIRef(cu.get_uri(variant_id))
        transcript_uri = URIRef(cu.get_uri(transcript_curie))
        gene_uri = URIRef(cu.get_uri(gene_id))
        db_snp_uri = URIRef(cu.get_uri(db_snp_curie))
        cosmic_uri = URIRef(cu.get_uri(cosmic_curie))
        uniprot_uri = URIRef(cu.get_uri(uniprot_curie))
        refseq_uri = URIRef(cu.get_uri(refseq_curie))
        aa_region_uri = URIRef(cu.get_uri(aa_region_id))
        cdna_region_uri = URIRef(cu.get_uri(cdna_region_id))
        chr_region_uri = URIRef(cu.get_uri(chr_region_id))
        aa_coord_uri = URIRef(cu.get_uri(aa_coord_id))
        cdna_coord_uri = URIRef(cu.get_uri(cdna_coord_id))
        chr_coord_uri = URIRef(cu.get_uri(chr_coord_id))

        sparql_query = """
                       SELECT ?cosmic ?gene ?aaRegion ?cdnaRegion ?chrRegion
                              ?dbSNP ?transcript ?uniprot ?refseq
                              ?aaCoord ?cdnaCoord ?chrCoord
                       WHERE {{
                           ?cosmic a OBO:SO_0001059;
                               a OBO:SO_0001583 ;
                               OBO:GENO_0000408 ?gene ;
                               faldo:location ?aaRegion ;
                               faldo:location ?cdnaRegion ;
                               faldo:location ?chrRegion ;
                               OBO:GENO_reference_amino_acid "{0}" ;
                               OBO:GENO_reference_nucleotide "{1}" ;
                               OBO:GENO_altered_nucleotide "{2}" ;
                               OBO:GENO_results_in_amino_acid_change "{3}" ;
                               owl:sameAs ?dbSNP ;
                               RO:0002205 ?transcript .

                           ?cosmic owl:sameAs ?dbSNP .

                           ?transcript a OBO:SO_0000233 ;
                               rdfs:label "{4}" ;
                               OBO:RO_0002513 ?uniprot ;
                               OBO:RO_0002513 ?refseq .

                           ?uniprot a OBO:SO_0000104 ;
                               rdfs:label "P00519-1" .

                           ?refseq a OBO:SO_0000104 ;
                               rdfs:label "NP_005148.2" .

                           ?refseq owl:sameAs ?uniprot .

                           ?aaRegion faldo:begin ?aaCoord .
                           ?cdnaRegion faldo:begin ?cdnaCoord .
                           ?chrRegion faldo:begin ?chrCoord .

                           ?aaCoord faldo:position {5} .
                           ?cdnaCoord faldo:position {6} .
                           ?chrCoord faldo:position {7} .

                           ?dbSNP rdfs:label "{8}" .
                       }}
                       """.format(ref_amino_acid, ref_base, variant_base,
                                  altered_amino_acid, transcript_id, position,
                                  bp_pos, genome_pos_start, db_snp_id)

        # Expected Results
        expected_results = [[
            cosmic_uri, gene_uri, aa_region_uri, cdna_region_uri,
            chr_region_uri, db_snp_uri, transcript_uri, uniprot_uri,
            refseq_uri, aa_coord_uri, cdna_coord_uri, chr_coord_uri
        ]]
        # Query graph
        sparql_output = test_env.query_graph(sparql_query)

        self.assertEqual(expected_results, sparql_output)
コード例 #15
0
ファイル: GraphUtils.py プロジェクト: JervenBolleman/dipper
class GraphUtils:

    # FIXME - i've duplicated relationships in Assoc and here -
    #         pick one or the other and refactor
    # TODO -  refactor using the getNode() method to clear out the
    #         URIRef(cu.get_uri(<id>)) nonsense

    OWLCLASS = OWL['Class']
    OWLIND = OWL['NamedIndividual']
    OWLRESTRICTION = OWL['Restriction']
    OWLPROP = OWL['ObjectProperty']
    OBJPROP = OWL['ObjectProperty']
    ANNOTPROP = OWL['AnnotationProperty']
    DATAPROP = OWL['DatatypeProperty']
    SUBCLASS = RDFS['subClassOf']
    PERSON = FOAF['Person']

    annotation_properties = {
        'replaced_by': 'IAO:0100001',
        'consider': 'OIO:consider',
        'hasExactSynonym': 'OIO:hasExactSynonym',
        'hasRelatedSynonym': 'OIO:hasRelatedSynonym',
        'definition': 'IAO:0000115',
        'has_xref': 'OIO:hasDbXref',
        'clique_leader': 'MONARCH:cliqueLeader'
    }

    object_properties = {
        'has_disposition': 'GENO:0000208',
        'has_phenotype': 'RO:0002200',
        'in_taxon': 'RO:0002162',
        'has_quality': 'RO:0000086',
        'has_qualifier': 'GENO:0000580',
        'towards': 'RO:0002503',
        'has_subject': ':hasSubject',
        'has_object': ':hasObject',
        'has_predicate': ':hasPredicate',
        'is_about': 'IAO:0000136',
        'has_member': 'RO:0002351',
        'member_of': 'RO:0002350',
        'involved_in': 'RO:0002331',
        'enables': 'RO:0002327',
        'derives_from': 'RO:0001000',
        'part_of': 'BFO:0000050',
        'has_part': 'BFO:0000051',
        'mentions': 'IAO:0000142',
        'model_of': 'RO:0003301',
        'has_gene_product': 'RO:0002205',
        'existence_starts_at': 'UBERON:existence_starts_at',
        'existence_starts_during': 'RO:0002488',
        'existence_ends_at': 'UBERON:existence_ends_at',
        'existence_ends_during': 'RO:0002492',
        'starts_with': 'RO:0002224',
        'starts_during': 'RO:0002091',
        'ends_during': 'RO:0002093',
        'ends_with': 'RO:0002230',
        'occurs_in': 'BFO:0000066',
        'has_environment_qualifier': 'GENO:0000580',
        'has_begin_stage_qualifier': 'GENO:0000630',
        'has_end_stage_qualifier': 'GENO:0000631',
        'correlates_with': 'RO:0002610',
        'substance_that_treats': 'RO:0002606',
        'is_marker_for': 'RO:0002607',
        'contributes_to': 'RO:0002326',
        'has_origin': 'GENO:0000643',
        'has_author': 'ERO:0000232',
        'dc:source': 'dc:source',
        'dc:evidence': 'dc:evidence',
        'has_evidence': 'RO:0002558',
        'causally_upstream_of_or_within': 'RO:0002418'
    }

    datatype_properties = {
        'position': 'faldo:position',
        'has_measurement': 'IAO:0000004',
    }

    properties = annotation_properties.copy()
    properties.update(object_properties)
    properties.update(datatype_properties)

    def __init__(self, curie_map, materialize_bnodes=False):
        self.curie_map = curie_map
        self.cu = CurieUtil(curie_map)         # TEC: what is cu really?
        self.nobnodes = materialize_bnodes
        return

    def addClassToGraph(self, g, id, label, type=None, description=None):
        """
        Any node added to the graph will get at least 3 triples:
        *(node, type, owl:Class) and
        *(node, label, literal(label))
        *if a type is added,
            then the node will be an OWL:subclassOf that the type
        *if a description is provided,
            it will also get added as a dc:description
        :param id:
        :param label:
        :param type:
        :param description:
        :return:

        """

        n = self.getNode(id)

        g.add((n, RDF['type'], self.OWLCLASS))
        if label is not None:
            g.add((n, RDFS['label'], Literal(label)))
        if type is not None:
            t = URIRef(self.cu.get_uri(type))
            g.add((n, self.SUBCLASS, t))
        if description is not None:
            g.add((n, DC['description'], Literal(description)))
        return g

    def addIndividualToGraph(self, g, id, label, type=None, description=None):
        n = self.getNode(id)

        if label is not None:
            g.add((n, RDFS['label'], Literal(label)))
        if type is not None:
            t = self.getNode(type)
            g.add((n, RDF['type'], t))
        else:
            g.add((n, RDF['type'], self.OWLIND))
        if description is not None:
            g.add((n, DC['description'], Literal(description)))
        return g

    def addOWLPropertyClassRestriction(
            self, g, class_id, property_id, property_value):

        # make a blank node to hold the property restrictions
        # scrub the colons, they will make the ttl parsers choke
        nid = \
            '_'+re.sub(r':', '', property_id)+re.sub(r':', '', property_value)
        n = self.getNode(nid)

        g.add((n, RDF['type'], self.OWLRESTRICTION))
        g.add((n, OWL['onProperty'], self.getNode(property_id)))
        g.add((n, OWL['someValuesFrom'], self.getNode(property_value)))

        g.add((self.getNode(class_id), self.SUBCLASS, n))

        return

    def addEquivalentClass(self, g, id1, id2):
        n1 = self.getNode(id1)
        n2 = self.getNode(id2)

        if n1 is not None and n2 is not None:
            g.add((n1, OWL['equivalentClass'], n2))

        return

    def addSameIndividual(self, g, id1, id2):
        n1 = self.getNode(id1)
        n2 = self.getNode(id2)

        if n1 is not None and n2 is not None:
            g.add((n1, OWL['sameAs'], n2))

        return

    def addPerson(self, graph, person_id, person_label):
        graph.add((self.getNode(person_id), RDF['type'], self.PERSON))
        if person_label is not None:
            graph.add(
                (self.getNode(person_id), RDFS['label'],
                 Literal(person_label)))
        return

    def addDeprecatedClass(self, g, oldid, newids=None):
        """
        Will mark the oldid as a deprecated class.
        if one newid is supplied, it will mark it as replaced by.
        if >1 newid is supplied, it will mark it with consider properties
        :param g:
        :param oldid: the class id to deprecate
        :param newids: the class idlist that is
                       the replacement(s) of the old class.  Not required.
        :return:

        """

        n1 = URIRef(self.cu.get_uri(oldid))
        g.add((n1, RDF['type'], self.OWLCLASS))

        self._addReplacementIds(g, oldid, newids)

        return

    def addDeprecatedIndividual(self, g, oldid, newids=None):
        """
        Will mark the oldid as a deprecated individual.
        if one newid is supplied, it will mark it as replaced by.
        if >1 newid is supplied, it will mark it with consider properties
        :param g:
        :param oldid: the individual id to deprecate
        :param newids: the individual idlist that is the replacement(s) of
                       the old individual.  Not required.
        :return:

        """

        n1 = URIRef(self.cu.get_uri(oldid))
        g.add((n1, RDF['type'], self.OWLIND))

        self._addReplacementIds(g, oldid, newids)

        return

    def _addReplacementIds(self, g, oldid, newids):
        consider = URIRef(self.cu.get_uri(self.properties['consider']))
        replaced_by = URIRef(self.cu.get_uri(self.properties['replaced_by']))

        n1 = URIRef(self.cu.get_uri(oldid))
        g.add((n1, OWL['deprecated'], Literal(True, datatype=XSD[bool])))

        if newids is not None:
            if len(newids) == 1:
                n = URIRef(self.cu.get_uri(newids[0]))
                g.add((n1, replaced_by, n))
            elif len(newids) > 0:
                for i in newids:
                    n = URIRef(self.cu.get_uri(i.strip()))
                    g.add((n1, consider, n))
        return

    def addSubclass(self, g, parentid, childid):
        p = URIRef(self.cu.get_uri(parentid))
        c = URIRef(self.cu.get_uri(childid))
        g.add((c, self.SUBCLASS, p))

        return

    def addType(self, graph, subject_id, type, type_is_literal=False):
        # FIXME check this... i don't think a type should ever be a literal
        if type_is_literal is True:
            graph.add((self.getNode(subject_id), RDF['type'], Literal(type)))
        else:
            graph.add(
                (self.getNode(subject_id), RDF['type'], self.getNode(type)))
        return

    def addLabel(self, graph, subject_id, label):
        graph.add(
            (self.getNode(subject_id), RDFS['label'], Literal(label)))
        return

    def addSynonym(self, g, cid, synonym, synonym_type=None):
        """
        Add the synonym as a property of the class cid.
        Assume it is an exact synonym, unless otherwise specified
        :param g:
        :param cid: class id
        :param synonym: the literal synonym label
        :param synonym_type: the CURIE of the synonym type (not the URI)
        :return:

        """
        n = self.getNode(cid)
        if synonym_type is None:
            # default
            synonym_type = URIRef(
                self.cu.get_uri(self.properties['hasExactSynonym']))
        else:
            synonym_type = URIRef(self.cu.get_uri(synonym_type))

        g.add((n, synonym_type, Literal(synonym)))
        return

    def addDefinition(self, g, cid, definition):
        if definition is not None:
            n = self.getNode(cid)
            p = URIRef(self.cu.get_uri(self.properties['definition']))
            g.add((n, p, Literal(definition)))

        return

    def addXref(self, g, cid, xrefid, xref_as_literal=False):
        self.addTriple(
            g, cid, self.properties['has_xref'], xrefid, xref_as_literal)
        return

    def addDepiction(self, g, subject_id, image_url):
        g.add(
            (self.getNode(subject_id), FOAF['depiction'], Literal(image_url)))
        return

    def addComment(self, g, subject_id, comment):
        g.add(
            (self.getNode(subject_id), DC['comment'],
             Literal(comment.strip())))
        return

    def addDescription(self, g, subject_id, description):
        g.add(
            (self.getNode(subject_id), DC['description'],
             Literal(description.strip())))
        return

    def addPage(self, g, subject_id, page_url):
        g.add(
            (self.getNode(subject_id), FOAF['page'], Literal(page_url)))
        return

    def addTitle(self, g, subject_id, title):
        g.add(
            (self.getNode(subject_id), DC['title'], Literal(title)))
        return

    def addMember(self, g, group_id, member_id):
        self.addTriple(
            g, group_id, self.properties['has_member'], member_id)

    def addMemberOf(self, g, member_id, group_id):
        self.addTriple(
            g, member_id, self.properties['member_of'], group_id)
        return

    def addInvolvedIn(self, g, member_id, group_id):
        self.addTriple(
            g, member_id, self.properties['involved_in'], group_id)

    def write(self, graph, fileformat=None, file=None):
        """
         a basic graph writer (to stdout) for any of the sources.
         this will write raw triples in rdfxml, unless specified.
         to write turtle, specify format='turtle'
         an optional file can be supplied instead of stdout
        :return: None

        """
        filewriter = None
        if fileformat is None:
            fileformat = 'rdfxml'
        if file is not None:
            filewriter = open(file, 'wb')

            logger.info("Writing triples in %s to %s", fileformat, file)
            graph.serialize(filewriter, format=fileformat)
            filewriter.close()
        else:
            print(graph.serialize(format=fileformat).decode())
        return

    def write_raw_triples(self, graph, file=None):
        """
         a basic graph writer (to stdout) for any of the sources.
         this will write raw triples in rdfxml, unless specified.
         to write turtle, specify format='turtle'
         an optional file can be supplied instead of stdout
        :return: None
        """
        filewriter = None
        if file is not None:
            filewriter = open(file, 'w')
            logger.info("Writing raw triples to %s", file)

        for (s, p, o) in graph:
            output = [s, p, o]

            print(' '.join(output), file=filewriter)

        if filewriter is not None:
            filewriter.close()

        return

    def write_compact_triples(self, graph, file=None):
        """
        Will write out the raw triples,
        except it will replace the full uri with the curie prefix
        :param graph:
        :param file:
        :return:
        """
        # TODO

        return

    def _getNode(self, id, materialize_bnode):
        """
        This is a wrapper for creating a node with a given identifier.
        If an id starts with an underscore, it assigns it to a BNode, otherwise
        it creates it with a standard URIRef. Alternatively,
        if materialize_bnode is True,
        it will add any nodes that would have been blank into the BASE space.
        This will return None if it can't map the node properly.
        :param id:
        :return:
        """
        base = Namespace(self.curie_map.get(''))
        n = None
        if id is not None and re.match(r'^_', id):
            if materialize_bnode is True:
                n = base[id]
            else:  # replace the leading underscore to make it cleaner
                n = BNode(re.sub(r'_', '', id, 1))
        elif re.match(r'^\:', id):  # do we need to remove embedded ID colons?
            n = base[re.sub(r':', '', id, 1)]
        else:
            u = self.cu.get_uri(id)
            if u is not None:
                n = URIRef(self.cu.get_uri(id))
            else:
                logger.error("couldn't make URI for %s", id)
        return n

    def getNode(self, id, materialize_bnode=False):

        return self._getNode(id, materialize_bnode)

    def addTriple(
            self, graph, subject_id, predicate_id, object,
            object_is_literal=False):
        if object_is_literal is True:
            graph.add(
                (self.getNode(subject_id), self.getNode(predicate_id),
                 Literal(object)))
        else:
            graph.add(
                (self.getNode(subject_id), self.getNode(predicate_id),
                 self.getNode(object)))
        return

    def loadObjectProperties(self, graph, op):
        """
        Given a graph, it will load the supplied object properties
        as owl['ObjectProperty'] types
        A convenience.
        Status: DEPRECATED.  See loadProperties().
        :param graph:
        :param op: a dictionary of object properties
        :return: None

        """
        self.loadProperties(graph, op, self.OBJPROP)
        return

    def loadProperties(self, graph, op, property_type):
        """
        Given a graph, it will load the supplied object properties
        as the given property_type.
        :param graph: a graph
        :param op: a dictionary of object properties
        :param property_type: one of OWL:(Annotation|Data|Object)Property
        :return: None

        """

        if property_type not in [self.OBJPROP, self.ANNOTPROP, self.DATAPROP]:
            logger.error(
                "bad property type assigned: %s, %s", property_type, op)
        else:
            for k in op:
                graph.add(
                    (self.getNode(op[k]), RDF['type'], property_type))
        return

    def loadAllProperties(self, graph):
        """
        A convenience to load all stored properties
        (object, data, and annotation) into the supplied graph.
        :param graph:
        :return:

        """

        self.loadProperties(graph, self.object_properties, self.OBJPROP)
        self.loadProperties(graph, self.annotation_properties, self.ANNOTPROP)
        self.loadProperties(graph, self.datatype_properties, self.DATAPROP)
        return

    def addOntologyDeclaration(self, graph, ontology_id):

        graph.add((self.getNode(ontology_id), RDF['type'], OWL['Ontology']))
        return

    def addOWLVersionIRI(self, graph, ontology_id, version_iri):
        graph.add(
            (self.getNode(ontology_id), OWL['versionIRI'],
             self.getNode(version_iri)))

        return

    def addOWLVersionInfo(self, graph, ontology_id, version_info):
        graph.add(
            (self.getNode(ontology_id), OWL['versionInfo'],
             Literal(version_info)))
        return

    def makeLeader(self, graph, node_id):
        """
        Add an annotation property to the given ```node_id```
        to be the clique_leader.
        This is a monarchism.
        :param graph:
        :param node_id:
        :return:
        """
        self.addTriple(
            graph, node_id, self.annotation_properties['clique_leader'],
            Literal(True, datatype=XSD[bool]), True)
        return
コード例 #16
0
ファイル: test_genotype.py プロジェクト: zzygyx9119/mckb
    def test_missense_variant_protein_model(self):
        """
        Test missense variant with only protein information
        Using test data set 1, and the function add_variant_info_to_graph()
        We want to test the following triples:

        CGD:VariantID is an instance of OBO:SO_0001059
        CGD:VariantID is an instance of OBO:SO_0001583
        CGD:VariantID has the label "CSF3R Q741X  missense mutation"
        CGD:VariantID is_sequence_variant_instance_of (OBO:GENO_0000408) NCBIGene:1441
        CGD:VariantID has location (faldo:location) CGD:RegionID
        CGD:VariantID OBO:GENO_reference_amino_acid "Q"
        CGD:VariantID OBO:GENO_results_in_amino_acid_change "X"
        CGD:VariantID RO:0002205 CCDS:413.1

        CCDS:413.1 is an instance of OBO:GENO_primary
        CCDS:413.1 has the label "CCDS413.1"
        """
        from dipper.utils.TestUtils import TestUtils

        self.cgd.add_variant_info_to_graph(self.test_set_1)

        # Make testutils object and load bindings
        test_env = TestUtils(self.cgd.graph)
        cu = CurieUtil(self.curie_map)
        self.cgd.load_bindings()

        (variant_key, variant_label, amino_acid_variant, amino_acid_position,
         transcript_id, transcript_priority, protein_variant_type,
         functional_impact, stop_gain_loss, transcript_gene,
         protein_variant_source) = self.test_set_1[0][0:11]

        gene_id = self.cgd.gene_map[transcript_gene]
        ref_amino_acid = "Q"
        altered_amino_acid = "X"
        position = 741
        uniprot_curie = "UniProtKB:Q99062#Q99062-1"

        variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key))
        transcript = "CCDS:413.1"
        region_id = ":_{0}{1}{2}Region".format(position, position,
                                               uniprot_curie)
        variant_uri = URIRef(cu.get_uri(variant_id))
        transcript_uri = URIRef(cu.get_uri(transcript))
        gene_uri = URIRef(cu.get_uri(gene_id))
        region_uri = URIRef(cu.get_uri(region_id))

        sparql_query = """
                       SELECT ?variant ?gene ?region ?transcript
                       WHERE {{
                           ?variant a OBO:SO_0001059;
                               a OBO:SO_0001583 ;
                               rdfs:label "{0}" ;
                               OBO:GENO_0000408 ?gene ;
                               faldo:location ?region ;
                               OBO:GENO_reference_amino_acid "{1}" ;
                               OBO:GENO_results_in_amino_acid_change "{2}" ;
                               RO:0002205 ?transcript .

                           ?transcript a OBO:SO_0000233 ;
                               rdfs:label "{3}" .
                       }}
                       """.format(variant_label, ref_amino_acid,
                                  altered_amino_acid, transcript_id)

        # Expected Results
        expected_results = [[
            variant_uri, gene_uri, region_uri, transcript_uri
        ]]
        # Query graph
        sparql_output = test_env.query_graph(sparql_query)

        self.assertEqual(expected_results, sparql_output)
コード例 #17
0
ファイル: test_genotype.py プロジェクト: zzygyx9119/mckb
    def test_chromosome_position_model(self):
        """
        Test modelling of genomic positions
        Using test data set 2, and the function add_variant_info_to_graph()
        """
        from dipper.utils.TestUtils import TestUtils
        self.cgd.add_variant_info_to_graph(self.test_set_2)

        # Make testutils object and load bindings
        test_env = TestUtils(self.cgd.graph)
        cu = CurieUtil(self.curie_map)
        self.cgd.load_bindings()

        (variant_key, variant_label, amino_acid_variant, amino_acid_position,
         transcript_id, transcript_priority, protein_variant_type,
         functional_impact, stop_gain_loss, transcript_gene,
         protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id,
         db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base,
         primary_transcript_exons, primary_transcript_variant_sub_types,
         variant_type, chromosome, genome_build, build_version,
         build_date) = self.test_set_2[0]

        variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key))

        chromosome_curie = ":MONARCH_hg19chr9"
        region_id = ":_{0}{1}Region-{2}-{3}".format(genome_build, chromosome,
                                                    genome_pos_start,
                                                    genome_pos_end)
        start_id = ":_hg19chr9-{0}".format(genome_pos_start)
        end_id = ":_hg19chr9-{0}".format(genome_pos_end)

        region_uri = URIRef(cu.get_uri(region_id))
        start_uri = URIRef(cu.get_uri(start_id))
        end_uri = URIRef(cu.get_uri(end_id))
        chromosome_uri = URIRef(cu.get_uri(chromosome_curie))

        sparql_query = """
                       SELECT ?region ?startPosition ?endPosition ?chromosome
                       WHERE {{
                           ?region a faldo:Region ;
                               faldo:begin ?startPosition ;
                               faldo:end ?endPosition .

                           ?startPosition a faldo:Position ;
                               faldo:position {0} ;
                               faldo:reference ?chromosome .

                           ?endPosition a faldo:Position ;
                               faldo:position {1} ;
                               faldo:reference ?chromosome .
                       }}
                       """.format(
            genome_pos_start,
            genome_pos_end,
        )

        # Expected Results
        expected_results = [[region_uri, start_uri, end_uri, chromosome_uri]]

        # Query graph
        sparql_output = test_env.query_graph(sparql_query)

        self.assertEqual(expected_results, sparql_output)
コード例 #18
0
ファイル: test_genotype.py プロジェクト: zzygyx9119/mckb
    def test_genome_build_chromosome_model(self):
        """
        Test modelling of genome, builds, and chromosomes
        Using test data set 2, and the function add_variant_info_to_graph()
        """
        from dipper.utils.TestUtils import TestUtils
        self.cgd.add_variant_info_to_graph(self.test_set_2)

        # Make testutils object and load bindings
        test_env = TestUtils(self.cgd.graph)
        cu = CurieUtil(self.curie_map)
        self.cgd.load_bindings()

        genome = ":9606genome"
        genome_label = "Human genome"
        chromosome = "CHR:9606chr9"
        chromosome_label = "chr9 (Human)"
        build_curie = "UCSC:hg19"
        build_label = "hg19"
        chrom_on_build = ":MONARCH_hg19chr9"
        chrom_build_label = "chr9 (hg19)"

        genome_uri = URIRef(cu.get_uri(genome))
        chromosome_uri = URIRef(cu.get_uri(chromosome))
        build_uri = URIRef(cu.get_uri(build_curie))
        chrom_on_build_uri = URIRef(cu.get_uri(chrom_on_build))
        '''
        sparql_query = """
                       SELECT ?genome ?chromosome ?build ?chromOnBuild
                       WHERE {{
                           ?genome a owl:Class ;
                               rdfs:label "{0}" ;
                               OBO:RO_0002162 OBO:NCBITaxon_9606 ;
                               OBO:RO_0002351 ?chromosome ;
                               rdfs:subClassOf OBO:SO_0001026 .

                           ?chromosome a owl:Class ;
                               rdfs:label "{1}" ;
                               OBO:RO_0002350 ?genome ;
                               rdfs:subClassOf OBO:SO_0000340 .

                           ?build a OBO:SO_0001505 ;
                               a ?genome ;
                               rdfs:label "{2}" ;
                               OBO:RO_0002351 ?chromOnBuild ;
                               rdfs:subClassOf ?genome .

                           ?chromOnBuild a ?chromosome ;
                               rdfs:label "{3}" ;
                               OBO:RO_0002350 ?build .
                       }}
                       """.format(genome_label, chromosome_label,
                                  build_label, chrom_build_label)
        '''
        sparql_query = """
                       SELECT ?genome ?chromosome ?build ?chromOnBuild
                       WHERE {{
                           ?genome a owl:Class ;
                               rdfs:label "{0}" ;
                               rdfs:subClassOf OBO:SO_0001026 .

                           ?chromosome a owl:Class ;
                               rdfs:label "{1}" ;
                               rdfs:subClassOf OBO:SO_0000340 .

                           ?build a OBO:SO_0001505 ;
                               a ?genome ;
                               rdfs:label "{2}" ;
                               OBO:RO_0002162 OBO:NCBITaxon_9606 ;
                               OBO:RO_0002351 ?chromOnBuild .

                           ?chromOnBuild a ?chromosome ;
                               a OBO:SO_0000340 ;
                               rdfs:label "{3}" ;
                               OBO:RO_0002350 ?build .
                       }}
                       """.format(genome_label, chromosome_label, build_label,
                                  chrom_build_label)

        # Expected Results
        expected_results = [[
            genome_uri, chromosome_uri, build_uri, chrom_on_build_uri
        ]]

        # Query graph
        sparql_output = test_env.query_graph(sparql_query)

        self.assertEqual(expected_results, sparql_output)
コード例 #19
0
ファイル: test_genotype.py プロジェクト: zzygyx9119/mckb
    def test_variant_position_region_model(self):
        """
        Test modelling of variant positions on a transcript
        Using test data set 2, and the function add_variant_info_to_graph()
        We want to test the following triples:

        CGD:RegionID is an instance of faldo:Region
        CGD:RegionID faldo:begin BothStrandPositionID
        CGD:RegionID faldo:end BothStrandPositionID

        CGD:BothStrandPositionID is an instance of faldo:BothStrandPosition
        CGD:BothStrandPositionID is an instance of faldo:Position
        CGD:BothStrandPositionID faldo:position 944
        CGD:BothStrandPositionID faldo:reference CGD:TranscriptID
        """
        from dipper.utils.TestUtils import TestUtils
        self.cgd.add_variant_info_to_graph(self.test_set_2)

        # Make testutils object and load bindings
        test_env = TestUtils(self.cgd.graph)
        cu = CurieUtil(self.curie_map)
        self.cgd.load_bindings()

        (variant_key, variant_label, amino_acid_variant, amino_acid_position,
         transcript_id, transcript_priority, protein_variant_type,
         functional_impact, stop_gain_loss, transcript_gene,
         protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id,
         db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base,
         primary_transcript_exons, primary_transcript_variant_sub_types,
         variant_type, chromosome, genome_build, build_version,
         build_date) = self.test_set_2[0]

        transcript_curie = self.cgd._make_transcript_curie(transcript_id)
        ccds_id = "35166.1"
        variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key))

        region_id = ":_{0}Region".format(transcript_curie)
        both_strand_id = ":_{0}-{1}".format(ccds_id, bp_pos)

        region_uri = URIRef(cu.get_uri(region_id))
        both_strand_uri = URIRef(cu.get_uri(both_strand_id))
        ccds_uri = URIRef(cu.get_uri(transcript_curie))

        sparql_query = """
                       SELECT ?region ?bsPosition ?transcript
                       WHERE {{
                           ?region a faldo:Region ;
                               faldo:begin ?bsPosition ;
                               faldo:end ?bsPosition .

                           ?bsPosition a faldo:Position ;
                               faldo:position {0} ;
                               faldo:reference ?transcript .
                       }}
                       """.format(bp_pos)

        # Expected Results
        expected_results = [[region_uri, both_strand_uri, ccds_uri]]

        # Query graph
        sparql_output = test_env.query_graph(sparql_query)

        self.assertEqual(expected_results, sparql_output)
コード例 #20
0
ファイル: test_genotype.py プロジェクト: zzygyx9119/mckb
    def test_amino_acid_position_region_model(self):
        """
        Test modelling of amino acid positions
        Using test data set 1, and the function add_variant_info_to_graph()
        We want to test the following triples:

        CGD:RegionID is an instance of faldo:Region
        CGD:RegionID faldo:begin BothStrandPositionID
        CGD:RegionID faldo:end BothStrandPositionID

        CGD:BothStrandPositionID is an instance of faldo:BothStrandPosition
        CGD:BothStrandPositionID is an instance of faldo:Position
        CGD:BothStrandPositionID faldo:position 741
        CGD:BothStrandPositionID faldo:reference UniProtID
        """
        from dipper.utils.TestUtils import TestUtils
        self.cgd.add_variant_info_to_graph(self.test_set_1)

        # Make testutils object and load bindings
        test_env = TestUtils(self.cgd.graph)
        cu = CurieUtil(self.curie_map)
        self.cgd.load_bindings()

        (variant_key, variant_label, amino_acid_variant, amino_acid_position,
         transcript_id, transcript_priority, protein_variant_type,
         functional_impact, stop_gain_loss, transcript_gene,
         protein_variant_source) = self.test_set_1[0][0:11]

        position = 741
        variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key))

        uniprot_curie = "UniProtKB:Q99062#Q99062-1"
        uniprot_id = "Q99062#Q99062-1"
        region_id = ":_{0}{1}{2}Region".format(position, position,
                                               uniprot_curie)
        both_strand_id = ":_{0}-{1}".format(uniprot_id, position)

        region_uri = URIRef(cu.get_uri(region_id))
        both_strand_uri = URIRef(cu.get_uri(both_strand_id))
        uniprot_uri = URIRef(cu.get_uri(uniprot_curie))

        sparql_query = """
                       SELECT ?region ?bsPosition ?protein
                       WHERE {{
                           ?region a faldo:Region ;
                               faldo:begin ?bsPosition ;
                               faldo:end ?bsPosition .

                           ?bsPosition a faldo:Position ;
                               faldo:position {0} ;
                               faldo:reference ?protein .
                       }}
                       """.format(position)

        # Expected Results
        expected_results = [[region_uri, both_strand_uri, uniprot_uri]]

        # Query graph
        sparql_output = test_env.query_graph(sparql_query)

        self.assertEqual(expected_results, sparql_output)
コード例 #21
0
ファイル: test_impc.py プロジェクト: tegar9000/dipper-1
    def setUp(self):
        self.test_util = TestUtils()
        self.assoc_curie = 'MONARCH:test_association'
        self.eco_id = 'ECO:0000015'

        # Headers:
        # 01 marker_accession_id,
        # 02 marker_symbol,
        # 03 phenotyping_center,
        # 04 colony_raw,
        # 05 sex,
        # 06 zygosity,
        # 07 allele_accession_id,
        # 08 allele_symbol,
        # 09 allele_name,
        # 10 strain_accession_id,
        # 11 strain_name,
        # 12 project_name,
        # 13 project_fullname,
        # 14 pipeline_name,
        # 15 pipeline_stable_id,
        # 16 procedure_stable_id,
        # 17 procedure_name,
        # 18 parameter_stable_id,
        # 19 parameter_name,
        # 20 top_level_mp_term_id,
        # 21 top_level_mp_term_name,
        # 22 mp_term_id,
        # 23 mp_term_name,
        # 24 p_value,
        # 25 percentage_change,
        # 26 effect_size,
        # 27 statistical_method,
        # 28 resource_name

        self.test_set_1 = (
            'MGI:1920145',              # 01
            'Setd5',                    # 02
            'WTSI',                     # 03
            'MEFW',                     # 04
            'male',                     # 05
            'heterozygote',             # 06
            'MGI:4432631',              # 07
            'Setd5<tm1a(EUCOMM)Wtsi>',  # 08
            'targeted mutation 1a, Wellcome Trust Sanger Institute',    # 09
            'MGI:2159965',              # 10
            'C57BL/6N',                 # 11
            'MGP',                      # 12
            'Wellcome Trust Sanger Institute Mouse Genetics Project',   # 13
            'MGP Select Pipeline',      # 14
            'MGP_001',                  # 15
            'MGP_XRY_001',              # 16
            'X-ray',                    # 17
            'IMPC_XRY_008_001',         # 18
            'Number of ribs right',     # 19
            'MP:0005390',               # 20
            'skeleton phenotype',       # 21
            'MP:0000480',               # 22
            'increased rib number',     # 23
            '1.637023E-010',            # 24
            '',                         # 25
            '8.885439E-007',            # 26
            'Wilcoxon rank sum test with continuity correction',    # 27
            'IMPC'            # 28
        )

        # Generate test curies, these are otherwise generated
        # within _add_evidence() and _add_study_provenance()
        # these blank nodes are hardcoded as NOT Skolemized  ...
        self.study_curie = "_:study"
        self.evidence_curie = "_:evidence"

        # IRIs for testing sparql output
        curie_dict = curie_map.get()
        curie_util = CurieUtil(curie_dict)
        self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie))

        return
コード例 #22
0
ファイル: test_rdfgraph.py プロジェクト: tegar9000/dipper-1
class RDFGraphTestCase(unittest.TestCase):
    def setUp(self):
        self.graph = RDFGraph()

        this_curie_map = curie_map.get()
        self.cutil = CurieUtil(this_curie_map)

        # stuff to make test triples
        self.test_cat_subj = "http://www.google.com"
        self.test_cat_default_pred = self.cutil.get_uri("biolink:category")
        self.test_cat_nondefault_pred = self.cutil.get_uri("rdf:type")
        self.test_cat_default_category = self.cutil.get_uri(
            "biolink:NamedThing")
        self.test_cat_nondefault_category = self.cutil.get_uri("biolink:Gene")
        self.test_cat_type = self.cutil.get_uri("rdf:type")
        self.test_cat_class = self.cutil.get_uri("rdf:class")

    def tearDown(self):
        self.graph = None

    def test_add_triple_makes_triple(self):
        """
        test that addTriple() makes at least one triple
        """
        self.graph.addTriple(subject_id=self.test_cat_subj,
                             predicate_id="rdf:type",
                             obj="rdf:class")
        self.assertTrue(
            len(self.graph) > 0, "addTriples() didn't make >=1 triple")

    def test_add_triple_subject_category_assignment(self):
        """
        test that addTriple() correctly assigns subject category
        """
        self.graph.addTriple(
            subject_id=self.test_cat_subj,
            predicate_id="rdf:comment",
            obj="website",
            subject_category=self.test_cat_nondefault_category)
        triples = list(
            self.graph.triples((URIRef(self.test_cat_subj),
                                URIRef(self.test_cat_default_pred), None)))
        self.assertEqual(
            len(triples), 1,
            "addTriples() didn't make exactly one triple subject category")
        self.assertEqual(
            triples[0][2], URIRef(self.test_cat_nondefault_category),
            "addTriples() didn't assign the right triple subject category")

    def test_add_triple_object_category_assignment(self):
        """
        test that addTriple() correctly assigns object category
        """
        self.graph.addTriple(subject_id=self.test_cat_subj,
                             predicate_id=self.test_cat_type,
                             obj=self.test_cat_class,
                             object_category=self.test_cat_nondefault_category)
        triples = list(
            self.graph.triples((URIRef(self.test_cat_class),
                                URIRef(self.test_cat_default_pred), None)))
        self.assertEqual(
            len(triples), 1,
            "addTriples() didn't make exactly one triple object category")
        self.assertEqual(
            triples[0][2], URIRef(self.test_cat_nondefault_category),
            "addTriples() didn't assign the right triple object category")

    def read_graph_from_turtle_file(self, f):
        """
        This will read the specified file into a graph.  A simple parsing test.
        :param f:
        :return:

        """
        vg = RDFGraph()
        p = os.path.abspath(f)
        logger.info("Testing reading turtle file from %s", p)
        vg.parse(f, format="turtle")
        logger.info('Found %s graph nodes in %s', len(vg), p)
        self.assertTrue(len(vg) > 0, "No nodes found in " + p)

        return

    def read_graph_into_owl(self, f):
        """
        test if the ttl can be parsed by owlparser
        this expects owltools to be accessible from commandline
        :param f: file of ttl
        :return:
        """

        import subprocess
        from subprocess import check_call

        status = check_call(["owltools", f], stderr=subprocess.STDOUT)
        # returns zero is success!
        if status != 0:
            logger.error('finished verifying with owltools with status %s',
                         status)
        self.assertTrue(status == 0)

        return

    def test_make_category_triple_default(self):
        """
        test that method adds category triple to graph correctly (default pred and obj)
        """
        self.graph._make_category_triple(self.test_cat_subj)

        triples = list(self.graph.triples((None, None, None)))
        self.assertEqual(len(triples), 1,
                         "method didn't make exactly one triple")
        self.assertEqual(triples[0][0], URIRef(self.test_cat_subj),
                         "didn't assign correct subject")
        self.assertEqual(triples[0][1], URIRef(self.test_cat_default_pred),
                         "didn't assign correct predicate")
        self.assertEqual(triples[0][2], URIRef(self.test_cat_default_category),
                         "didn't assign correct category")

    def test_make_category_triple_non_default_category(self):
        """
        test that method adds category triple to graph correctly
        """
        self.graph._make_category_triple(self.test_cat_subj,
                                         self.test_cat_nondefault_category)
        triples = list(self.graph.triples((None, None, None)))

        self.assertEqual(len(triples), 1,
                         "method didn't make exactly one triple")
        self.assertEqual(URIRef(self.test_cat_nondefault_category),
                         triples[0][2],
                         "didn't assign correct (non-default) category")

    def test_make_category_triple_non_default_pred(self):
        """
        test that method adds category triple to graph correctly (non default pred)
        """
        self.graph._make_category_triple(
            self.test_cat_subj,
            self.test_cat_default_category,
            predicate=self.test_cat_nondefault_pred)
        triples = list(self.graph.triples((None, None, None)))
        self.assertEqual(len(triples), 1,
                         "method didn't make exactly one triple")
        self.assertEqual(URIRef(self.test_cat_nondefault_pred), triples[0][1],
                         "didn't assign correct (non-default) category")

    def test_make_category_triple_category_none_should_emit_named_thing(self):
        """
        test that method adds category triple to graph correctly (default pred and obj)
        """
        self.graph._make_category_triple(self.test_cat_subj, category=None)
        triples = list(self.graph.triples((None, None, None)))
        self.assertEqual(len(triples), 1,
                         "method didn't make exactly one triple")
        self.assertEqual(URIRef(self.test_cat_default_category), triples[0][2],
                         "didn't assign correct default category")

    def test_is_literal(self):
        """
        test that method infers type (either literal or CURIE) correctly
        """
        self.assertTrue(self.graph._is_literal("1"))
        self.assertTrue(not self.graph._is_literal("foo:bar"))
        self.assertTrue(not self.graph._is_literal("http://www.zombo.com/"))
        self.assertTrue(not self.graph._is_literal("https://www.zombo.com/"))
        self.assertTrue(
            not self.graph._is_literal("ftp://ftp.1000genomes.ebi.ac.uk/"))