class TestGwasSNPModel(unittest.TestCase): """ Test the modelling of a SNP to trait association from sample GWAS catalog data """ def setUp(self): self.test_util = TestUtils() self.source = GWASCatalog('rdf_graph', True) self.source.graph = RDFGraph(True) # Reset graph self.source.graph.bind_all_namespaces() self.test_data = { 'snp_label': 'rs1491921-C', 'chrom_num': '5', 'chrom_pos': '21259029', 'context': 'intergenic_variant', 'allele_freq': '0.013', 'trait': 'Diisocyanate-induced asthma', 'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949', 'pvalue': '0.0000007', 'merged': '0', 'snp_id_current': '1491921', 'mapped_gene': 'LOC102723561 - GUSBP1', 'snp_gene_nums': '', 'upstream_gene_num': '107986179', 'downstream_gene_num': '107986180', 'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls', 'replicated_sample_desc': 'NA', 'platform': 'Illumina [1556551]', 'pubmed': '25918132' } def tearDown(self): self.source = None self.efo_ontology = None def test_snp_type_resolution(self): """ Given the label: rs1491921-C return dbSNP:rs1491921, snp """ self.assertTrue(len(list(self.source.graph)) == 0) variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) self.assertEqual(variant_curie, "dbSNP:rs1491921") self.assertEqual(variant_type, 'snp') def test_snp_model(self): """ Test output model of _add_snp_to_graph() """ self.assertTrue(len(list(self.source.graph)) == 0) variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) self.source._add_snp_to_graph( variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'], self.test_data['chrom_pos'], self.test_data['context'], self.test_data['allele_freq']) triples = """ dbSNP:rs1491921 a OBO:SO_0000694, OBO:SO_0001628 ; rdfs:label "rs1491921-C" ; faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> ; OBO:RO_0002162 OBO:NCBITaxon_9606 ; dc:description "0.013 [risk allele frequency]" . <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> a faldo:Region ; faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> ; faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> . <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> a faldo:Position ; faldo:position 21259029 ; faldo:reference OBO:CHR_GRCh38chr5 . """ # To debug # print(self.source.graph.serialize(format="turtle").decode("utf-8")) # self.assertTrue(False) # dbg # LOG.debug( # "Reference graph: %s", # self.source.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph)) def test_snp_gene_relation(self): """ test the _add_snp_gene_relation function :return: """ self.assertTrue(len(list(self.source.graph)) == 0) variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) self.source._add_snp_gene_relation( variant_curie, self.test_data['snp_gene_nums'], self.test_data['upstream_gene_num'], self.test_data['downstream_gene_num']) triples = """ dbSNP:rs1491921 OBO:RO_0002528 NCBIGene:107986180 ; OBO:RO_0002529 NCBIGene:107986179 . """ self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph)) def test_deprecated_snp(self): """ test the _add_deprecated_snp :return: """ self.assertTrue(len(list(self.source.graph)) == 0) # fake data snp_id_current = '12345' merged = '1' variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) self.source._add_deprecated_snp( variant_curie, snp_id_current, merged, self.test_data['chrom_num'], self.test_data['chrom_pos']) triples = """ dbSNP:rs1491921 a owl:NamedIndividual ; OBO:IAO_0100001 dbSNP:rs12345 ; owl:deprecated true . dbSNP:rs12345 MONARCH:cliqueLeader true . """ self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph)) def test_snp_trait_association(self): """ test the _add_variant_trait_association :return: """ self.assertTrue(len(list(self.source.graph)) == 0) efo_ontology = RDFGraph() LOG.info("Loading EFO ontology in separate rdf graph") efo_ontology.parse(self.source.files['efo']['url'], format='xml') efo_ontology.bind_all_namespaces() LOG.info("Finished loading EFO ontology") variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) description = self.source._make_description( self.test_data['trait'], self.test_data['init_sample_desc'], self.test_data['replicated_sample_desc'], self.test_data['platform'], self.test_data['pvalue']) self.source._add_variant_trait_association( variant_curie, self.test_data['trait_uri'], efo_ontology, self.test_data['pubmed'], description) triples = """ MONARCH:bffc7a930c08cc8fe931 a OBAN:association ; dc:description "{0}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0003949 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject dbSNP:rs1491921 . MONARCH:bff9b97458d67ed7f517 a OBAN:association ; dc:description "{0}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0006995 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject dbSNP:rs1491921 . EFO:0003949 a owl:Class ; rdfs:label "eye color"^^xsd:string ; rdfs:subClassOf UPHENO:0001001 . dbSNP:rs1491921 RO:0003304 EFO:0003949, EFO:0006995 . PMID:25918132 a OBO:IAO_0000013 . """.format(description) # dbg # LOG.debug( # "Reference graph: %s", # self.source.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
class TestGwasSNPModel(unittest.TestCase): """ Test the modelling of a SNP to trait association from sample GWAS catalog data """ def setUp(self): self.source = GWASCatalog('rdf_graph', True) self.test_data = { 'snp_label': 'rs1491921-C', 'chrom_num': '5', 'chrom_pos': '21259029', 'context': 'intergenic_variant', 'allele_freq': '0.013', 'trait': 'Diisocyanate-induced asthma', 'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949', 'pvalue': '0.0000007', 'merged': '0', 'snp_id_current': '1491921', 'mapped_gene': 'LOC102723561 - GUSBP1', 'snp_gene_nums': '', 'upstream_gene_num': '107986179', 'downstream_gene_num': '107986180', 'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls', 'replicated_sample_desc': 'NA', 'platform': 'Illumina [1556551]', 'pubmed': '25918132' } def tearDown(self): self.source = None self.efo_ontology = None def test_snp_type_resolution(self): """ Given the label: rs1491921-C return dbSNP:rs1491921, snp """ variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) self.assertEqual(variant_curie, "dbSNP:rs1491921") self.assertEqual(variant_type, 'snp') def test_snp_model(self): """ Test output model of _add_snp_to_graph() """ variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) self.source._add_snp_to_graph(variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'], self.test_data['chrom_pos'], self.test_data['context'], self.test_data['allele_freq']) sparql_query = """ SELECT ?snp WHERE { ?snp a OBO:SO_0000694, OBO:SO_0001628 ; rdfs:label "rs1491921-C" ; faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> ; OBO:RO_0002162 OBO:NCBITaxon_9606 ; dc:description "0.013 [risk allele frequency]" . <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> a faldo:Region ; faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> ; faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> . <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> a faldo:Position ; faldo:position 21259029 ; faldo:reference OBO:CHR_GRCh38chr5 . } """ # To debug #print(self.source.graph.serialize(format="turtle").decode("utf-8")) #self.assertTrue(False) sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )] self.assertEqual(results, expected) def test_snp_gene_relation(self): """ test the _add_snp_gene_relation function :return: """ variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) self.source._add_snp_gene_relation( variant_curie, self.test_data['snp_gene_nums'], self.test_data['upstream_gene_num'], self.test_data['downstream_gene_num']) sparql_query = """ SELECT ?snp WHERE { ?snp OBO:RO_0002528 <http://www.ncbi.nlm.nih.gov/gene/107986180> ; OBO:RO_0002529 <http://www.ncbi.nlm.nih.gov/gene/107986179> . } """ sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )] self.assertEqual(results, expected) def test_deprecated_snp(self): """ test the _add_deprecated_snp :return: """ #fake data snp_id_current = '12345' merged = '1' variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) self.source._add_deprecated_snp(variant_curie, snp_id_current, merged, self.test_data['chrom_num'], self.test_data['chrom_pos']) sparql_query = """ SELECT ?snp WHERE { ?snp a owl:NamedIndividual ; OBO:IAO_0100001 dbSNP:rs12345 ; owl:deprecated true . dbSNP:rs12345 MONARCH:cliqueLeader true . } """ sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )] self.assertEqual(results, expected) def test_snp_trait_association(self): """ test the _add_variant_trait_association :return: """ efo_ontology = RDFGraph() logger.info("Loading EFO ontology in separate rdf graph") efo_ontology.parse(self.source.files['efo']['url'], format='xml') efo_ontology.bind_all_namespaces() logger.info("Finished loading EFO ontology") variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) description = self.source._make_description( self.test_data['trait'], self.test_data['init_sample_desc'], self.test_data['replicated_sample_desc'], self.test_data['platform'], self.test_data['pvalue']) self.source._add_variant_trait_association(variant_curie, self.test_data['trait_uri'], efo_ontology, self.test_data['pubmed'], description) sparql_query = """ SELECT ?snp WHERE {{ <https://monarchinitiative.org/MONARCH_b46cdf48950cb00d> a OBAN:association ; dc:description "{}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0003949 ; OBAN:association_has_predicate OBO:RO_0002326 ; OBAN:association_has_subject ?snp . <https://monarchinitiative.org/MONARCH_70a05d8eb1c3d4b0> a OBAN:association ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0006995 ; OBAN:association_has_predicate OBO:RO_0002326 ; OBAN:association_has_subject ?snp . EFO:0003949 a owl:Class ; rdfs:label "eye color"^^xsd:string ; rdfs:subClassOf UPHENO:0001001 . ?snp OBO:RO_0002326 EFO:0003949, EFO:0006995 . PMID:25918132 a OBO:IAO_0000013 . }} """.format(description) sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )] self.assertEqual(results, expected)
class TestGwasSNPModel(unittest.TestCase): """ Test the modelling of a SNP to trait association from sample GWAS catalog data """ def setUp(self): self.source = GWASCatalog('rdf_graph', True) self.test_data = { 'snp_label': 'rs1491921-C', 'chrom_num': '5', 'chrom_pos': '21259029', 'context': 'intergenic_variant', 'allele_freq': '0.013', 'trait': 'Diisocyanate-induced asthma', 'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949', 'pvalue': '0.0000007', 'merged': '0', 'snp_id_current': '1491921', 'mapped_gene': 'LOC102723561 - GUSBP1', 'snp_gene_nums': '', 'upstream_gene_num': '107986179', 'downstream_gene_num': '107986180', 'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls', 'replicated_sample_desc': 'NA', 'platform': 'Illumina [1556551]', 'pubmed': '25918132' } def tearDown(self): self.source = None self.efo_ontology = None def test_snp_type_resolution(self): """ Given the label: rs1491921-C return dbSNP:rs1491921, snp """ variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) self.assertEqual(variant_curie, "dbSNP:rs1491921") self.assertEqual(variant_type, 'snp') def test_snp_model(self): """ Test output model of _add_snp_to_graph() """ variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) self.source._add_snp_to_graph( variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'], self.test_data['chrom_pos'], self.test_data['context'], self.test_data['allele_freq']) sparql_query = """ SELECT ?snp WHERE { ?snp a OBO:SO_0000694, OBO:SO_0001628 ; rdfs:label "rs1491921-C" ; faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> ; OBO:RO_0002162 OBO:NCBITaxon_9606 ; dc:description "0.013 [risk allele frequency]" . <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> a faldo:Region ; faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> ; faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> . <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> a faldo:Position ; faldo:position 21259029 ; faldo:reference OBO:CHR_GRCh38chr5 . } """ # To debug #print(self.source.graph.serialize(format="turtle").decode("utf-8")) #self.assertTrue(False) sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)] self.assertEqual(results, expected) def test_snp_gene_relation(self): """ test the _add_snp_gene_relation function :return: """ variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) self.source._add_snp_gene_relation( variant_curie, self.test_data['snp_gene_nums'], self.test_data['upstream_gene_num'], self.test_data['downstream_gene_num']) sparql_query = """ SELECT ?snp WHERE { ?snp OBO:RO_0002528 <http://www.ncbi.nlm.nih.gov/gene/107986180> ; OBO:RO_0002529 <http://www.ncbi.nlm.nih.gov/gene/107986179> . } """ sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)] self.assertEqual(results, expected) def test_deprecated_snp(self): """ test the _add_deprecated_snp :return: """ #fake data snp_id_current = '12345' merged = '1' variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) self.source._add_deprecated_snp( variant_curie, snp_id_current, merged, self.test_data['chrom_num'], self.test_data['chrom_pos']) sparql_query = """ SELECT ?snp WHERE { ?snp a owl:NamedIndividual ; OBO:IAO_0100001 dbSNP:rs12345 ; owl:deprecated true . dbSNP:rs12345 MONARCH:cliqueLeader true . } """ sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)] self.assertEqual(results, expected) def test_snp_trait_association(self): """ test the _add_variant_trait_association :return: """ efo_ontology = RDFGraph() logger.info("Loading EFO ontology in separate rdf graph") efo_ontology.parse(self.source.files['efo']['url'], format='xml') efo_ontology.bind_all_namespaces() logger.info("Finished loading EFO ontology") variant_curie, variant_type = \ self.source._get_curie_and_type_from_id(self.test_data['snp_label']) description = self.source._make_description( self.test_data['trait'], self.test_data['init_sample_desc'], self.test_data['replicated_sample_desc'], self.test_data['platform'], self.test_data['pvalue']) self.source._add_variant_trait_association( variant_curie, self.test_data['trait_uri'], efo_ontology, self.test_data['pubmed'], description) sparql_query = """ SELECT ?snp WHERE {{ <https://monarchinitiative.org/MONARCH_b46cdf48950cb00d> a OBAN:association ; dc:description "{}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0003949 ; OBAN:association_has_predicate OBO:RO_0002326 ; OBAN:association_has_subject ?snp . <https://monarchinitiative.org/MONARCH_70a05d8eb1c3d4b0> a OBAN:association ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0006995 ; OBAN:association_has_predicate OBO:RO_0002326 ; OBAN:association_has_subject ?snp . EFO:0003949 a owl:Class ; rdfs:label "eye color"^^xsd:string ; rdfs:subClassOf UPHENO:0001001 . ?snp OBO:RO_0002326 EFO:0003949, EFO:0006995 . PMID:25918132 a OBO:IAO_0000013 . }} """.format(description) sparql_output = self.source.graph.query(sparql_query) # Test that query passes and returns one row results = list(sparql_output) expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)] self.assertEqual(results, expected)