Ejemplo n.º 1
0
 def setUp(self):
     self.test_util = TestUtils()
     self.source = GWASCatalog('rdf_graph', True)
     self.source.graph = RDFGraph(True)  # Reset graph
     self.source.graph.bind_all_namespaces()
     self.test_data = {
         'snp_label': 'rs1491921-C',
         'chrom_num': '5',
         'chrom_pos': '21259029',
         'context': 'intergenic_variant',
         'allele_freq': '0.013',
         'trait': 'Diisocyanate-induced asthma',
         'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949',
         'pvalue': '0.0000007',
         'merged': '0',
         'snp_id_current': '1491921',
         'mapped_gene': 'LOC102723561 - GUSBP1',
         'snp_gene_nums': '',
         'upstream_gene_num': '107986179',
         'downstream_gene_num': '107986180',
         'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls',
         'replicated_sample_desc': 'NA',
         'platform': 'Illumina [1556551]',
         'pubmed': '25918132'
     }
Ejemplo n.º 2
0
 def setUp(self):
     self.test_util = TestUtils()
     self.source = GWASCatalog('rdf_graph', True)
     self.source.graph = RDFGraph(True)
     self.test_data = {
         'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
         'chrom_num': '9;9;9;9',
         'chrom_pos': '36998996;37002118;37000690;36997420',
         'context':
         'intron_variant; intron_variant; intron_variant; intron_variant',
         'allele_freq': 'NR',
         'trait': 'Intelligence',
         'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
         'pvalue': '0.00000004',
         'merged': '0',
         'snp_id_current': '',
         'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
         'snp_gene_nums': '',
         'upstream_gene_num': '107986179',
         'downstream_gene_num': '107986180',
         'init_sample_desc':
         '656 European ancestry individuals from ADHD families',
         'replicated_sample_desc': 'NA',
         'platform': 'Illumina [795637]',
         'pubmed': '22449649'
     }
Ejemplo n.º 3
0
class GWASCatalogTestCase(unittest.TestCase):
    """
    Test gwas catalog methods

    Input source data is generated with scripts/test-sets/gwas.sh
    All tests here are functional, and will fail for a variety of reasons,
    including new or updated data connected to an allele

    Note setUp and tearDown do not run before/after TestCase.subTest !
    need something similar to @pytest.mark.parametrize

    """
    def setUp(self):
        self.gwascatalog = GWASCatalog('rdf_graph', True)
        self.gwascatalog.graph = RDFGraph(True)

    def tearDown(self):
        self.gwascatalog = None

    def tearDownAndSetUp(self):
        """
        In lieu of the setUp and tearDown working for subTests
        """
        self.tearDown()
        self.setUp()

    def test_parse(self):
        """
        Runs WormBase.parse() and outputs dot file for each allele
        This is less of a unit test and more for viewing the
        output of an entire run on a single allele,
        dot files can be converted to images using
        scripts/dot-to-svg.sh
        """
        for variant in VARIANTS:
            LOG.debug(f"testing variant {variant}")
            with self.subTest(variant_id=variant):
                self.tearDownAndSetUp()
                self.gwascatalog.rawdir = RAW_PATH + '/' + variant
                self.gwascatalog.parse()
                dot_file_path = DOT_PATH + variant + ".dot"
                with open(dot_file_path, 'w') as dot_file:
                    rdf2dot(self.gwascatalog.graph, dot_file)

                # debug
                LOG.debug(
                    "Reference graph: %s",
                    self.gwascatalog.graph.serialize(
                        format="turtle").decode("utf-8"))

                reference_ttl = TTL_PATH + variant + '.ttl'

                self.assertTrue(
                    TestUtils.test_graph_equality(reference_ttl,
                                                  self.gwascatalog.graph))
Ejemplo n.º 4
0
class GWASCatalogTestCase(SourceTestCase):
    def setUp(self):
        self.source = GWASCatalog('rdf_graph', True)
        self.source.test_ids = self._get_conf()['test_ids']
        self.source.settestonly(True)
        self._setDirToSource()
        return

    def tearDown(self):
        self.source = None
        return
Ejemplo n.º 5
0
class GWASCatalogTestCase(SourceTestCase):

    def setUp(self):
        self.source = GWASCatalog()
        self.source.settestonly(True)
        self.source.setnobnodes(True)
        self._setDirToSource()
        return

    def tearDown(self):
        self.source = None
        return
Ejemplo n.º 6
0
class GWASCatalogTestCase(SourceTestCase):

    def setUp(self):
        self.source = GWASCatalog('rdf_graph', True)
        self.source.test_ids = self._get_conf()['test_ids']
        self.source.settestonly(True)
        self._setDirToSource()
        return

    def tearDown(self):
        self.source = None
        return
Ejemplo n.º 7
0
 def setUp(self):
     self.source = GWASCatalog('rdf_graph', True)
     self.test_data = {
         'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
         'chrom_num': '9;9;9;9',
         'chrom_pos': '36998996;37002118;37000690;36997420',
         'context': 'intron_variant; intron_variant; intron_variant; intron_variant',
         'allele_freq': 'NR',
         'trait': 'Intelligence',
         'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
         'pvalue': '0.00000004',
         'merged': '0',
         'snp_id_current': '',
         'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
         'snp_gene_nums': '',
         'upstream_gene_num': '107986179',
         'downstream_gene_num': '107986180',
         'init_sample_desc': '656 European ancestry individuals from ADHD families',
         'replicated_sample_desc': 'NA',
         'platform': 'Illumina [795637]',
         'pubmed': '22449649'
     }
Ejemplo n.º 8
0
 def setUp(self):
     self.source = GWASCatalog('rdf_graph', True)
     self.source.test_ids = self._get_conf()['test_ids']
     self.source.settestonly(True)
     self._setDirToSource()
     return
Ejemplo n.º 9
0
 def setUp(self):
     self.gwascatalog = GWASCatalog('rdf_graph', True)
     self.gwascatalog.graph = RDFGraph(True)
Ejemplo n.º 10
0
class TestGwasHaplotypeModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """

    def setUp(self):
        self.test_util = TestUtils()
        self.source = GWASCatalog('rdf_graph', True)
        self.source.graph = RDFGraph(True)
        self.test_data = {
            'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
            'chrom_num': '9;9;9;9',
            'chrom_pos': '36998996;37002118;37000690;36997420',
            'context': 'intron_variant; intron_variant; intron_variant; intron_variant',
            'allele_freq': 'NR',
            'trait': 'Intelligence',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
            'pvalue': '0.00000004',
            'merged': '0',
            'snp_id_current': '',
            'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc': '656 European ancestry individuals from ADHD families',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [795637]',
            'pubmed': '22449649'
        }

    def tearDown(self):
        self.source = None

    def test_snp_model(self):
        """
        Test output model of _process_haplotype()
        self._process_haplotype(
            variant_curie, strongest_snp_risk_allele,
            chrom_num, chrom_pos, context,
            risk_allele_frequency, mapped_gene, so_ontology)
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        so_ontology = RDFGraph()
        LOG.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.source.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        LOG.info("Finished loading SO ontology")

        self.source._process_haplotype(
            variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'],
            self.test_data['chrom_pos'], self.test_data['context'],
            self.test_data['allele_freq'], self.test_data['mapped_gene'], so_ontology)

        triples = """
:haplotype_bb627b1f64039b0f751a a OBO:GENO_0000871 ;
    rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ;
    OBO:GENO_0000382 dbSNP:rs1329573,
        dbSNP:rs3758171,
        dbSNP:rs3824344,
        dbSNP:rs7020413 ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs1329573 a OBO:SO_0000694,
        SO:0001627 ;
    rdfs:label "rs1329573-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs3758171 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs3758171-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs3824344 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs3824344-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs7020413 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs7020413-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> a faldo:Position ;
    faldo:position 36997420 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> a faldo:Position ;
    faldo:position 36998996 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> a faldo:Position ;
    faldo:position 37000690 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> a faldo:Position ;
    faldo:position 37002118 ;
    faldo:reference OBO:CHR_GRCh38chr9 .
        """

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #   self.source.graph.serialize(format="turtle").decode("utf-8"))

        #  Does not seem to acknowlage these constant triples 
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
Ejemplo n.º 11
0
class TestGwasSNPModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """

    def setUp(self):
        self.test_util = TestUtils()
        self.source = GWASCatalog('rdf_graph', True)
        self.source.graph = RDFGraph(True)  # Reset graph
        self.source.graph.bind_all_namespaces()
        self.test_data = {
            'snp_label': 'rs1491921-C',
            'chrom_num': '5',
            'chrom_pos': '21259029',
            'context': 'intergenic_variant',
            'allele_freq': '0.013',
            'trait': 'Diisocyanate-induced asthma',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949',
            'pvalue': '0.0000007',
            'merged': '0',
            'snp_id_current': '1491921',
            'mapped_gene': 'LOC102723561 - GUSBP1',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [1556551]',
            'pubmed': '25918132'
        }

    def tearDown(self):
        self.source = None
        self.efo_ontology = None

    def test_snp_type_resolution(self):
        """
        Given the label: rs1491921-C
        return dbSNP:rs1491921, snp
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        self.assertEqual(variant_curie, "dbSNP:rs1491921")
        self.assertEqual(variant_type, 'snp')

    def test_snp_model(self):
        """
        Test output model of _add_snp_to_graph()
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        self.source._add_snp_to_graph(
            variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'],
            self.test_data['chrom_pos'], self.test_data['context'],
            self.test_data['allele_freq'])

        triples = """
    dbSNP:rs1491921 a OBO:SO_0000694, OBO:SO_0001628 ;
        rdfs:label "rs1491921-C" ;
        faldo:location  <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> ;
        OBO:RO_0002162 OBO:NCBITaxon_9606 ;
        dc:description "0.013 [risk allele frequency]" .

    <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> a faldo:Region ;
        faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> ;
        faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> .

    <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> a faldo:Position ;
        faldo:position 21259029 ;
        faldo:reference OBO:CHR_GRCh38chr5 .
"""
        # To debug
        # print(self.source.graph.serialize(format="turtle").decode("utf-8"))
        # self.assertTrue(False)

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #   self.source.graph.serialize(format="turtle").decode("utf-8"))

        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))

    def test_snp_gene_relation(self):
        """
        test the _add_snp_gene_relation function
        :return:
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        self.source._add_snp_gene_relation(
            variant_curie, self.test_data['snp_gene_nums'],
            self.test_data['upstream_gene_num'],
            self.test_data['downstream_gene_num'])

        triples = """
        dbSNP:rs1491921 OBO:RO_0002528 NCBIGene:107986180 ;
            OBO:RO_0002529 NCBIGene:107986179 .
        """
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))

    def test_deprecated_snp(self):
        """
        test the _add_deprecated_snp
        :return:
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        # fake data
        snp_id_current = '12345'
        merged = '1'

        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        self.source._add_deprecated_snp(
            variant_curie, snp_id_current, merged,
            self.test_data['chrom_num'], self.test_data['chrom_pos'])

        triples = """
        dbSNP:rs1491921 a owl:NamedIndividual ;
            OBO:IAO_0100001 dbSNP:rs12345 ;
            owl:deprecated true .

        dbSNP:rs12345 MONARCH:cliqueLeader true .
        """
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))

    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        efo_ontology = RDFGraph()
        LOG.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        LOG.info("Finished loading EFO ontology")

        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(
            variant_curie, self.test_data['trait_uri'], efo_ontology,
            self.test_data['pubmed'], description)

        triples = """


    MONARCH:bffc7a930c08cc8fe931 a OBAN:association ;
        dc:description "{0}" ;
        OBO:RO_0002558 OBO:ECO_0000213 ;
        dc:source PMID:25918132 ;
        OBAN:association_has_object EFO:0003949 ;
        OBAN:association_has_predicate RO:0003304 ;
        OBAN:association_has_subject dbSNP:rs1491921 .

    MONARCH:bff9b97458d67ed7f517 a OBAN:association ;
        dc:description "{0}" ;
        OBO:RO_0002558 OBO:ECO_0000213 ;
        dc:source PMID:25918132 ;
        OBAN:association_has_object EFO:0006995 ;
        OBAN:association_has_predicate RO:0003304 ;
        OBAN:association_has_subject dbSNP:rs1491921 .

    EFO:0003949 a owl:Class ;
        rdfs:label "eye color"^^xsd:string ;
        rdfs:subClassOf UPHENO:0001001 .

    dbSNP:rs1491921 RO:0003304 EFO:0003949,
            EFO:0006995 .

    PMID:25918132 a OBO:IAO_0000013 .
        """.format(description)

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #    self.source.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
Ejemplo n.º 12
0
 def setUp(self):
     self.source = GWASCatalog('rdf_graph', True)
     self.source.test_ids = self._get_conf()['test_ids']
     self.source.settestonly(True)
     self._setDirToSource()
     return
Ejemplo n.º 13
0
class TestGwasHaplotypeModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """
    def setUp(self):
        self.test_util = TestUtils()
        self.source = GWASCatalog('rdf_graph', True)
        self.source.graph = RDFGraph(True)
        self.test_data = {
            'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
            'chrom_num': '9;9;9;9',
            'chrom_pos': '36998996;37002118;37000690;36997420',
            'context':
            'intron_variant; intron_variant; intron_variant; intron_variant',
            'allele_freq': 'NR',
            'trait': 'Intelligence',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
            'pvalue': '0.00000004',
            'merged': '0',
            'snp_id_current': '',
            'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc':
            '656 European ancestry individuals from ADHD families',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [795637]',
            'pubmed': '22449649'
        }

    def tearDown(self):
        self.source = None

    def test_snp_model(self):
        """
        Test output model of _process_haplotype()
        self._process_haplotype(
            variant_curie, strongest_snp_risk_allele,
            chrom_num, chrom_pos, context,
            risk_allele_frequency, mapped_gene, so_ontology)
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        so_ontology = RDFGraph()
        LOG.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.source.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        LOG.info("Finished loading SO ontology")

        self.source._process_haplotype(
            variant_curie, self.test_data['snp_label'],
            self.test_data['chrom_num'], self.test_data['chrom_pos'],
            self.test_data['context'], self.test_data['allele_freq'],
            self.test_data['mapped_gene'], so_ontology)

        triples = """
:haplotype_bb627b1f64039b0f751a a SO:0001024 ;
    rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ;
    GENO:0000382 dbSNP:rs1329573,
        dbSNP:rs3758171,
        dbSNP:rs3824344,
        dbSNP:rs7020413 ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

dbSNP:rs1329573 a SO:0000694,
        SO:0001627 ;
    rdfs:label "rs1329573-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/b3fad5df82cdfb283329> ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

dbSNP:rs3758171 a SO:0000694,
        SO:0001627 ;
    rdfs:label "rs3758171-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/b25a2da36647bdd71be3> ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

dbSNP:rs3824344 a SO:0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs3824344-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/b096a3e94e32fe23374a> ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

dbSNP:rs7020413 a SO:0000694,
        SO:0001627 ;
    rdfs:label "rs7020413-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/bbb252d9b6cd02e9880a> ;
    GENO:0000418 HGNC:8619 ;
    RO:0002162 NCBITaxon:9606 .

<https://monarchinitiative.org/.well-known/genid/b25a2da36647bdd71be3> a faldo:Region ;
    rdfs:label "GRCh38chr9-36997420-36997420-Region";
    faldo:begin <https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> .

<https://monarchinitiative.org/.well-known/genid/b3fad5df82cdfb283329> a faldo:Region ;
    rdfs:label "GRCh38chr9-36998996-36998996-Region";
    faldo:begin <https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> .

<https://monarchinitiative.org/.well-known/genid/b096a3e94e32fe23374a> a faldo:Region ;
    rdfs:label "GRCh38chr9-37000690-37000690-Region";
    faldo:begin <https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> .

<https://monarchinitiative.org/.well-known/genid/bbb252d9b6cd02e9880a> a faldo:Region ;
    rdfs:label "GRCh38chr9-37002118-37002118-Region";
    faldo:begin <https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> .

<https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> a faldo:Position ;
    rdfs:label "GRCh38chr9-36997420";
    faldo:position 36997420 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> a faldo:Position ;
    rdfs:label "GRCh38chr9-36998996";
    faldo:position 36998996 ;
    faldo:reference CHR:GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> a faldo:Position ;
    rdfs:label "GRCh38chr9-37000690";
    faldo:position 37000690 ;
    faldo:reference CHR:GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> a faldo:Position ;
    rdfs:label "GRCh38chr9-37002118";
    faldo:position 37002118 ;
    faldo:reference CHR:GRCh38chr9 .
        """

        # dbg
        LOG.debug("Reference graph: %s",
                  self.source.graph.serialize(format="turtle").decode("utf-8"))

        self.assertTrue(
            self.test_util.test_graph_equality(triples, self.source.graph))
Ejemplo n.º 14
0
class TestGwasSNPModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """

    def setUp(self):
        self.source = GWASCatalog('rdf_graph', True)
        self.test_data = {
            'snp_label': 'rs1491921-C',
            'chrom_num': '5',
            'chrom_pos': '21259029',
            'context': 'intergenic_variant',
            'allele_freq': '0.013',
            'trait': 'Diisocyanate-induced asthma',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949',
            'pvalue': '0.0000007',
            'merged': '0',
            'snp_id_current': '1491921',
            'mapped_gene': 'LOC102723561 - GUSBP1',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [1556551]',
            'pubmed': '25918132'
        }

    def tearDown(self):
        self.source = None
        self.efo_ontology = None

    def test_snp_type_resolution(self):
        """
        Given the label: rs1491921-C
        return dbSNP:rs1491921, snp
        """
        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        self.assertEqual(variant_curie, "dbSNP:rs1491921")
        self.assertEqual(variant_type, 'snp')

    def test_snp_model(self):
        """
        Test output model of _add_snp_to_graph()
        """
        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        self.source._add_snp_to_graph(
            variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'],
            self.test_data['chrom_pos'], self.test_data['context'],
            self.test_data['allele_freq'])

        sparql_query = """
            SELECT ?snp
            WHERE {
                ?snp a OBO:SO_0000694,
                    OBO:SO_0001628 ;
                    rdfs:label "rs1491921-C" ;
                    faldo:location
                    <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 ;
                    dc:description "0.013 [risk allele frequency]" .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> a faldo:Position ;
                    faldo:position 21259029 ;
                    faldo:reference OBO:CHR_GRCh38chr5 .
            }
        """
        # To debug
        #print(self.source.graph.serialize(format="turtle").decode("utf-8"))
        #self.assertTrue(False)

        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)]
        self.assertEqual(results, expected)

    def test_snp_gene_relation(self):
        """
        test the _add_snp_gene_relation function
        :return:
        """

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        self.source._add_snp_gene_relation(
            variant_curie, self.test_data['snp_gene_nums'],
            self.test_data['upstream_gene_num'],
            self.test_data['downstream_gene_num'])

        sparql_query = """
            SELECT ?snp
            WHERE {
                ?snp OBO:RO_0002528 <http://www.ncbi.nlm.nih.gov/gene/107986180> ;
                     OBO:RO_0002529 <http://www.ncbi.nlm.nih.gov/gene/107986179> .
            }
        """
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)]
        self.assertEqual(results, expected)

    def test_deprecated_snp(self):
        """
        test the _add_deprecated_snp
        :return:
        """
        #fake data
        snp_id_current = '12345'
        merged = '1'

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        self.source._add_deprecated_snp(
            variant_curie, snp_id_current, merged,
            self.test_data['chrom_num'], self.test_data['chrom_pos'])

        sparql_query = """
            SELECT ?snp
            WHERE {
                ?snp a owl:NamedIndividual ;
                    OBO:IAO_0100001 dbSNP:rs12345 ;
                    owl:deprecated true .

                dbSNP:rs12345 MONARCH:cliqueLeader true .
            }
        """
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)]
        self.assertEqual(results, expected)

    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        efo_ontology = RDFGraph()
        logger.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        logger.info("Finished loading EFO ontology")

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(
            variant_curie, self.test_data['trait_uri'], efo_ontology,
            self.test_data['pubmed'], description)

        sparql_query = """
            SELECT ?snp
            WHERE {{
                <https://monarchinitiative.org/MONARCH_b46cdf48950cb00d> a OBAN:association ;
                    dc:description "{}" ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0003949 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                <https://monarchinitiative.org/MONARCH_70a05d8eb1c3d4b0> a OBAN:association ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0006995 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                EFO:0003949 a owl:Class ;
                    rdfs:label "eye color"^^xsd:string ;
                    rdfs:subClassOf UPHENO:0001001 .

                ?snp OBO:RO_0002326 EFO:0003949,
                        EFO:0006995 .

                PMID:25918132 a OBO:IAO_0000013 .
            }}
        """.format(description)
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)]
        self.assertEqual(results, expected)
Ejemplo n.º 15
0
class TestGwasHaplotypeModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """

    def setUp(self):
        self.source = GWASCatalog('rdf_graph', True)
        self.test_data = {
            'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
            'chrom_num': '9;9;9;9',
            'chrom_pos': '36998996;37002118;37000690;36997420',
            'context': 'intron_variant; intron_variant; intron_variant; intron_variant',
            'allele_freq': 'NR',
            'trait': 'Intelligence',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
            'pvalue': '0.00000004',
            'merged': '0',
            'snp_id_current': '',
            'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc': '656 European ancestry individuals from ADHD families',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [795637]',
            'pubmed': '22449649'
        }

    def tearDown(self):
        self.source = None

    def test_snp_model(self):
        """
        Test output model of _process_haplotype()
        self._process_haplotype(
                            variant_curie, strongest_snp_risk_allele,
                            chrom_num, chrom_pos, context,
                            risk_allele_frequency, mapped_gene, so_ontology)
        """
        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        so_ontology = RDFGraph()
        logger.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.source.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        logger.info("Finished loading SO ontology")

        self.source._process_haplotype(
            variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'],
            self.test_data['chrom_pos'], self.test_data['context'],
            self.test_data['allele_freq'], self.test_data['mapped_gene'], so_ontology)

        sparql_query = """
            SELECT ?snp
            WHERE {
                :haplotype_bcb627b1f64039b0 a OBO:GENO_0000871 ;
                    rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ;
                    OBO:GENO_0000382 ?snp,
                        dbSNP:rs3758171,
                        dbSNP:rs3824344,
                        dbSNP:rs7020413 ;
                OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                OBO:RO_0002162 OBO:NCBITaxon_9606 .

                ?snp a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs1329573-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs3758171 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs3758171-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs3824344 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs3824344-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs7020413 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs7020413-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> a faldo:Position ;
                    faldo:position 36997420 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> a faldo:Position ;
                    faldo:position 36998996 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> a faldo:Position ;
                    faldo:position 37000690 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> a faldo:Position ;
                    faldo:position 37002118 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .
            }
        """
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1329573")),)]
        self.assertEqual(results, expected)
Ejemplo n.º 16
0
class TestGwasHaplotypeModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """
    def setUp(self):
        self.source = GWASCatalog('rdf_graph', True)
        self.test_data = {
            'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
            'chrom_num': '9;9;9;9',
            'chrom_pos': '36998996;37002118;37000690;36997420',
            'context':
            'intron_variant; intron_variant; intron_variant; intron_variant',
            'allele_freq': 'NR',
            'trait': 'Intelligence',
            'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
            'pvalue': '0.00000004',
            'merged': '0',
            'snp_id_current': '',
            'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc':
            '656 European ancestry individuals from ADHD families',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [795637]',
            'pubmed': '22449649'
        }

    def tearDown(self):
        self.source = None

    def test_snp_model(self):
        """
        Test output model of _process_haplotype()
        self._process_haplotype(
                            variant_curie, strongest_snp_risk_allele,
                            chrom_num, chrom_pos, context,
                            risk_allele_frequency, mapped_gene, so_ontology)
        """
        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        so_ontology = RDFGraph()
        logger.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.source.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        logger.info("Finished loading SO ontology")

        self.source._process_haplotype(
            variant_curie, self.test_data['snp_label'],
            self.test_data['chrom_num'], self.test_data['chrom_pos'],
            self.test_data['context'], self.test_data['allele_freq'],
            self.test_data['mapped_gene'], so_ontology)

        sparql_query = """
            SELECT ?snp
            WHERE {
                :haplotype_bcb627b1f64039b0 a OBO:GENO_0000871 ;
                    rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ;
                    OBO:GENO_0000382 ?snp,
                        dbSNP:rs3758171,
                        dbSNP:rs3824344,
                        dbSNP:rs7020413 ;
                OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                OBO:RO_0002162 OBO:NCBITaxon_9606 .

                ?snp a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs1329573-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs3758171 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs3758171-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs3824344 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs3824344-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs7020413 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs7020413-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> a faldo:Position ;
                    faldo:position 36997420 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> a faldo:Position ;
                    faldo:position 36998996 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> a faldo:Position ;
                    faldo:position 37000690 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> a faldo:Position ;
                    faldo:position 37002118 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .
            }
        """
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1329573")), )]
        self.assertEqual(results, expected)
Ejemplo n.º 17
0
class TestGwasSNPModel(unittest.TestCase):
    """
    Test the modelling of a  SNP to trait association
    from sample GWAS catalog data
    """
    def setUp(self):
        self.source = GWASCatalog('rdf_graph', True)
        self.test_data = {
            'snp_label': 'rs1491921-C',
            'chrom_num': '5',
            'chrom_pos': '21259029',
            'context': 'intergenic_variant',
            'allele_freq': '0.013',
            'trait': 'Diisocyanate-induced asthma',
            'trait_uri':
            'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949',
            'pvalue': '0.0000007',
            'merged': '0',
            'snp_id_current': '1491921',
            'mapped_gene': 'LOC102723561 - GUSBP1',
            'snp_gene_nums': '',
            'upstream_gene_num': '107986179',
            'downstream_gene_num': '107986180',
            'init_sample_desc':
            '74 European ancestry cases, 824 European ancestry controls',
            'replicated_sample_desc': 'NA',
            'platform': 'Illumina [1556551]',
            'pubmed': '25918132'
        }

    def tearDown(self):
        self.source = None
        self.efo_ontology = None

    def test_snp_type_resolution(self):
        """
        Given the label: rs1491921-C
        return dbSNP:rs1491921, snp
        """
        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        self.assertEqual(variant_curie, "dbSNP:rs1491921")
        self.assertEqual(variant_type, 'snp')

    def test_snp_model(self):
        """
        Test output model of _add_snp_to_graph()
        """
        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        self.source._add_snp_to_graph(variant_curie,
                                      self.test_data['snp_label'],
                                      self.test_data['chrom_num'],
                                      self.test_data['chrom_pos'],
                                      self.test_data['context'],
                                      self.test_data['allele_freq'])

        sparql_query = """
            SELECT ?snp
            WHERE {
                ?snp a OBO:SO_0000694,
                    OBO:SO_0001628 ;
                    rdfs:label "rs1491921-C" ;
                    faldo:location
                    <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 ;
                    dc:description "0.013 [risk allele frequency]" .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> a faldo:Position ;
                    faldo:position 21259029 ;
                    faldo:reference OBO:CHR_GRCh38chr5 .
            }
        """
        # To debug
        #print(self.source.graph.serialize(format="turtle").decode("utf-8"))
        #self.assertTrue(False)

        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )]
        self.assertEqual(results, expected)

    def test_snp_gene_relation(self):
        """
        test the _add_snp_gene_relation function
        :return:
        """

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        self.source._add_snp_gene_relation(
            variant_curie, self.test_data['snp_gene_nums'],
            self.test_data['upstream_gene_num'],
            self.test_data['downstream_gene_num'])

        sparql_query = """
            SELECT ?snp
            WHERE {
                ?snp OBO:RO_0002528 <http://www.ncbi.nlm.nih.gov/gene/107986180> ;
                     OBO:RO_0002529 <http://www.ncbi.nlm.nih.gov/gene/107986179> .
            }
        """
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )]
        self.assertEqual(results, expected)

    def test_deprecated_snp(self):
        """
        test the _add_deprecated_snp
        :return:
        """
        #fake data
        snp_id_current = '12345'
        merged = '1'

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        self.source._add_deprecated_snp(variant_curie, snp_id_current, merged,
                                        self.test_data['chrom_num'],
                                        self.test_data['chrom_pos'])

        sparql_query = """
            SELECT ?snp
            WHERE {
                ?snp a owl:NamedIndividual ;
                    OBO:IAO_0100001 dbSNP:rs12345 ;
                    owl:deprecated true .

                dbSNP:rs12345 MONARCH:cliqueLeader true .
            }
        """
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )]
        self.assertEqual(results, expected)

    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        efo_ontology = RDFGraph()
        logger.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        logger.info("Finished loading EFO ontology")

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(variant_curie,
                                                   self.test_data['trait_uri'],
                                                   efo_ontology,
                                                   self.test_data['pubmed'],
                                                   description)

        sparql_query = """
            SELECT ?snp
            WHERE {{
                <https://monarchinitiative.org/MONARCH_b46cdf48950cb00d> a OBAN:association ;
                    dc:description "{}" ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0003949 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                <https://monarchinitiative.org/MONARCH_70a05d8eb1c3d4b0> a OBAN:association ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0006995 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                EFO:0003949 a owl:Class ;
                    rdfs:label "eye color"^^xsd:string ;
                    rdfs:subClassOf UPHENO:0001001 .

                ?snp OBO:RO_0002326 EFO:0003949,
                        EFO:0006995 .

                PMID:25918132 a OBO:IAO_0000013 .
            }}
        """.format(description)
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )]
        self.assertEqual(results, expected)
Ejemplo n.º 18
0
 def setUp(self):
     self.source = GWASCatalog()
     self.source.settestonly(True)
     self.source.setnobnodes(True)
     self._setDirToSource()
     return