Exemple #1
0
    def test_graph_equality(self, turtlish, graph):
        """

        :param turtlish: String of triples in turtle
                         format without prefix header
        :param graph: Graph object to test against
        :return: Boolean, True if graphs contain same
                          set of triples
        """
        turtle_graph = RDFGraph()
        turtle_graph.bind_all_namespaces()
        prefixes = "\n".join(
            ["@prefix {}: <{}> .".format(n[0], n[1])
            for n in turtle_graph.namespace_manager.namespaces()]
        )

        turtle_string = prefixes + turtlish
        mock_file = io.StringIO(turtle_string)
        turtle_graph.parse(mock_file, format="turtle")
        turtle_triples = set(list(turtle_graph))
        ref_triples = set(list(graph))
        equality = turtle_triples == ref_triples
        if not equality:
            logger.warning("Triples do not match\n"
                           "Left hand difference: {}\n"
                           "Right hand difference:{}".format(
                turtle_triples - ref_triples,
                ref_triples - turtle_triples
            ))
        return equality
Exemple #2
0
    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        efo_ontology = RDFGraph()
        logger.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        logger.info("Finished loading EFO ontology")

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(
            variant_curie, self.test_data['trait_uri'], efo_ontology,
            self.test_data['pubmed'], description)

        sparql_query = """
            SELECT ?snp
            WHERE {{
                <https://monarchinitiative.org/MONARCH_b46cdf48950cb00d> a OBAN:association ;
                    dc:description "{}" ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0003949 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                <https://monarchinitiative.org/MONARCH_70a05d8eb1c3d4b0> a OBAN:association ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0006995 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                EFO:0003949 a owl:Class ;
                    rdfs:label "eye color"^^xsd:string ;
                    rdfs:subClassOf UPHENO:0001001 .

                ?snp OBO:RO_0002326 EFO:0003949,
                        EFO:0006995 .

                PMID:25918132 a OBO:IAO_0000013 .
            }}
        """.format(description)
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")),)]
        self.assertEqual(results, expected)
Exemple #3
0
    def __init__(self, identifier, title, url, description=None,
                 license_url=None, data_rights=None, graph_type=None,
                 file_handle=None):
        if graph_type is None:
            self.graph = RDFGraph(None, identifier)  # 
        elif graph_type == 'streamed_graph':
            self.graph = StreamedGraph(True, file_handle=file_handle)
        elif graph_type == 'rdf_graph':
            self.graph = RDFGraph()
        self.model = Model(self.graph)
        self.identifier = ':' + identifier
        self.version = None
        self.date_issued = None

        # The data_accesed value is later used as an literal of properties
        # such as dct:issued, which needs to conform xsd:dateTime format.
        # TODO ... we need to have a talk about typed literals and SPARQL
        self.date_accessed = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

        self.citation = set()
        self.license = license_url
        self.model.addType(self.identifier, 'dctypes:Dataset')
        self.graph.addTriple(self.identifier, 'dct:title', title, True)
        self.graph.addTriple(
            self.identifier, 'dct:identifier',
            identifier, object_is_literal=True)
        self.graph.addTriple(self.identifier, 'foaf:page', url)
        # maybe in the future add the logo here:
        # schemaorg:logo <http://www.ebi.ac.uk/rdf/sites/ebi.ac.uk.rdf/files/resize/images/rdf/chembl_service_logo-146x48.gif> .

        # TODO add the licence info
        # FIXME:Temporarily making this in IF statement,
        #  can revert after all current resources are updated.
        if license_url is not None:
            self.graph.addTriple(
                self.identifier, 'dct:license', license_url)
        else:
            logger.debug('No license provided.')
        if data_rights is not None:
            self.graph.addTriple(
                self.identifier, 'dct:rights',
                data_rights, object_is_literal=True)
        else:
            logger.debug('No rights provided.')

        if description is not None:
            self.model.addDescription(self.identifier, description)
        return
Exemple #4
0
    def test_parse(self):
        self.source.graph = RDFGraph(True)  # Reset graph
        self.assertTrue(len(list(self.source.graph)) == 0)

        self.source.parse()

        triples = """
        UNII:46U771ERWK RO:0002606 SNOMED:386761002 ;
            rdfs:subClassOf CHEBI:23367 .

        SNOMED:386761002 rdfs:label "Local anesthesia" ;
            rdfs:subClassOf MONDO:0000001 .

        UNII:46U771ERWK biolink:category biolink:ChemicalSubstance .
        SNOMED:386761002 biolink:category biolink:Disease .

        """

        # dbg
        logger.debug(
            "Reference graph: %s", self.source.graph.serialize(format="turtle")
            .decode("utf-8")
        )
        self.assertTrue(self.test_util.test_graph_equality(
            triples, self.source.graph))
Exemple #5
0
    def testSGDParser(self):
        sgd = SGD('rdf_graph', True)
        sgd.graph = RDFGraph(True)
        record = self.test_set_1
        sgd.make_association(record)

        description = sgd._make_description(record)

        triples = """
        :MONARCH_ba748c98c0f167739128 a OBAN:association ;
            OBO:RO_0002558 OBO:APO_0000020 ;
            dc:description "{0}";
            dc:source PMID:21715656 ;
            OBAN:association_has_object MONARCH:APO_0000309APO_0000245 ;
            OBAN:association_has_predicate OBO:RO_0002200 ;
            OBAN:association_has_subject SGD:S000007268 .
            
        SGD:S000007268 rdfs:label "ATP6" ;
        RO:0002200 MONARCH:APO_0000309APO_0000245 .

        APO:0000020 rdfs:label "classical genetics" .

        PMID:21715656 a OBO:IAO_0000311 ;
        owl:sameAs SGD_REF:S000145858 .

        MONARCH:APO_0000309APO_0000245 rdfs:label "respiratory growth:decreased rate" ;
        rdfs:subClassOf UPHENO:0001001 .

        """.format(description)
        # test exact contents of graph
        self.assertTrue(self.test_util.test_graph_equality(triples, sgd.graph))
Exemple #6
0
    def test_patient_phenotype_model(self):
        """
        functional test for _parse_patient_phenotypes()
        """
        udp = UDP('rdf_graph', True)
        udp.graph = RDFGraph(True)

        # test that graph is empty
        self.assertTrue(len(list(udp.graph)) == 0)

        mock_lines = [
            'patient_1\tHP:000001\tyes',
            'patient_1\tHP:000002\tno'
        ]
        mock_data = MagicMock()
        mock_data.__iter__.return_value = iter(mock_lines)

        mock_file = mock_open(mock=mock_data)
        udp._parse_patient_phenotypes(mock_file)
        triples = """
        :patient_1 a foaf:Person ;
            rdfs:label "patient_1" ;
            RO:0002200 DOID:4,
              HP:000001 .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, udp.graph))
Exemple #7
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)  # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(self.assoc_curie, self.eco_id, p_value,
                           percentage_change, effect_size, self.study_curie)

        triples = """
    :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> .

    <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> a ECO:0000015 ;
        SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9>,
            <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> ;
        SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

    <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> a OBI:0000175 ;
        RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
        STATO:0000129 1.637023e-10 .

    <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9> a STATO:0000085 ;
        RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
        STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
Exemple #8
0
    def test_gene_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml'

        self.orphanet._process_diseasegene(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b64684a0ea6ae59fdb09 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object Orphanet:938475 ;
    OBAN:association_has_predicate RO:0003304 ;
    OBAN:association_has_subject Orphanet:268061 .

Orphanet:268061 a owl:Class ;
    rdfs:label "KS1" ;
    RO:0003304 Orphanet:938475 ;
    dc:description "kinesin family member 7" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf SO:0001217 .

Orphanet:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
Exemple #9
0
    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """

        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie)

        triples = """
    MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bff92df374a88496> .
    <https://monarchinitiative.org/.well-known/genid/bff92df374a88496> a SEPIO:0000001 ;
        SEPIO:0000018 <https://www.mousephenotype.org/> ;
        SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

    <https://www.mousephenotype.org/> a foaf:organization ;
        rdfs:label "International Mouse Phenotyping Consortium" .

        """
        # dbg
        logger.info("Assertion graph:\n %s\n",
                    impc.graph.serialize(format="turtle").decode("utf-8"))

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
Exemple #10
0
 def setUp(self):
     self.test_util = TestUtils()
     self.source = GWASCatalog('rdf_graph', True)
     self.source.graph = RDFGraph(True)  # Reset graph
     self.source.graph.bind_all_namespaces()
     self.test_data = {
         'snp_label': 'rs1491921-C',
         'chrom_num': '5',
         'chrom_pos': '21259029',
         'context': 'intergenic_variant',
         'allele_freq': '0.013',
         'trait': 'Diisocyanate-induced asthma',
         'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949',
         'pvalue': '0.0000007',
         'merged': '0',
         'snp_id_current': '1491921',
         'mapped_gene': 'LOC102723561 - GUSBP1',
         'snp_gene_nums': '',
         'upstream_gene_num': '107986179',
         'downstream_gene_num': '107986180',
         'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls',
         'replicated_sample_desc': 'NA',
         'platform': 'Illumina [1556551]',
         'pubmed': '25918132'
     }
Exemple #11
0
    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map('9606')

        [prot_map.update({k: ['ENSEMBL:' + prot_map[k]]}) for k in prot_map.keys()]

        print("Finished fetching ENSP IDs, fetched {} proteins".format(len(prot_map)))

        # just looking
        # for key in prot_map:
        #    if string_db.graph.curie_regexp.match(prot_map[key]) is None:
        #        print("INVALID curie for %s from %s", prot_map[key], key)

        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, '9606')

        # g1 <interacts with> g2
        triples = """
ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
        """

        self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph))
Exemple #12
0
    def readGraphFromTurtleFile(self, f):
        """
        This will read the specified file into a graph.  A simple parsing test.
        :param f:
        :return:

        """
        import os
        vg = RDFGraph()
        p = os.path.abspath(f)
        logger.info("Testing reading turtle file from %s", p)
        vg.parse(f, format="turtle")
        logger.info('Found %s graph nodes in %s', len(vg), p)
        self.assertTrue(len(vg) > 0, "No nodes found in "+p)

        return
Exemple #13
0
    def testEnsemblReactomeParser(self):
        reactome = Reactome('rdf_graph', True)
        reactome.graph = RDFGraph(True)
        self.assertTrue(len(list(reactome.graph)) == 0)

        eco_map = Reactome.get_eco_map(Reactome.map_files['eco_map'])
        (gene, pathway_id, pathway_iri, pathway_label, go_ecode,
         species_name) = self.test_set_1
        reactome._add_component_pathway_association(eco_map, gene, 'ENSEMBL',
                                                    pathway_id, 'REACT',
                                                    pathway_label, go_ecode)

        triples = """
        ENSEMBL:ENSBTAP00000013354 RO:0002331 REACT:R-BTA-3000480 .
        
        :MONARCH_b582c188b7ec20016206 a OBAN:association ;
            OBO:RO_0002558 ECO:0000501 ;
            OBAN:association_has_object REACT:R-BTA-3000480 ;
            OBAN:association_has_predicate RO:0002331 ;
            OBAN:association_has_subject ENSEMBL:ENSBTAP00000013354 .

        REACT:R-BTA-3000480 a owl:Class ;
            rdfs:label "Scavenging by Class A Receptors" ;
            rdfs:subClassOf GO:0009987,
                PW:0000001 .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(triples, reactome.graph))
Exemple #14
0
    def test_gene_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle") .decode("utf-8")
        )
        expected_triples = """
MONARCH:bdbeb077e365ddedda20 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0004015 ;
    OBAN:association_has_subject ORPHA:268061 .

ORPHA:268061 RO:0004015 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            expected_triples, self.orphanet.graph))
        return
Exemple #15
0
    def test_germline_lof_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml'

        self.orphanet._process_diseasegene(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b53dada0eb229a75e705 OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object Orphanet:938475 ;
    OBAN:association_has_predicate RO:0003303 ;
    OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> .

Orphanet:268061 a owl:Class ;
    rdfs:label "KS1" ;
    dc:description "kinesin family member 7" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf SO:0001217 .

<https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> a GENO:0000002 ;
    rdfs:label "germline loss of function variant of KS1" ;
    GENO:0000418 Orphanet:268061 ;
    RO:0003303 Orphanet:938475 ;
    :MONARCH_anonymous true ;
    :has_cell_origin GENO:0000900 ;
    :has_functional_consequence SO:0002054 .

Orphanet:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
Exemple #16
0
    def test_germline_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8")
        )
        expected_triples = """
MONARCH:ba2ac5d2153c70e2bb98 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0004013 ;
    OBAN:association_has_subject HGNC:30497 .

ENSEMBL:ENSG00000166813 a owl:Class .

HGNC:30497 a owl:Class ;
    RO:0004013 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" ;
    owl:equivalentClass ENSEMBL:ENSG00000166813,
       ORPHA:268061 .

ORPHA:268061 a owl:Class .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            expected_triples, self.orphanet.graph))
        return
Exemple #17
0
 def setUp(self):
     self.test_util = TestUtils()
     self.source = GWASCatalog('rdf_graph', True)
     self.source.graph = RDFGraph(True)
     self.test_data = {
         'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
         'chrom_num': '9;9;9;9',
         'chrom_pos': '36998996;37002118;37000690;36997420',
         'context':
         'intron_variant; intron_variant; intron_variant; intron_variant',
         'allele_freq': 'NR',
         'trait': 'Intelligence',
         'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
         'pvalue': '0.00000004',
         'merged': '0',
         'snp_id_current': '',
         'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
         'snp_gene_nums': '',
         'upstream_gene_num': '107986179',
         'downstream_gene_num': '107986180',
         'init_sample_desc':
         '656 European ancestry individuals from ADHD families',
         'replicated_sample_desc': 'NA',
         'platform': 'Illumina [795637]',
         'pubmed': '22449649'
     }
Exemple #18
0
    def readGraphFromTurtleFile(self, f):
        """
        This will read the specified file into a graph.  A simple parsing test.
        :param f:
        :return:

        """
        import os
        vg = RDFGraph()
        p = os.path.abspath(f)
        logger.info("Testing reading turtle file from %s", p)
        vg.parse(f, format="turtle")
        logger.info('Found %s graph nodes in %s', len(vg), p)
        self.assertTrue(len(vg) > 0, "No nodes found in " + p)

        return
Exemple #19
0
class GeneralGraphTestCase(unittest.TestCase):
    def setUp(self):
        self.graph = RDFGraph()
        self.curie_map = curie_map.get()

    def tearDown(self):
        self.graph = None

    def test_curieprefixes(self):
        """
        This will ensure that we can create identifiers for all of the
        defined curie prefixes using the GraphUtils.getNode() method
        :return:

        """
        # add one id per curie as classes to the graph
        for p in self.curie_map.keys():
            testid = p + ':testme'
            n = self.graph._getNode(testid)
            m = "prefix \"" + p + "\" has an error...can't create graph node"
            self.assertTrue(n is not None, m)

        return

    def readGraphFromTurtleFile(self, f):
        """
        This will read the specified file into a graph.  A simple parsing test.
        :param f:
        :return:

        """
        import os
        vg = RDFGraph()
        p = os.path.abspath(f)
        logger.info("Testing reading turtle file from %s", p)
        vg.parse(f, format="turtle")
        logger.info('Found %s graph nodes in %s', len(vg), p)
        self.assertTrue(len(vg) > 0, "No nodes found in " + p)

        return

    def readGraphIntoOWL(self, f):
        """
        test if the ttl can be parsed by owlparser
        this expects owltools to be accessible from commandline
        :param f: file of ttl
        :return:
        """

        import subprocess
        from subprocess import check_call

        status = check_call(["owltools", f], stderr=subprocess.STDOUT)
        # returns zero is success!
        if status != 0:
            logger.error('finished verifying with owltools with status %s',
                         status)
        self.assertTrue(status == 0)

        return
Exemple #20
0
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(phenotyping_center, colony,
                                   project_fullname, pipeline_name,
                                   pipeline_stable_id, procedure_stable_id,
                                   procedure_name, parameter_stable_id,
                                   parameter_name, statistical_method,
                                   resource_name)

        # dbg
        LOG.info("Provenance graph as turtle:\n%s\n",
                 impc.graph.serialize(format="turtle").decode("utf-8"))

        triples = """
<https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ;
  BFO:0000051 OBO:STATO_0000076,
      <https://www.mousephenotype.org/impress/protocol/175/15> ;
  BFO:0000050  IMPRESS-procedure:15 ,
      <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
  SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
  SEPIO:0000017 <http://www.sanger.ac.uk/>  .

<https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ;
    rdfs:label "MEFW" .

<http://www.sanger.ac.uk/> a foaf:organization ;
    rdfs:label "WTSI" .

<http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
    rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

<https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
    rdfs:label "Number of ribs right (X-ray)" .

IMPRESS-procedure:15 a owl:NamedIndividual ;
    rdfs:label "MGP Select Pipeline" .

<https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
    rdfs:label "X-ray" .
"""

        # dbg
        LOG.debug("Reference graph: %s",
                  impc.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
Exemple #21
0
    def test_parse(self):
        for rcv in RCVS:
            output_nt = rcv + '.nt'
            input_xml = rcv + '.xml.gz'
            reference_ttl = TTL_PATH + rcv + '.ttl'
            with self.subTest(rcv=rcv):

                mock_args = [
                    "test_clinvar.py", "--inputdir", XML_PATH, "--filename",
                    input_xml, "--mapfile", MAP_FILE, "--destination", NT_PATH,
                    "--output", output_nt
                ]

                patch('sys.argv', mock_args).start()
                clinvar_parse()
                query_graph = RDFGraph()
                query_graph.bind_all_namespaces()
                query_graph.parse(NT_PATH + output_nt, format='nt')

                with open(reference_ttl, 'r') as ref_fh:
                    ref_graph = "\n".join(ref_fh.readlines())

                # debug
                LOG.debug(
                    "Reference graph: %s",
                    query_graph.serialize(format="turtle").decode("utf-8"))

                # Convert output from ClinVar parse to dot then png
                dot_file_path = DOT_PATH + rcv + ".dot"
                with open(dot_file_path, 'w') as dot_file:
                    rdf2dot(query_graph, dot_file)

                self.assertTrue(
                    TestUtils.test_graph_equality(ref_graph, query_graph))
Exemple #22
0
 def setUp(self):
     self.test_util = TestUtils()
     self.source = CTD('rdf_graph', True)
     self.source.graph = RDFGraph(True)
     self.test_row = [
         'Nicotine', 'D009538', '', 'TOBACCO ADDICTION, SUSCEPTIBILITY TO',
         'OMIM:188890', 'therapeutic', '', '', '', '12345|56789'
     ]
     return
Exemple #23
0
    def test_variant_model(self):
        """
        functional test for _parse_patient_variants()
        """
        udp = UDP('rdf_graph', True)
        udp.graph = RDFGraph(True)
        # test that graph is empty
        self.assertTrue(len(list(udp.graph)) == 0)

        data = ['patient_1',
                'family_1',
                '1',
                'HG19',
                '155230432',
                'G',
                'A',
                'Maternal',
                'Biallelic',
                'Non-synonymous;DOWNSTREAM',
                'CLK2',
                '',
                '',
                '',
                '',
                '',
                '',
                '',
                'Compound heterozygous',
                'Heterozygous',
                '',
                '0.002747253',
                '']
        test_data = "\t".join(data)
        mock_lines = [test_data]
        mock_data = MagicMock()
        mock_data.__iter__.return_value = iter(mock_lines)

        mock_file = mock_open(mock=mock_data)

        udp._parse_patient_variants(mock_file)

        triples = """
        :patient_1 GENO:0000222 <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> .

        <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> a SO:0001059 ;
            rdfs:label "hg19chr1(CLK2):g.155230432G>A" ;
            GENO:0000418 HGNC:2069 ;
            RO:0002162 NCBITaxon:9606 ;
            owl:sameAs dbSNP:rs11557757 .

        <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> a GENO:0000000 ;
            rdfs:label "patient_1 genotype" ;
            GENO:0000382 <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> .
        """

        self.assertTrue(self.test_util.test_graph_equality(triples, udp.graph))
Exemple #24
0
 def test_unmapped_disease_assoc_type(self):
     """
     Test that a gene disease type that we have
     not mapped in translationtable/orphanet.yaml
     raises a ValueError
     """
     self.orphanet.graph = RDFGraph()  # Reset graph
     self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml'
     self.assertRaises(
         ValueError, lambda: self.orphanet._process_diseasegene(limit=None))
Exemple #25
0
    def test_graph_equality(turtlish, graph):
        """

        :param turtlish: String of triples in turtle
                         format without prefix header
        :param graph: Graph object to test against
        :return: Boolean, True if graphs contain same
                          set of triples
        """
        turtle_graph = RDFGraph()
        turtle_graph.bind_all_namespaces()
        prefixes = "\n".join([
            "@prefix {}: <{}> .".format(n[0], n[1])
            for n in turtle_graph.namespace_manager.namespaces()
        ])

        turtle_string = prefixes + turtlish
        mock_file = io.StringIO(turtle_string)
        turtle_graph.parse(mock_file, format="turtle")
        turtle_triples = set(list(turtle_graph))
        ref_triples = set(list(graph))
        equality = turtle_triples == ref_triples
        if not equality:
            LOG.warning(
                "Triples do not match\n"
                "\tLeft hand difference: %s\n"
                "\tRight hand difference: %s",
                sorted(turtle_triples - ref_triples),
                sorted(ref_triples - turtle_triples))
        return equality
Exemple #26
0
    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        efo_ontology = RDFGraph()
        logger.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        logger.info("Finished loading EFO ontology")

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(variant_curie,
                                                   self.test_data['trait_uri'],
                                                   efo_ontology,
                                                   self.test_data['pubmed'],
                                                   description)

        sparql_query = """
            SELECT ?snp
            WHERE {{
                <https://monarchinitiative.org/MONARCH_b46cdf48950cb00d> a OBAN:association ;
                    dc:description "{}" ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0003949 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                <https://monarchinitiative.org/MONARCH_70a05d8eb1c3d4b0> a OBAN:association ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0006995 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                EFO:0003949 a owl:Class ;
                    rdfs:label "eye color"^^xsd:string ;
                    rdfs:subClassOf UPHENO:0001001 .

                ?snp OBO:RO_0002326 EFO:0003949,
                        EFO:0006995 .

                PMID:25918132 a OBO:IAO_0000013 .
            }}
        """.format(description)
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )]
        self.assertEqual(results, expected)
Exemple #27
0
    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        efo_ontology = RDFGraph()
        LOG.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        LOG.info("Finished loading EFO ontology")

        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(
            variant_curie, self.test_data['trait_uri'], efo_ontology,
            self.test_data['pubmed'], description)

        triples = """


    MONARCH:bffc7a930c08cc8fe931 a OBAN:association ;
        dc:description "{0}" ;
        OBO:RO_0002558 OBO:ECO_0000213 ;
        dc:source PMID:25918132 ;
        OBAN:association_has_object EFO:0003949 ;
        OBAN:association_has_predicate RO:0003304 ;
        OBAN:association_has_subject dbSNP:rs1491921 .

    MONARCH:bff9b97458d67ed7f517 a OBAN:association ;
        dc:description "{0}" ;
        OBO:RO_0002558 OBO:ECO_0000213 ;
        dc:source PMID:25918132 ;
        OBAN:association_has_object EFO:0006995 ;
        OBAN:association_has_predicate RO:0003304 ;
        OBAN:association_has_subject dbSNP:rs1491921 .

    EFO:0003949 a owl:Class ;
        rdfs:label "eye color"^^xsd:string ;
        rdfs:subClassOf UPHENO:0001001 .

    dbSNP:rs1491921 RO:0003304 EFO:0003949,
            EFO:0006995 .

    PMID:25918132 a OBO:IAO_0000013 .
        """.format(description)

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #    self.source.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
Exemple #28
0
    def testFakeDataSet2(self):
        """
        Dataset contains a deprecated protein ID
        that we expect if filtered out by ensembl biomart
        We test that this returns an empty graph
        :return:
        """
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph()
        self.assertEqual(len(string_db.graph), 0)

        dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns)
        string_db._process_protein_links(dataframe, self.protein_list, 9606)
        self.assertEqual(len(string_db.graph), 0)
Exemple #29
0
    def test_sex_specificity_model(self):
        self.mgi.graph = RDFGraph(True)  # Reset graph
        self.mgi._process_evidence_view(limit=None)
        logger.debug("Reference graph: %s",
                     self.mgi.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
        :association RO:0002558 ECO:0000006 ;
            dc:source J:74619 ;
            :has_sex_specificity PATO:0000384 .

        J:74619 a IAO:0000310 .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.mgi.graph))
Exemple #30
0
    def setUp(self):
        g = RDFGraph()
        self.model = Model(g)

        this_curie_map = curie_map.get()
        self.cutil = CurieUtil(this_curie_map)

        # stuff to make test triples
        self.test_cat_subj_curie = "MGI:1234"
        self.test_cat_subj = self.cutil.get_uri("MGI:1234")
        self.test_cat_default_pred = self.cutil.get_uri("biolink:category")
        self.test_named_indiv = self.cutil.get_uri("owl:NamedIndividual")
        self.test_label_pred = self.cutil.get_uri("rdfs:label")
        self.test_label = "some label"

        self.test_comment_IRI = self.cutil.get_uri("rdfs:comment")
        self.test_comment = 'bonus eruptus'
Exemple #31
0
    def test_germline_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml'

        self.orphanet._process_diseasegene(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b2cd4dfacc21d0e28c39 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object Orphanet:938475 ;
    OBAN:association_has_predicate RO:0003303 ;
    OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/b56f798350412a34> .

ENSEMBL:ENSG00000166813 a owl:Class .

HGNC:30497 a owl:Class .

Orphanet:268061 a owl:Class ;
    rdfs:label "KS1" ;
    dc:description "kinesin family member 7" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf OBO:SO_0001217 ;
    owl:equivalentClass ENSEMBL:ENSG00000166813,
        HGNC:30497 .

<https://monarchinitiative.org/.well-known/genid/b56f798350412a34> a GENO:0000002 ;
    rdfs:label "germline variant of KS1" ;
    GENO:0000418 Orphanet:268061 ;
    RO:0003303 Orphanet:938475 ;
    :MONARCH_anonymous true ;
    :has_cell_origin GENO:0000900 .

Orphanet:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
Exemple #32
0
    def test_germline_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8")
        )
        expected_triples = """
MONARCH:b40e89f44906ccededb6 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0003303 ;
    OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/bc50c3aece4f4f161d4d> .

ENSEMBL:ENSG00000166813 a owl:Class .

HGNC:30497 a owl:Class .

HGNC:30497 a owl:Class ;
    rdfs:label "KS1" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf OBO:SO_0001217 ;
    owl:equivalentClass ENSEMBL:ENSG00000166813,
        ORPHA:268061 .

<https://monarchinitiative.org/.well-known/genid/bc50c3aece4f4f161d4d> a GENO:0000002 ;
    rdfs:label "germline variant of KS1" ;
    GENO:0000418 HGNC:30497;
    RO:0003303 ORPHA:938475 ;
    :MONARCH_anonymous true ;
    :has_cell_origin GENO:0000900 .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            expected_triples, self.orphanet.graph))
        return
Exemple #33
0
    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map(9606)
        for key in prot_map.keys():
            prot_map[key] = "ENSEMBL:{}".format(prot_map[key])

        print("Finished fetching ENSP IDs, fetched {} proteins".format(
            len(prot_map.keys())))
        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, 9606)

        triples = """
            ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
        """

        self.assertTrue(
            self.test_util.test_graph_equality(triples, string_db.graph))
Exemple #34
0
def main():

    hpo = RDFGraph()
    root = "HP:0000118"
    hpo_terms = OrderedDict()

    hpo.parse("http://purl.obolibrary.org/obo/hp.owl", format='xml')
    hpo.bind_all_namespaces()
    hpo.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")

    tree = {}
    tree[root] = {}
    path = []

    hpo_to_tree(root, hpo_terms, hpo, tree, path)

    with open('hpo-tree.json', 'w') as outfile:
        json.dump(tree, outfile)

    with open('hpo-terms.tsv', 'w') as outfile:
        for key, value in hpo_terms.items():
            outfile.write("{0}\t{1}\t{2}\t{3}\n".format(
                key, value['label'], "|".join(value['lay_person']),
                value['parents']))
Exemple #35
0
def main():

    hpo = RDFGraph()
    root = "HP:0000118"
    hpo_terms = OrderedDict()

    hpo.parse("http://purl.obolibrary.org/obo/hp.owl", format='xml')
    hpo.bind_all_namespaces()
    hpo.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")

    tree = {}
    tree[root] = {}
    path = []

    hpo_to_tree(root, hpo_terms, hpo, tree, path)

    with open('hpo-tree.json', 'w') as outfile:
        json.dump(tree, outfile)

    with open('hpo-terms.tsv', 'w') as outfile:
        for key, value in hpo_terms.items():
            outfile.write("{0}\t{1}\t{2}\t{3}\n".format(
                key, value['label'], "|".join(value['lay_person']), value['parents']
            ))
for n in wd_ontology.nodes():
    proteins = wd.canned_query('disease2protein', n)
    anns = [a for p in proteins for a in aset.annotations(p)]
    if len(anns) > 0:
        print("{} {}".format(n, wd_ontology.label(n)))
        for a in anns:
            outfile.write("{}\t{}\n".format(a, go.label(a)))

# Endpoints
SCIGRAPH_ONTOLOGY = 'https://scigraph-ontology-dev.monarchinitiative.org/scigraph/'
SCIGRAPH_DATA = 'https://scigraph-data-dev.monarchinitiative.org/scigraph/'
GOLR_URL = 'https://solr.monarchinitiative.org/solr/golr/select'

# Get mondo subset
sickle_cell_anemia = 'OMIM:603903'
disease_graph = RDFGraph()

sg = SciGraph(SCIGRAPH_ONTOLOGY)
parent_graph = sg.neighbors(sickle_cell_anemia,
    {'depth':25, 'direction': 'OUTGOING', 'relationshipType': 'subClassOf'})
child_graph = sg.neighbors(sickle_cell_anemia,
    {'depth':2, 'direction': 'INCOMING', 'relationshipType': 'subClassOf'})
eq_graph = sg.neighbors(sickle_cell_anemia, {'depth':10, 'relationshipType': 'equivalentClass'})


def add_triples_from_bbop(bbop_graph, rdf_graph):
    for e in bbop_graph.edges:
        if not re.search(r':', e.predicate):
            if e.predicate == 'subClassOf':
                e.predicate = 'rdfs:subClassOf'
            elif e.predicate == 'equivalentClass':
Exemple #37
0
    def test_snp_model(self):
        """
        Test output model of _process_haplotype()
        self._process_haplotype(
                            variant_curie, strongest_snp_risk_allele,
                            chrom_num, chrom_pos, context,
                            risk_allele_frequency, mapped_gene, so_ontology)
        """
        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        so_ontology = RDFGraph()
        logger.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.source.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        logger.info("Finished loading SO ontology")

        self.source._process_haplotype(
            variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'],
            self.test_data['chrom_pos'], self.test_data['context'],
            self.test_data['allele_freq'], self.test_data['mapped_gene'], so_ontology)

        sparql_query = """
            SELECT ?snp
            WHERE {
                :haplotype_bcb627b1f64039b0 a OBO:GENO_0000871 ;
                    rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ;
                    OBO:GENO_0000382 ?snp,
                        dbSNP:rs3758171,
                        dbSNP:rs3824344,
                        dbSNP:rs7020413 ;
                OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                OBO:RO_0002162 OBO:NCBITaxon_9606 .

                ?snp a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs1329573-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs3758171 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs3758171-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs3824344 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs3824344-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                dbSNP:rs7020413 a OBO:SO_0000694,
                        OBO:SO_0001627 ;
                    rdfs:label "rs7020413-?" ;
                    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> ;
                    OBO:GENO_0000418 <http://identifiers.org/hgnc/HGNC:8619> ;
                    OBO:RO_0002162 OBO:NCBITaxon_9606 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> a faldo:Region ;
                    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> ;
                    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> a faldo:Position ;
                    faldo:position 36997420 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> a faldo:Position ;
                    faldo:position 36998996 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> a faldo:Position ;
                    faldo:position 37000690 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .

                <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> a faldo:Position ;
                    faldo:position 37002118 ;
                    faldo:reference OBO:CHR_GRCh38chr9 .
            }
        """
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1329573")),)]
        self.assertEqual(results, expected)
Exemple #38
0
    def __init__(
            self,
            identifier,       # name? should be Archive url via Source
            title,
            url,
            ingest_desc=None,
            license_url=None,
            data_rights=None,
            graph_type='rdf_graph',     # rdf_graph, streamed_graph
            file_handle=None):

        if graph_type is None:
            self.graph = RDFGraph(None, identifier)
        elif graph_type == 'streamed_graph':
            self.graph = StreamedGraph(True, identifier, file_handle=file_handle)
        elif graph_type == 'rdf_graph':
            self.graph = RDFGraph(True, identifier)

        self.model = Model(self.graph)
        self.globaltt = self.graph.globaltt
        self.globaltcid = self.graph.globaltcid
        self.curie_map = self.graph.curie_map
        # TODO: move hard coded curies to translation table calls
        self.identifier = identifier
        if title is None:
            self.title = identifier
        else:
            self.title = title
        self.version = None
        self.date_issued = None

        # The data_accesed value is later used as an literal of properties
        # such as dcterms:issued, which needs to conform xsd:dateTime format.
        # TODO ... we need to have a talk about typed literals and SPARQL
        self.date_accessed = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

        self.citation = set()
        self.license_url = license_url
        self.model.addType(self.identifier, 'dctypes:Dataset')
        self.graph.addTriple(self.identifier, 'dcterms:title', title, True)
        self.graph.addTriple(
            self.identifier, 'dcterms:identifier', identifier, True)
        if url is not None:
            self.graph.addTriple(self.identifier, 'foaf:page', url)
        # maybe in the future add the logo here:
        # schemaorg:logo  <uri>
        # TODO add the license info
        # FIXME:Temporarily making this in IF statement,
        #  can revert after all current resources are updated.
        if license_url is not None:
            self.graph.addTriple(
                self.identifier, 'dcterms:license', license_url)
        else:
            LOG.debug('No license provided.')
        if data_rights is not None:
            self.graph.addTriple(
                self.identifier, 'dcterms:rights',
                data_rights, object_is_literal=True)
        else:
            LOG.debug('No rights provided.')

        if ingest_desc is not None:
            self.model.addDescription(self.identifier, ingest_desc)
        return
Exemple #39
0
    def test_snp_model(self):
        """
        Test output model of _process_haplotype()
        self._process_haplotype(
            variant_curie, strongest_snp_risk_allele,
            chrom_num, chrom_pos, context,
            risk_allele_frequency, mapped_gene, so_ontology)
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        so_ontology = RDFGraph()
        LOG.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.source.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        LOG.info("Finished loading SO ontology")

        self.source._process_haplotype(
            variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'],
            self.test_data['chrom_pos'], self.test_data['context'],
            self.test_data['allele_freq'], self.test_data['mapped_gene'], so_ontology)

        triples = """
:haplotype_bb627b1f64039b0f751a a OBO:GENO_0000871 ;
    rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ;
    OBO:GENO_0000382 dbSNP:rs1329573,
        dbSNP:rs3758171,
        dbSNP:rs3824344,
        dbSNP:rs7020413 ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs1329573 a OBO:SO_0000694,
        SO:0001627 ;
    rdfs:label "rs1329573-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs3758171 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs3758171-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs3824344 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs3824344-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

dbSNP:rs7020413 a OBO:SO_0000694,
        OBO:SO_0001627 ;
    rdfs:label "rs7020413-?" ;
    faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> ;
    OBO:SO_0001627 HGNC:8619 ;
    OBO:RO_0002162 OBO:NCBITaxon_9606 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> a faldo:Region ;
    faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> ;
    faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> a faldo:Position ;
    faldo:position 36997420 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> a faldo:Position ;
    faldo:position 36998996 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> a faldo:Position ;
    faldo:position 37000690 ;
    faldo:reference OBO:CHR_GRCh38chr9 .

<https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> a faldo:Position ;
    faldo:position 37002118 ;
    faldo:reference OBO:CHR_GRCh38chr9 .
        """

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #   self.source.graph.serialize(format="turtle").decode("utf-8"))

        #  Does not seem to acknowlage these constant triples 
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
Exemple #40
0
class GeneralGraphTestCase(unittest.TestCase):

    def setUp(self):
        self.graph = RDFGraph()
        self.curie_map = curie_map.get()

    def tearDown(self):
        self.graph = None

    def test_curieprefixes(self):
        """
        This will ensure that we can create identifiers for all of the
        defined curie prefixes using the GraphUtils.getNode() method
        :return:

        """
        # add one id per curie as classes to the graph
        for p in self.curie_map.keys():
            testid = p+':testme'
            n = self.graph._getnode(testid)
            m = "prefix \""+p+"\" has an error...can't create graph node"
            self.assertTrue(n is not None, m)

        return

    def readGraphFromTurtleFile(self, f):
        """
        This will read the specified file into a graph.  A simple parsing test.
        :param f:
        :return:

        """
        import os
        vg = RDFGraph()
        p = os.path.abspath(f)
        logger.info("Testing reading turtle file from %s", p)
        vg.parse(f, format="turtle")
        logger.info('Found %s graph nodes in %s', len(vg), p)
        self.assertTrue(len(vg) > 0, "No nodes found in "+p)

        return

    def readGraphIntoOWL(self, f):
        """
        test if the ttl can be parsed by owlparser
        this expects owltools to be accessible from commandline
        :param f: file of ttl
        :return:
        """

        import subprocess
        from subprocess import check_call

        status = check_call(["owltools", f], stderr=subprocess.STDOUT)
        # returns zero is success!
        if status != 0:
            logger.error(
                'finished verifying with owltools with status %s', status)
        self.assertTrue(status == 0)

        return
Exemple #41
0
 def setUp(self):
     self.graph = RDFGraph()
     self.curie_map = curie_map.get()
Exemple #42
0
    def process_catalog(self, limit=None):
        """
        :param limit:
        :return:

        """
        raw = '/'.join((self.rawdir, self.files['catalog']['file']))
        LOG.info("Processing Data from %s", raw)
        efo_ontology = RDFGraph(False, "EFO")
        LOG.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        LOG.info("Finished loading EFO ontology")

        so_ontology = RDFGraph(False, "SO")
        LOG.info("Loading SO ontology in separate rdf graph")
        so_ontology.parse(self.files['so']['url'], format='xml')
        so_ontology.bind_all_namespaces()
        LOG.info("Finished loading SO ontology")

        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t')
            header = next(filereader, None)  # the header row
            header_len = len(header)
            LOG.info('header length:\t %i', header_len)

            for row in filereader:
                if not row:
                    pass
                else:
                    if header_len != len(row):
                        LOG.error('BadRow: %i has %i columns', filereader.line_num, row)

                    (date_added_to_catalog,
                     pubmed_num,
                     first_author,
                     pub_date,
                     journal,
                     link,
                     study_name,
                     disease_or_trait,
                     initial_sample_description,
                     replicate_sample_description,
                     region,
                     chrom_num,
                     chrom_pos,
                     reported_gene_nums,
                     mapped_gene,
                     upstream_gene_num,
                     downstream_gene_num,
                     snp_gene_nums,
                     upstream_gene_distance,
                     downstream_gene_distance,
                     strongest_snp_risk_allele,
                     snps,
                     merged,
                     snp_id_current,
                     context,
                     intergenic_flag,
                     risk_allele_frequency,
                     pvalue,
                     pvalue_mlog,
                     pvalue_text,
                     or_or_beta,
                     confidence_interval_95,
                     platform_with_snps_passing_qc,
                     cnv_flag,
                     mapped_trait,
                     mapped_trait_uri,
                     study_accession,
                     GENOTYPING_TECHNOLOGY
                    ) = row

                    if self.test_mode:
                        continue

# 06-May-2015	25917933
#   Zai CC	20-Nov-2014	J Psychiatr Res	http://europepmc.org/abstract/MED/25917933
# A genome-wide association study of suicide severity scores in bipolar disorder.
# Suicide in bipolar disorder
# 959 European ancestry individuals	NA
# 10p11.22	10	32704340	C10orf68, CCDC7, ITGB1	CCDC7
# rs7079041-A	rs7079041	0	7079041	intron	0		2E-6	5.698970

                    variant_curie, variant_type = self._get_curie_and_type_from_id(
                        strongest_snp_risk_allele)

                    if strongest_snp_risk_allele.strip() == '':
                        LOG.debug(
                            "No strongest SNP risk allele for %s:\n%s",
                            pubmed_num, str(row))
                        # still consider adding in the EFO terms
                        # for what the study measured?
                        continue

                    if variant_type == 'snp':
                        self._add_snp_to_graph(
                            variant_curie, strongest_snp_risk_allele, chrom_num,
                            chrom_pos, context, risk_allele_frequency)

                        self._add_deprecated_snp(
                            variant_curie, snp_id_current, merged, chrom_num, chrom_pos)

                        self._add_snp_gene_relation(
                            variant_curie, snp_gene_nums, upstream_gene_num,
                            downstream_gene_num)
                    elif variant_type == 'haplotype':
                        self._process_haplotype(
                            variant_curie, strongest_snp_risk_allele, chrom_num,
                            chrom_pos, context, risk_allele_frequency, mapped_gene,
                            so_ontology)
                    elif variant_type is None:
                        LOG.warning(
                            "There's a snp id i can't manage: %s",
                            strongest_snp_risk_allele)
                        continue

                    description = self._make_description(
                        disease_or_trait, initial_sample_description,
                        replicate_sample_description,
                        platform_with_snps_passing_qc, pvalue)

                    self._add_variant_trait_association(
                        variant_curie, mapped_trait_uri, efo_ontology,
                        pubmed_num, description)

                    if not self.test_mode and (
                            limit is not None and filereader.line_num > limit):
                        break

        # TODO loop through the location hash,
        # and make all snps at that location equivalent
        for l in self.id_location_map:
            snp_ids = self.id_location_map[l]
            if len(snp_ids) > 1:
                LOG.info("%s has >1 snp id: %s", l, str(snp_ids))
        return
Exemple #43
0
class Dataset:
    """
     this will produce the metadata about a dataset
     following the example laid out here:
     http://htmlpreview.github.io/?
     https://github.com/joejimbo/HCLSDatasetDescriptions/blob/master/Overview.html#appendix_1
     (mind the wrap)

    """

    def __init__(self, identifier, title, url, description=None,
                 license_url=None, data_rights=None, graph_type=None,
                 file_handle=None):
        if graph_type is None:
            self.graph = RDFGraph(None, identifier)  # 
        elif graph_type == 'streamed_graph':
            self.graph = StreamedGraph(True, file_handle=file_handle)
        elif graph_type == 'rdf_graph':
            self.graph = RDFGraph()
        self.model = Model(self.graph)
        self.identifier = ':' + identifier
        self.version = None
        self.date_issued = None

        # The data_accesed value is later used as an literal of properties
        # such as dct:issued, which needs to conform xsd:dateTime format.
        # TODO ... we need to have a talk about typed literals and SPARQL
        self.date_accessed = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

        self.citation = set()
        self.license = license_url
        self.model.addType(self.identifier, 'dctypes:Dataset')
        self.graph.addTriple(self.identifier, 'dct:title', title, True)
        self.graph.addTriple(
            self.identifier, 'dct:identifier',
            identifier, object_is_literal=True)
        self.graph.addTriple(self.identifier, 'foaf:page', url)
        # maybe in the future add the logo here:
        # schemaorg:logo <http://www.ebi.ac.uk/rdf/sites/ebi.ac.uk.rdf/files/resize/images/rdf/chembl_service_logo-146x48.gif> .

        # TODO add the licence info
        # FIXME:Temporarily making this in IF statement,
        #  can revert after all current resources are updated.
        if license_url is not None:
            self.graph.addTriple(
                self.identifier, 'dct:license', license_url)
        else:
            logger.debug('No license provided.')
        if data_rights is not None:
            self.graph.addTriple(
                self.identifier, 'dct:rights',
                data_rights, object_is_literal=True)
        else:
            logger.debug('No rights provided.')

        if description is not None:
            self.model.addDescription(self.identifier, description)
        return

    def setVersion(self, date_issued, version_id=None):
        """
        Legacy function...
            should use the other set_* for version and date

        as of 2016-10-20  used in:
        
        dipper/sources/HPOAnnotations.py 139:
        dipper/sources/CTD.py             99:
        dipper/sources/BioGrid.py        100:        
        dipper/sources/MGI.py            255:
        dipper/sources/EOM.py             93:
        dipper/sources/Coriell.py        200:
        dipper/sources/MMRRC.py           77:

        # TODO set as deprecated
        
        :param date_issued:
        :param version_id:
        :return:

        """

        if date_issued is not None:
            self.set_date_issued(date_issued)
        elif version_id is not None:
            self.set_version_by_num(version_id)
        else:
            logger.error("date or version not set!")
            # TODO throw error
            return

        if version_id is not None:
            self.set_version_by_num(version_id)
        else:
            logger.info("set version to %s", self.version)
            self.set_version_by_date(date_issued)

        logger.info("set version to %s", self.version)

        return

    def set_date_issued(self, date_issued):

        self.date_issued = date_issued
        self.graph.addTriple(
            self.identifier, 'dct:issued', date_issued, object_is_literal=True)
        logger.info("setting date to %s", date_issued)

        return

    def set_version_by_date(self, date_issued=None):
        """
        This will set the version by the date supplied,
        the date already stored in the dataset description,
        or by the download date (today)
        :param date_issued:
        :return:
        """

        if date_issued is not None:
            d = date_issued
        elif self.date_issued is not None:
            d = self.date_issued
        else:
            d = self.date_accessed
            logger.info(
                "No date supplied for setting version; "
                "using download timestamp for date_issued")

        logger.info("setting version by date")
        self.set_version_by_num(d)

        return

    def set_version_by_num(self, version_num):

        self.version = self.identifier+version_num
        self.graph.addTriple(self.version, 'dct:isVersionOf', self.identifier)
        self.graph.addTriple(self.version, 'pav:version', version_num,
                             object_is_literal=True)

        logger.info("setting version to %s", self.version)

        # set the monarch-generated-version of the resource-version
        # TODO sync this up with the ontology version
        if version_num != self.date_accessed:
            dipperized_version = ':' + str(self.date_accessed)
            self.graph.addTriple(
                dipperized_version, 'dct:isVersionOf',
                self.version)
            self.graph.addTriple(
                dipperized_version, 'pav:version',
                self.date_accessed, object_is_literal=True)
            self.graph.addTriple(
                dipperized_version, 'dct:issued',
                self.date_accessed, object_is_literal=True,
                literal_type="xsd:dateTime")
        return


    def setFileAccessUrl(self, url, is_object_literal=False):
        self.graph.addTriple(self.identifier, 'dcat:accessURL',
                             url, is_object_literal)

    def getGraph(self):
        return self.graph

    def set_license(self, license):
        self.license = license
        return

    def get_license(self):

        return self.license

    def set_citation(self, citation_id):

        self.citation.add(citation_id)
        # TODO
        # model.addTriple(self.identifier, 'cito:citeAsAuthority', citation_id)

        return