예제 #1
0
파일: Dataset.py 프로젝트: lwinfree/dipper
    def __init__(self,
                 identifier,
                 title,
                 url,
                 description=None,
                 license_url=None,
                 data_rights=None,
                 graph_type=None,
                 file_handle=None):
        if graph_type is None:
            self.graph = RDFGraph()
        elif graph_type == 'streamed_graph':
            self.graph = StreamedGraph(True, file_handle=file_handle)
        elif graph_type == 'rdf_graph':
            self.graph = RDFGraph()
        self.model = Model(self.graph)
        self.identifier = ':' + identifier
        self.version = None
        self.date_issued = None

        # The data_accesed value is later used as an object literal of properties such as dct:issued, which needs to conform xsd:dateTime format.
        # self.date_accessed = datetime.now().strftime('%Y-%m-%d-%H-%M')
        self.date_accessed = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

        self.citation = set()
        self.license = license_url
        self.model.addType(self.identifier, 'dctypes:Dataset')
        self.graph.addTriple(self.identifier, 'dct:title', title, True)
        self.graph.addTriple(self.identifier,
                             'dct:identifier',
                             identifier,
                             object_is_literal=True)
        self.graph.addTriple(self.identifier, 'foaf:page', url)
        # maybe in the future add the logo here:
        # schemaorg:logo <http://www.ebi.ac.uk/rdf/sites/ebi.ac.uk.rdf/files/resize/images/rdf/chembl_service_logo-146x48.gif> .

        # TODO add the licence info
        # FIXME:Temporarily making this in IF statement,
        #  can revert after all current resources are updated.
        if license_url is not None:
            self.graph.addTriple(self.identifier, 'dct:license', license_url)
        else:
            logger.debug('No license provided.')
        if data_rights is not None:
            self.graph.addTriple(self.identifier,
                                 'dct:rights',
                                 data_rights,
                                 object_is_literal=True)
        else:
            logger.debug('No rights provided.')

        if description is not None:
            self.model.addDescription(self.identifier, description)
        return
예제 #2
0
    def test_germline_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8")
        )
        expected_triples = """
MONARCH:ba2ac5d2153c70e2bb98 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0004013 ;
    OBAN:association_has_subject HGNC:30497 .

ENSEMBL:ENSG00000166813 a owl:Class .

HGNC:30497 a owl:Class ;
    RO:0004013 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" ;
    owl:equivalentClass ENSEMBL:ENSG00000166813,
       ORPHA:268061 .

ORPHA:268061 a owl:Class .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            expected_triples, self.orphanet.graph))
        return
예제 #3
0
 def setUp(self):
     self.test_util = TestUtils()
     self.source = GWASCatalog('rdf_graph', True)
     self.source.graph = RDFGraph(True)
     self.test_data = {
         'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?',
         'chrom_num': '9;9;9;9',
         'chrom_pos': '36998996;37002118;37000690;36997420',
         'context':
         'intron_variant; intron_variant; intron_variant; intron_variant',
         'allele_freq': 'NR',
         'trait': 'Intelligence',
         'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337',
         'pvalue': '0.00000004',
         'merged': '0',
         'snp_id_current': '',
         'mapped_gene': 'PAX5; PAX5; PAX5; PAX5',
         'snp_gene_nums': '',
         'upstream_gene_num': '107986179',
         'downstream_gene_num': '107986180',
         'init_sample_desc':
         '656 European ancestry individuals from ADHD families',
         'replicated_sample_desc': 'NA',
         'platform': 'Illumina [795637]',
         'pubmed': '22449649'
     }
예제 #4
0
    def test_parse(self):
        self.source.graph = RDFGraph(True)  # Reset graph
        self.assertTrue(len(list(self.source.graph)) == 0)

        self.source.parse()

        triples = """
        UNII:46U771ERWK RO:0002606 SNOMED:386761002 ;
            rdfs:subClassOf CHEBI:23367 .

        SNOMED:386761002 rdfs:label "Local anesthesia" ;
            rdfs:subClassOf MONDO:0000001 .

        UNII:46U771ERWK biolink:category biolink:ChemicalSubstance .
        SNOMED:386761002 biolink:category biolink:Disease .

        """

        # dbg
        logger.debug(
            "Reference graph: %s", self.source.graph.serialize(format="turtle")
            .decode("utf-8")
        )
        self.assertTrue(self.test_util.test_graph_equality(
            triples, self.source.graph))
예제 #5
0
    def test_gene_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle") .decode("utf-8")
        )
        expected_triples = """
MONARCH:bdbeb077e365ddedda20 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0004015 ;
    OBAN:association_has_subject ORPHA:268061 .

ORPHA:268061 RO:0004015 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(self.test_util.test_graph_equality(
            expected_triples, self.orphanet.graph))
        return
예제 #6
0
    def test_patient_phenotype_model(self):
        """
        functional test for _parse_patient_phenotypes()
        """
        udp = UDP('rdf_graph', True)
        udp.graph = RDFGraph(True)

        # test that graph is empty
        self.assertTrue(len(list(udp.graph)) == 0)

        mock_lines = [
            'patient_1\tHP:000001\tyes',
            'patient_1\tHP:000002\tno'
        ]
        mock_data = MagicMock()
        mock_data.__iter__.return_value = iter(mock_lines)

        mock_file = mock_open(mock=mock_data)
        udp._parse_patient_phenotypes(mock_file)
        triples = """
        :patient_1 a foaf:Person ;
            rdfs:label "patient_1" ;
            RO:0002200 DOID:4,
              HP:000001 .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, udp.graph))
예제 #7
0
    def testSGDParser(self):
        sgd = SGD('rdf_graph', True)
        sgd.graph = RDFGraph(True)
        record = self.test_set_1
        sgd.make_association(record)

        description = sgd._make_description(record)

        triples = """
        :MONARCH_ba748c98c0f167739128 a OBAN:association ;
            OBO:RO_0002558 OBO:APO_0000020 ;
            dcterms:description "{0}";
            dcterms:source PMID:21715656 ;
            OBAN:association_has_object MONARCH:APO_0000309APO_0000245 ;
            OBAN:association_has_predicate OBO:RO_0002200 ;
            OBAN:association_has_subject SGD:S000007268 .

        SGD:S000007268 rdfs:label "ATP6" ;
        RO:0002200 MONARCH:APO_0000309APO_0000245 .

        APO:0000020 rdfs:label "classical genetics" .

        PMID:21715656 a OBO:IAO_0000311 ;
        owl:sameAs SGD_REF:S000145858 .

        MONARCH:APO_0000309APO_0000245 rdfs:label "respiratory growth:decreased rate" ;
        rdfs:subClassOf UPHENO:0001001 .

        """.format(description)
        # test exact contents of graph
        self.assertTrue(self.test_util.test_graph_equality(triples, sgd.graph))
예제 #8
0
    def test_evidence_model(self):
        """
        Functional test for _add_evidence()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)  # Reset graph
        # Test graph is empty
        self.assertTrue(len(list(impc.graph)) == 0)

        (p_value, percentage_change, effect_size) = self.test_set_1[23:26]

        impc._add_evidence(
            self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size,
            self.study_curie)

        triples = """
:MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> .

<https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ;
    SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>,
        <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ;
    SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> .

<https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 1.637023e-10 .

<https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ;
    RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ;
    STATO:0000129 "8.885439E-007" .
        """

        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #9
0
    def test_parse(self):
        for rcv in RCVS:
            output_nt = rcv + '.nt'
            input_xml = rcv + '.xml.gz'
            reference_ttl = TTL_PATH + rcv + '.ttl'
            with self.subTest(rcv=rcv):

                mock_args = [
                    "test_clinvar.py", "--inputdir", XML_PATH, "--filename",
                    input_xml, "--mapfile", MAP_FILE, "--destination", NT_PATH,
                    "--output", output_nt
                ]

                patch('sys.argv', mock_args).start()
                clinvar_parse()
                query_graph = RDFGraph()
                query_graph.bind_all_namespaces()
                query_graph.parse(NT_PATH + output_nt, format='nt')

                with open(reference_ttl, 'r') as ref_fh:
                    ref_graph = "\n".join(ref_fh.readlines())

                # debug
                LOG.debug(
                    "Reference graph: %s",
                    query_graph.serialize(format="turtle").decode("utf-8"))

                # Convert output from ClinVar parse to dot then png
                dot_file_path = DOT_PATH + rcv + ".dot"
                with open(dot_file_path, 'w') as dot_file:
                    rdf2dot(query_graph, dot_file)

                self.assertTrue(
                    TestUtils.test_graph_equality(ref_graph, query_graph))
예제 #10
0
    def test_graph_equality(turtlish, graph):
        """

        :param turtlish: String of triples in turtle
                         format without prefix header
        :param graph: Graph object to test against
        :return: Boolean, True if graphs contain same
                          set of triples
        """
        turtle_graph = RDFGraph()
        turtle_graph.bind_all_namespaces()
        prefixes = "\n".join([
            "@prefix {}: <{}> .".format(n[0], n[1])
            for n in turtle_graph.namespace_manager.namespaces()
        ])

        turtle_string = prefixes + turtlish
        mock_file = io.StringIO(turtle_string)
        turtle_graph.parse(mock_file, format="turtle")
        turtle_triples = set(list(turtle_graph))
        ref_triples = set(list(graph))
        equality = turtle_triples == ref_triples
        if not equality:
            LOG.warning(
                "Triples do not match\n"
                "\tLeft hand difference: %s\n"
                "\tRight hand difference: %s",
                sorted(turtle_triples - ref_triples),
                sorted(ref_triples - turtle_triples))
        return equality
예제 #11
0
    def test_assertion_model(self):
        """
        Functional test for _add_study_provenance()
        """

        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie)

        triples = """
    MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> .
    <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ;
        SEPIO:0000018 <https://www.mousephenotype.org/> ;
        SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence>  .

    <https://www.mousephenotype.org/> a foaf:organization ;
        rdfs:label "International Mouse Phenotyping Consortium" .
        """
        # dbg
        LOG.info(
            "Assertion graph:\n %s\n", impc.graph.serialize(
                format="turtle").decode("utf-8")
        )

        self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph))
예제 #12
0
    def testEnsemblReactomeParser(self):
        reactome = Reactome('rdf_graph', True)
        reactome.graph = RDFGraph(True)
        self.assertTrue(len(list(reactome.graph)) == 0)

        eco_map = Reactome.get_eco_map(Reactome.map_files['eco_map'])
        (gene, pathway_id, pathway_iri, pathway_label, go_ecode,
         species_name) = self.test_set_1
        reactome._add_component_pathway_association(eco_map, gene, 'ENSEMBL',
                                                    pathway_id, 'REACT',
                                                    pathway_label, go_ecode)

        triples = """
        ENSEMBL:ENSBTAP00000013354 RO:0002331 REACT:R-BTA-3000480 .
        
        :MONARCH_b582c188b7ec20016206 a OBAN:association ;
            OBO:RO_0002558 ECO:0000501 ;
            OBAN:association_has_object REACT:R-BTA-3000480 ;
            OBAN:association_has_predicate RO:0002331 ;
            OBAN:association_has_subject ENSEMBL:ENSBTAP00000013354 .

        REACT:R-BTA-3000480 a owl:Class ;
            rdfs:label "Scavenging by Class A Receptors" ;
            rdfs:subClassOf GO:0009987,
                PW:0000001 .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(triples, reactome.graph))
예제 #13
0
    def test_germline_lof_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml'

        self.orphanet._process_diseasegene(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b53dada0eb229a75e705 OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object Orphanet:938475 ;
    OBAN:association_has_predicate RO:0003303 ;
    OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> .

Orphanet:268061 a owl:Class ;
    rdfs:label "KS1" ;
    dc:description "kinesin family member 7" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf SO:0001217 .

<https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> a GENO:0000002 ;
    rdfs:label "germline loss of function variant of KS1" ;
    GENO:0000418 Orphanet:268061 ;
    RO:0003303 Orphanet:938475 ;
    :MONARCH_anonymous true ;
    :has_cell_origin GENO:0000900 ;
    :has_functional_consequence SO:0002054 .

Orphanet:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
예제 #14
0
    def test_germline_lof_variant_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml'

        self.orphanet._process_diseasegene(limit=None)
        LOG.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b9ad1b0c562ad4db3f1e a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object ORPHA:938475 ;
    OBAN:association_has_predicate RO:0004012 ;
    OBAN:association_has_subject ORPHA:268061 .

ORPHA:268061 RO:0004012 ORPHA:938475 ;
    oboInOwl:hasExactSynonym "KAS1" .

ORPHA:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
    
ECO:0000322 biolink:category biolink:EvidenceType .
ORPHA:268061 biolink:category biolink:Gene .
ORPHA:268061 biolink:category biolink:Genotype .
ORPHA:938475 biolink:category biolink:Disease .
    
MONARCH:b9ad1b0c562ad4db3f1e biolink:category biolink:Association .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
        return
예제 #15
0
    def test_gene_to_disease(self):
        self.orphanet.graph = RDFGraph()  # Reset graph
        self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml'

        self.orphanet._process_diseasegene(limit=None)
        logger.debug(
            "Reference graph: %s",
            self.orphanet.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
MONARCH:b64684a0ea6ae59fdb09 a OBAN:association ;
    RO:0002558 ECO:0000322 ;
    OBAN:association_has_object Orphanet:938475 ;
    OBAN:association_has_predicate RO:0003304 ;
    OBAN:association_has_subject Orphanet:268061 .

Orphanet:268061 a owl:Class ;
    rdfs:label "KS1" ;
    RO:0003304 Orphanet:938475 ;
    dc:description "kinesin family member 7" ;
    oboInOwl:hasExactSynonym "KAS1" ;
    rdfs:subClassOf SO:0001217 .

Orphanet:938475 a owl:Class ;
    rdfs:label "too much unit testing disorder" .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.orphanet.graph))
예제 #16
0
 def setUp(self):
     self.test_util = TestUtils()
     self.source = GWASCatalog('rdf_graph', True)
     self.source.graph = RDFGraph(True)  # Reset graph
     self.source.graph.bind_all_namespaces()
     self.test_data = {
         'snp_label': 'rs1491921-C',
         'chrom_num': '5',
         'chrom_pos': '21259029',
         'context': 'intergenic_variant',
         'allele_freq': '0.013',
         'trait': 'Diisocyanate-induced asthma',
         'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949',
         'pvalue': '0.0000007',
         'merged': '0',
         'snp_id_current': '1491921',
         'mapped_gene': 'LOC102723561 - GUSBP1',
         'snp_gene_nums': '',
         'upstream_gene_num': '107986179',
         'downstream_gene_num': '107986180',
         'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls',
         'replicated_sample_desc': 'NA',
         'platform': 'Illumina [1556551]',
         'pubmed': '25918132'
     }
예제 #17
0
    def testFakeDataSet1(self):
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph(True)
        self.assertEqual(len(string_db.graph), 0)

        ensembl = Ensembl('rdf_graph', True)
        prot_map = ensembl.fetch_protein_gene_map('9606')

        [prot_map.update({k: ['ENSEMBL:' + prot_map[k]]}) for k in prot_map.keys()]

        print("Finished fetching ENSP IDs, fetched {} proteins".format(len(prot_map)))

        # just looking
        # for key in prot_map:
        #    if string_db.graph.curie_regexp.match(prot_map[key]) is None:
        #        print("INVALID curie for %s from %s", prot_map[key], key)

        dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns)

        string_db._process_protein_links(dataframe, prot_map, '9606')

        # g1 <interacts with> g2
        triples = """
ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 .
        """

        self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph))
예제 #18
0
    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        efo_ontology = RDFGraph()
        logger.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        logger.info("Finished loading EFO ontology")

        variant_curie, variant_type = \
            self.source._get_curie_and_type_from_id(self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(variant_curie,
                                                   self.test_data['trait_uri'],
                                                   efo_ontology,
                                                   self.test_data['pubmed'],
                                                   description)

        sparql_query = """
            SELECT ?snp
            WHERE {{
                <https://monarchinitiative.org/MONARCH_b46cdf48950cb00d> a OBAN:association ;
                    dc:description "{}" ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0003949 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                <https://monarchinitiative.org/MONARCH_70a05d8eb1c3d4b0> a OBAN:association ;
                    OBO:RO_0002558 OBO:ECO_0000213 ;
                    dc:source PMID:25918132 ;
                    OBAN:association_has_object EFO:0006995 ;
                    OBAN:association_has_predicate OBO:RO_0002326 ;
                    OBAN:association_has_subject ?snp .

                EFO:0003949 a owl:Class ;
                    rdfs:label "eye color"^^xsd:string ;
                    rdfs:subClassOf UPHENO:0001001 .

                ?snp OBO:RO_0002326 EFO:0003949,
                        EFO:0006995 .

                PMID:25918132 a OBO:IAO_0000013 .
            }}
        """.format(description)
        sparql_output = self.source.graph.query(sparql_query)
        # Test that query passes and returns one row
        results = list(sparql_output)
        expected = [(URIRef(self.source.graph._getNode("dbSNP:rs1491921")), )]
        self.assertEqual(results, expected)
예제 #19
0
    def test_snp_trait_association(self):
        """
        test the _add_variant_trait_association
        :return:
        """
        self.assertTrue(len(list(self.source.graph)) == 0)
        efo_ontology = RDFGraph()
        LOG.info("Loading EFO ontology in separate rdf graph")
        efo_ontology.parse(self.source.files['efo']['url'], format='xml')
        efo_ontology.bind_all_namespaces()
        LOG.info("Finished loading EFO ontology")

        variant_curie, variant_type = self.source._get_curie_and_type_from_id(
            self.test_data['snp_label'])

        description = self.source._make_description(
            self.test_data['trait'], self.test_data['init_sample_desc'],
            self.test_data['replicated_sample_desc'],
            self.test_data['platform'], self.test_data['pvalue'])

        self.source._add_variant_trait_association(
            variant_curie, self.test_data['trait_uri'], efo_ontology,
            self.test_data['pubmed'], description)

        triples = """


    MONARCH:bffc7a930c08cc8fe931 a OBAN:association ;
        dc:description "{0}" ;
        OBO:RO_0002558 OBO:ECO_0000213 ;
        dc:source PMID:25918132 ;
        OBAN:association_has_object EFO:0003949 ;
        OBAN:association_has_predicate RO:0003304 ;
        OBAN:association_has_subject dbSNP:rs1491921 .

    MONARCH:bff9b97458d67ed7f517 a OBAN:association ;
        dc:description "{0}" ;
        OBO:RO_0002558 OBO:ECO_0000213 ;
        dc:source PMID:25918132 ;
        OBAN:association_has_object EFO:0006995 ;
        OBAN:association_has_predicate RO:0003304 ;
        OBAN:association_has_subject dbSNP:rs1491921 .

    EFO:0003949 a owl:Class ;
        rdfs:label "eye color"^^xsd:string ;
        rdfs:subClassOf UPHENO:0001001 .

    dbSNP:rs1491921 RO:0003304 EFO:0003949,
            EFO:0006995 .

    PMID:25918132 a OBO:IAO_0000013 .
        """.format(description)

        # dbg
        # LOG.debug(
        #    "Reference graph: %s",
        #    self.source.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
예제 #20
0
파일: test_impc.py 프로젝트: sgml/dipper
    def test_provenance_model(self):
        """
        Functional test for _add_study_provenance()
        """
        impc = IMPC('rdf_graph', True)
        impc.graph = RDFGraph(True)
        self.assertTrue(len(list(impc.graph)) == 0)

        (phenotyping_center, colony) = self.test_set_1[2:4]
        (project_fullname, pipeline_name, pipeline_stable_id,
         procedure_stable_id, procedure_name, parameter_stable_id,
         parameter_name) = self.test_set_1[12:19]
        (statistical_method, resource_name) = self.test_set_1[26:28]

        impc._add_study_provenance(phenotyping_center, colony,
                                   project_fullname, pipeline_name,
                                   pipeline_stable_id, procedure_stable_id,
                                   procedure_name, parameter_stable_id,
                                   parameter_name, statistical_method,
                                   resource_name)

        # dbg
        LOG.info("Provenance graph as turtle:\n%s\n",
                 impc.graph.serialize(format="turtle").decode("utf-8"))

        triples = """
<https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ;
  BFO:0000051 OBO:STATO_0000076,
      <https://www.mousephenotype.org/impress/protocol/175/15> ;
  BFO:0000050  IMPRESS-procedure:15 ,
      <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ;
  SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ;
  SEPIO:0000017 <http://www.sanger.ac.uk/>  .

<https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ;
    rdfs:label "MEFW" .

<http://www.sanger.ac.uk/> a foaf:organization ;
    rdfs:label "WTSI" .

<http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ;
    rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" .

<https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ;
    rdfs:label "Number of ribs right (X-ray)" .

IMPRESS-procedure:15 a owl:NamedIndividual ;
    rdfs:label "MGP Select Pipeline" .

<https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ;
    rdfs:label "X-ray" .
"""

        # dbg
        LOG.debug("Reference graph: %s",
                  impc.graph.serialize(format="turtle").decode("utf-8"))
        self.assertTrue(self.test_util.test_graph_equality(
            triples, impc.graph))
예제 #21
0
 def setUp(self):
     self.test_util = TestUtils()
     self.source = CTD('rdf_graph', True)
     self.source.graph = RDFGraph(True)
     self.test_row = [
         'Nicotine', 'D009538', '', 'TOBACCO ADDICTION, SUSCEPTIBILITY TO',
         'OMIM:188890', 'therapeutic', '', '', '', '12345|56789'
     ]
     return
예제 #22
0
    def test_variant_model(self):
        """
        functional test for _parse_patient_variants()
        """
        udp = UDP('rdf_graph', True)
        udp.graph = RDFGraph(True)
        # test that graph is empty
        self.assertTrue(len(list(udp.graph)) == 0)

        data = ['patient_1',
                'family_1',
                '1',
                'HG19',
                '155230432',
                'G',
                'A',
                'Maternal',
                'Biallelic',
                'Non-synonymous;DOWNSTREAM',
                'CLK2',
                '',
                '',
                '',
                '',
                '',
                '',
                '',
                'Compound heterozygous',
                'Heterozygous',
                '',
                '0.002747253',
                '']
        test_data = "\t".join(data)
        mock_lines = [test_data]
        mock_data = MagicMock()
        mock_data.__iter__.return_value = iter(mock_lines)

        mock_file = mock_open(mock=mock_data)

        udp._parse_patient_variants(mock_file)

        triples = """
        :patient_1 GENO:0000222 <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> .

        <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> a SO:0001059 ;
            rdfs:label "hg19chr1(CLK2):g.155230432G>A" ;
            GENO:0000418 HGNC:2069 ;
            RO:0002162 NCBITaxon:9606 ;
            owl:sameAs dbSNP:rs11557757 .

        <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> a GENO:0000000 ;
            rdfs:label "patient_1 genotype" ;
            GENO:0000382 <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> .
        """

        self.assertTrue(self.test_util.test_graph_equality(triples, udp.graph))
예제 #23
0
 def test_unmapped_disease_assoc_type(self):
     """
     Test that a gene disease type that we have
     not mapped in translationtable/orphanet.yaml
     raises a ValueError
     """
     self.orphanet.graph = RDFGraph()  # Reset graph
     self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml'
     self.assertRaises(
         ValueError, lambda: self.orphanet._process_diseasegene(limit=None))
예제 #24
0
 def setUp(self):
     self.graph = RDFGraph()
     self.curie_map = curie_map.get()
     self.genotype = Genotype(self.graph)
     self.cutil = CurieUtil(self.curie_map)
     self.test_cat_pred = self.cutil.get_uri(blv.terms['category'])
     self.test_cat_genotype_category = self.cutil.get_uri(
         blv.terms['Genotype'])
     self.test_cat_background_category = self.cutil.get_uri(
         blv.terms['PopulationOfIndividualOrganisms'])
예제 #25
0
    def testFakeDataSet2(self):
        """
        Dataset contains a deprecated protein ID
        that we expect if filtered out by ensembl biomart
        We test that this returns an empty graph
        :return:
        """
        string_db = StringDB('rdf_graph', True)
        string_db.graph = RDFGraph()
        self.assertEqual(len(string_db.graph), 0)

        dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns)
        string_db._process_protein_links(dataframe, self.protein_list, 9606)
        self.assertEqual(len(string_db.graph), 0)
예제 #26
0
    def test_sex_specificity_model(self):
        self.mgi.graph = RDFGraph(True)  # Reset graph
        self.mgi._process_evidence_view(limit=None)
        logger.debug("Reference graph: %s",
                     self.mgi.graph.serialize(format="turtle").decode("utf-8"))
        expected_triples = """
        :association RO:0002558 ECO:0000006 ;
            dc:source J:74619 ;
            :has_sex_specificity PATO:0000384 .

        J:74619 a IAO:0000310 .
        """
        self.assertTrue(
            self.test_util.test_graph_equality(expected_triples,
                                               self.mgi.graph))
예제 #27
0
    def read_graph_from_turtle_file(self, f):
        """
        This will read the specified file into a graph.  A simple parsing test.
        :param f:
        :return:

        """
        vg = RDFGraph()
        p = os.path.abspath(f)
        logger.info("Testing reading turtle file from %s", p)
        vg.parse(f, format="turtle")
        logger.info('Found %s graph nodes in %s', len(vg), p)
        self.assertTrue(len(vg) > 0, "No nodes found in " + p)

        return
예제 #28
0
    def setUp(self):
        self.graph = RDFGraph()

        this_curie_map = curie_map.get()
        self.cutil = CurieUtil(this_curie_map)

        # stuff to make test triples
        self.test_cat_subj = "http://www.google.com"
        self.test_cat_default_pred = self.cutil.get_uri("biolink:category")
        self.test_cat_nondefault_pred = self.cutil.get_uri("rdf:type")
        self.test_cat_default_category = self.cutil.get_uri(
            "biolink:NamedThing")
        self.test_cat_nondefault_category = self.cutil.get_uri("biolink:Gene")
        self.test_cat_type = self.cutil.get_uri("rdf:type")
        self.test_cat_class = self.cutil.get_uri("rdf:class")
예제 #29
0
    def setUp(self):
        g = RDFGraph()
        self.model = Model(g)

        this_curie_map = curie_map.get()
        self.cutil = CurieUtil(this_curie_map)

        # stuff to make test triples
        self.test_cat_subj_curie = "MGI:1234"
        self.test_cat_subj = self.cutil.get_uri("MGI:1234")
        self.test_cat_default_pred = self.cutil.get_uri("biolink:category")
        self.test_named_indiv = self.cutil.get_uri("owl:NamedIndividual")
        self.test_label_pred = self.cutil.get_uri("rdfs:label")
        self.test_label = "some label"

        self.test_comment_IRI = self.cutil.get_uri("rdfs:comment")
        self.test_comment = 'bonus eruptus'
예제 #30
0
    def test_graph_equality(turtlish, graph):
        """

        :param turtlish: file path or string of triples in turtle
                         format without prefix header
        :param graph: Graph object to test against
        :return: Boolean, True if graphs contain same
                          set of triples
        """
        turtle_graph = RDFGraph()
        turtle_graph.bind_all_namespaces()
        prefixes = "\n".join([
            "@prefix {}: <{}> .".format(n[0], n[1])
            for n in turtle_graph.namespace_manager.namespaces()
        ])
        headless_ttl = ''
        try:
            if Path(turtlish).exists():
                headless_ttl = Path(turtlish).read_text()
            else:
                raise OSError
        except OSError:
            if isinstance(turtlish, str):
                headless_ttl = turtlish
            else:
                raise ValueError("turtlish must be filepath or string")

        turtle_string = prefixes + headless_ttl
        mock_file = io.StringIO(turtle_string)
        turtle_graph.parse(mock_file, format="turtle")

        TestUtils.remove_ontology_axioms(graph)

        turtle_triples = set(list(turtle_graph))
        ref_triples = set(list(graph))
        equality = turtle_triples == ref_triples
        if not equality:
            LOG.warning(
                "Triples do not match\n"
                "\tLeft hand difference: %s\n"
                "\tRight hand difference: %s",
                sorted(turtle_triples - ref_triples),
                sorted(ref_triples - turtle_triples))
        return equality