def setUp(self): self.assoc_curie = 'MONARCH:test_association' self.eco_id = 'ECO:0000015' self.test_set_1 = ( 'MGI:1920145', 'Setd5', 'WTSI', 'MEFW', 'male', 'heterozygote', 'MGI:4432631', 'Setd5<tm1a(EUCOMM)Wtsi>', 'targeted mutation 1a, Wellcome Trust Sanger Institute', 'MGI:2159965', 'C57BL/6N', 'MGP', 'Wellcome Trust Sanger Institute Mouse Genetics Project', 'MGP Select Pipeline', 'MGP_001', 'MGP_XRY_001', 'X-ray', 'IMPC_XRY_008_001', 'Number of ribs right', 'MP:0005390', 'skeleton phenotype', 'MP:0000480', 'increased rib number', '1.637023E-010', '', '8.885439E-007', 'Wilcoxon rank sum test with continuity correction', 'IMPC') # Generate test curies, these are otherwise generated # within _add_evidence() and _add_study_provenance() self.study_curie = "_:study" self.evidence_curie = "_:evidence" # IRIs for testing sparql output curie_dict = curie_map.get() curie_util = CurieUtil(curie_dict) self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie)) return
def hpo_to_tree(cls, hpo_terms, hpo_graph, tree, path): tree_path = copy.copy(path) tree_path.append(cls) curie_util = CurieUtil(curie_map.get()) if cls not in hpo_terms: hpo_terms[cls] = { 'label': hpo_graph.label(URIRef(curie_util.get_uri(cls))) } parents = hpo_graph.objects(URIRef(curie_util.get_uri(cls)), RDFS.subClassOf) hpo_terms[cls]['parents'] = len(list(parents)) lay_person = get_lay_person(cls, hpo_graph) hpo_terms[cls]["lay_person"] = lay_person # Traverse the tree to get to the input class position = tree[tree_path[0]] for term in tree_path[1:]: position = position[term] for sub_class in hpo_graph.subjects( RDFS.subClassOf, URIRef(curie_util.get_uri(tree_path[-1]))): curie = curie_util.get_curie(sub_class).replace("OBO:HP_", "HP:") position[curie] = {} hpo_to_tree(curie, hpo_terms, hpo_graph, tree, tree_path)
def test_addGenotype(self): cutil = CurieUtil(self.curie_map) gid = 'MGI:5515892' label = \ 'Pmp22<Tr-2J>/Pmp22<+> [C57BL/6J-Pmp22<Tr-2J>/GrsrJ]' self.genotype.addGenotype(gid, label) self.assertTrue((URIRef(cutil.get_uri(gid)), RDFS['label'], Literal(label)) in self.genotype.graph)
def setUp(self): self.graph = RDFGraph() self.curie_map = curie_map.get() self.genotype = Genotype(self.graph) self.cutil = CurieUtil(self.curie_map) self.test_cat_pred = self.cutil.get_uri(blv.terms['category']) self.test_cat_genotype_category = self.cutil.get_uri( blv.terms['Genotype']) self.test_cat_background_category = self.cutil.get_uri( blv.terms['PopulationOfIndividualOrganisms'])
def test_addGenotype(self): from rdflib.namespace import RDFS, URIRef from rdflib import Literal from dipper.utils.CurieUtil import CurieUtil cutil = CurieUtil(self.curie_map) gid = 'MGI:5515892' label = \ 'Pmp22<Tr-2J>/Pmp22<+> [C57BL/6J-Pmp22<Tr-2J>/GrsrJ]' self.genotype.addGenotype(gid, label) self.assertTrue((URIRef(cutil.get_uri(gid)), RDFS['label'], Literal(label)) in self.genotype.graph)
def setUp(self): self.graph = RDFGraph() this_curie_map = curie_map.get() self.cutil = CurieUtil(this_curie_map) # stuff to make test triples self.test_cat_subj = "http://www.google.com" self.test_cat_default_pred = self.cutil.get_uri("biolink:category") self.test_cat_nondefault_pred = self.cutil.get_uri("rdf:type") self.test_cat_default_category = self.cutil.get_uri( "biolink:NamedThing") self.test_cat_nondefault_category = self.cutil.get_uri("biolink:Gene") self.test_cat_type = self.cutil.get_uri("rdf:type") self.test_cat_class = self.cutil.get_uri("rdf:class")
def setUp(self): self.curie_map = curie_map.get() cu = CurieUtil(self.curie_map) # Fake credentials as these tests do not require a database connection database = 'foo' user = '******' password = '******' self.cgd = CGD(database, user, password) test_data = ((387, 'MLH1 any mutation', 13, 'Adenocarcinoma', None, 'Colon', 'no response', 1, '5FU-based adjuvant therapy', 'late trials', '20498393'),) self.cgd.add_disease_drug_variant_to_graph(test_data) (variant_key, variant_label, diagnoses_key, diagnoses, specific_diagnosis, organ, relationship, drug_key, drug, therapy_status, pubmed_id) = test_data[0] source_id = "PMID:{0}".format(pubmed_id) variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) disease_id = self.cgd.make_cgd_id('disease{0}{1}'.format(diagnoses_key, diagnoses)) relationship_id = "RO:has_environment" disease_quality = ("CGD:{0}".format(relationship)).replace(" ", "_") has_quality_property = "BFO:0000159" drug_id = self.cgd.make_cgd_id('drug{0}'.format(drug_key)) disease_instance_id = self.cgd.make_cgd_id('phenotype{0}{1}{2}'.format( diagnoses, variant_key, relationship)) variant_disease_annot = self.cgd.make_cgd_id("assoc{0}{1}".format(variant_key, diagnoses)) # Set up URIs self.source_uri = URIRef(cu.get_uri(source_id)) self.variant_uri = URIRef(cu.get_uri(variant_id)) self.disease_uri = URIRef(cu.get_uri(disease_id)) self.disease_ind_uri = URIRef(cu.get_uri(disease_instance_id)) self.relationship_uri = URIRef(cu.get_uri(relationship_id)) self.drug_uri = URIRef(cu.get_uri(drug_id)) self.vd_annot_uri = URIRef(cu.get_uri(variant_disease_annot)) self.disease_quality_uri = URIRef(cu.get_uri(disease_quality)) self.variant_label = variant_label self.disease_label = diagnoses self.disease_instance_label = "{0} with {1} to therapy".format(diagnoses, relationship) self.drug_label = drug return
def setUp(self): g = RDFGraph() self.model = Model(g) this_curie_map = curie_map.get() self.cutil = CurieUtil(this_curie_map) # stuff to make test triples self.test_cat_subj_curie = "MGI:1234" self.test_cat_subj = self.cutil.get_uri("MGI:1234") self.test_cat_default_pred = self.cutil.get_uri("biolink:category") self.test_named_indiv = self.cutil.get_uri("owl:NamedIndividual") self.test_label_pred = self.cutil.get_uri("rdfs:label") self.test_label = "some label" self.test_comment_IRI = self.cutil.get_uri("rdfs:comment") self.test_comment = 'bonus eruptus'
def test_associations(self): """ Given the above sample input, produce the following: CGD:VariantID has_phenotype(RO:0002200) CGD:DiseaseInstance A CGD:AssociationID OBO:RO_0002558 Traceable Author Statement (ECO:0000033) A CGD:AssociationID dc:source PMID:20498393 A CGD:AssociationID has_environment CGD:DrugID A CGD:AssociationID OBAN:association_has_subject CGD:VariantID A CGD:AssociationID OBAN:association_has_object_property has_phenotype A CGD:AssociationID OBAN:association_has_object CGD:DiseaseInstance """ from dipper.utils.TestUtils import TestUtils # Make testutils object and load bindings cu = CurieUtil(self.curie_map) test_env = TestUtils(self.cgd.graph) self.cgd.load_bindings() evidence = 'OBO:ECO_0000033' evidence_uri = URIRef(cu.get_uri(evidence)) sparql_query = """ SELECT ?diseaseInd ?variant ?drug ?vdannot ?source ?evidence WHERE {{ ?variant OBO:RO_0002200 ?diseaseInd . ?vdannot a OBAN:association ; OBO:RO_0002558 ?evidence ; dc:source ?source ; <{0}> ?drug ; OBAN:association_has_object ?diseaseInd ; OBAN:association_has_object_property OBO:RO_0002200 ; OBAN:association_has_subject ?variant . }} """.format(self.relationship_uri) # Expected Results expected_results = [[self.disease_ind_uri, self.variant_uri, self.drug_uri, self.vd_annot_uri, self.source_uri, evidence_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def _replace_entity(graph, old_id, new_id, bindings={}, is_property=False): """ Replace entity in graph Replace one ID with another :param graph rdflib.graph object :param old_id, String curie,IRI, or literal to be replaced :param new_id, String curie, IRI, or literal to replace the old id :param bindings, Dict, dictionary of namespace prefixes :param is_property, Boolean, is an id a property/predicate rather than a class, individual, or literal :return: None """ cu = CurieUtil(curie_map.get()) old_uri = URIRef(cu.get_uri(old_id)) new_uri = URIRef(cu.get_uri(new_id)) if is_property is False: sparql_update = \ """ DELETE {{ <{0}> ?pred ?obj }} INSERT {{ <{1}> ?pred ?obj }} WHERE {{ <{0}> ?pred ?obj }} """.format(old_uri, new_uri) graph.update(sparql_update, 'sparql', bindings) sparql_update = \ """ DELETE {{ ?sub ?pred <{0}> }} INSERT {{ ?sub ?pred <{1}> }} WHERE {{ ?sub ?pred <{0}> }} """.format(old_uri, new_uri) graph.update(sparql_update, 'sparql', bindings) else: sparql_update = \ """ DELETE {{ ?sub <{0}> ?obj }} INSERT {{ ?sub <{1}> ?obj }} WHERE {{ ?sub <{0}> {?obj} }} """.format(old_uri, new_uri) graph.update(sparql_update, 'sparql', bindings) return
def setUp(self): self.test_util = TestUtils() self.assoc_curie = 'MONARCH:test_association' self.eco_id = 'ECO:0000015' # Headers: # 01 marker_accession_id, # 02 marker_symbol, # 03 phenotyping_center, # 04 colony_raw, # 05 sex, # 06 zygosity, # 07 allele_accession_id, # 08 allele_symbol, # 09 allele_name, # 10 strain_accession_id, # 11 strain_name, # 12 project_name, # 13 project_fullname, # 14 pipeline_name, # 15 pipeline_stable_id, # 16 procedure_stable_id, # 17 procedure_name, # 18 parameter_stable_id, # 19 parameter_name, # 20 top_level_mp_term_id, # 21 top_level_mp_term_name, # 22 mp_term_id, # 23 mp_term_name, # 24 p_value, # 25 percentage_change, # 26 effect_size, # 27 statistical_method, # 28 resource_name self.test_set_1 = ( 'MGI:1920145', # 01 'Setd5', # 02 'WTSI', # 03 'MEFW', # 04 'male', # 05 'heterozygote', # 06 'MGI:4432631', # 07 'Setd5<tm1a(EUCOMM)Wtsi>', # 08 'targeted mutation 1a, Wellcome Trust Sanger Institute', # 09 'MGI:2159965', # 10 'C57BL/6N', # 11 'MGP', # 12 'Wellcome Trust Sanger Institute Mouse Genetics Project', # 13 'MGP Select Pipeline', # 14 'MGP_001', # 15 'MGP_XRY_001', # 16 'X-ray', # 17 'IMPC_XRY_008_001', # 18 'Number of ribs right', # 19 'MP:0005390', # 20 'skeleton phenotype', # 21 'MP:0000480', # 22 'increased rib number', # 23 '1.637023E-010', # 24 '', # 25 '8.885439E-007', # 26 'Wilcoxon rank sum test with continuity correction', # 27 'IMPC' # 28 ) # Generate test curies, these are otherwise generated # within _add_evidence() and _add_study_provenance() # these blank nodes are hardcoded as NOT Skolemized ... self.study_curie = "_:study" self.evidence_curie = "_:evidence" # IRIs for testing sparql output curie_dict = curie_map.get() curie_util = CurieUtil(curie_dict) self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie)) return
class RDFGraph(DipperGraph, ConjunctiveGraph): """ Extends RDFLibs ConjunctiveGraph The goal of this class is wrap the creation of triples and manage creation of URIRef, Bnodes, and literals from an input curie """ curie_map = curie_map_class.get() curie_util = CurieUtil(curie_map) # make global translation table available outside the ingest with open( os.path.join( os.path.dirname(__file__), '../../translationtable/GLOBAL_TERMS.yaml')) as fhandle: globaltt = yaml.safe_load(fhandle) globaltcid = {v: k for k, v in globaltt.items()} def __init__(self, are_bnodes_skized=True, identifier=None): # print("in RDFGraph with id: ", identifier) super().__init__('IOMemory', identifier) self.are_bnodes_skized = are_bnodes_skized # Can be removed when this is resolved # https://github.com/RDFLib/rdflib/issues/632 for pfx in ('OBO', ): # , 'ORPHA'): self.bind(pfx, Namespace(self.curie_map[pfx])) # try adding them all # self.bind_all_namespaces() # too much def addTriple(self, subject_id, predicate_id, obj, object_is_literal=None, literal_type=None): # trying making infrence on type of object if none is supplied if object_is_literal is None: if self.curie_regexp.match(obj) is not None or\ obj.split(':')[0].lower() in ('http', 'https', 'ftp'): object_is_literal = False else: object_is_literal = True if object_is_literal is True: if literal_type is not None and obj is not None: literal_type_iri = self._getnode(literal_type) self.add( (self._getnode(subject_id), self._getnode(predicate_id), Literal(obj, datatype=literal_type_iri))) elif obj is not None: self.add( (self._getnode(subject_id), self._getnode(predicate_id), Literal(obj))) else: LOG.warning("None as literal object for subj: %s and pred: %s", subject_id, predicate_id) # get a sense of where the None is comming from # magic number here is "steps up the call stack" for call in range(2, 0, -1): LOG.warning('\t%sfrom: %s', '\t' * call, sys._getframe(call).f_code.co_name) elif obj is not None and obj != '': # object is a resourse self.add((self._getnode(subject_id), self._getnode(predicate_id), self._getnode(obj))) else: LOG.warning("None/empty object IRI for subj: %s and pred: %s", subject_id, predicate_id) return def skolemizeBlankNode(self, curie): stripped_id = re.sub(r'^_:|^_', '', curie, 1) node = BNode(stripped_id).skolemize(self.curie_util.get_base()) node = re.sub(r'rdflib/', '', node) # remove string added by rdflib return URIRef(node) def _getnode(self, curie): # convention is lowercase names """ This is a wrapper for creating a URIRef or Bnode object with a given a curie or iri as a string. If an id starts with an underscore, it assigns it to a BNode, otherwise it creates it with a standard URIRef. Alternatively, self.skolemize_blank_node is True, it will skolemize the blank node :param curie: str identifier formatted as curie or iri :return: node: RDFLib URIRef or BNode object """ node = None if curie[0] == '_': if self.are_bnodes_skized is True: node = self.skolemizeBlankNode(curie) else: # delete the leading underscore to make it cleaner node = BNode(re.sub(r'^_:|^_', '', curie, 1)) # Check if curie string is actually an IRI elif curie[:4] == 'http' or curie[:3] == 'ftp' or curie[:4] == 'jdbc': node = URIRef(curie) else: iri = RDFGraph.curie_util.get_uri(curie) if iri is not None: node = URIRef(RDFGraph.curie_util.get_uri(curie)) # Bind prefix map to graph prefix = curie.split(':')[0] if prefix not in self.namespace_manager.namespaces(): mapped_iri = self.curie_map[prefix] self.bind(prefix, Namespace(mapped_iri)) else: LOG.error("couldn't make URI for %s", curie) return node def bind_all_namespaces(self): """ Results in the RDF @prefix directives for every ingest being added to this ingest. """ for prefix in self.curie_map.keys(): iri = self.curie_map[prefix] self.bind(prefix, Namespace(iri)) return # serialize() conflicts between rdflib & Graph.serialize abstractmethod # GraphUtils expects the former. (too bad there is no multiple dispatch) def serialize( # rdflib version self, destination=None, format='turtle', base=None, encoding=None): return ConjunctiveGraph.serialize(self, destination, format)
def __init__(self, curie_map): self.curie_map = curie_map self.cu = CurieUtil(curie_map) return
class StreamedGraph(DipperGraph): """ Stream rdf triples to file or stdout Assumes a downstream process will sort then uniquify triples Theoretically could support both ntriple, rdfxml formats, for now just support nt """ curie_map = curimap.get() curie_util = CurieUtil(curie_map) with open('translationtable/GLOBAL_TERMS.yaml') as fhandle: globaltt = yaml.safe_load(fhandle).copy() globaltcid = {v: k for k, v in globaltt.items()} def __init__(self, are_bnodes_skized=True, identifier=None, file_handle=None, fmt='nt'): self.are_bnodes_skized = are_bnodes_skized self.fmt = fmt self.file_handle = file_handle self.identifier = identifier def addTriple(self, subject_id, predicate_id, obj, object_is_literal=None, literal_type=None): # trying making infrence on type of object if none is supplied if object_is_literal is None: if self.curie_regexp.match(obj) or\ obj.split(':')[0].lower() in ('http', 'https', 'ftp'): object_is_literal = False else: object_is_literal = True subject_iri = self._getnode(subject_id) predicate_iri = self._getnode(predicate_id) if not object_is_literal: obj = self._getnode(obj) if literal_type is not None: literal_type = self._getnode(literal_type) if obj is not None: self.serialize(subject_iri, predicate_iri, obj, object_is_literal, literal_type) else: LOG.warning("Null value passed as object") return def skolemizeBlankNode(self, curie): base_iri = StreamedGraph.curie_map.get_base() curie_id = curie.split(':')[1] skolem_iri = "{0}.wellknown/genid/{1}".format(base_iri, curie_id) return skolem_iri def serialize(self, subject_iri, predicate_iri, obj, object_is_literal=False, literal_type=None): if not object_is_literal: triple = "<{}> <{}> <{}> .".format(subject_iri, predicate_iri, obj) elif literal_type is not None: triple = '<{}> <{}> {}^^<{}> .'.format( subject_iri, predicate_iri, self._quote_encode(str(obj)), literal_type) else: if isinstance(obj, str): triple = '<{}> <{}> {} .'.format(subject_iri, predicate_iri, self._quote_encode(obj)) else: lit_type = self._getLiteralXSDType(obj) if type is not None: triple = '<{}> <{}> "{}"^^<{}> .'.format( subject_iri, predicate_iri, obj, lit_type) else: raise TypeError("Cannot determine type of {}".format(obj)) if self.file_handle is None: print(triple) else: self.file_handle.write("{}\n".format(triple)) def _getnode(self, curie): """ Returns IRI, or blank node curie/iri depending on self.skolemize_blank_node setting :param curie: str id as curie or iri :return: """ if re.match(r'^_:', curie): if self.are_bnodes_skized is True: node = self.skolemizeBlankNode(curie) else: node = curie elif re.match(r'^http|^ftp', curie): node = curie elif len(curie.split(':')) == 2: node = StreamedGraph.curie_util.get_uri(curie) else: raise TypeError("Cannot process curie {}".format(curie)) return node def _getLiteralXSDType(self, literal): """ This could be much more nuanced, but for now if a literal is not a str, determine if it's a xsd int or double :param literal: :return: str - xsd full iri """ if isinstance(literal, int): return self._getnode("xsd:integer") if isinstance(literal, float): return self._getnode("xsd:double") @staticmethod def _quote_encode(literal): """ Copy of code in rdflib here: https://github.com/RDFLib/rdflib/blob/776b90be/ rdflib/plugins/serializers/nt.py#L76 :param literal: :return: """ return '"%s"' % literal.replace('\\', '\\\\')\ .replace('\n', '\\n')\ .replace('"', '\\"')\ .replace('\r', '\\r')
class RDFGraph(ConjunctiveGraph, DipperGraph): """ Extends RDFLibs ConjunctiveGraph The goal of this class is wrap the creation of triples and manage creation of URIRef, Bnodes, and literals from an input curie """ curie_util = CurieUtil(curie_map.get()) curie_map = curie_map def __init__(self, are_bnodes_skized=True): super().__init__() self.are_bnodes_skized = are_bnodes_skized # Can be removed when this is resolved # https://github.com/RDFLib/rdflib/issues/632 obo_map = curie_map.get()['OBO'] self.bind('OBO', Namespace(obo_map)) def addTriple(self, subject_id, predicate_id, obj, object_is_literal=False, literal_type=None): if object_is_literal is True: if literal_type is not None and obj is not None: literal_type_iri = self._getNode(literal_type) self.add( (self._getNode(subject_id), self._getNode(predicate_id), Literal(obj, datatype=literal_type_iri))) elif obj is not None: self.add( (self._getNode(subject_id), self._getNode(predicate_id), Literal(obj))) else: logger.warn("None as literal object for subj: %s and pred: %s", subject_id, predicate_id) elif obj is not None and obj != '': self.add((self._getNode(subject_id), self._getNode(predicate_id), self._getNode(obj))) else: logger.warn("None/empty object IRI for subj: %s and pred: %s", subject_id, predicate_id) return def skolemizeBlankNode(self, curie): stripped_id = re.sub(r'^_:|^_', '', curie, 1) node = BNode(stripped_id).skolemize(self.curie_map.get_base()) node = re.sub(r'rdflib/', '', node) return URIRef(node) def _getNode(self, curie): """ This is a wrapper for creating a URIRef or Bnode object with a given a curie or iri as a string. If an id starts with an underscore, it assigns it to a BNode, otherwise it creates it with a standard URIRef. Alternatively, self.skolemize_blank_node is True, it will skolemize the blank node :param curie: str identifier formatted as curie or iri :return: node: RDFLib URIRef or BNode object """ node = None if re.match(r'^_', curie): if self.are_bnodes_skized is True: node = self.skolemizeBlankNode(curie) else: # replace the leading underscore to make it cleaner node = BNode(re.sub(r'^_:|^_', '', curie, 1)) # Check if curie actually an IRI elif re.match(r'^http|^ftp', curie): node = URIRef(curie) else: iri = RDFGraph.curie_util.get_uri(curie) if iri is not None: node = URIRef(RDFGraph.curie_util.get_uri(curie)) # Bind prefix map to graph prefix = curie.split(':')[0] if prefix not in self.namespace_manager.namespaces(): mapped_iri = curie_map.get()[prefix] self.bind(prefix, Namespace(mapped_iri)) else: logger.error("couldn't make URI for %s", curie) return node def bind_all_namespaces(self): for prefix in curie_map.get().keys(): iri = curie_map.get()[prefix] self.bind(prefix, Namespace(iri))
class RDFGraph(DipperGraph, ConjunctiveGraph): """ Extends RDFLibs ConjunctiveGraph The goal of this class is wrap the creation of triples and manage creation of URIRef, Bnodes, and literals from an input curie """ curie_map = curie_map_class.get() curie_util = CurieUtil(curie_map) # make global translation table available outside the ingest with open( os.path.join( os.path.dirname(__file__), '../../translationtable/GLOBAL_TERMS.yaml')) as fhandle: globaltt = yaml.safe_load(fhandle) globaltcid = {v: k for k, v in globaltt.items()} def __init__(self, are_bnodes_skized=True, identifier=None): # print("in RDFGraph with id: ", identifier) super().__init__('IOMemory', identifier) self.are_bnodes_skized = are_bnodes_skized self.prefixes = set() # Can be removed when this is resolved # https://github.com/RDFLib/rdflib/issues/632 for pfx in ('OBO',): # , 'ORPHA'): self.bind(pfx, Namespace(self.curie_map[pfx])) def _make_category_triple( self, subject, category, predicate=blv.terms['category'] ): """ add a triple to capture subject or object category (in CURIE form) that was passed to addTriple() """ try: self.add(( self._getnode(subject), self._getnode(predicate), self._getnode(category))) except: LOG.warning( "Problem adding triple in _makeCategoryTriple for " + \ "subj: %s pred: %s obj(category): %s", subject, predicate, category) def _is_literal(self, thing): """ make inference on type (literal or CURIE) return: logical """ if self.curie_regexp.match(thing) is not None or\ thing.split(':')[0].lower() in ('http', 'https', 'ftp'): object_is_literal = False else: object_is_literal = True return object_is_literal def addTriple( self, subject_id, predicate_id, obj, object_is_literal=None, literal_type=None, subject_category=None, object_category=None ): if object_is_literal is None: object_is_literal = self._is_literal(obj) # add triples for subject category info if subject_category is not None: self._make_category_triple(subject_id, subject_category) # add triples for obj category info, if obj is not a literal if not object_is_literal: if object_category is not None: self._make_category_triple(obj, object_category) else: # emit warning if object category is given for a literal if object_category is not None: LOG.warning("I was given a category %s for obj: %s, " + "which seems to be a literal!", object_category, obj) if object_is_literal is True: if isinstance(obj, str): re.sub(r'[\t\n\r\f\v]+', ' ', obj) # reduce any ws to a space if literal_type is not None and obj is not None and obj not in ("", " "): literal_type_iri = self._getnode(literal_type) self.add( (self._getnode(subject_id), self._getnode(predicate_id), Literal(obj, datatype=literal_type_iri))) elif obj is not None: # could attempt to infer a type here but there is no use case self.add(( self._getnode(subject_id), self._getnode(predicate_id), Literal(obj))) else: LOG.warning( "None as literal object for subj: %s and pred: %s", subject_id, predicate_id) # get a sense of where the None is comming from # magic number here is "steps up the call stack" # TODO there may be easier/ideomatic ways to do this now for call in range(2, 0, -1): LOG.warning( '\t%sfrom: %s', '\t' * call, sys._getframe(call).f_code.co_name) elif obj is not None and obj != '': # object is a resource self.add(( self._getnode(subject_id), self._getnode(predicate_id), self._getnode(obj))) else: LOG.warning( "None/empty object IRI for subj: %s and pred: %s", subject_id, predicate_id) def skolemizeBlankNode(self, curie): stripped_id = re.sub(r'^_:|^_', '', curie, 1) return URIRef(self.curie_map['BNODE'] + stripped_id) def _getnode(self, curie): """ This is a wrapper for creating a URIRef or Bnode object with a given a curie or iri as a string. If an id starts with an underscore, it assigns it to a BNode, otherwise it creates it with a standard URIRef. Alternatively, self.skolemize_blank_node is True, it will skolemize the blank node :param curie: str identifier formatted as curie or iri :return: node: RDFLib URIRef or BNode object """ node = None if curie[0] == '_': if self.are_bnodes_skized: node = self.skolemizeBlankNode(curie) else: # delete the leading underscore to make it cleaner node = BNode(re.sub(r'^_:|^_', '', curie, 1)) # Check if curie string is actually an IRI elif curie[:4] == 'http' or curie[:3] == 'ftp' or curie[:4] == 'jdbc': node = URIRef(curie) else: iri = RDFGraph.curie_util.get_uri(curie) if iri is not None: node = URIRef(iri) # Bind prefix map to graph prefix = curie.split(':')[0] self.prefixes.add(prefix) else: LOG.error("couldn't make URI for %s", curie) # get a sense of where the CURIE-ish? thing is comming from # magic number here is "steps up the call stack" for call in range(3, 0, -1): LOG.warning( '\t%sfrom: %s', '\t' * call, sys._getframe(call).f_code.co_name) return node def bind_all_namespaces(self): """ Results in the RDF @prefix directives for every ingest being added to this ingest. """ for prefix in self.curie_map.keys(): iri = self.curie_map[prefix] self.bind(prefix, Namespace(iri)) # serialize() conflicts between rdflib & Graph.serialize abstractmethod # GraphUtils expects the former. (too bad there is no multiple dispatch) # rdflib version def serialize( self, destination=None, format='turtle', base=None, encoding=None ): for prefix in self.prefixes: mapped_iri = self.curie_map[prefix] self.bind(prefix, Namespace(mapped_iri)) return ConjunctiveGraph.serialize(self, destination, format)
class RDFGraph(ConjunctiveGraph, DipperGraph): """ Extends RDFLibs ConjunctiveGraph The goal of this class is wrap the creation of triples and manage creation of URIRef, Bnodes, and literals from an input curie """ curie_map = curie_map.get() curie_util = CurieUtil(curie_map) # make global translation table available outside the ingest with open('translationtable/GLOBAL_TERMS.yaml') as fh: globaltt = yaml.safe_load(fh) globaltcid = {v: k for k, v in globaltt.items()} def __init__(self, are_bnodes_skized=True, identifier=None): # print("in RDFGraph with id: ", identifier) super().__init__('IOMemory', identifier) self.are_bnodes_skized = are_bnodes_skized # Can be removed when this is resolved # https://github.com/RDFLib/rdflib/issues/632 obo_map = curie_map.get()['OBO'] self.bind('OBO', Namespace(obo_map)) # try adding them all # self.bind_all_namespaces() # too much def addTriple(self, subject_id, predicate_id, obj, object_is_literal=False, literal_type=None): if object_is_literal is True: if literal_type is not None and obj is not None: literal_type_iri = self._getNode(literal_type) self.add( (self._getNode(subject_id), self._getNode(predicate_id), Literal(obj, datatype=literal_type_iri))) elif obj is not None: self.add( (self._getNode(subject_id), self._getNode(predicate_id), Literal(obj))) else: logger.warning( "None as literal object for subj: %s and pred: %s", subject_id, predicate_id) # magic number 2 here is "steps up the stack" logger.warning(sys._getframe(2).f_code.co_name) elif obj is not None and obj != '': self.add(( self._getNode(subject_id), self._getNode(predicate_id), self._getNode(obj))) else: logger.warning( "None/empty object IRI for subj: %s and pred: %s", subject_id, predicate_id) return def skolemizeBlankNode(self, curie): stripped_id = re.sub(r'^_:|^_', '', curie, 1) node = BNode(stripped_id).skolemize(self.curie_util.get_base()) node = re.sub(r'rdflib/', '', node) # remove string added by rdflib return URIRef(node) def _getNode(self, curie): """ This is a wrapper for creating a URIRef or Bnode object with a given a curie or iri as a string. If an id starts with an underscore, it assigns it to a BNode, otherwise it creates it with a standard URIRef. Alternatively, self.skolemize_blank_node is True, it will skolemize the blank node :param curie: str identifier formatted as curie or iri :return: node: RDFLib URIRef or BNode object """ node = None if re.match(r'^_', curie): if self.are_bnodes_skized is True: node = self.skolemizeBlankNode(curie) else: # delete the leading underscore to make it cleaner node = BNode(re.sub(r'^_:|^_', '', curie, 1)) # Check if curie actually an IRI elif re.match(r'^http|^ftp', curie): node = URIRef(curie) else: iri = RDFGraph.curie_util.get_uri(curie) if iri is not None: node = URIRef(RDFGraph.curie_util.get_uri(curie)) # Bind prefix map to graph prefix = curie.split(':')[0] if prefix not in self.namespace_manager.namespaces(): mapped_iri = curie_map.get()[prefix] self.bind(prefix, Namespace(mapped_iri)) else: logger.error("couldn't make URI for %s", curie) return node def bind_all_namespaces(self): for prefix in curie_map.get().keys(): iri = curie_map.get()[prefix] self.bind(prefix, Namespace(iri))
def test_amino_acid_position_region_model(self): """ Test modelling of amino acid positions Using test data set 1, and the function add_variant_info_to_graph() We want to test the following triples: CGD:RegionID is an instance of faldo:Region CGD:RegionID faldo:begin BothStrandPositionID CGD:RegionID faldo:end BothStrandPositionID CGD:BothStrandPositionID is an instance of faldo:BothStrandPosition CGD:BothStrandPositionID is an instance of faldo:Position CGD:BothStrandPositionID faldo:position 741 CGD:BothStrandPositionID faldo:reference UniProtID """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_1) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source) = self.test_set_1[0][0:11] position = 741 variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) uniprot_curie = "UniProtKB:Q99062#Q99062-1" uniprot_id = "Q99062#Q99062-1" region_id = ":_{0}{1}{2}Region".format(position, position, uniprot_curie) both_strand_id = ":_{0}-{1}".format(uniprot_id, position) region_uri = URIRef(cu.get_uri(region_id)) both_strand_uri = URIRef(cu.get_uri(both_strand_id)) uniprot_uri = URIRef(cu.get_uri(uniprot_curie)) sparql_query = """ SELECT ?region ?bsPosition ?protein WHERE {{ ?region a faldo:Region ; faldo:begin ?bsPosition ; faldo:end ?bsPosition . ?bsPosition a faldo:Position ; faldo:position {0} ; faldo:reference ?protein . }} """.format(position) # Expected Results expected_results = [[region_uri, both_strand_uri, uniprot_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_missense_variant_cdna_model(self): """ Test missense variant with cdna information Using test data set 2, and the function add_variant_info_to_graph() We want to test the following triples: CGD:VariantID is an instance of OBO:SO_0001059 CGD:VariantID is an instance of OBO:SO_0001583 CGD:VariantID has the label "ABL1 T315I missense mutation" CGD:VariantID is_sequence_variant_instance_of (OBO:GENO_0000408) NCBIGene:25 CGD:VariantID has location (faldo:location) AminoAcidRegionID CGD:VariantID has location (faldo:location) CDNARegionID CGD:VariantID has location (faldo:location) ChromosomalRegionID CGD:VariantID OBO:GENO_reference_amino_acid "T" CGD:VariantID OBO:GENO_results_in_amino_acid_change "I" CGD:VariantID owl:sameAs dbSNP:rs121913459 CGD:VariantID owl:sameAs COSMIC:12560 CGD:VariantID RO:0002205 (transcribed_to) CCDS:35166.1 CCDS:35166.1 is an instance of OBO:SO_0000233 CCDS:35166.1 has the label "CCDS35166.1" CCDS:35166.1 OBO:RO_0002513 (translates_to) UniProtKB:P00519#P00519-1 CCDS:35166.1 OBO:RO_0002513 (translates_to) NCBIProtein:NP_005148.2 UniProtKB:P00519#P00519-1 owl:sameAs NCBIProtein:NP_005148.2 UniProtKB:P00519#P00519-1 is an instance of OBO:SO_0000104 (polypeptide) UniProtKB:P00519#P00519-1 has the label "P00519#P00519-1" NCBIProtein:NP_005148.2 is an instance of OBO:SO_0000104 (polypeptide) NCBIProtein:NP_005148.2 has the label "NP_005148.2" """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_2) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id, db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base, primary_transcript_exons, primary_transcript_variant_sub_types, variant_type, chromosome, genome_build, build_version, build_date) = self.test_set_2[0] gene_id = self.cgd.gene_map[transcript_gene] ref_amino_acid = "T" altered_amino_acid = "I" db_snp_curie = "dbSNP:121913459" cosmic_curie = "COSMIC:12560" uniprot_curie = "UniProtKB:P00519#P00519-1" uniprot_id = "P00519#P00519-1" refseq_curie = "NCBIProtein:NP_005148.2" transcript_curie = "CCDS:35166.1" ccds_id = "35166.1" position = 315 chromosome_curie = "hg19chr9" variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) aa_region_id = ":_{0}{1}{2}Region".format(position, position, uniprot_curie) cdna_region_id = ":_{0}Region".format(transcript_curie) chr_region_id = ":_{0}{1}Region-{2}-{3}".format( genome_build, chromosome, genome_pos_start, genome_pos_end) aa_coord_id = ":_{0}-{1}".format(uniprot_id, position) cdna_coord_id = ":_{0}-{1}".format(ccds_id, bp_pos) # chr_coord_id = "CHR:{0}-{1}".format(chromosome_curie, genome_pos_start) chr_coord_id = ":_{0}-{1}".format(chromosome_curie, genome_pos_start) variant_uri = URIRef(cu.get_uri(variant_id)) transcript_uri = URIRef(cu.get_uri(transcript_curie)) gene_uri = URIRef(cu.get_uri(gene_id)) db_snp_uri = URIRef(cu.get_uri(db_snp_curie)) cosmic_uri = URIRef(cu.get_uri(cosmic_curie)) uniprot_uri = URIRef(cu.get_uri(uniprot_curie)) refseq_uri = URIRef(cu.get_uri(refseq_curie)) aa_region_uri = URIRef(cu.get_uri(aa_region_id)) cdna_region_uri = URIRef(cu.get_uri(cdna_region_id)) chr_region_uri = URIRef(cu.get_uri(chr_region_id)) aa_coord_uri = URIRef(cu.get_uri(aa_coord_id)) cdna_coord_uri = URIRef(cu.get_uri(cdna_coord_id)) chr_coord_uri = URIRef(cu.get_uri(chr_coord_id)) sparql_query = """ SELECT ?cosmic ?gene ?aaRegion ?cdnaRegion ?chrRegion ?dbSNP ?transcript ?uniprot ?refseq ?aaCoord ?cdnaCoord ?chrCoord WHERE {{ ?cosmic a OBO:SO_0001059; a OBO:SO_0001583 ; OBO:GENO_0000408 ?gene ; faldo:location ?aaRegion ; faldo:location ?cdnaRegion ; faldo:location ?chrRegion ; OBO:GENO_reference_amino_acid "{0}" ; OBO:GENO_reference_nucleotide "{1}" ; OBO:GENO_altered_nucleotide "{2}" ; OBO:GENO_results_in_amino_acid_change "{3}" ; owl:sameAs ?dbSNP ; RO:0002205 ?transcript . ?cosmic owl:sameAs ?dbSNP . ?transcript a OBO:SO_0000233 ; rdfs:label "{4}" ; OBO:RO_0002513 ?uniprot ; OBO:RO_0002513 ?refseq . ?uniprot a OBO:SO_0000104 ; rdfs:label "P00519-1" . ?refseq a OBO:SO_0000104 ; rdfs:label "NP_005148.2" . ?refseq owl:sameAs ?uniprot . ?aaRegion faldo:begin ?aaCoord . ?cdnaRegion faldo:begin ?cdnaCoord . ?chrRegion faldo:begin ?chrCoord . ?aaCoord faldo:position {5} . ?cdnaCoord faldo:position {6} . ?chrCoord faldo:position {7} . ?dbSNP rdfs:label "{8}" . }} """.format(ref_amino_acid, ref_base, variant_base, altered_amino_acid, transcript_id, position, bp_pos, genome_pos_start, db_snp_id) # Expected Results expected_results = [[ cosmic_uri, gene_uri, aa_region_uri, cdna_region_uri, chr_region_uri, db_snp_uri, transcript_uri, uniprot_uri, refseq_uri, aa_coord_uri, cdna_coord_uri, chr_coord_uri ]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_variant_position_region_model(self): """ Test modelling of variant positions on a transcript Using test data set 2, and the function add_variant_info_to_graph() We want to test the following triples: CGD:RegionID is an instance of faldo:Region CGD:RegionID faldo:begin BothStrandPositionID CGD:RegionID faldo:end BothStrandPositionID CGD:BothStrandPositionID is an instance of faldo:BothStrandPosition CGD:BothStrandPositionID is an instance of faldo:Position CGD:BothStrandPositionID faldo:position 944 CGD:BothStrandPositionID faldo:reference CGD:TranscriptID """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_2) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id, db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base, primary_transcript_exons, primary_transcript_variant_sub_types, variant_type, chromosome, genome_build, build_version, build_date) = self.test_set_2[0] transcript_curie = self.cgd._make_transcript_curie(transcript_id) ccds_id = "35166.1" variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) region_id = ":_{0}Region".format(transcript_curie) both_strand_id = ":_{0}-{1}".format(ccds_id, bp_pos) region_uri = URIRef(cu.get_uri(region_id)) both_strand_uri = URIRef(cu.get_uri(both_strand_id)) ccds_uri = URIRef(cu.get_uri(transcript_curie)) sparql_query = """ SELECT ?region ?bsPosition ?transcript WHERE {{ ?region a faldo:Region ; faldo:begin ?bsPosition ; faldo:end ?bsPosition . ?bsPosition a faldo:Position ; faldo:position {0} ; faldo:reference ?transcript . }} """.format(bp_pos) # Expected Results expected_results = [[region_uri, both_strand_uri, ccds_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def _add_variant_trait_association(self, variant_id, mapped_trait_uri, efo_ontology, pubmed_id, description=None): if self.testMode: g = self.testgraph else: g = self.graph model = Model(g) # make associations to the EFO terms; there can be >1 if mapped_trait_uri.strip() != '': for trait in re.split(r',', mapped_trait_uri): trait = trait.strip() cu = CurieUtil(curie_map.get()) trait_id = cu.get_curie(trait) dis_query = """ SELECT ?trait WHERE {{ {0} rdfs:subClassOf+ EFO:0000408 . {0} rdfs:label ?trait . }} """.format(trait_id) query_result = efo_ontology.query(dis_query) if len(list(query_result)) > 0: if re.match(r'^EFO', trait_id): model.addClassToGraph(trait_id, list(query_result)[0][0], 'DOID:4') phenotype_query = """ SELECT ?trait WHERE {{ {0} rdfs:subClassOf+ EFO:0000651 . {0} rdfs:label ?trait . }} """.format(trait_id) query_result = efo_ontology.query(phenotype_query) if len(list(query_result)) > 0: if re.match(r'^EFO', trait_id): model.addClassToGraph(trait_id, list(query_result)[0][0], 'UPHENO:0001001') pubmed_curie = 'PMID:' + pubmed_id ref = Reference(g, pubmed_curie, Reference.ref_types['journal_article']) ref.addRefToGraph() assoc = G2PAssoc(g, self.name, variant_id, trait_id, model.object_properties['contributes_to']) assoc.add_source(pubmed_curie) # combinatorial evidence # used in automatic assertion eco_id = 'ECO:0000213' assoc.add_evidence(eco_id) if description is not None: assoc.set_description(description) # FIXME score should get added to provenance/study # assoc.set_score(pvalue) assoc.add_association_to_graph()
def test_genome_build_chromosome_model(self): """ Test modelling of genome, builds, and chromosomes Using test data set 2, and the function add_variant_info_to_graph() """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_2) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() genome = ":9606genome" genome_label = "Human genome" chromosome = "CHR:9606chr9" chromosome_label = "chr9 (Human)" build_curie = "UCSC:hg19" build_label = "hg19" chrom_on_build = ":MONARCH_hg19chr9" chrom_build_label = "chr9 (hg19)" genome_uri = URIRef(cu.get_uri(genome)) chromosome_uri = URIRef(cu.get_uri(chromosome)) build_uri = URIRef(cu.get_uri(build_curie)) chrom_on_build_uri = URIRef(cu.get_uri(chrom_on_build)) ''' sparql_query = """ SELECT ?genome ?chromosome ?build ?chromOnBuild WHERE {{ ?genome a owl:Class ; rdfs:label "{0}" ; OBO:RO_0002162 OBO:NCBITaxon_9606 ; OBO:RO_0002351 ?chromosome ; rdfs:subClassOf OBO:SO_0001026 . ?chromosome a owl:Class ; rdfs:label "{1}" ; OBO:RO_0002350 ?genome ; rdfs:subClassOf OBO:SO_0000340 . ?build a OBO:SO_0001505 ; a ?genome ; rdfs:label "{2}" ; OBO:RO_0002351 ?chromOnBuild ; rdfs:subClassOf ?genome . ?chromOnBuild a ?chromosome ; rdfs:label "{3}" ; OBO:RO_0002350 ?build . }} """.format(genome_label, chromosome_label, build_label, chrom_build_label) ''' sparql_query = """ SELECT ?genome ?chromosome ?build ?chromOnBuild WHERE {{ ?genome a owl:Class ; rdfs:label "{0}" ; rdfs:subClassOf OBO:SO_0001026 . ?chromosome a owl:Class ; rdfs:label "{1}" ; rdfs:subClassOf OBO:SO_0000340 . ?build a OBO:SO_0001505 ; a ?genome ; rdfs:label "{2}" ; OBO:RO_0002162 OBO:NCBITaxon_9606 ; OBO:RO_0002351 ?chromOnBuild . ?chromOnBuild a ?chromosome ; a OBO:SO_0000340 ; rdfs:label "{3}" ; OBO:RO_0002350 ?build . }} """.format(genome_label, chromosome_label, build_label, chrom_build_label) # Expected Results expected_results = [[ genome_uri, chromosome_uri, build_uri, chrom_on_build_uri ]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_chromosome_position_model(self): """ Test modelling of genomic positions Using test data set 2, and the function add_variant_info_to_graph() """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_2) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id, db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base, primary_transcript_exons, primary_transcript_variant_sub_types, variant_type, chromosome, genome_build, build_version, build_date) = self.test_set_2[0] variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) chromosome_curie = ":MONARCH_hg19chr9" region_id = ":_{0}{1}Region-{2}-{3}".format(genome_build, chromosome, genome_pos_start, genome_pos_end) start_id = ":_hg19chr9-{0}".format(genome_pos_start) end_id = ":_hg19chr9-{0}".format(genome_pos_end) region_uri = URIRef(cu.get_uri(region_id)) start_uri = URIRef(cu.get_uri(start_id)) end_uri = URIRef(cu.get_uri(end_id)) chromosome_uri = URIRef(cu.get_uri(chromosome_curie)) sparql_query = """ SELECT ?region ?startPosition ?endPosition ?chromosome WHERE {{ ?region a faldo:Region ; faldo:begin ?startPosition ; faldo:end ?endPosition . ?startPosition a faldo:Position ; faldo:position {0} ; faldo:reference ?chromosome . ?endPosition a faldo:Position ; faldo:position {1} ; faldo:reference ?chromosome . }} """.format( genome_pos_start, genome_pos_end, ) # Expected Results expected_results = [[region_uri, start_uri, end_uri, chromosome_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_missense_variant_protein_model(self): """ Test missense variant with only protein information Using test data set 1, and the function add_variant_info_to_graph() We want to test the following triples: CGD:VariantID is an instance of OBO:SO_0001059 CGD:VariantID is an instance of OBO:SO_0001583 CGD:VariantID has the label "CSF3R Q741X missense mutation" CGD:VariantID is_sequence_variant_instance_of (OBO:GENO_0000408) NCBIGene:1441 CGD:VariantID has location (faldo:location) CGD:RegionID CGD:VariantID OBO:GENO_reference_amino_acid "Q" CGD:VariantID OBO:GENO_results_in_amino_acid_change "X" CGD:VariantID RO:0002205 CCDS:413.1 CCDS:413.1 is an instance of OBO:GENO_primary CCDS:413.1 has the label "CCDS413.1" """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_1) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source) = self.test_set_1[0][0:11] gene_id = self.cgd.gene_map[transcript_gene] ref_amino_acid = "Q" altered_amino_acid = "X" position = 741 uniprot_curie = "UniProtKB:Q99062#Q99062-1" variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) transcript = "CCDS:413.1" region_id = ":_{0}{1}{2}Region".format(position, position, uniprot_curie) variant_uri = URIRef(cu.get_uri(variant_id)) transcript_uri = URIRef(cu.get_uri(transcript)) gene_uri = URIRef(cu.get_uri(gene_id)) region_uri = URIRef(cu.get_uri(region_id)) sparql_query = """ SELECT ?variant ?gene ?region ?transcript WHERE {{ ?variant a OBO:SO_0001059; a OBO:SO_0001583 ; rdfs:label "{0}" ; OBO:GENO_0000408 ?gene ; faldo:location ?region ; OBO:GENO_reference_amino_acid "{1}" ; OBO:GENO_results_in_amino_acid_change "{2}" ; RO:0002205 ?transcript . ?transcript a OBO:SO_0000233 ; rdfs:label "{3}" . }} """.format(variant_label, ref_amino_acid, altered_amino_acid, transcript_id) # Expected Results expected_results = [[ variant_uri, gene_uri, region_uri, transcript_uri ]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)