def testPhenotypeSearchMultipleQualifiers(self): request = protocol.SearchPhenotypesRequest() request.phenotype_association_set_id = \ self.getPhenotypeAssociationSetId() ontologyterm = protocol.OntologyTerm() ontologyterm.term_id = "http://purl.obolibrary.org/obo/PATO_0000396" ontologyterm2 = protocol.OntologyTerm() ontologyterm2.term_id = "http://purl.obolibrary.org/obo/PATO_0000460" request.qualifiers.extend([ontologyterm, ontologyterm2]) postUrl = '/phenotypes/search' response = self.sendSearchRequest(postUrl, request, protocol.SearchPhenotypesResponse) self.assertGreater(len(response.phenotypes), 0)
def testToProtocolElement(self): dataset = datasets.Dataset('dataset1') term = protocol.OntologyTerm() term.term = "male genotypic sex" term.term_id = "PATO:0020001" # Write out a valid input print(protocol.toJsonDict(term)) validIndividual = protocol.Individual(name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z", sex=term) validIndividual.attributes.attr['test']. \ values.add().string_value = 'test-info' # pass through protocol creation individual = bioMetadata.Individual(dataset, "test") individual.populateFromJson(protocol.toJson(validIndividual)) gaIndividual = individual.toProtocolElement() # Verify elements exist self.assertEqual(gaIndividual.created, validIndividual.created) self.assertEqual(gaIndividual.updated, validIndividual.updated) # Invalid input invalidIndividual = '{"bad:", "json"}' individual = bioMetadata.Individual(dataset, "test") # Should fail self.assertRaises(exceptions.InvalidJsonException, individual.populateFromJson, invalidIndividual)
def testPhenotypeSearchQualifiersSensitivity(self): request = protocol.SearchPhenotypesRequest() request.phenotype_association_set_id = \ self.getPhenotypeAssociationSetId() ontologyterm = protocol.OntologyTerm() ontologyterm.term_id = "http://ohsu.edu/cgd/sensitivity" request.qualifiers.extend([ontologyterm]) postUrl = '/phenotypes/search' response = self.sendSearchRequest(postUrl, request, protocol.SearchPhenotypesResponse) self.assertGreater(len(response.phenotypes), 0)
def _getFeatureById(self, featureId): """ find a feature and return ga4gh representation, use 'native' id as featureId """ featureRef = rdflib.URIRef(featureId) featureDetails = self._detailTuples([featureRef]) feature = {} for detail in featureDetails: feature[detail['predicate']] = [] for detail in featureDetails: feature[detail['predicate']].append(detail['object']) pbFeature = protocol.Feature() term = protocol.OntologyTerm() # Schema for feature only supports one type of `type` # here we default to first OBO defined for featureType in sorted(feature[TYPE]): if "obolibrary" in featureType: term.term = self._featureTypeLabel(featureType) term.term_id = featureType pbFeature.feature_type.MergeFrom(term) break pbFeature.id = featureId # Schema for feature only supports one type of `name` `symbol` # here we default to shortest for symbol and longest for name feature[LABEL].sort(key=len) pbFeature.gene_symbol = feature[LABEL][0] pbFeature.name = feature[LABEL][-1] pbFeature.attributes.MergeFrom(protocol.Attributes()) for key in feature: for val in sorted(feature[key]): pbFeature.attributes.attr[key].values.add().string_value = val if featureId in self._locationMap: location = self._locationMap[featureId] pbFeature.reference_name = location["chromosome"] pbFeature.start = location["begin"] pbFeature.end = location["end"] return pbFeature
def search_variant_annotations(self, variant_annotation_set_id, reference_name="", reference_id="", start=0, end=0, effects=[]): """ Returns an iterator over the Variant Annotations fulfilling the specified conditions from the specified VariantSet. :param str variant_annotation_set_id: The ID of the :class:`ga4gh.protocol.VariantAnnotationSet` of interest. :param int start: Required. The beginning of the window (0-based, inclusive) for which overlapping variants should be returned. Genomic positions are non-negative integers less than reference length. Requests spanning the join of circular genomes are represented as two requests one on each side of the join (position 0). :param int end: Required. The end of the window (0-based, exclusive) for which overlapping variants should be returned. :param str reference_name: The name of the :class:`ga4gh.protocol.Reference` we wish to return variants from. :return: An iterator over the :class:`ga4gh.protocol.VariantAnnotation` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchVariantAnnotationsRequest() request.variant_annotation_set_id = variant_annotation_set_id request.reference_name = reference_name request.reference_id = reference_id request.start = start request.end = end for effect in effects: request.effects.add().CopyFrom(protocol.OntologyTerm(**effect)) for effect in request.effects: if not effect.term_id: raise exceptions.ErrantRequestException( "Each ontology term should have an id set") request.page_size = pb.int(self._page_size) return self._run_search_request( request, "variantannotations", protocol.SearchVariantAnnotationsResponse)
def getGaTermByName(self, name): """ Returns a GA4GH OntologyTerm object by name. :param name: name of the ontology term, ex. "gene". :return: GA4GH OntologyTerm object. """ # TODO what is the correct value when we have no mapping?? termIds = self.getTermIds(name) if len(termIds) == 0: termId = "" # TODO add logging for missed term translation. else: # TODO what is the correct behaviour here when we have multiple # IDs matching a given name? termId = termIds[0] term = protocol.OntologyTerm() term.term = name term.term_id = termId return term
def _getRandomfeatureType(self, randomNumberGenerator): ontologyTuples = [("gene", "SO:0000704"), ("exon", "SO:0000147")] term = protocol.OntologyTerm() ontologyTuple = randomNumberGenerator.choice(ontologyTuples) term.term, term.term_id = ontologyTuple[0], ontologyTuple[1] return term
def _toGA4GH(self, association, featureSets=[]): """ given an association dict, return a protocol.FeaturePhenotypeAssociation """ # The association dict has the keys: environment, environment # label, evidence, feature label, phenotype and sources. Each # key's value is a dict with the RDF predicates as keys and # subject as values # 1) map a GA4GH FeaturePhenotypeAssociation # from the association dict passed to us feature = association['feature'] fpa = protocol.FeaturePhenotypeAssociation() fpa.id = association['id'] feature_id = feature['id'] for feature_set in featureSets: if self.getLocalId() in feature_set.getLocalId(): feature_id = feature_set.getCompoundIdForFeatureId(feature_id) fpa.feature_ids.extend([feature_id]) msg = 'Association: genotype:[{}] phenotype:[{}] environment:[{}] ' \ 'evidence:[{}] publications:[{}]' fpa.description = msg.format( association['feature_label'], association['phenotype_label'], association['environment_label'], self._getIdentifier(association['evidence']), association['sources']) # 2) map a GA4GH Evidence # from the association's phenotype & evidence evidence = protocol.Evidence() phenotype = association['phenotype'] term = protocol.OntologyTerm() term.term = association['evidence_type'] term.term_id = phenotype['id'] evidence.evidence_type.MergeFrom(term) evidence.description = self._getIdentifier(association['evidence']) # 3) Store publications from the list of sources for source in association['sources'].split("|"): evidence.info['publications'].values.add().string_value = source fpa.evidence.extend([evidence]) # 4) map environment (drug) to environmentalContext environmentalContext = protocol.EnvironmentalContext() environment = association['environment'] environmentalContext.id = environment['id'] environmentalContext.description = association['environment_label'] term = protocol.OntologyTerm() term.term = environment['id'] term.term_id = 'http://purl.obolibrary.org/obo/RO_0002606' environmentalContext.environment_type.MergeFrom(term) fpa.environmental_contexts.extend([environmentalContext]) # 5) map the phenotype phenotypeInstance = protocol.PhenotypeInstance() term = protocol.OntologyTerm() term.term = phenotype[TYPE] term.term_id = phenotype['id'] phenotypeInstance.type.MergeFrom(term) phenotypeInstance.description = phenotype[LABEL] phenotypeInstance.id = phenotype['id'] fpa.phenotype.MergeFrom(phenotypeInstance) fpa.phenotype_association_set_id = self.getId() return fpa