def searchReads( self, readGroupIds, referenceId=None, start=None, end=None): """ Returns an iterator over the Reads fulfilling the specified conditions from the specified ReadGroupIds. :param str readGroupIds: The IDs of the :class:`ga4gh.protocol.ReadGroup` of interest. :param str referenceId: The name of the :class:`ga4gh.protocol.Reference` we wish to return reads mapped to. :param int start: The start position (0-based) of this query. If a reference is specified, this defaults to 0. Genomic positions are non-negative integers less than reference length. Requests spanning the join of circular genomes are represented as two requests one on each side of the join (position 0). :param int end: The end position (0-based, exclusive) of this query. If a reference is specified, this defaults to the reference's length. :return: An iterator over the :class:`ga4gh.protocol.ReadAlignment` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchReadsRequest() request.read_group_ids.extend(readGroupIds) request.reference_id = pb.string(referenceId) request.start = pb.int(start) request.end = pb.int(end) request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "reads", protocol.SearchReadsResponse)
def search_reads( self, read_group_ids, reference_id=None, start=None, end=None): """ Returns an iterator over the Reads fulfilling the specified conditions from the specified read_group_ids. :param str read_group_ids: The IDs of the :class:`ga4gh.protocol.ReadGroup` of interest. :param str reference_id: The name of the :class:`ga4gh.protocol.Reference` we wish to return reads mapped to. :param int start: The start position (0-based) of this query. If a reference is specified, this defaults to 0. Genomic positions are non-negative integers less than reference length. Requests spanning the join of circular genomes are represented as two requests one on each side of the join (position 0). :param int end: The end position (0-based, exclusive) of this query. If a reference is specified, this defaults to the reference's length. :return: An iterator over the :class:`ga4gh.protocol.ReadAlignment` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchReadsRequest() request.read_group_ids.extend(read_group_ids) request.reference_id = pb.string(reference_id) request.start = pb.int(start) request.end = pb.int(end) request.page_size = pb.int(self._page_size) return self._run_search_request( request, "reads", protocol.SearchReadsResponse)
def _gaFeatureForFeatureDbRecord(self, feature): """ :param feature: The DB Row representing a feature :return: the corresponding GA4GH protocol.Feature object """ gaFeature = protocol.Feature() gaFeature.id = self.getCompoundIdForFeatureId(feature['id']) if feature.get('parent_id'): gaFeature.parent_id = self.getCompoundIdForFeatureId( feature['parent_id']) else: gaFeature.parent_id = "" gaFeature.feature_set_id = self.getId() gaFeature.reference_name = pb.string(feature.get('reference_name')) gaFeature.start = pb.int(feature.get('start')) gaFeature.end = pb.int(feature.get('end')) gaFeature.name = pb.string(feature.get('name')) if feature.get('strand', '') == '-': gaFeature.strand = protocol.NEG_STRAND else: # default to positive strand gaFeature.strand = protocol.POS_STRAND gaFeature.child_ids.extend(map( self.getCompoundIdForFeatureId, json.loads(feature['child_ids']))) gaFeature.feature_type.CopyFrom( self._ontology.getGaTermByName(feature['type'])) attributes = json.loads(feature['attributes']) # TODO: Identify which values are ExternalIdentifiers and OntologyTerms for key in attributes: for v in attributes[key]: gaFeature.attributes.vals[key].values.add().string_value = v if 'gene_name' in attributes and len(attributes['gene_name']) > 0: gaFeature.gene_symbol = pb.string(attributes['gene_name'][0]) return gaFeature
def search_features( self, feature_set_id=None, parent_id="", reference_name="", start=0, end=0, feature_types=[], name="", gene_symbol=""): """ Returns the result of running a search_features method on a request with the passed-in parameters. :param str feature_set_id: ID of the feature Set being searched :param str parent_id: ID (optional) of the parent feature :param str reference_name: name of the reference to search (ex: "chr1") :param int start: search start position on reference :param int end: end position on reference :param feature_types: array of terms to limit search by (ex: "gene") :param str name: only return features with this name :param str gene_symbol: only return features on this gene :return: an iterator over Features as returned in the SearchFeaturesResponse object. """ request = protocol.SearchFeaturesRequest() request.feature_set_id = feature_set_id request.parent_id = parent_id request.reference_name = reference_name request.name = name request.gene_symbol = gene_symbol request.start = start request.end = end request.feature_types.extend(feature_types) request.page_size = pb.int(self._page_size) return self._run_search_request( request, "features", protocol.SearchFeaturesResponse)
def search_reference_sets( self, accession=None, md5checksum=None, assembly_id=None): """ Returns an iterator over the ReferenceSets fulfilling the specified conditions. :param str accession: If not null, return the reference sets for which the `accession` matches this string (case-sensitive, exact match). :param str md5checksum: If not null, return the reference sets for which the `md5checksum` matches this string (case-sensitive, exact match). See :class:`ga4gh.protocol.ReferenceSet::md5checksum` for details. :param str assembly_id: If not null, return the reference sets for which the `assembly_id` matches this string (case-sensitive, exact match). :return: An iterator over the :class:`ga4gh.protocol.ReferenceSet` objects defined by the query parameters. """ request = protocol.SearchReferenceSetsRequest() request.accession = pb.string(accession) request.md5checksum = pb.string(md5checksum) request.assembly_id = pb.string(assembly_id) request.page_size = pb.int(self._page_size) return self._run_search_request( request, "referencesets", protocol.SearchReferenceSetsResponse)
def search_reference_sets(self, accession=None, md5checksum=None, assembly_id=None): """ Returns an iterator over the ReferenceSets fulfilling the specified conditions. :param str accession: If not null, return the reference sets for which the `accession` matches this string (case-sensitive, exact match). :param str md5checksum: If not null, return the reference sets for which the `md5checksum` matches this string (case-sensitive, exact match). See :class:`ga4gh.protocol.ReferenceSet::md5checksum` for details. :param str assembly_id: If not null, return the reference sets for which the `assembly_id` matches this string (case-sensitive, exact match). :return: An iterator over the :class:`ga4gh.protocol.ReferenceSet` objects defined by the query parameters. """ request = protocol.SearchReferenceSetsRequest() request.accession = pb.string(accession) request.md5checksum = pb.string(md5checksum) request.assembly_id = pb.string(assembly_id) request.page_size = pb.int(self._page_size) return self._run_search_request(request, "referencesets", protocol.SearchReferenceSetsResponse)
def search_phenotype_association_sets(self, dataset_id): """ Returns an iterator over the PhenotypeAssociationSets on the server. """ request = protocol.SearchPhenotypeAssociationSetsRequest() request.dataset_id = dataset_id request.page_size = pb.int(self._page_size) return self._run_search_request( request, "phenotype_association_sets", protocol.SearchPhenotypeAssociationSetsResponse)
def search_phenotype_association_sets(self, dataset_id): """ Returns an iterator over the PhenotypeAssociationSets on the server. """ request = protocol.SearchPhenotypeAssociationSetsRequest() request.dataset_id = dataset_id request.page_size = pb.int(self._page_size) return self._run_search_request( request, "phenotypeassociationsets", protocol.SearchPhenotypeAssociationSetsResponse)
def listReferenceBases(self, id_, start=0, end=None): """ Returns an iterator over the bases from the server in the form of consecutive strings. This command does not conform to the patterns of the other search and get requests, and is implemented differently. """ request = protocol.ListReferenceBasesRequest() request.start = pb.int(start) request.end = pb.int(end) notDone = True # TODO We should probably use a StringIO here to make string buffering # a bit more efficient. basesList = [] while notDone: response = self._runListReferenceBasesPageRequest(id_, request) basesList.append(response.sequence) notDone = bool(response.next_page_token) request.page_token = response.next_page_token return "".join(basesList)
def searchDatasets(self): """ Returns an iterator over the Datasets on the server. :return: An iterator over the :class:`ga4gh.protocol.Dataset` objects on the server. """ request = protocol.SearchDatasetsRequest() request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "datasets", protocol.SearchDatasetsResponse)
def search_rna_quantification_sets(self, dataset_id): """ Returns an iterator over the RnaQuantificationSet objects from the server """ request = protocol.SearchRnaQuantificationSetsRequest() request.dataset_id = dataset_id request.page_size = pb.int(self._page_size) return self._run_search_request( request, "rnaquantificationsets", protocol.SearchRnaQuantificationSetsResponse)
def search_datasets(self): """ Returns an iterator over the Datasets on the server. :return: An iterator over the :class:`ga4gh.protocol.Dataset` objects on the server. """ request = protocol.SearchDatasetsRequest() request.page_size = pb.int(self._page_size) return self._run_search_request( request, "datasets", protocol.SearchDatasetsResponse)
def search_rna_quantifications(self, rna_quantification_set_id=""): """ Returns an iterator over the RnaQuantification objects from the server :param str rna_quantification_set_id: The ID of the :class:`ga4gh.protocol.RnaQuantificationSet` of interest. """ request = protocol.SearchRnaQuantificationsRequest() request.rna_quantification_set_id = rna_quantification_set_id request.page_size = pb.int(self._page_size) return self._run_search_request( request, "rnaquantifications", protocol.SearchRnaQuantificationsResponse)
def search_variants(self, variant_set_id, start=None, end=None, reference_name=None, call_set_ids=None): """ Returns an iterator over the Variants fulfilling the specified conditions from the specified VariantSet. :param str variant_set_id: The ID of the :class:`ga4gh.protocol.VariantSet` of interest. :param int start: Required. The beginning of the window (0-based, inclusive) for which overlapping variants should be returned. Genomic positions are non-negative integers less than reference length. Requests spanning the join of circular genomes are represented as two requests one on each side of the join (position 0). :param int end: Required. The end of the window (0-based, exclusive) for which overlapping variants should be returned. :param str reference_name: The name of the :class:`ga4gh.protocol.Reference` we wish to return variants from. :param list call_set_ids: Only return variant calls which belong to call sets with these IDs. If an empty array, returns variants without any call objects. If null, returns all variant calls. :return: An iterator over the :class:`ga4gh.protocol.Variant` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchVariantsRequest() request.reference_name = pb.string(reference_name) request.start = pb.int(start) request.end = pb.int(end) request.variant_set_id = variant_set_id request.call_set_ids.extend(pb.string(call_set_ids)) request.page_size = pb.int(self._page_size) return self._run_search_request(request, "variants", protocol.SearchVariantsResponse)
def getFeatures(self, referenceName=None, start=None, end=None, pageToken=None, pageSize=None, featureTypes=None, parentId=None, name=None, geneSymbol=None): """ method passed to runSearchRequest to fulfill the request :param str referenceName: name of reference (ex: "chr1") :param start: castable to int, start position on reference :param end: castable to int, end position on reference :param pageToken: none or castable to int :param pageSize: none or castable to int :param featureTypes: array of str :param parentId: none or featureID of parent :param name: the name of the feature :param geneSymbol: the symbol for the gene the features are on :return: yields a protocol.Feature at a time, together with the corresponding nextPageToken (which is null for the last feature served out). """ with self._db as dataSource: # featuresCount is needed to ensure that once the # request is fulfilled, no nextPageTokens past the # end of the actual dataset range are returned. featuresCount = dataSource.countFeaturesSearchInDb( referenceName=referenceName, start=start, end=end, parentId=parentId, featureTypes=featureTypes, name=name, geneSymbol=geneSymbol) featuresReturned = dataSource.searchFeaturesInDb( pageToken, pageSize, referenceName=referenceName, start=start, end=end, parentId=parentId, featureTypes=featureTypes, name=name, geneSymbol=geneSymbol) # pagination logic: None if last feature was returned, # else 1 + row number being returned (starting at row 0). if pageToken: nextPageToken = pb.int(pageToken) else: nextPageToken = 0 for featureRecord in featuresReturned: gaFeature = self._gaFeatureForFeatureDbRecord(featureRecord) if nextPageToken < featuresCount - 1: nextPageToken += 1 else: nextPageToken = None yield gaFeature, ( str(nextPageToken) if nextPageToken is not None else None)
def searchFeatureSets(self, datasetId): """ Returns an iterator over the FeatureSets fulfilling the specified conditions from the specified Dataset. :param str datasetId: The ID of the :class:`ga4gh.protocol.Dataset` of interest. :return: An iterator over the :class:`ga4gh.protocol.FeatureSet` objects defined by the query parameters. """ request = protocol.SearchFeatureSetsRequest() request.dataset_id = datasetId request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "featuresets", protocol.SearchFeatureSetsResponse)
def toProtocolElement(self): """ Returns the GA4GH protocol representation of this ReferenceSet. """ ret = protocol.ReferenceSet() ret.assembly_id = pb.string(self.getAssemblyId()) ret.description = pb.string(self.getDescription()) ret.id = self.getId() ret.is_derived = self.getIsDerived() ret.md5checksum = self.getMd5Checksum() ret.ncbi_taxon_id = pb.int(self.getNcbiTaxonId()) ret.source_accessions.extend(self.getSourceAccessions()) ret.source_uri = pb.string(self.getSourceUri()) ret.name = self.getLocalId() return ret
def search_feature_sets(self, dataset_id): """ Returns an iterator over the FeatureSets fulfilling the specified conditions from the specified Dataset. :param str dataset_id: The ID of the :class:`ga4gh.protocol.Dataset` of interest. :return: An iterator over the :class:`ga4gh.protocol.FeatureSet` objects defined by the query parameters. """ request = protocol.SearchFeatureSetsRequest() request.dataset_id = dataset_id request.page_size = pb.int(self._page_size) return self._run_search_request( request, "featuresets", protocol.SearchFeatureSetsResponse)
def toProtocolElement(self): """ Returns the GA4GH protocol representation of this Reference. """ reference = protocol.Reference() reference.id = self.getId() reference.is_derived = self.getIsDerived() reference.length = self.getLength() reference.md5checksum = self.getMd5Checksum() reference.name = self.getName() reference.ncbi_taxon_id = self.getNcbiTaxonId() reference.source_accessions.extend(self.getSourceAccessions()) reference.source_divergence = pb.int(self.getSourceDivergence()) reference.source_uri = self.getSourceUri() return reference
def search_variants( self, variant_set_id, start=None, end=None, reference_name=None, call_set_ids=None): """ Returns an iterator over the Variants fulfilling the specified conditions from the specified VariantSet. :param str variant_set_id: The ID of the :class:`ga4gh.protocol.VariantSet` of interest. :param int start: Required. The beginning of the window (0-based, inclusive) for which overlapping variants should be returned. Genomic positions are non-negative integers less than reference length. Requests spanning the join of circular genomes are represented as two requests one on each side of the join (position 0). :param int end: Required. The end of the window (0-based, exclusive) for which overlapping variants should be returned. :param str reference_name: The name of the :class:`ga4gh.protocol.Reference` we wish to return variants from. :param list call_set_ids: Only return variant calls which belong to call sets with these IDs. If an empty array, returns variants without any call objects. If null, returns all variant calls. :return: An iterator over the :class:`ga4gh.protocol.Variant` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchVariantsRequest() request.reference_name = pb.string(reference_name) request.start = pb.int(start) request.end = pb.int(end) request.variant_set_id = variant_set_id request.call_set_ids.extend(pb.string(call_set_ids)) request.page_size = pb.int(self._page_size) return self._run_search_request( request, "variants", protocol.SearchVariantsResponse)
def search_variant_annotation_sets(self, variant_set_id): """ Returns an iterator over the Annotation Sets fulfilling the specified conditions from the specified variant set. :param str variant_set_id: The ID of the :class:`ga4gh.protocol.VariantSet` of interest. :return: An iterator over the :class:`ga4gh.protocol.AnnotationSet` objects defined by the query parameters. """ request = protocol.SearchVariantAnnotationSetsRequest() request.variant_set_id = variant_set_id request.page_size = pb.int(self._page_size) return self._run_search_request( request, "variantannotationsets", protocol.SearchVariantAnnotationSetsResponse)
def searchVariantAnnotationSets(self, variantSetId): """ Returns an iterator over the Annotation Sets fulfilling the specified conditions from the specified variant set. :param str variantSetId: The ID of the :class:`ga4gh.protocol.VariantSet` of interest. :return: An iterator over the :class:`ga4gh.protocol.AnnotationSet` objects defined by the query parameters. """ request = protocol.SearchVariantAnnotationSetsRequest() request.variant_set_id = variantSetId request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "variantannotationsets", protocol.SearchVariantAnnotationSetsResponse)
def searchCallSets(self, variantSetId, name=None): """ Returns an iterator over the CallSets fulfilling the specified conditions from the specified VariantSet. :param str name: Only CallSets matching the specified name will be returned. :return: An iterator over the :class:`ga4gh.protocol.CallSet` objects defined by the query parameters. """ request = protocol.SearchCallSetsRequest() request.variant_set_id = variantSetId request.name = pb.string(name) request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "callsets", protocol.SearchCallSetsResponse)
def search_individuals(self, dataset_id, name=None): """ Returns an iterator over the Individuals fulfilling the specified conditions. :param str dataset_id: The dataset to search within. :param str name: Only Individuals matching the specified name will be returned. :return: An iterator over the :class:`ga4gh.protocol.BioSample` objects defined by the query parameters. """ request = protocol.SearchIndividualsRequest() request.dataset_id = dataset_id request.name = pb.string(name) request.page_size = pb.int(self._page_size) return self._run_search_request( request, "individuals", protocol.SearchIndividualsResponse)
def searchReadGroupSets(self, datasetId, name=None): """ Returns an iterator over the ReadGroupSets fulfilling the specified conditions from the specified Dataset. :param str name: Only ReadGroupSets matching the specified name will be returned. :return: An iterator over the :class:`ga4gh.protocol.ReadGroupSet` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchReadGroupSetsRequest() request.dataset_id = datasetId request.name = pb.string(name) request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "readgroupsets", protocol.SearchReadGroupSetsResponse)
def searchIndividuals(self, datasetId, name=None): """ Returns an iterator over the Individuals fulfilling the specified conditions. :param str datasetId: The dataset to search within. :param str name: Only Individuals matching the specified name will be returned. :return: An iterator over the :class:`ga4gh.protocol.BioSample` objects defined by the query parameters. """ request = protocol.SearchIndividualsRequest() request.dataset_id = datasetId request.name = pb.string(name) request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "individuals", protocol.SearchIndividualsResponse)
def search_variant_annotations(self, variant_annotation_set_id, reference_name="", reference_id="", start=0, end=0, effects=[]): """ Returns an iterator over the Variant Annotations fulfilling the specified conditions from the specified VariantSet. :param str variant_annotation_set_id: The ID of the :class:`ga4gh.protocol.VariantAnnotationSet` of interest. :param int start: Required. The beginning of the window (0-based, inclusive) for which overlapping variants should be returned. Genomic positions are non-negative integers less than reference length. Requests spanning the join of circular genomes are represented as two requests one on each side of the join (position 0). :param int end: Required. The end of the window (0-based, exclusive) for which overlapping variants should be returned. :param str reference_name: The name of the :class:`ga4gh.protocol.Reference` we wish to return variants from. :return: An iterator over the :class:`ga4gh.protocol.VariantAnnotation` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchVariantAnnotationsRequest() request.variant_annotation_set_id = variant_annotation_set_id request.reference_name = reference_name request.reference_id = reference_id request.start = start request.end = end for effect in effects: request.effects.add().CopyFrom(protocol.OntologyTerm(**effect)) for effect in request.effects: if not effect.id: raise exceptions.BadRequestException( "Each ontology term should have an id set") request.page_size = pb.int(self._page_size) return self._run_search_request( request, "variantannotations", protocol.SearchVariantAnnotationsResponse)
def search_expression_levels( self, rna_quantification_id="", feature_ids=[], threshold=0.0): """ Returns an iterator over the ExpressionLevel objects from the server :param str feature_ids: The IDs of the :class:`ga4gh.protocol.Feature` of interest. :param str rna_quantification_id: The ID of the :class:`ga4gh.protocol.RnaQuantification` of interest. :param float threshold: Minimum expression of responses to return. """ request = protocol.SearchExpressionLevelsRequest() request.rna_quantification_id = rna_quantification_id request.feature_ids.extend(feature_ids) request.threshold = threshold request.page_size = pb.int(self._page_size) return self._run_search_request( request, "expressionlevels", protocol.SearchExpressionLevelsResponse)
def search_bio_samples(self, dataset_id, name=None, individual_id=None): """ Returns an iterator over the BioSamples fulfilling the specified conditions. :param str dataset_id: The dataset to search within. :param str name: Only BioSamples matching the specified name will be returned. :param str individual_id: Only BioSamples matching matching this id will be returned. :return: An iterator over the :class:`ga4gh.protocol.BioSample` objects defined by the query parameters. """ request = protocol.SearchBioSamplesRequest() request.dataset_id = dataset_id request.name = pb.string(name) request.individual_id = pb.string(individual_id) request.page_size = pb.int(self._page_size) return self._run_search_request(request, "biosamples", protocol.SearchBioSamplesResponse)
def searchReadGroupSets(self, datasetId, name=None, bioSampleId=None): """ Returns an iterator over the ReadGroupSets fulfilling the specified conditions from the specified Dataset. :param str name: Only ReadGroupSets matching the specified name will be returned. :param str bioSampleId: Only ReadGroups matching the specified bioSample will be included in the response. :return: An iterator over the :class:`ga4gh.protocol.ReadGroupSet` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchReadGroupSetsRequest() request.dataset_id = datasetId request.name = pb.string(name) request.bio_sample_id = pb.string(bioSampleId) request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "readgroupsets", protocol.SearchReadGroupSetsResponse)
def search_phenotype( self, phenotype_association_set_id=None, phenotype_id=None, description=None, type_=None, age_of_onset=None): """ Returns an iterator over the Phenotypes from the server """ request = protocol.SearchPhenotypesRequest() request.phenotype_association_set_id = phenotype_association_set_id if phenotype_id: request.id = phenotype_id if description: request.description = description if type_: request.type.mergeFrom(type_) if age_of_onset: request.age_of_onset = age_of_onset request.page_size = pb.int(self._page_size) return self._run_search_request( request, "phenotype", protocol.SearchPhenotypesResponse)
def searchBioSamples(self, datasetId, name=None, individualId=None): """ Returns an iterator over the BioSamples fulfilling the specified conditions. :param str datasetId: The dataset to search within. :param str name: Only BioSamples matching the specified name will be returned. :param str individualId: Only BioSamples matching matching this id will be returned. :return: An iterator over the :class:`ga4gh.protocol.BioSample` objects defined by the query parameters. """ request = protocol.SearchBioSamplesRequest() request.dataset_id = datasetId request.name = pb.string(name) request.individual_id = pb.string(individualId) request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "biosamples", protocol.SearchBioSamplesResponse)
def search_genotype_phenotype( self, phenotype_association_set_id=None, feature_ids=None, phenotype_ids=None, evidence=None): """ Returns an iterator over the GeneotypePhenotype associations from the server """ request = protocol.SearchGenotypePhenotypeRequest() request.phenotype_association_set_id = phenotype_association_set_id if feature_ids: request.feature_ids.extend(feature_ids) if phenotype_ids: request.phenotype_ids.extend(phenotype_ids) if evidence: request.evidence.extend(evidence) request.page_size = pb.int(self._page_size) self._logger.debug("search_genotype_phenotype {}".format(request)) return self._run_search_request( request, "genotypephenotype", protocol.SearchGenotypePhenotypeResponse)
def search_variant_annotations( self, variant_annotation_set_id, reference_name="", reference_id="", start=0, end=0, effects=[]): """ Returns an iterator over the Variant Annotations fulfilling the specified conditions from the specified VariantSet. :param str variant_annotation_set_id: The ID of the :class:`ga4gh.protocol.VariantAnnotationSet` of interest. :param int start: Required. The beginning of the window (0-based, inclusive) for which overlapping variants should be returned. Genomic positions are non-negative integers less than reference length. Requests spanning the join of circular genomes are represented as two requests one on each side of the join (position 0). :param int end: Required. The end of the window (0-based, exclusive) for which overlapping variants should be returned. :param str reference_name: The name of the :class:`ga4gh.protocol.Reference` we wish to return variants from. :return: An iterator over the :class:`ga4gh.protocol.VariantAnnotation` objects defined by the query parameters. :rtype: iter """ request = protocol.SearchVariantAnnotationsRequest() request.variant_annotation_set_id = variant_annotation_set_id request.reference_name = reference_name request.reference_id = reference_id request.start = start request.end = end for effect in effects: request.effects.add().CopyFrom(protocol.OntologyTerm(**effect)) for effect in request.effects: if not effect.id: raise exceptions.BadRequestException( "Each ontology term should have an id set") request.page_size = pb.int(self._page_size) return self._run_search_request( request, "variantannotations", protocol.SearchVariantAnnotationsResponse)
def searchCallSets(self, variantSetId, name=None, bioSampleId=None): """ Returns an iterator over the CallSets fulfilling the specified conditions from the specified VariantSet. :param str variantSetId: Find callsets belonging to the provided variant set. :param str name: Only CallSets matching the specified name will be returned. :param str bioSampleId: Only CallSets matching this id will be returned. :return: An iterator over the :class:`ga4gh.protocol.CallSet` objects defined by the query parameters. """ request = protocol.SearchCallSetsRequest() request.variant_set_id = variantSetId request.name = pb.string(name) request.bio_sample_id = pb.string(bioSampleId) request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "callsets", protocol.SearchCallSetsResponse)
def search_expression_levels(self, rna_quantification_id="", feature_ids=[], threshold=0.0): """ Returns an iterator over the ExpressionLevel objects from the server :param str feature_ids: The IDs of the :class:`ga4gh.protocol.Feature` of interest. :param str rna_quantification_id: The ID of the :class:`ga4gh.protocol.RnaQuantification` of interest. :param float threshold: Minimum expression of responses to return. """ request = protocol.SearchExpressionLevelsRequest() request.rna_quantification_id = rna_quantification_id request.feature_ids.extend(feature_ids) request.threshold = threshold request.page_size = pb.int(self._page_size) return self._run_search_request( request, "expressionlevels", protocol.SearchExpressionLevelsResponse)
def search_call_sets(self, variant_set_id, name=None, bio_sample_id=None): """ Returns an iterator over the CallSets fulfilling the specified conditions from the specified VariantSet. :param str variant_set_id: Find callsets belonging to the provided variant set. :param str name: Only CallSets matching the specified name will be returned. :param str bio_sample_id: Only CallSets matching this id will be returned. :return: An iterator over the :class:`ga4gh.protocol.CallSet` objects defined by the query parameters. """ request = protocol.SearchCallSetsRequest() request.variant_set_id = variant_set_id request.name = pb.string(name) request.bio_sample_id = pb.string(bio_sample_id) request.page_size = pb.int(self._page_size) return self._run_search_request( request, "callsets", protocol.SearchCallSetsResponse)
def searchReferences( self, referenceSetId, accession=None, md5checksum=None): """ Returns an iterator over the References fulfilling the specified conditions from the specified Dataset. :param str referenceSetId: The ReferenceSet to search. :param str accession: If not None, return the references for which the `accession` matches this string (case-sensitive, exact match). :param str md5checksum: If not None, return the references for which the `md5checksum` matches this string (case-sensitive, exact match). :return: An iterator over the :class:`ga4gh.protocol.Reference` objects defined by the query parameters. """ request = protocol.SearchReferencesRequest() request.reference_set_id = referenceSetId request.accession = pb.string(accession) request.md5checksum = pb.string(md5checksum) request.page_size = pb.int(self._pageSize) return self._runSearchRequest( request, "references", protocol.SearchReferencesResponse)
def search_genotype_phenotype(self, phenotype_association_set_id=None, feature_ids=None, phenotype_ids=None, evidence=None): """ Returns an iterator over the GeneotypePhenotype associations from the server """ request = protocol.SearchGenotypePhenotypeRequest() request.phenotype_association_set_id = phenotype_association_set_id if feature_ids: request.feature_ids.extend(feature_ids) if phenotype_ids: request.phenotype_ids.extend(phenotype_ids) if evidence: request.evidence.extend(evidence) request.page_size = pb.int(self._page_size) self._logger.debug("search_genotype_phenotype {}".format(request)) return self._run_search_request( request, "featurephenotypeassociations", protocol.SearchGenotypePhenotypeResponse)
def search_references( self, reference_set_id, accession=None, md5checksum=None): """ Returns an iterator over the References fulfilling the specified conditions from the specified Dataset. :param str reference_set_id: The ReferenceSet to search. :param str accession: If not None, return the references for which the `accession` matches this string (case-sensitive, exact match). :param str md5checksum: If not None, return the references for which the `md5checksum` matches this string (case-sensitive, exact match). :return: An iterator over the :class:`ga4gh.protocol.Reference` objects defined by the query parameters. """ request = protocol.SearchReferencesRequest() request.reference_set_id = reference_set_id request.accession = pb.string(accession) request.md5checksum = pb.string(md5checksum) request.page_size = pb.int(self._page_size) return self._run_search_request( request, "references", protocol.SearchReferencesResponse)
def search_phenotype(self, phenotype_association_set_id=None, phenotype_id=None, description=None, type_=None, age_of_onset=None): """ Returns an iterator over the Phenotypes from the server """ request = protocol.SearchPhenotypesRequest() request.phenotype_association_set_id = phenotype_association_set_id if phenotype_id: request.id = phenotype_id if description: request.description = description if type_: request.type.mergeFrom(type_) if age_of_onset: request.age_of_onset = age_of_onset request.page_size = pb.int(self._page_size) return self._run_search_request(request, "phenotypes", protocol.SearchPhenotypesResponse)
def toProtocolElement(self): """ Returns the GA4GH protocol representation of this ReadGroup. """ # TODO this is very incomplete, but we don't have the # implementation to fill out the rest of the fields currently readGroup = protocol.ReadGroup() readGroup.id = self.getId() readGroup.created = self._creationTime readGroup.updated = self._updateTime dataset = self.getParentContainer().getParentContainer() readGroup.dataset_id = dataset.getId() readGroup.name = self.getLocalId() readGroup.predicted_insert_size = pb.int(self.getPredictedInsertSize()) referenceSet = self._parentContainer.getReferenceSet() readGroup.sample_id = pb.string(self.getSampleId()) if referenceSet is not None: readGroup.reference_set_id = referenceSet.getId() readGroup.stats.CopyFrom(self.getStats()) readGroup.programs.extend(self.getPrograms()) readGroup.description = pb.string(self.getDescription()) readGroup.experiment.CopyFrom(self.getExperiment()) return readGroup
def search_features(self, feature_set_id=None, parent_id="", reference_name="", start=0, end=0, feature_types=[], name="", gene_symbol=""): """ Returns the result of running a search_features method on a request with the passed-in parameters. :param str feature_set_id: ID of the feature Set being searched :param str parent_id: ID (optional) of the parent feature :param str reference_name: name of the reference to search (ex: "chr1") :param int start: search start position on reference :param int end: end position on reference :param feature_types: array of terms to limit search by (ex: "gene") :param str name: only return features with this name :param str gene_symbol: only return features on this gene :return: an iterator over Features as returned in the SearchFeaturesResponse object. """ request = protocol.SearchFeaturesRequest() request.feature_set_id = feature_set_id request.parent_id = parent_id request.reference_name = reference_name request.name = name request.gene_symbol = gene_symbol request.start = start request.end = end request.feature_types.extend(feature_types) request.page_size = pb.int(self._page_size) return self._run_search_request(request, "features", protocol.SearchFeaturesResponse)
def toProtocolElement(self): """ Returns the GA4GH protocol representation of this ReadGroup. """ # TODO this is very incomplete, but we don't have the # implementation to fill out the rest of the fields currently readGroup = protocol.ReadGroup() readGroup.id = self.getId() readGroup.created = self._creationTime readGroup.updated = self._updateTime dataset = self.getParentContainer().getParentContainer() readGroup.dataset_id = dataset.getId() readGroup.name = self.getLocalId() readGroup.predicted_insert_size = pb.int(self.getPredictedInsertSize()) referenceSet = self._parentContainer.getReferenceSet() readGroup.sample_name = pb.string(self.getSampleName()) readGroup.bio_sample_id = pb.string(self.getBioSampleId()) if referenceSet is not None: readGroup.reference_set_id = referenceSet.getId() readGroup.stats.CopyFrom(self.getStats()) readGroup.programs.extend(self.getPrograms()) readGroup.description = pb.string(self.getDescription()) readGroup.experiment.CopyFrom(self.getExperiment()) return readGroup