Exemple #1
0
    def searchReads(
            self, readGroupIds, referenceId=None, start=None, end=None):
        """
        Returns an iterator over the Reads fulfilling the specified
        conditions from the specified ReadGroupIds.

        :param str readGroupIds: The IDs of the
            :class:`ga4gh.protocol.ReadGroup` of interest.
        :param str referenceId: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return reads
            mapped to.
        :param int start: The start position (0-based) of this query. If a
            reference is specified, this defaults to 0. Genomic positions are
            non-negative integers less than reference length. Requests spanning
            the join of circular genomes are represented as two requests one on
            each side of the join (position 0).
        :param int end: The end position (0-based, exclusive) of this query.
            If a reference is specified, this defaults to the reference's
            length.
        :return: An iterator over the
            :class:`ga4gh.protocol.ReadAlignment` objects defined by
            the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadsRequest()
        request.read_group_ids.extend(readGroupIds)
        request.reference_id = pb.string(referenceId)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "reads", protocol.SearchReadsResponse)
Exemple #2
0
    def search_reads(
            self, read_group_ids, reference_id=None, start=None, end=None):
        """
        Returns an iterator over the Reads fulfilling the specified
        conditions from the specified read_group_ids.

        :param str read_group_ids: The IDs of the
            :class:`ga4gh.protocol.ReadGroup` of interest.
        :param str reference_id: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return reads
            mapped to.
        :param int start: The start position (0-based) of this query. If a
            reference is specified, this defaults to 0. Genomic positions are
            non-negative integers less than reference length. Requests spanning
            the join of circular genomes are represented as two requests one on
            each side of the join (position 0).
        :param int end: The end position (0-based, exclusive) of this query.
            If a reference is specified, this defaults to the reference's
            length.
        :return: An iterator over the
            :class:`ga4gh.protocol.ReadAlignment` objects defined by
            the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadsRequest()
        request.read_group_ids.extend(read_group_ids)
        request.reference_id = pb.string(reference_id)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "reads", protocol.SearchReadsResponse)
Exemple #3
0
 def _gaFeatureForFeatureDbRecord(self, feature):
     """
     :param feature: The DB Row representing a feature
     :return: the corresponding GA4GH protocol.Feature object
     """
     gaFeature = protocol.Feature()
     gaFeature.id = self.getCompoundIdForFeatureId(feature['id'])
     if feature.get('parent_id'):
         gaFeature.parent_id = self.getCompoundIdForFeatureId(
                 feature['parent_id'])
     else:
         gaFeature.parent_id = ""
     gaFeature.feature_set_id = self.getId()
     gaFeature.reference_name = pb.string(feature.get('reference_name'))
     gaFeature.start = pb.int(feature.get('start'))
     gaFeature.end = pb.int(feature.get('end'))
     gaFeature.name = pb.string(feature.get('name'))
     if feature.get('strand', '') == '-':
         gaFeature.strand = protocol.NEG_STRAND
     else:
         # default to positive strand
         gaFeature.strand = protocol.POS_STRAND
     gaFeature.child_ids.extend(map(
             self.getCompoundIdForFeatureId,
             json.loads(feature['child_ids'])))
     gaFeature.feature_type.CopyFrom(
         self._ontology.getGaTermByName(feature['type']))
     attributes = json.loads(feature['attributes'])
     # TODO: Identify which values are ExternalIdentifiers and OntologyTerms
     for key in attributes:
         for v in attributes[key]:
             gaFeature.attributes.vals[key].values.add().string_value = v
     if 'gene_name' in attributes and len(attributes['gene_name']) > 0:
         gaFeature.gene_symbol = pb.string(attributes['gene_name'][0])
     return gaFeature
 def _gaFeatureForFeatureDbRecord(self, feature):
     """
     :param feature: The DB Row representing a feature
     :return: the corresponding GA4GH protocol.Feature object
     """
     gaFeature = protocol.Feature()
     gaFeature.id = self.getCompoundIdForFeatureId(feature['id'])
     if feature.get('parent_id'):
         gaFeature.parent_id = self.getCompoundIdForFeatureId(
                 feature['parent_id'])
     else:
         gaFeature.parent_id = ""
     gaFeature.feature_set_id = self.getId()
     gaFeature.reference_name = pb.string(feature.get('reference_name'))
     gaFeature.start = pb.int(feature.get('start'))
     gaFeature.end = pb.int(feature.get('end'))
     gaFeature.name = pb.string(feature.get('name'))
     if feature.get('strand', '') == '-':
         gaFeature.strand = protocol.NEG_STRAND
     else:
         # default to positive strand
         gaFeature.strand = protocol.POS_STRAND
     gaFeature.child_ids.extend(map(
             self.getCompoundIdForFeatureId,
             json.loads(feature['child_ids'])))
     gaFeature.feature_type.CopyFrom(
         self._ontology.getGaTermByName(feature['type']))
     attributes = json.loads(feature['attributes'])
     # TODO: Identify which values are ExternalIdentifiers and OntologyTerms
     for key in attributes:
         for v in attributes[key]:
             gaFeature.attributes.vals[key].values.add().string_value = v
     if 'gene_name' in attributes and len(attributes['gene_name']) > 0:
         gaFeature.gene_symbol = pb.string(attributes['gene_name'][0])
     return gaFeature
Exemple #5
0
    def search_features(
            self, feature_set_id=None, parent_id="", reference_name="",
            start=0, end=0, feature_types=[], name="", gene_symbol=""):
        """
        Returns the result of running a search_features method
        on a request with the passed-in parameters.

        :param str feature_set_id: ID of the feature Set being searched
        :param str parent_id: ID (optional) of the parent feature
        :param str reference_name: name of the reference to search
            (ex: "chr1")
        :param int start: search start position on reference
        :param int end: end position on reference
        :param feature_types: array of terms to limit search by (ex: "gene")
        :param str name: only return features with this name
        :param str gene_symbol: only return features on this gene
        :return: an iterator over Features as returned in the
            SearchFeaturesResponse object.
        """
        request = protocol.SearchFeaturesRequest()
        request.feature_set_id = feature_set_id
        request.parent_id = parent_id
        request.reference_name = reference_name
        request.name = name
        request.gene_symbol = gene_symbol
        request.start = start
        request.end = end
        request.feature_types.extend(feature_types)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "features",
            protocol.SearchFeaturesResponse)
Exemple #6
0
    def search_reference_sets(
            self, accession=None, md5checksum=None, assembly_id=None):
        """
        Returns an iterator over the ReferenceSets fulfilling the specified
        conditions.

        :param str accession: If not null, return the reference sets for which
            the `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not null, return the reference sets for
            which the `md5checksum` matches this string (case-sensitive, exact
            match). See :class:`ga4gh.protocol.ReferenceSet::md5checksum` for
            details.
        :param str assembly_id: If not null, return the reference sets for
            which the `assembly_id` matches this string (case-sensitive,
            exact match).
        :return: An iterator over the :class:`ga4gh.protocol.ReferenceSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferenceSetsRequest()
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.assembly_id = pb.string(assembly_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "referencesets", protocol.SearchReferenceSetsResponse)
Exemple #7
0
    def search_reference_sets(self,
                              accession=None,
                              md5checksum=None,
                              assembly_id=None):
        """
        Returns an iterator over the ReferenceSets fulfilling the specified
        conditions.

        :param str accession: If not null, return the reference sets for which
            the `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not null, return the reference sets for
            which the `md5checksum` matches this string (case-sensitive, exact
            match). See :class:`ga4gh.protocol.ReferenceSet::md5checksum` for
            details.
        :param str assembly_id: If not null, return the reference sets for
            which the `assembly_id` matches this string (case-sensitive,
            exact match).
        :return: An iterator over the :class:`ga4gh.protocol.ReferenceSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferenceSetsRequest()
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.assembly_id = pb.string(assembly_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "referencesets",
                                        protocol.SearchReferenceSetsResponse)
Exemple #8
0
 def search_phenotype_association_sets(self, dataset_id):
     """
     Returns an iterator over the PhenotypeAssociationSets on the server.
     """
     request = protocol.SearchPhenotypeAssociationSetsRequest()
     request.dataset_id = dataset_id
     request.page_size = pb.int(self._page_size)
     return self._run_search_request(
         request, "phenotype_association_sets",
         protocol.SearchPhenotypeAssociationSetsResponse)
Exemple #9
0
 def search_phenotype_association_sets(self, dataset_id):
     """
     Returns an iterator over the PhenotypeAssociationSets on the server.
     """
     request = protocol.SearchPhenotypeAssociationSetsRequest()
     request.dataset_id = dataset_id
     request.page_size = pb.int(self._page_size)
     return self._run_search_request(
         request, "phenotypeassociationsets",
         protocol.SearchPhenotypeAssociationSetsResponse)
Exemple #10
0
 def listReferenceBases(self, id_, start=0, end=None):
     """
     Returns an iterator over the bases from the server in the form
     of consecutive strings. This command does not conform to the
     patterns of the other search and get requests, and is implemented
     differently.
     """
     request = protocol.ListReferenceBasesRequest()
     request.start = pb.int(start)
     request.end = pb.int(end)
     notDone = True
     # TODO We should probably use a StringIO here to make string buffering
     # a bit more efficient.
     basesList = []
     while notDone:
         response = self._runListReferenceBasesPageRequest(id_, request)
         basesList.append(response.sequence)
         notDone = bool(response.next_page_token)
         request.page_token = response.next_page_token
     return "".join(basesList)
Exemple #11
0
    def searchDatasets(self):
        """
        Returns an iterator over the Datasets on the server.

        :return: An iterator over the :class:`ga4gh.protocol.Dataset`
            objects on the server.
        """
        request = protocol.SearchDatasetsRequest()
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "datasets", protocol.SearchDatasetsResponse)
Exemple #12
0
 def search_rna_quantification_sets(self, dataset_id):
     """
     Returns an iterator over the RnaQuantificationSet objects from the
     server
     """
     request = protocol.SearchRnaQuantificationSetsRequest()
     request.dataset_id = dataset_id
     request.page_size = pb.int(self._page_size)
     return self._run_search_request(
         request, "rnaquantificationsets",
         protocol.SearchRnaQuantificationSetsResponse)
Exemple #13
0
 def search_rna_quantification_sets(self, dataset_id):
     """
     Returns an iterator over the RnaQuantificationSet objects from the
     server
     """
     request = protocol.SearchRnaQuantificationSetsRequest()
     request.dataset_id = dataset_id
     request.page_size = pb.int(self._page_size)
     return self._run_search_request(
         request, "rnaquantificationsets",
         protocol.SearchRnaQuantificationSetsResponse)
Exemple #14
0
    def search_datasets(self):
        """
        Returns an iterator over the Datasets on the server.

        :return: An iterator over the :class:`ga4gh.protocol.Dataset`
            objects on the server.
        """
        request = protocol.SearchDatasetsRequest()
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "datasets", protocol.SearchDatasetsResponse)
Exemple #15
0
 def listReferenceBases(self, id_, start=0, end=None):
     """
     Returns an iterator over the bases from the server in the form
     of consecutive strings. This command does not conform to the
     patterns of the other search and get requests, and is implemented
     differently.
     """
     request = protocol.ListReferenceBasesRequest()
     request.start = pb.int(start)
     request.end = pb.int(end)
     notDone = True
     # TODO We should probably use a StringIO here to make string buffering
     # a bit more efficient.
     basesList = []
     while notDone:
         response = self._runListReferenceBasesPageRequest(id_, request)
         basesList.append(response.sequence)
         notDone = bool(response.next_page_token)
         request.page_token = response.next_page_token
     return "".join(basesList)
Exemple #16
0
    def search_rna_quantifications(self, rna_quantification_set_id=""):
        """
        Returns an iterator over the RnaQuantification objects from the server

        :param str rna_quantification_set_id: The ID of the
            :class:`ga4gh.protocol.RnaQuantificationSet` of interest.
        """
        request = protocol.SearchRnaQuantificationsRequest()
        request.rna_quantification_set_id = rna_quantification_set_id
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "rnaquantifications",
            protocol.SearchRnaQuantificationsResponse)
Exemple #17
0
    def search_rna_quantifications(self, rna_quantification_set_id=""):
        """
        Returns an iterator over the RnaQuantification objects from the server

        :param str rna_quantification_set_id: The ID of the
            :class:`ga4gh.protocol.RnaQuantificationSet` of interest.
        """
        request = protocol.SearchRnaQuantificationsRequest()
        request.rna_quantification_set_id = rna_quantification_set_id
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "rnaquantifications",
            protocol.SearchRnaQuantificationsResponse)
Exemple #18
0
    def search_variants(self,
                        variant_set_id,
                        start=None,
                        end=None,
                        reference_name=None,
                        call_set_ids=None):
        """
        Returns an iterator over the Variants fulfilling the specified
        conditions from the specified VariantSet.

        :param str variant_set_id: The ID of the
            :class:`ga4gh.protocol.VariantSet` of interest.
        :param int start: Required. The beginning of the window (0-based,
            inclusive) for which overlapping variants should be returned.
            Genomic positions are non-negative integers less than reference
            length. Requests spanning the join of circular genomes are
            represented as two requests one on each side of the join
            (position 0).
        :param int end: Required. The end of the window (0-based, exclusive)
            for which overlapping variants should be returned.
        :param str reference_name: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return variants from.
        :param list call_set_ids: Only return variant calls which belong to
            call sets with these IDs. If an empty array, returns variants
            without any call objects. If null, returns all variant calls.

        :return: An iterator over the :class:`ga4gh.protocol.Variant` objects
            defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchVariantsRequest()
        request.reference_name = pb.string(reference_name)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.variant_set_id = variant_set_id
        request.call_set_ids.extend(pb.string(call_set_ids))
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "variants",
                                        protocol.SearchVariantsResponse)
Exemple #19
0
    def getFeatures(self, referenceName=None, start=None, end=None,
                    pageToken=None, pageSize=None,
                    featureTypes=None, parentId=None,
                    name=None, geneSymbol=None):
        """
        method passed to runSearchRequest to fulfill the request
        :param str referenceName: name of reference (ex: "chr1")
        :param start: castable to int, start position on reference
        :param end: castable to int, end position on reference
        :param pageToken: none or castable to int
        :param pageSize: none or castable to int
        :param featureTypes: array of str
        :param parentId: none or featureID of parent
        :param name: the name of the feature
        :param geneSymbol: the symbol for the gene the features are on
        :return: yields a protocol.Feature at a time, together with
            the corresponding nextPageToken (which is null for the last
            feature served out).
        """

        with self._db as dataSource:
            # featuresCount is needed to ensure that once the
            # request is fulfilled, no nextPageTokens past the
            # end of the actual dataset range are returned.
            featuresCount = dataSource.countFeaturesSearchInDb(
                referenceName=referenceName,
                start=start, end=end,
                parentId=parentId, featureTypes=featureTypes,
                name=name, geneSymbol=geneSymbol)
            featuresReturned = dataSource.searchFeaturesInDb(
                pageToken, pageSize,
                referenceName=referenceName,
                start=start, end=end,
                parentId=parentId, featureTypes=featureTypes,
                name=name, geneSymbol=geneSymbol)

        # pagination logic: None if last feature was returned,
        # else 1 + row number being returned (starting at row 0).
        if pageToken:
            nextPageToken = pb.int(pageToken)
        else:
            nextPageToken = 0
        for featureRecord in featuresReturned:
            gaFeature = self._gaFeatureForFeatureDbRecord(featureRecord)
            if nextPageToken < featuresCount - 1:
                nextPageToken += 1
            else:
                nextPageToken = None
            yield gaFeature, (
                str(nextPageToken)
                if nextPageToken is not None else None)
Exemple #20
0
    def searchFeatureSets(self, datasetId):
        """
        Returns an iterator over the FeatureSets fulfilling the specified
        conditions from the specified Dataset.

        :param str datasetId: The ID of the
            :class:`ga4gh.protocol.Dataset` of interest.
        :return: An iterator over the :class:`ga4gh.protocol.FeatureSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchFeatureSetsRequest()
        request.dataset_id = datasetId
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "featuresets", protocol.SearchFeatureSetsResponse)
Exemple #21
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReferenceSet.
     """
     ret = protocol.ReferenceSet()
     ret.assembly_id = pb.string(self.getAssemblyId())
     ret.description = pb.string(self.getDescription())
     ret.id = self.getId()
     ret.is_derived = self.getIsDerived()
     ret.md5checksum = self.getMd5Checksum()
     ret.ncbi_taxon_id = pb.int(self.getNcbiTaxonId())
     ret.source_accessions.extend(self.getSourceAccessions())
     ret.source_uri = pb.string(self.getSourceUri())
     ret.name = self.getLocalId()
     return ret
Exemple #22
0
    def search_feature_sets(self, dataset_id):
        """
        Returns an iterator over the FeatureSets fulfilling the specified
        conditions from the specified Dataset.

        :param str dataset_id: The ID of the
            :class:`ga4gh.protocol.Dataset` of interest.
        :return: An iterator over the :class:`ga4gh.protocol.FeatureSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchFeatureSetsRequest()
        request.dataset_id = dataset_id
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "featuresets", protocol.SearchFeatureSetsResponse)
Exemple #23
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this Reference.
     """
     reference = protocol.Reference()
     reference.id = self.getId()
     reference.is_derived = self.getIsDerived()
     reference.length = self.getLength()
     reference.md5checksum = self.getMd5Checksum()
     reference.name = self.getName()
     reference.ncbi_taxon_id = self.getNcbiTaxonId()
     reference.source_accessions.extend(self.getSourceAccessions())
     reference.source_divergence = pb.int(self.getSourceDivergence())
     reference.source_uri = self.getSourceUri()
     return reference
Exemple #24
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReferenceSet.
     """
     ret = protocol.ReferenceSet()
     ret.assembly_id = pb.string(self.getAssemblyId())
     ret.description = pb.string(self.getDescription())
     ret.id = self.getId()
     ret.is_derived = self.getIsDerived()
     ret.md5checksum = self.getMd5Checksum()
     ret.ncbi_taxon_id = pb.int(self.getNcbiTaxonId())
     ret.source_accessions.extend(self.getSourceAccessions())
     ret.source_uri = pb.string(self.getSourceUri())
     ret.name = self.getLocalId()
     return ret
Exemple #25
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this Reference.
     """
     reference = protocol.Reference()
     reference.id = self.getId()
     reference.is_derived = self.getIsDerived()
     reference.length = self.getLength()
     reference.md5checksum = self.getMd5Checksum()
     reference.name = self.getName()
     reference.ncbi_taxon_id = self.getNcbiTaxonId()
     reference.source_accessions.extend(self.getSourceAccessions())
     reference.source_divergence = pb.int(self.getSourceDivergence())
     reference.source_uri = self.getSourceUri()
     return reference
Exemple #26
0
    def search_variants(
            self, variant_set_id, start=None, end=None, reference_name=None,
            call_set_ids=None):
        """
        Returns an iterator over the Variants fulfilling the specified
        conditions from the specified VariantSet.

        :param str variant_set_id: The ID of the
            :class:`ga4gh.protocol.VariantSet` of interest.
        :param int start: Required. The beginning of the window (0-based,
            inclusive) for which overlapping variants should be returned.
            Genomic positions are non-negative integers less than reference
            length. Requests spanning the join of circular genomes are
            represented as two requests one on each side of the join
            (position 0).
        :param int end: Required. The end of the window (0-based, exclusive)
            for which overlapping variants should be returned.
        :param str reference_name: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return variants from.
        :param list call_set_ids: Only return variant calls which belong to
            call sets with these IDs. If an empty array, returns variants
            without any call objects. If null, returns all variant calls.

        :return: An iterator over the :class:`ga4gh.protocol.Variant` objects
            defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchVariantsRequest()
        request.reference_name = pb.string(reference_name)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.variant_set_id = variant_set_id
        request.call_set_ids.extend(pb.string(call_set_ids))
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "variants", protocol.SearchVariantsResponse)
Exemple #27
0
    def search_variant_annotation_sets(self, variant_set_id):
        """
        Returns an iterator over the Annotation Sets fulfilling the specified
        conditions from the specified variant set.

        :param str variant_set_id: The ID of the
            :class:`ga4gh.protocol.VariantSet` of interest.
        :return: An iterator over the :class:`ga4gh.protocol.AnnotationSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchVariantAnnotationSetsRequest()
        request.variant_set_id = variant_set_id
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "variantannotationsets",
            protocol.SearchVariantAnnotationSetsResponse)
Exemple #28
0
    def searchVariantAnnotationSets(self, variantSetId):
        """
        Returns an iterator over the Annotation Sets fulfilling the specified
        conditions from the specified variant set.

        :param str variantSetId: The ID of the
            :class:`ga4gh.protocol.VariantSet` of interest.
        :return: An iterator over the :class:`ga4gh.protocol.AnnotationSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchVariantAnnotationSetsRequest()
        request.variant_set_id = variantSetId
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "variantannotationsets",
            protocol.SearchVariantAnnotationSetsResponse)
Exemple #29
0
    def searchCallSets(self, variantSetId, name=None):
        """
        Returns an iterator over the CallSets fulfilling the specified
        conditions from the specified VariantSet.

        :param str name: Only CallSets matching the specified name will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.CallSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchCallSetsRequest()
        request.variant_set_id = variantSetId
        request.name = pb.string(name)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "callsets", protocol.SearchCallSetsResponse)
Exemple #30
0
    def search_individuals(self, dataset_id, name=None):
        """
        Returns an iterator over the Individuals fulfilling the specified
        conditions.

        :param str dataset_id: The dataset to search within.
        :param str name: Only Individuals matching the specified name will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchIndividualsRequest()
        request.dataset_id = dataset_id
        request.name = pb.string(name)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "individuals", protocol.SearchIndividualsResponse)
Exemple #31
0
    def searchReadGroupSets(self, datasetId, name=None):
        """
        Returns an iterator over the ReadGroupSets fulfilling the specified
        conditions from the specified Dataset.

        :param str name: Only ReadGroupSets matching the specified name
            will be returned.
        :return: An iterator over the :class:`ga4gh.protocol.ReadGroupSet`
            objects defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadGroupSetsRequest()
        request.dataset_id = datasetId
        request.name = pb.string(name)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "readgroupsets", protocol.SearchReadGroupSetsResponse)
Exemple #32
0
    def searchIndividuals(self, datasetId, name=None):
        """
        Returns an iterator over the Individuals fulfilling the specified
        conditions.

        :param str datasetId: The dataset to search within.
        :param str name: Only Individuals matching the specified name will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchIndividualsRequest()
        request.dataset_id = datasetId
        request.name = pb.string(name)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "individuals", protocol.SearchIndividualsResponse)
Exemple #33
0
    def search_variant_annotations(self,
                                   variant_annotation_set_id,
                                   reference_name="",
                                   reference_id="",
                                   start=0,
                                   end=0,
                                   effects=[]):
        """
        Returns an iterator over the Variant Annotations fulfilling
        the specified conditions from the specified VariantSet.

        :param str variant_annotation_set_id: The ID of the
            :class:`ga4gh.protocol.VariantAnnotationSet` of interest.
        :param int start: Required. The beginning of the window (0-based,
            inclusive) for which overlapping variants should be returned.
            Genomic positions are non-negative integers less than reference
            length. Requests spanning the join of circular genomes are
            represented as two requests one on each side of the join
            (position 0).
        :param int end: Required. The end of the window (0-based, exclusive)
            for which overlapping variants should be returned.
        :param str reference_name: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return variants from.

        :return: An iterator over the
            :class:`ga4gh.protocol.VariantAnnotation` objects
            defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchVariantAnnotationsRequest()
        request.variant_annotation_set_id = variant_annotation_set_id
        request.reference_name = reference_name
        request.reference_id = reference_id
        request.start = start
        request.end = end
        for effect in effects:
            request.effects.add().CopyFrom(protocol.OntologyTerm(**effect))
        for effect in request.effects:
            if not effect.id:
                raise exceptions.BadRequestException(
                    "Each ontology term should have an id set")
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "variantannotations",
            protocol.SearchVariantAnnotationsResponse)
Exemple #34
0
    def search_expression_levels(
            self, rna_quantification_id="", feature_ids=[], threshold=0.0):
        """
        Returns an iterator over the ExpressionLevel objects from the server

        :param str feature_ids: The IDs of the
            :class:`ga4gh.protocol.Feature` of interest.
        :param str rna_quantification_id: The ID of the
            :class:`ga4gh.protocol.RnaQuantification` of interest.
        :param float threshold: Minimum expression of responses to return.
        """
        request = protocol.SearchExpressionLevelsRequest()
        request.rna_quantification_id = rna_quantification_id
        request.feature_ids.extend(feature_ids)
        request.threshold = threshold
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "expressionlevels",
            protocol.SearchExpressionLevelsResponse)
Exemple #35
0
    def search_bio_samples(self, dataset_id, name=None, individual_id=None):
        """
        Returns an iterator over the BioSamples fulfilling the specified
        conditions.

        :param str dataset_id: The dataset to search within.
        :param str name: Only BioSamples matching the specified name will
            be returned.
        :param str individual_id: Only BioSamples matching matching this
            id will be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchBioSamplesRequest()
        request.dataset_id = dataset_id
        request.name = pb.string(name)
        request.individual_id = pb.string(individual_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "biosamples",
                                        protocol.SearchBioSamplesResponse)
Exemple #36
0
    def searchReadGroupSets(self, datasetId, name=None, bioSampleId=None):
        """
        Returns an iterator over the ReadGroupSets fulfilling the specified
        conditions from the specified Dataset.

        :param str name: Only ReadGroupSets matching the specified name
            will be returned.
        :param str bioSampleId: Only ReadGroups matching the specified
            bioSample will be included in the response.
        :return: An iterator over the :class:`ga4gh.protocol.ReadGroupSet`
            objects defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadGroupSetsRequest()
        request.dataset_id = datasetId
        request.name = pb.string(name)
        request.bio_sample_id = pb.string(bioSampleId)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "readgroupsets", protocol.SearchReadGroupSetsResponse)
Exemple #37
0
 def search_phenotype(
         self, phenotype_association_set_id=None, phenotype_id=None,
         description=None, type_=None, age_of_onset=None):
     """
     Returns an iterator over the Phenotypes from the server
     """
     request = protocol.SearchPhenotypesRequest()
     request.phenotype_association_set_id = phenotype_association_set_id
     if phenotype_id:
         request.id = phenotype_id
     if description:
         request.description = description
     if type_:
         request.type.mergeFrom(type_)
     if age_of_onset:
         request.age_of_onset = age_of_onset
     request.page_size = pb.int(self._page_size)
     return self._run_search_request(
         request, "phenotype",
         protocol.SearchPhenotypesResponse)
Exemple #38
0
    def searchBioSamples(self, datasetId, name=None, individualId=None):
        """
        Returns an iterator over the BioSamples fulfilling the specified
        conditions.

        :param str datasetId: The dataset to search within.
        :param str name: Only BioSamples matching the specified name will
            be returned.
        :param str individualId: Only BioSamples matching matching this
            id will be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchBioSamplesRequest()
        request.dataset_id = datasetId
        request.name = pb.string(name)
        request.individual_id = pb.string(individualId)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "biosamples", protocol.SearchBioSamplesResponse)
Exemple #39
0
 def search_genotype_phenotype(
         self, phenotype_association_set_id=None, feature_ids=None,
         phenotype_ids=None, evidence=None):
     """
     Returns an iterator over the GeneotypePhenotype associations from
     the server
     """
     request = protocol.SearchGenotypePhenotypeRequest()
     request.phenotype_association_set_id = phenotype_association_set_id
     if feature_ids:
         request.feature_ids.extend(feature_ids)
     if phenotype_ids:
         request.phenotype_ids.extend(phenotype_ids)
     if evidence:
         request.evidence.extend(evidence)
     request.page_size = pb.int(self._page_size)
     self._logger.debug("search_genotype_phenotype {}".format(request))
     return self._run_search_request(
         request, "genotypephenotype",
         protocol.SearchGenotypePhenotypeResponse)
Exemple #40
0
    def search_variant_annotations(
            self, variant_annotation_set_id, reference_name="",
            reference_id="", start=0, end=0, effects=[]):
        """
        Returns an iterator over the Variant Annotations fulfilling
        the specified conditions from the specified VariantSet.

        :param str variant_annotation_set_id: The ID of the
            :class:`ga4gh.protocol.VariantAnnotationSet` of interest.
        :param int start: Required. The beginning of the window (0-based,
            inclusive) for which overlapping variants should be returned.
            Genomic positions are non-negative integers less than reference
            length. Requests spanning the join of circular genomes are
            represented as two requests one on each side of the join
            (position 0).
        :param int end: Required. The end of the window (0-based, exclusive)
            for which overlapping variants should be returned.
        :param str reference_name: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return variants from.

        :return: An iterator over the
            :class:`ga4gh.protocol.VariantAnnotation` objects
            defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchVariantAnnotationsRequest()
        request.variant_annotation_set_id = variant_annotation_set_id
        request.reference_name = reference_name
        request.reference_id = reference_id
        request.start = start
        request.end = end
        for effect in effects:
            request.effects.add().CopyFrom(protocol.OntologyTerm(**effect))
        for effect in request.effects:
            if not effect.id:
                raise exceptions.BadRequestException(
                    "Each ontology term should have an id set")
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "variantannotations",
            protocol.SearchVariantAnnotationsResponse)
Exemple #41
0
    def searchCallSets(self, variantSetId, name=None, bioSampleId=None):
        """
        Returns an iterator over the CallSets fulfilling the specified
        conditions from the specified VariantSet.

        :param str variantSetId: Find callsets belonging to the
            provided variant set.
        :param str name: Only CallSets matching the specified name will
            be returned.
        :param str bioSampleId: Only CallSets matching this id will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.CallSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchCallSetsRequest()
        request.variant_set_id = variantSetId
        request.name = pb.string(name)
        request.bio_sample_id = pb.string(bioSampleId)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "callsets", protocol.SearchCallSetsResponse)
Exemple #42
0
    def search_expression_levels(self,
                                 rna_quantification_id="",
                                 feature_ids=[],
                                 threshold=0.0):
        """
        Returns an iterator over the ExpressionLevel objects from the server

        :param str feature_ids: The IDs of the
            :class:`ga4gh.protocol.Feature` of interest.
        :param str rna_quantification_id: The ID of the
            :class:`ga4gh.protocol.RnaQuantification` of interest.
        :param float threshold: Minimum expression of responses to return.
        """
        request = protocol.SearchExpressionLevelsRequest()
        request.rna_quantification_id = rna_quantification_id
        request.feature_ids.extend(feature_ids)
        request.threshold = threshold
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "expressionlevels",
            protocol.SearchExpressionLevelsResponse)
Exemple #43
0
    def search_call_sets(self, variant_set_id, name=None, bio_sample_id=None):
        """
        Returns an iterator over the CallSets fulfilling the specified
        conditions from the specified VariantSet.

        :param str variant_set_id: Find callsets belonging to the
            provided variant set.
        :param str name: Only CallSets matching the specified name will
            be returned.
        :param str bio_sample_id: Only CallSets matching this id will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.CallSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchCallSetsRequest()
        request.variant_set_id = variant_set_id
        request.name = pb.string(name)
        request.bio_sample_id = pb.string(bio_sample_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "callsets", protocol.SearchCallSetsResponse)
Exemple #44
0
    def searchReferences(
            self, referenceSetId, accession=None, md5checksum=None):
        """
        Returns an iterator over the References fulfilling the specified
        conditions from the specified Dataset.

        :param str referenceSetId: The ReferenceSet to search.
        :param str accession: If not None, return the references for which the
            `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not None, return the references for which
            the `md5checksum` matches this string (case-sensitive, exact
            match).
        :return: An iterator over the :class:`ga4gh.protocol.Reference`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferencesRequest()
        request.reference_set_id = referenceSetId
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "references", protocol.SearchReferencesResponse)
Exemple #45
0
 def search_genotype_phenotype(self,
                               phenotype_association_set_id=None,
                               feature_ids=None,
                               phenotype_ids=None,
                               evidence=None):
     """
     Returns an iterator over the GeneotypePhenotype associations from
     the server
     """
     request = protocol.SearchGenotypePhenotypeRequest()
     request.phenotype_association_set_id = phenotype_association_set_id
     if feature_ids:
         request.feature_ids.extend(feature_ids)
     if phenotype_ids:
         request.phenotype_ids.extend(phenotype_ids)
     if evidence:
         request.evidence.extend(evidence)
     request.page_size = pb.int(self._page_size)
     self._logger.debug("search_genotype_phenotype {}".format(request))
     return self._run_search_request(
         request, "featurephenotypeassociations",
         protocol.SearchGenotypePhenotypeResponse)
Exemple #46
0
    def search_references(
            self, reference_set_id, accession=None, md5checksum=None):
        """
        Returns an iterator over the References fulfilling the specified
        conditions from the specified Dataset.

        :param str reference_set_id: The ReferenceSet to search.
        :param str accession: If not None, return the references for which the
            `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not None, return the references for which
            the `md5checksum` matches this string (case-sensitive, exact
            match).
        :return: An iterator over the :class:`ga4gh.protocol.Reference`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferencesRequest()
        request.reference_set_id = reference_set_id
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "references", protocol.SearchReferencesResponse)
Exemple #47
0
 def search_phenotype(self,
                      phenotype_association_set_id=None,
                      phenotype_id=None,
                      description=None,
                      type_=None,
                      age_of_onset=None):
     """
     Returns an iterator over the Phenotypes from the server
     """
     request = protocol.SearchPhenotypesRequest()
     request.phenotype_association_set_id = phenotype_association_set_id
     if phenotype_id:
         request.id = phenotype_id
     if description:
         request.description = description
     if type_:
         request.type.mergeFrom(type_)
     if age_of_onset:
         request.age_of_onset = age_of_onset
     request.page_size = pb.int(self._page_size)
     return self._run_search_request(request, "phenotypes",
                                     protocol.SearchPhenotypesResponse)
Exemple #48
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReadGroup.
     """
     # TODO this is very incomplete, but we don't have the
     # implementation to fill out the rest of the fields currently
     readGroup = protocol.ReadGroup()
     readGroup.id = self.getId()
     readGroup.created = self._creationTime
     readGroup.updated = self._updateTime
     dataset = self.getParentContainer().getParentContainer()
     readGroup.dataset_id = dataset.getId()
     readGroup.name = self.getLocalId()
     readGroup.predicted_insert_size = pb.int(self.getPredictedInsertSize())
     referenceSet = self._parentContainer.getReferenceSet()
     readGroup.sample_id = pb.string(self.getSampleId())
     if referenceSet is not None:
         readGroup.reference_set_id = referenceSet.getId()
     readGroup.stats.CopyFrom(self.getStats())
     readGroup.programs.extend(self.getPrograms())
     readGroup.description = pb.string(self.getDescription())
     readGroup.experiment.CopyFrom(self.getExperiment())
     return readGroup
Exemple #49
0
    def search_features(self,
                        feature_set_id=None,
                        parent_id="",
                        reference_name="",
                        start=0,
                        end=0,
                        feature_types=[],
                        name="",
                        gene_symbol=""):
        """
        Returns the result of running a search_features method
        on a request with the passed-in parameters.

        :param str feature_set_id: ID of the feature Set being searched
        :param str parent_id: ID (optional) of the parent feature
        :param str reference_name: name of the reference to search
            (ex: "chr1")
        :param int start: search start position on reference
        :param int end: end position on reference
        :param feature_types: array of terms to limit search by (ex: "gene")
        :param str name: only return features with this name
        :param str gene_symbol: only return features on this gene
        :return: an iterator over Features as returned in the
            SearchFeaturesResponse object.
        """
        request = protocol.SearchFeaturesRequest()
        request.feature_set_id = feature_set_id
        request.parent_id = parent_id
        request.reference_name = reference_name
        request.name = name
        request.gene_symbol = gene_symbol
        request.start = start
        request.end = end
        request.feature_types.extend(feature_types)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "features",
                                        protocol.SearchFeaturesResponse)
Exemple #50
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReadGroup.
     """
     # TODO this is very incomplete, but we don't have the
     # implementation to fill out the rest of the fields currently
     readGroup = protocol.ReadGroup()
     readGroup.id = self.getId()
     readGroup.created = self._creationTime
     readGroup.updated = self._updateTime
     dataset = self.getParentContainer().getParentContainer()
     readGroup.dataset_id = dataset.getId()
     readGroup.name = self.getLocalId()
     readGroup.predicted_insert_size = pb.int(self.getPredictedInsertSize())
     referenceSet = self._parentContainer.getReferenceSet()
     readGroup.sample_name = pb.string(self.getSampleName())
     readGroup.bio_sample_id = pb.string(self.getBioSampleId())
     if referenceSet is not None:
         readGroup.reference_set_id = referenceSet.getId()
     readGroup.stats.CopyFrom(self.getStats())
     readGroup.programs.extend(self.getPrograms())
     readGroup.description = pb.string(self.getDescription())
     readGroup.experiment.CopyFrom(self.getExperiment())
     return readGroup