Esempio n. 1
0
 def toProtocolElement(self):
     dataset = protocol.Dataset()
     dataset.id = self.getId()
     dataset.name = pb.string(self.getLocalId())
     dataset.description = pb.string(self.getDescription())
     self.serializeAttributes(dataset)
     return dataset
Esempio n. 2
0
 def _gaFeatureForFeatureDbRecord(self, feature):
     """
     :param feature: The DB Row representing a feature
     :return: the corresponding GA4GH protocol.Feature object
     """
     gaFeature = protocol.Feature()
     gaFeature.id = self.getCompoundIdForFeatureId(feature['id'])
     if feature.get('parent_id'):
         gaFeature.parent_id = self.getCompoundIdForFeatureId(
             feature['parent_id'])
     else:
         gaFeature.parent_id = ""
     gaFeature.feature_set_id = self.getId()
     gaFeature.reference_name = pb.string(feature.get('reference_name'))
     gaFeature.start = pb.int(feature.get('start'))
     gaFeature.end = pb.int(feature.get('end'))
     gaFeature.name = pb.string(feature.get('name'))
     if feature.get('strand', '') == '-':
         gaFeature.strand = protocol.NEG_STRAND
     else:
         # default to positive strand
         gaFeature.strand = protocol.POS_STRAND
     gaFeature.child_ids.extend(
         map(self.getCompoundIdForFeatureId,
             json.loads(feature['child_ids'])))
     gaFeature.feature_type.CopyFrom(
         self._ontology.getGaTermByName(feature['type']))
     attributes = json.loads(feature['attributes'])
     # TODO: Identify which values are ExternalIdentifiers and OntologyTerms
     for key in attributes:
         for v in attributes[key]:
             gaFeature.attributes.attr[key].values.add().string_value = v
     if 'gene_name' in attributes and len(attributes['gene_name']) > 0:
         gaFeature.gene_symbol = pb.string(attributes['gene_name'][0])
     return gaFeature
Esempio n. 3
0
    def search_reference_sets(self,
                              accession=None,
                              md5checksum=None,
                              assembly_id=None):
        """
        Returns an iterator over the ReferenceSets fulfilling the specified
        conditions.

        :param str accession: If not null, return the reference sets for which
            the `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not null, return the reference sets for
            which the `md5checksum` matches this string (case-sensitive, exact
            match). See :class:`ga4gh.protocol.ReferenceSet::md5checksum` for
            details.
        :param str assembly_id: If not null, return the reference sets for
            which the `assembly_id` matches this string (case-sensitive,
            exact match).
        :return: An iterator over the :class:`ga4gh.protocol.ReferenceSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferenceSetsRequest()
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.assembly_id = pb.string(assembly_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "referencesets",
                                        protocol.SearchReferenceSetsResponse)
Esempio n. 4
0
 def toProtocolElement(self):
     dataset = protocol.Dataset()
     dataset.id = self.getId()
     dataset.name = pb.string(self.getLocalId())
     dataset.description = pb.string(self.getDescription())
     self.serializeAttributes(dataset)
     return dataset
Esempio n. 5
0
 def _gaFeatureForFeatureDbRecord(self, feature):
     """
     :param feature: The DB Row representing a feature
     :return: the corresponding GA4GH protocol.Feature object
     """
     gaFeature = protocol.Feature()
     gaFeature.id = self.getCompoundIdForFeatureId(feature['id'])
     if feature.get('parent_id'):
         gaFeature.parent_id = self.getCompoundIdForFeatureId(
                 feature['parent_id'])
     else:
         gaFeature.parent_id = ""
     gaFeature.feature_set_id = self.getId()
     gaFeature.reference_name = pb.string(feature.get('reference_name'))
     gaFeature.start = pb.int(feature.get('start'))
     gaFeature.end = pb.int(feature.get('end'))
     gaFeature.name = pb.string(feature.get('name'))
     if feature.get('strand', '') == '-':
         gaFeature.strand = protocol.NEG_STRAND
     else:
         # default to positive strand
         gaFeature.strand = protocol.POS_STRAND
     gaFeature.child_ids.extend(map(
             self.getCompoundIdForFeatureId,
             json.loads(feature['child_ids'])))
     gaFeature.feature_type.CopyFrom(
         self._ontology.getGaTermByName(feature['type']))
     attributes = json.loads(feature['attributes'])
     # TODO: Identify which values are ExternalIdentifiers and OntologyTerms
     for key in attributes:
         for v in attributes[key]:
             gaFeature.attributes.attr[key].values.add().string_value = v
     if 'gene_name' in attributes and len(attributes['gene_name']) > 0:
         gaFeature.gene_symbol = pb.string(attributes['gene_name'][0])
     return gaFeature
Esempio n. 6
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReadGroup.
     """
     # TODO this is very incomplete, but we don't have the
     # implementation to fill out the rest of the fields currently
     readGroup = protocol.ReadGroup()
     readGroup.id = self.getId()
     readGroup.created = self._creationTime
     readGroup.updated = self._updateTime
     dataset = self.getParentContainer().getParentContainer()
     readGroup.dataset_id = dataset.getId()
     readGroup.name = self.getLocalId()
     readGroup.predicted_insert_size = pb.int(self.getPredictedInsertSize())
     referenceSet = self._parentContainer.getReferenceSet()
     readGroup.sample_name = pb.string(self.getSampleName())
     readGroup.biosample_id = pb.string(self.getBiosampleId())
     if referenceSet is not None:
         readGroup.reference_set_id = referenceSet.getId()
     readGroup.stats.CopyFrom(self.getStats())
     readGroup.programs.extend(self.getPrograms())
     readGroup.description = pb.string(self.getDescription())
     readGroup.experiment.CopyFrom(self.getExperiment())
     self.serializeAttributes(readGroup)
     return readGroup
Esempio n. 7
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReadGroup.
     """
     # TODO this is very incomplete, but we don't have the
     # implementation to fill out the rest of the fields currently
     readGroup = protocol.ReadGroup()
     readGroup.id = self.getId()
     readGroup.created = self._creationTime
     readGroup.updated = self._updateTime
     dataset = self.getParentContainer().getParentContainer()
     readGroup.dataset_id = dataset.getId()
     readGroup.name = self.getLocalId()
     readGroup.predicted_insert_size = pb.int(self.getPredictedInsertSize())
     referenceSet = self._parentContainer.getReferenceSet()
     readGroup.sample_name = pb.string(self.getSampleName())
     readGroup.biosample_id = pb.string(self.getBiosampleId())
     if referenceSet is not None:
         readGroup.reference_set_id = referenceSet.getId()
     readGroup.stats.CopyFrom(self.getStats())
     readGroup.programs.extend(self.getPrograms())
     readGroup.description = pb.string(self.getDescription())
     readGroup.experiment.CopyFrom(self.getExperiment())
     self.serializeAttributes(readGroup)
     return readGroup
Esempio n. 8
0
 def toProtocolElement(self):
     dataset = protocol.Dataset()
     dataset.id = self.getId()
     dataset.name = pb.string(self.getLocalId())
     dataset.description = pb.string(self.getDescription())
     for key in self.getInfo():
         dataset.info[key].values.extend(_encodeValue(self._info[key]))
     return dataset
Esempio n. 9
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReferenceSet.
     """
     ret = protocol.ReferenceSet()
     ret.assembly_id = pb.string(self.getAssemblyId())
     ret.description = pb.string(self.getDescription())
     ret.id = self.getId()
     ret.is_derived = self.getIsDerived()
     ret.md5checksum = self.getMd5Checksum()
     ret.ncbi_taxon_id = pb.int(self.getNcbiTaxonId())
     ret.source_accessions.extend(self.getSourceAccessions())
     ret.source_uri = pb.string(self.getSourceUri())
     ret.name = self.getLocalId()
     return ret
Esempio n. 10
0
 def testToProtocolElement(self):
     dataset = datasets.Dataset('dataset1')
     term = protocol.OntologyTerm()
     term.term = "male genotypic sex"
     term.id = "PATO:0020001"
     term.source_name = "PATO"
     term.source_version = pb.string("2015-11-18")
     # Write out a valid input
     print(protocol.toJsonDict(term))
     validIndividual = protocol.Individual(
         name="test",
         created="2016-05-19T21:00:19Z",
         updated="2016-05-19T21:00:19Z",
         sex=term)
     validIndividual.info['test'].values.add().string_value = 'test-info'
     # pass through protocol creation
     individual = bioMetadata.Individual(
         dataset, "test")
     individual.populateFromJson(protocol.toJson(validIndividual))
     gaIndividual = individual.toProtocolElement()
     # Verify elements exist
     self.assertEqual(gaIndividual.created, validIndividual.created)
     self.assertEqual(gaIndividual.updated, validIndividual.updated)
     # Invalid input
     invalidIndividual = '{"bad:", "json"}'
     individual = bioMetadata.Individual(dataset, "test")
     # Should fail
     self.assertRaises(
         exceptions.InvalidJsonException,
         individual.populateFromJson,
         invalidIndividual)
Esempio n. 11
0
    def search_reads(self,
                     read_group_ids,
                     reference_id=None,
                     start=None,
                     end=None):
        """
        Returns an iterator over the Reads fulfilling the specified
        conditions from the specified read_group_ids.

        :param str read_group_ids: The IDs of the
            :class:`ga4gh.protocol.ReadGroup` of interest.
        :param str reference_id: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return reads
            mapped to.
        :param int start: The start position (0-based) of this query. If a
            reference is specified, this defaults to 0. Genomic positions are
            non-negative integers less than reference length. Requests spanning
            the join of circular genomes are represented as two requests one on
            each side of the join (position 0).
        :param int end: The end position (0-based, exclusive) of this query.
            If a reference is specified, this defaults to the reference's
            length.
        :return: An iterator over the
            :class:`ga4gh.protocol.ReadAlignment` objects defined by
            the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadsRequest()
        request.read_group_ids.extend(read_group_ids)
        request.reference_id = pb.string(reference_id)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "reads",
                                        protocol.SearchReadsResponse)
Esempio n. 12
0
 def getSerializedResponse(self):
     """
     Returns a string version of the SearchResponse that has
     been built by this SearchResponseBuilder.
     """
     self._protoObject.next_page_token = pb.string(self._nextPageToken)
     s = protocol.serialize(self._protoObject, self._return_mimetype)
     return s
Esempio n. 13
0
 def getSerializedResponse(self):
     """
     Returns a string version of the SearchResponse that has
     been built by this SearchResponseBuilder.
     """
     self._protoObject.next_page_token = pb.string(self._nextPageToken)
     s = protocol.toJson(self._protoObject)
     return s
Esempio n. 14
0
    def search_read_group_sets(self, dataset_id, name=None, biosample_id=None):
        """
        Returns an iterator over the ReadGroupSets fulfilling the specified
        conditions from the specified Dataset.

        :param str name: Only ReadGroupSets matching the specified name
            will be returned.
        :param str biosample_id: Only ReadGroups matching the specified
            biosample will be included in the response.
        :return: An iterator over the :class:`ga4gh.protocol.ReadGroupSet`
            objects defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadGroupSetsRequest()
        request.dataset_id = dataset_id
        request.name = pb.string(name)
        request.biosample_id = pb.string(biosample_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "readgroupsets",
                                        protocol.SearchReadGroupSetsResponse)
Esempio n. 15
0
 def toProtocolElement(self, tier=0):
     """
     Returns the GA4GH protocol representation of this ReferenceSet.
     """
     ret = protocol.ReferenceSet()
     ret.assembly_id = pb.string(self.getAssemblyId())
     ret.description = pb.string(self.getDescription())
     ret.id = self.getId()
     ret.is_derived = self.getIsDerived()
     ret.md5checksum = self.getMd5Checksum()
     if self.getSpecies():
         term = protocol.fromJson(json.dumps(self.getSpecies()),
                                  protocol.OntologyTerm)
         ret.species.term_id = term.term_id
         ret.species.term = term.term
     ret.source_accessions.extend(self.getSourceAccessions())
     ret.source_uri = pb.string(self.getSourceUri())
     ret.name = self.getLocalId()
     self.serializeAttributes(ret)
     return ret
Esempio n. 16
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReferenceSet.
     """
     ret = protocol.ReferenceSet()
     ret.assembly_id = pb.string(self.getAssemblyId())
     ret.description = pb.string(self.getDescription())
     ret.id = self.getId()
     ret.is_derived = self.getIsDerived()
     ret.md5checksum = self.getMd5Checksum()
     if self.getSpecies():
         term = protocol.fromJson(
             json.dumps(self.getSpecies()), protocol.OntologyTerm)
         ret.species.term_id = term.term_id
         ret.species.term = term.term
     ret.source_accessions.extend(self.getSourceAccessions())
     ret.source_uri = pb.string(self.getSourceUri())
     ret.name = self.getLocalId()
     self.serializeAttributes(ret)
     return ret
Esempio n. 17
0
    def search_bio_samples(self, dataset_id, name=None, individual_id=None):
        """
        Returns an iterator over the BioSamples fulfilling the specified
        conditions.

        :param str dataset_id: The dataset to search within.
        :param str name: Only BioSamples matching the specified name will
            be returned.
        :param str individual_id: Only BioSamples matching matching this
            id will be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchBioSamplesRequest()
        request.dataset_id = dataset_id
        request.name = pb.string(name)
        request.individual_id = pb.string(individual_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "biosamples",
                                        protocol.SearchBioSamplesResponse)
Esempio n. 18
0
    def search_call_sets(self, variant_set_id, name=None, bio_sample_id=None):
        """
        Returns an iterator over the CallSets fulfilling the specified
        conditions from the specified VariantSet.

        :param str variant_set_id: Find callsets belonging to the
            provided variant set.
        :param str name: Only CallSets matching the specified name will
            be returned.
        :param str bio_sample_id: Only CallSets matching this id will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.CallSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchCallSetsRequest()
        request.variant_set_id = variant_set_id
        request.name = pb.string(name)
        request.bio_sample_id = pb.string(bio_sample_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "callsets",
                                        protocol.SearchCallSetsResponse)
Esempio n. 19
0
    def search_variants(self,
                        variant_set_id,
                        start=None,
                        end=None,
                        reference_name=None,
                        call_set_ids=None):
        """
        Returns an iterator over the Variants fulfilling the specified
        conditions from the specified VariantSet.

        :param str variant_set_id: The ID of the
            :class:`ga4gh.protocol.VariantSet` of interest.
        :param int start: Required. The beginning of the window (0-based,
            inclusive) for which overlapping variants should be returned.
            Genomic positions are non-negative integers less than reference
            length. Requests spanning the join of circular genomes are
            represented as two requests one on each side of the join
            (position 0).
        :param int end: Required. The end of the window (0-based, exclusive)
            for which overlapping variants should be returned.
        :param str reference_name: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return variants from.
        :param list call_set_ids: Only return variant calls which belong to
            call sets with these IDs. If an empty array, returns variants
            without any call objects. If null, returns all variant calls.

        :return: An iterator over the :class:`ga4gh.protocol.Variant` objects
            defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchVariantsRequest()
        request.reference_name = pb.string(reference_name)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.variant_set_id = variant_set_id
        request.call_set_ids.extend(pb.string(call_set_ids))
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "variants",
                                        protocol.SearchVariantsResponse)
Esempio n. 20
0
 def toProtocolElement(self):
     """
     Returns the representation of this FeatureSet as the corresponding
     ProtocolElement.
     """
     gaFeatureSet = protocol.FeatureSet()
     gaFeatureSet.id = self.getId()
     gaFeatureSet.dataset_id = self.getParentContainer().getId()
     gaFeatureSet.reference_set_id = pb.string(self._referenceSet.getId())
     gaFeatureSet.name = self._name
     gaFeatureSet.source_uri = self._sourceUri
     for key in self._info:
         gaFeatureSet.info[key].values.extend(self._info[key])
     return gaFeatureSet
Esempio n. 21
0
    def search_references(self,
                          reference_set_id,
                          accession=None,
                          md5checksum=None):
        """
        Returns an iterator over the References fulfilling the specified
        conditions from the specified Dataset.

        :param str reference_set_id: The ReferenceSet to search.
        :param str accession: If not None, return the references for which the
            `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not None, return the references for which
            the `md5checksum` matches this string (case-sensitive, exact
            match).
        :return: An iterator over the :class:`ga4gh.protocol.Reference`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferencesRequest()
        request.reference_set_id = reference_set_id
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "references",
                                        protocol.SearchReferencesResponse)
Esempio n. 22
0
 def toProtocolElement(self):
     """
     Returns the representation of this FeatureSet as the corresponding
     ProtocolElement.
     """
     gaFeatureSet = protocol.FeatureSet()
     gaFeatureSet.id = self.getId()
     gaFeatureSet.dataset_id = self.getParentContainer().getId()
     gaFeatureSet.reference_set_id = pb.string(self._referenceSet.getId())
     gaFeatureSet.name = self._name
     gaFeatureSet.source_uri = self._sourceUri
     attributes = self.getAttributes()
     for key in attributes:
         gaFeatureSet.attributes.attr[key] \
             .values.extend(protocol.encodeValue(attributes[key]))
     return gaFeatureSet
Esempio n. 23
0
 def toProtocolElement(self):
     """
     Returns the representation of this FeatureSet as the corresponding
     ProtocolElement.
     """
     gaFeatureSet = protocol.FeatureSet()
     gaFeatureSet.id = self.getId()
     gaFeatureSet.dataset_id = self.getParentContainer().getId()
     gaFeatureSet.reference_set_id = pb.string(self._referenceSet.getId())
     gaFeatureSet.name = self._name
     gaFeatureSet.source_uri = self._sourceUri
     attributes = self.getAttributes()
     for key in attributes:
         gaFeatureSet.attributes.attr[key] \
             .values.extend(protocol.encodeValue(attributes[key]))
     return gaFeatureSet
Esempio n. 24
0
 def getExperiment(self):
     """
     Returns the GA4GH protocol representation of this read group's
     Experiment.
     """
     experiment = protocol.Experiment()
     experiment.id = self.getExperimentId()
     experiment.instrument_model = pb.string(self.getInstrumentModel())
     experiment.sequencing_center = pb.string(self.getSequencingCenter())
     experiment.description = pb.string(self.getExperimentDescription())
     experiment.library = pb.string(self.getLibrary())
     experiment.platform_unit = pb.string(self.getPlatformUnit())
     experiment.message_create_time = self._iso8601
     experiment.message_update_time = self._iso8601
     experiment.run_time = pb.string(self.getRunTime())
     return experiment
Esempio n. 25
0
 def getExperiment(self):
     """
     Returns the GA4GH protocol representation of this read group's
     Experiment.
     """
     experiment = protocol.Experiment()
     experiment.id = self.getExperimentId()
     experiment.instrument_model = pb.string(self.getInstrumentModel())
     experiment.sequencing_center = pb.string(self.getSequencingCenter())
     experiment.description = pb.string(self.getExperimentDescription())
     experiment.library = pb.string(self.getLibrary())
     experiment.platform_unit = pb.string(self.getPlatformUnit())
     experiment.message_create_time = self._iso8601
     experiment.message_update_time = self._iso8601
     experiment.run_time = pb.string(self.getRunTime())
     return experiment
Esempio n. 26
0
    def getGaTermByName(self, name):
        """
        Returns a GA4GH OntologyTerm object by name.

        :param name: name of the ontology term, ex. "gene".
        :return: GA4GH OntologyTerm object.
        """
        # TODO what is the correct value when we have no mapping??
        termIds = self.getTermIds(name)
        if len(termIds) == 0:
            termId = ""
            # TODO add logging for missed term translation.
        else:
            # TODO what is the correct behaviour here when we have multiple
            # IDs matching a given name?
            termId = termIds[0]
        term = protocol.OntologyTerm()
        term.term = name
        term.id = termId
        term.source_name = self._sourceName
        term.source_version = pb.string(self._sourceVersion)
        return term
Esempio n. 27
0
 def testString(self):
     self.assertEqual(pb.DEFAULT_STRING, pb.string(None))
     self.assertEqual('A', pb.string('A'))
Esempio n. 28
0
 def testString(self):
     self.assertEqual(pb.DEFAULT_STRING, pb.string(None))
     self.assertEqual('A', pb.string('A'))