def load_transcripts(sequence, sample_id=None, group_id=None, source=None, threshold=None): """Process a sequence of exon lines. Args: sequence (sequence): list of chanjo bed lines sample_id (Optional[str]): unique sample id, else auto-guessed grouip_id (Optional[str]): id to group samples source (Optional[str]): path to coverage source (BAM/Sambamba) threshold (Optional[int]): completeness level to disqualify exons Returns: Result: iterators of `Transcript`, transcripts processed, sample model """ exons = sambamba.depth_output(sequence) transcripts = groupby_tx(exons, sambamba=True) raw_stats = ((tx_id, tx_stat(tx_id, exons, threshold=threshold)) for tx_id, exons in iteritems(transcripts)) if sample_id is None: sample_id = next(iter(itervalues(transcripts)))[0]['sampleName'] sample_obj = Sample(id=sample_id, group_id=group_id, source=source) models = (make_model(sample_obj, tx_id, raw_stat) for tx_id, raw_stat in raw_stats) return Result(models=models, count=len(transcripts), sample=sample_obj)
def gene(self, *gene_ids): r"""Report aggregate statistics for particular genes. Args: \*gene_ids (List[str]): gene ids Returns: List[dict]: metrics grouped by sample and gene """ samples = {} for gene_id in gene_ids: logger.debug('figure out which transcripts the gene belongs to') exon_objs = self.gene_exons(gene_id).all() if len(exon_objs) == 0: raise AttributeError( "gene id not in database: {}".format(gene_id)) exon_ids = [exon_obj.exon_id for exon_obj in exon_objs] query = (self.query().filter(Exon.exon_id.in_(exon_ids))) for sample_id, data in self.means(query): if sample_id not in samples: samples[sample_id] = {'sample_id': sample_id, 'genes': {}} samples[sample_id]['genes'][gene_id] = data return itervalues(samples)
def gene(self, *gene_ids): r"""Report aggregate statistics for particular genes. Args: \*gene_ids (List[str]): gene ids Returns: List[dict]: metrics grouped by sample and gene """ samples = {} for gene_id in gene_ids: logger.debug('figure out which transcripts the gene belongs to') exon_objs = self.gene_exons(gene_id).all() if len(exon_objs) == 0: raise AttributeError("gene id not in database: {}" .format(gene_id)) exon_ids = [exon_obj.exon_id for exon_obj in exon_objs] query = (self.query().filter(Exon.exon_id.in_(exon_ids))) for sample_id, data in self.means(query): if sample_id not in samples: samples[sample_id] = {'sample_id': sample_id, 'genes': {}} samples[sample_id]['genes'][gene_id] = data return itervalues(samples)