Beispiel #1
0
def load_transcripts(sequence,
                     sample_id=None,
                     group_id=None,
                     source=None,
                     threshold=None):
    """Process a sequence of exon lines.

    Args:
        sequence (sequence): list of chanjo bed lines
        sample_id (Optional[str]): unique sample id, else auto-guessed
        grouip_id (Optional[str]): id to group samples
        source (Optional[str]): path to coverage source (BAM/Sambamba)
        threshold (Optional[int]): completeness level to disqualify exons

    Returns:
        Result: iterators of `Transcript`, transcripts processed, sample model
    """
    exons = sambamba.depth_output(sequence)
    transcripts = groupby_tx(exons, sambamba=True)
    raw_stats = ((tx_id, tx_stat(tx_id, exons, threshold=threshold))
                 for tx_id, exons in iteritems(transcripts))

    if sample_id is None:
        sample_id = next(iter(itervalues(transcripts)))[0]['sampleName']
    sample_obj = Sample(id=sample_id, group_id=group_id, source=source)

    models = (make_model(sample_obj, tx_id, raw_stat)
              for tx_id, raw_stat in raw_stats)
    return Result(models=models, count=len(transcripts), sample=sample_obj)
Beispiel #2
0
def load_transcripts(sequence, sample_id=None, group_id=None, source=None,
                     threshold=None):
    """Process a sequence of exon lines.

    Args:
        sequence (sequence): list of chanjo bed lines
        sample_id (Optional[str]): unique sample id, else auto-guessed
        grouip_id (Optional[str]): id to group samples
        source (Optional[str]): path to coverage source (BAM/Sambamba)
        threshold (Optional[int]): completeness level to disqualify exons

    Returns:
        Result: iterators of `Transcript`, transcripts processed, sample model
    """
    exons = sambamba.depth_output(sequence)
    transcripts = groupby_tx(exons, sambamba=True)
    raw_stats = ((tx_id, tx_stat(tx_id, exons, threshold=threshold))
                 for tx_id, exons in iteritems(transcripts))

    if sample_id is None:
        sample_id = next(iter(itervalues(transcripts)))[0]['sampleName']
    sample_obj = Sample(id=sample_id, group_id=group_id, source=source)

    models = (make_model(sample_obj, tx_id, raw_stat) for tx_id, raw_stat
              in raw_stats)
    return Result(models=models, count=len(transcripts), sample=sample_obj)
Beispiel #3
0
    def gene(self, *gene_ids):
        r"""Report aggregate statistics for particular genes.

        Args:
            \*gene_ids (List[str]): gene ids

        Returns:
            List[dict]: metrics grouped by sample and gene
        """
        samples = {}
        for gene_id in gene_ids:
            logger.debug('figure out which transcripts the gene belongs to')
            exon_objs = self.gene_exons(gene_id).all()
            if len(exon_objs) == 0:
                raise AttributeError(
                    "gene id not in database: {}".format(gene_id))
            exon_ids = [exon_obj.exon_id for exon_obj in exon_objs]
            query = (self.query().filter(Exon.exon_id.in_(exon_ids)))
            for sample_id, data in self.means(query):
                if sample_id not in samples:
                    samples[sample_id] = {'sample_id': sample_id, 'genes': {}}
                samples[sample_id]['genes'][gene_id] = data

        return itervalues(samples)
Beispiel #4
0
    def gene(self, *gene_ids):
        r"""Report aggregate statistics for particular genes.

        Args:
            \*gene_ids (List[str]): gene ids

        Returns:
            List[dict]: metrics grouped by sample and gene
        """
        samples = {}
        for gene_id in gene_ids:
            logger.debug('figure out which transcripts the gene belongs to')
            exon_objs = self.gene_exons(gene_id).all()
            if len(exon_objs) == 0:
                raise AttributeError("gene id not in database: {}"
                                     .format(gene_id))
            exon_ids = [exon_obj.exon_id for exon_obj in exon_objs]
            query = (self.query().filter(Exon.exon_id.in_(exon_ids)))
            for sample_id, data in self.means(query):
                if sample_id not in samples:
                    samples[sample_id] = {'sample_id': sample_id, 'genes': {}}
                samples[sample_id]['genes'][gene_id] = data

        return itervalues(samples)