def process(sequence, sample_id=None, group_id=None, source=None, threshold=None): """Process a sequence of exon lines. Args: sequence (sequence): list of chanjo bed lines sample_id (Optional[str]): unique sample id, else auto-guessed grouip_id (Optional[str]): id to group samples source (Optional[str]): path to coverage source (BAM/Sambamba) threshold (Optional[int]): completeness level to disqualify exons Returns: Result: iterators of `Transcript`, transcripts processed, sample model """ exons = sambamba.depth_output(sequence) transcripts = groupby_tx(exons, sambamba=True) raw_stats = ((tx_id, tx_stat(tx_id, exons, threshold=threshold)) for tx_id, exons in iteritems(transcripts)) if sample_id is None: sample_id = next(itervalues(transcripts))[0]['sampleName'] sample_obj = Sample(id=sample_id, group_id=group_id, source=source) models = (make_model(sample_obj, tx_id, raw_stat) for tx_id, raw_stat in raw_stats) return Result(models=models, count=len(transcripts), sample=sample_obj)
def test_depth_output_exon(): bed_lines = ["# chrom\tchromStart\tchromEnd\tF3\tF4\tF5\tF6\tF7\t" "readCount\tmeanCoverage\tpercentage10\tpercentage20" "\tpercentage100\tsampleName", "1\t69089\t70007\t1-69090-70007\t0\t+\tCCDS30547.1\tOR4F5\t" "232\t25.4946\t57.9521\t36.0566\t5.55556\tADM992A10\t"] row_data = sambamba.depth_output(bed_lines) results = [data for data in row_data] assert len(results) == 1 assert results[0]['sampleName'] == 'ADM992A10' assert results[0]['chromStart'] == 69089
def test_depth_output_exon(): bed_lines = [ "# chrom\tchromStart\tchromEnd\tF3\tF4\tF5\tF6\tF7\t" "readCount\tmeanCoverage\tpercentage10\tpercentage20" "\tpercentage100\tsampleName", "1\t69089\t70007\t1-69090-70007\t0\t+\tCCDS30547.1\tOR4F5\t" "232\t25.4946\t57.9521\t36.0566\t5.55556\tADM992A10\t", ] row_data = sambamba.depth_output(bed_lines) results = [data for data in row_data] assert len(results) == 1 assert results[0]["sampleName"] == "ADM992A10" assert results[0]["chromStart"] == 69089
def load_sambamba(chanjo_db, bed_iterable, sample_id=None, group_id=None): """Load Sambamba BED output from a stream.""" rows = sambamba.depth_output(bed_iterable) stats = sambamba_rows(chanjo_db.session, rows, sample_id=sample_id, group_id=group_id) for index, stat in enumerate(stats): chanjo_db.add(stat) if index % 10000 == 0: chanjo_db.save() logger.info("processed %s annotations", index) chanjo_db.save() logger.info("processed %s annotations", index)
def test_depth_output_gene(): bed_lines = ["# chrom\tchromStart\tchromEnd\tF3\treadCount\tmeanCoverage\t" "percentage10\tpercentage20\tpercentage100\tsampleName", "1\t69089\t70007\tOR4F5\t232\t25.4946\t57.9521\t36.0566\t" "5.55556\tADM992A10\t"] row_data = sambamba.depth_output(bed_lines) results = [data for data in row_data] assert len(results) == 1 assert results[0]['chrom'] == '1' assert results[0]['chromStart'] == 69089 assert results[0]['chromEnd'] == 70007 assert results[0]['extraFields'] == ['OR4F5'] assert results[0]['readCount'] == 232 assert results[0]['meanCoverage'] == 25.4946 assert results[0]['thresholds'][20] == 36.0566 assert results[0]['sampleName'] == 'ADM992A10'
def test_depth_output_gene(): bed_lines = [ "# chrom\tchromStart\tchromEnd\tF3\treadCount\tmeanCoverage\t" "percentage10\tpercentage20\tpercentage100\tsampleName", "1\t69089\t70007\tOR4F5\t232\t25.4946\t57.9521\t36.0566\t" "5.55556\tADM992A10\t", ] row_data = sambamba.depth_output(bed_lines) results = [data for data in row_data] assert len(results) == 1 assert results[0]["chrom"] == "1" assert results[0]["chromStart"] == 69089 assert results[0]["chromEnd"] == 70007 assert results[0]["extraFields"] == ["OR4F5"] assert results[0]["readCount"] == 232 assert results[0]["meanCoverage"] == 25.4946 assert results[0]["thresholds"][20] == 36.0566 assert results[0]["sampleName"] == "ADM992A10"
def setup(self): self.store = Store('sqlite://') self.store.set_up() self.row_data = parse_sambamba.depth_output(bed_lines)