Esempio n. 1
0
def process(sequence,
            sample_id=None,
            group_id=None,
            source=None,
            threshold=None):
    """Process a sequence of exon lines.

    Args:
        sequence (sequence): list of chanjo bed lines
        sample_id (Optional[str]): unique sample id, else auto-guessed
        grouip_id (Optional[str]): id to group samples
        source (Optional[str]): path to coverage source (BAM/Sambamba)
        threshold (Optional[int]): completeness level to disqualify exons

    Returns:
        Result: iterators of `Transcript`, transcripts processed, sample model
    """
    exons = sambamba.depth_output(sequence)
    transcripts = groupby_tx(exons, sambamba=True)
    raw_stats = ((tx_id, tx_stat(tx_id, exons, threshold=threshold))
                 for tx_id, exons in iteritems(transcripts))

    if sample_id is None:
        sample_id = next(itervalues(transcripts))[0]['sampleName']
    sample_obj = Sample(id=sample_id, group_id=group_id, source=source)

    models = (make_model(sample_obj, tx_id, raw_stat)
              for tx_id, raw_stat in raw_stats)
    return Result(models=models, count=len(transcripts), sample=sample_obj)
Esempio n. 2
0
def test_depth_output_exon():
    bed_lines = ["# chrom\tchromStart\tchromEnd\tF3\tF4\tF5\tF6\tF7\t"
                    "readCount\tmeanCoverage\tpercentage10\tpercentage20"
                    "\tpercentage100\tsampleName",
                 "1\t69089\t70007\t1-69090-70007\t0\t+\tCCDS30547.1\tOR4F5\t"
                    "232\t25.4946\t57.9521\t36.0566\t5.55556\tADM992A10\t"]

    row_data = sambamba.depth_output(bed_lines)
    results = [data for data in row_data]
    assert len(results) == 1
    assert results[0]['sampleName'] == 'ADM992A10'
    assert results[0]['chromStart'] == 69089
Esempio n. 3
0
def test_depth_output_exon():
    bed_lines = [
        "# chrom\tchromStart\tchromEnd\tF3\tF4\tF5\tF6\tF7\t"
        "readCount\tmeanCoverage\tpercentage10\tpercentage20"
        "\tpercentage100\tsampleName",
        "1\t69089\t70007\t1-69090-70007\t0\t+\tCCDS30547.1\tOR4F5\t"
        "232\t25.4946\t57.9521\t36.0566\t5.55556\tADM992A10\t",
    ]

    row_data = sambamba.depth_output(bed_lines)
    results = [data for data in row_data]
    assert len(results) == 1
    assert results[0]["sampleName"] == "ADM992A10"
    assert results[0]["chromStart"] == 69089
Esempio n. 4
0
def load_sambamba(chanjo_db, bed_iterable, sample_id=None, group_id=None):
    """Load Sambamba BED output from a stream."""
    rows = sambamba.depth_output(bed_iterable)
    stats = sambamba_rows(chanjo_db.session,
                          rows,
                          sample_id=sample_id,
                          group_id=group_id)
    for index, stat in enumerate(stats):
        chanjo_db.add(stat)
        if index % 10000 == 0:
            chanjo_db.save()
            logger.info("processed %s annotations", index)

    chanjo_db.save()
    logger.info("processed %s annotations", index)
Esempio n. 5
0
def test_depth_output_gene():
    bed_lines = ["# chrom\tchromStart\tchromEnd\tF3\treadCount\tmeanCoverage\t"
                    "percentage10\tpercentage20\tpercentage100\tsampleName",
                 "1\t69089\t70007\tOR4F5\t232\t25.4946\t57.9521\t36.0566\t"
                    "5.55556\tADM992A10\t"]

    row_data = sambamba.depth_output(bed_lines)
    results = [data for data in row_data]
    assert len(results) == 1
    assert results[0]['chrom'] == '1'
    assert results[0]['chromStart'] == 69089
    assert results[0]['chromEnd'] == 70007
    assert results[0]['extraFields'] == ['OR4F5']
    assert results[0]['readCount'] == 232
    assert results[0]['meanCoverage'] == 25.4946
    assert results[0]['thresholds'][20] == 36.0566
    assert results[0]['sampleName'] == 'ADM992A10'
Esempio n. 6
0
def test_depth_output_gene():
    bed_lines = [
        "# chrom\tchromStart\tchromEnd\tF3\treadCount\tmeanCoverage\t"
        "percentage10\tpercentage20\tpercentage100\tsampleName",
        "1\t69089\t70007\tOR4F5\t232\t25.4946\t57.9521\t36.0566\t" "5.55556\tADM992A10\t",
    ]

    row_data = sambamba.depth_output(bed_lines)
    results = [data for data in row_data]
    assert len(results) == 1
    assert results[0]["chrom"] == "1"
    assert results[0]["chromStart"] == 69089
    assert results[0]["chromEnd"] == 70007
    assert results[0]["extraFields"] == ["OR4F5"]
    assert results[0]["readCount"] == 232
    assert results[0]["meanCoverage"] == 25.4946
    assert results[0]["thresholds"][20] == 36.0566
    assert results[0]["sampleName"] == "ADM992A10"
Esempio n. 7
0
 def setup(self):
     self.store = Store('sqlite://')
     self.store.set_up()
     self.row_data = parse_sambamba.depth_output(bed_lines)