Esempio n. 1
0
def get_stats(blastfile, strict=False):
    from jcvi.utils.range import range_union, range_span
    from .pyblast import BlastLine

    logging.debug("Report stats on `%s`" % blastfile)
    fp = open(blastfile)
    ref_ivs = []
    qry_ivs = []
    identicals = 0
    ngaps = 0
    alignlens = []

    for row in fp:
        c = BlastLine(row)
        qstart, qstop = c.qstart, c.qstop
        if qstart > qstop:
            qstart, qstop = qstop, qstart
        qry_ivs.append((c.query, qstart, qstop))

        sstart, sstop = c.sstart, c.sstop
        if sstart > sstop:
            sstart, sstop = sstop, sstart
        ref_ivs.append((c.subject, sstart, sstop))

        alen = c.hitlen
        ngaps += c.ngaps
        identicals += c.hitlen - c.nmismatch - c.ngaps
        alignlens.append(alen)

    qrycovered = range_union(qry_ivs)
    refcovered = range_union(ref_ivs)
    if strict:
        # We discount gaps in counting covered bases, since we
        # did not track individually gaps in qry and ref, we assume
        # the gaps are opened evenly in the two sequences
        qrycovered -= ngaps / 2
        refcovered -= ngaps / 2
    qryspan = range_span(qry_ivs)
    refspan = range_span(ref_ivs)
    _, AL50, _ = calculate_A50(alignlens)
    filename = op.basename(blastfile)
    alignstats = AlignStats(
        filename, qrycovered, refcovered, qryspan, refspan, identicals, AL50
    )

    return alignstats
Esempio n. 2
0
def get_stats(blastfile, strict=False):
    from jcvi.utils.range import range_union, range_span
    from .pyblast import BlastLine

    logging.debug("Report stats on `%s`" % blastfile)
    fp = open(blastfile)
    ref_ivs = []
    qry_ivs = []
    identicals = 0
    ngaps = 0
    alignlens = []

    for row in fp:
        c = BlastLine(row)
        qstart, qstop = c.qstart, c.qstop
        if qstart > qstop:
            qstart, qstop = qstop, qstart
        qry_ivs.append((c.query, qstart, qstop))

        sstart, sstop = c.sstart, c.sstop
        if sstart > sstop:
            sstart, sstop = sstop, sstart
        ref_ivs.append((c.subject, sstart, sstop))

        alen = c.hitlen
        ngaps += c.ngaps
        identicals += c.hitlen - c.nmismatch - c.ngaps
        alignlens.append(alen)

    qrycovered = range_union(qry_ivs)
    refcovered = range_union(ref_ivs)
    if strict:
        # We discount gaps in counting covered bases, since we
        # did not track individually gaps in qry and ref, we assume
        # the gaps are opened evenly in the two sequences
        qrycovered -= ngaps / 2
        refcovered -= ngaps / 2
    qryspan = range_span(qry_ivs)
    refspan = range_span(ref_ivs)
    _, AL50, _ = calculate_A50(alignlens)
    filename = op.basename(blastfile)
    alignstats = AlignStats(filename, qrycovered, refcovered,
                            qryspan, refspan, identicals, AL50)

    return alignstats
Esempio n. 3
0
def test_range_span(ranges, expected):
    from jcvi.utils.range import range_span

    assert range_span(ranges) == expected