예제 #1
0
def scoresForPosition(ai, pos):
    muts = allSingleBaseMutations(str(ai), positions=[pos])
    noMutScore = [0] * ai.NumReads()
    mutScores_ = [ ai.ReadLLs(mut)
                   for mut in muts ]
    mutScores = np.column_stack([noMutScore] + mutScores_).astype(np.float32)
    return mutScores
예제 #2
0
def variantsFromConsensus(refWindow, refSequenceInWindow, cssSequenceInWindow,
                          cssQvInWindow=None, siteCoverage=None, aligner="affine",
                          ai=None):
    """
    Compare the consensus and the reference in this window, returning
    a list of variants.

    Uses the integrator to identify heterozygous variants.
    """
    assert (cssQvInWindow is None) == (siteCoverage is None)  # Both or none

    refId, refStart, refEnd = refWindow

    if ai is not None:
        #
        # Hunting diploid variants:
        # 1. find confident heterozygous sites;
        # 2. build a "diploid consensus" using IUPAC encoding
        #    for het sites; mark cssQv accordingly
        # 3. align diploid consensus to reference
        # 4. extract and decorate variants
        #
        assert str(ai) == cssSequenceInWindow
        iupacMutations = []  # List of (Mutation, confidence)
        for pos in xrange(0, ai.Length()):
            ds = cc.IsSiteHeterozygous(scoresForPosition(ai, pos), 40)
            if ds:
                muts = [None] + list(allSingleBaseMutations(cssSequenceInWindow, positions=[pos]))
                mut0 = muts[ds.Allele0]
                mut1 = muts[ds.Allele1]
                cssBase = cssSequenceInWindow[pos]
                packedMut = packMuts(cssBase, mut0, mut1)
                iupacMutations.append((packedMut, 40))

        # Create diploidCss by applying mutations, meanwhile updating the
        # confidence vector accordingly.
        diploidCss = cc.ApplyMutations([pair[0] for pair in iupacMutations],
                                       cssSequenceInWindow)

        diploidQv  = list(cssQvInWindow) if cssQvInWindow is not None else None

        runningLengthDiff = 0
        for (mut, conf) in iupacMutations:
            start = mut.Start() + runningLengthDiff
            end   = mut.End() + runningLengthDiff
            diploidQv[start:end] = [conf]
        assert len(diploidCss) == len(diploidQv)

        cssSequenceInWindow = diploidCss
        cssQvInWindow = diploidQv

    vars = variantsFromAlignment(refWindow,
                                 refSequenceInWindow, cssSequenceInWindow,
                                 cssQvInWindow, siteCoverage)
    return vars