def scoresForPosition(ai, pos): muts = allSingleBaseMutations(str(ai), positions=[pos]) noMutScore = [0] * ai.NumReads() mutScores_ = [ ai.ReadLLs(mut) for mut in muts ] mutScores = np.column_stack([noMutScore] + mutScores_).astype(np.float32) return mutScores
def variantsFromConsensus(refWindow, refSequenceInWindow, cssSequenceInWindow, cssQvInWindow=None, siteCoverage=None, aligner="affine", ai=None): """ Compare the consensus and the reference in this window, returning a list of variants. Uses the integrator to identify heterozygous variants. """ assert (cssQvInWindow is None) == (siteCoverage is None) # Both or none refId, refStart, refEnd = refWindow if ai is not None: # # Hunting diploid variants: # 1. find confident heterozygous sites; # 2. build a "diploid consensus" using IUPAC encoding # for het sites; mark cssQv accordingly # 3. align diploid consensus to reference # 4. extract and decorate variants # assert str(ai) == cssSequenceInWindow iupacMutations = [] # List of (Mutation, confidence) for pos in xrange(0, ai.Length()): ds = cc.IsSiteHeterozygous(scoresForPosition(ai, pos), 40) if ds: muts = [None] + list(allSingleBaseMutations(cssSequenceInWindow, positions=[pos])) mut0 = muts[ds.Allele0] mut1 = muts[ds.Allele1] cssBase = cssSequenceInWindow[pos] packedMut = packMuts(cssBase, mut0, mut1) iupacMutations.append((packedMut, 40)) # Create diploidCss by applying mutations, meanwhile updating the # confidence vector accordingly. diploidCss = cc.ApplyMutations([pair[0] for pair in iupacMutations], cssSequenceInWindow) diploidQv = list(cssQvInWindow) if cssQvInWindow is not None else None runningLengthDiff = 0 for (mut, conf) in iupacMutations: start = mut.Start() + runningLengthDiff end = mut.End() + runningLengthDiff diploidQv[start:end] = [conf] assert len(diploidCss) == len(diploidQv) cssSequenceInWindow = diploidCss cssQvInWindow = diploidQv vars = variantsFromAlignment(refWindow, refSequenceInWindow, cssSequenceInWindow, cssQvInWindow, siteCoverage) return vars