Example #1
0
    'differ. Quitting.', file=sys.stderr)
    exit(1)
NewConsensusAsString = ''
NewRefAsString = ''
for ConsensusBase, RefBase in itertools.izip(ConsensusAsString, RefAsString):
    if RefBase == '-' and (ConsensusBase == '-' or ConsensusBase == '?'):
        continue
    else:
        NewConsensusAsString += ConsensusBase
        NewRefAsString += RefBase
ConsensusAsString = NewConsensusAsString
RefAsString = NewRefAsString
ref.seq = Seq.Seq(RefAsString)

# Replaces gaps that border "no coverage" by "no coverage".
ConsensusAsString = PropagateNoCoverageChar(ConsensusAsString)

# Check all seq IDs are unique.
IDsOfSeqsToBeAdded = [seq.id for seq in \
SeqIO.parse(open(args.OtherSeqsToBeAdded),'fasta')]
if consensus.id in IDsOfSeqsToBeAdded:
    print('A sequence in',
          args.OtherSeqsToBeAdded,
          'is called',
          consensus.id + ', like the consensus in',
          args.SeqPairWithMissingCov +
          '. Rename to avoid such a clash. Quitting.',
          file=sys.stderr)
    exit(1)

# Align
Example #2
0
        if CallMostCommon:
            BaseToCall = CallAmbigBaseIfNeeded(BasesWithMaxCount, coverage)

        else:
            CountToCallBase = coverage * args.MinFracToCall
            # This next 'if' would also be covered by the 'else', but the explicit
            # 'if' scope is faster and is what is usually needed.
            if MaxCount * len(BasesWithMaxCount) >= CountToCallBase:
                BaseToCall = CallAmbigBaseIfNeeded(BasesWithMaxCount, coverage)
            else:
                BaseToCall = CallEnoughBases(counts, CountToCallBase, coverage)

        consensus += BaseToCall

# Replaces gaps that border "no coverage" by "no coverage".
consensus = PropagateNoCoverageChar(consensus)

# Skip positions at which the ref has a gap and the consensus has a gap or
# missing cov.
if not args.ref_seq_missing:
    NewConsensus = ''
    NewRefSeq = ''
    for ConsensusBase, RefBase in itertools.izip(consensus, RefSeq):
        if RefBase == GapChar and (ConsensusBase == '?' or ConsensusBase == \
        GapChar):
            continue
        NewConsensus += ConsensusBase
        NewRefSeq += RefBase
    consensus = NewConsensus
    RefSeq = NewRefSeq
Example #3
0
        RefPosition = PositionInSeqToAdd
        if not RefInMainHasUniqueInsertion:
            SeqToAdd_WithGaps += SeqBase
            ReferenceWithoutInsertions += RefSeqFromPair[PositionInSeqToAdd]
            PositionInFinalAln += 1
            TranslationRecord += '\n' +str(PositionInFinalAln) +',' +\
            str(RefPosition+1) +',' +SeqBase
        else:
            TranslationRecord += '\n-,' + str(RefPosition + 1) + ',' + SeqBase
        PositionInSeqToAdd += 1

if args.log_file != None:
    with open(args.log_file, 'w') as f:
        f.write(TranslationRecord)

FinalSeqToAdd = PropagateNoCoverageChar(SeqToAdd_WithGaps)


# Thanks Stackoverflow:
def insert_newlines(string, every=FastaSeqLineLength):
    lines = []
    for i in range(0, len(string), every):
        lines.append(string[i:i + every])
    return '\n'.join(lines)


print('>' + SeqToAddName)
print(insert_newlines(FinalSeqToAdd))
#print('>'+RefSeqName+'_UniqueInsertionsExcised')
#print(insert_newlines(ReferenceWithoutInsertions))
Example #4
0
# Make a list, of the same length of the alignment, of integers: each one
# counting the number of contigs with coverage there. Gaps inside contigs get
# counted as coverage; gaps between contigs get a count of 0.
ContigCoverageByPosition = [0 for n in range(0,AlignmentLength)]
for [start,end] in ContigStartsAndEnds.values():
  for position in range(start,end+1):
    ContigCoverageByPosition[position] += 1

# Compare contigs to consensus to count positions in the four categories
# ('cats') described in the help above. Convert all bases to upper case, and
# replace any gap char that neighbours a '?' char in the consensus by a '?'.
if ConsensusName != None:
  categories = []
  ConsensusSeq = ConsensusSeq.upper()
  ConsensusSeq = PropagateNoCoverageChar(ConsensusSeq)
  FlattenedContigsSeq = FlattenedContigsSeq.upper()
  for ContigName in ContigDict:
    ContigDict[ContigName] = ContigDict[ContigName].upper()

  if CheckContigSNPs:
    NumPosLongestContigCorrect = 0
    NumPosLongestContigIncorrect = 0
    for pos in range(0,AlignmentLength):
      if ContigCoverageByPosition[pos] < 2:
        continue
      ContigBases = []
      for ContigName, ContigSeq in ContigDict.items():
        StartOfContig, EndOfContig = ContigStartsAndEnds[ContigName]
        if StartOfContig <= pos <= EndOfContig:
          ContigBase = ContigSeq[pos]
Example #5
0
            BaseToCall = CallAmbigBaseIfNeeded(BasesWithMaxCount, coverage)

        else:
            CountToCallBase = coverage * args.MinFracToCall
            # This next 'if' would also be covered by the 'else', but the explicit
            # 'if' scope is faster and is what is usually needed.
            if MaxCount * len(BasesWithMaxCount) >= CountToCallBase:
                BaseToCall = CallAmbigBaseIfNeeded(BasesWithMaxCount, coverage)
            else:
                BaseToCall = CallEnoughBases(counts, CountToCallBase, coverage)

        consensus += BaseToCall

# Replaces gaps that border "no coverage" by "no coverage".
if not args.keep_gaps_by_missing:
    consensus = PropagateNoCoverageChar(consensus)

# Skip positions at which the ref has a gap and the consensus has a gap or
# missing cov.
if not args.ref_seq_missing:
    NewConsensus = ''
    NewRefSeq = ''
    for ConsensusBase, RefBase in zip(consensus, RefSeq):
        if RefBase == GapChar and (ConsensusBase == '?' or ConsensusBase == \
        GapChar):
            continue
        NewConsensus += ConsensusBase
        NewRefSeq += RefBase
    consensus = NewConsensus
    RefSeq = NewRefSeq