def compare_mapping(oufilnam1, oufilnam2): from formats import Cigar, openFile infil1 = openFile(oufilnam1, 'r') infil2 = openFile(oufilnam2, 'r') cig1 = Cigar() cig2 = Cigar() ctr1 = 0 ctr2 = 0 while 1: if cig1.next(infil1): break ctr1 = ctr1 + 1 if cig2.next(infil2): break ctr2 = ctr2 + 1 if cmp(cig1.qnam, cig2.qnam): exit("readnames don't match: '%s' vs '%s'" % \ (cig1.qnam, cig2.qnam)) if cmp(cig1,cig2) and cig1.mapq > 5 and cig2.mapq > 5: exit("mappings don't match for read '%s'" % \ cig1.qnam) infil2.close() infil1.close() if ctr1 != ctr1: exit("Expected the same number of mates, got %i (A) and %i (B)" % \ ctr1, ctr2) if ctr1 != 20000: exit("Expected 20,000 reads, got %i." % ctr1)
def cmpCigarFiles(cigfilA, cigfilB, is_verbose=True): from formats import Cigar, openFile, getNextCigarPair cigA1 = Cigar() cigA2 = Cigar() cigB1 = Cigar() cigB2 = Cigar() filA = openFile(cigfilA, 'r') filB = openFile(cigfilB, 'r') ctr = 0 while 1: (isOK, isEOF) = getNextCigarPair(filA, cigA1, cigA2) if not isOK: break (isOK, isEOF) = getNextCigarPair(filB, cigB1, cigB2) if not isOK: break if cigA1 != cigB1: if is_verbose: print "Not matching:\n%s\n%s" % (cigA1.lin, cigB1.lin) if cigA1.mapq > MAPQ_THRESH and cigB1.mapq > MAPQ_THRESH: exit("Discrepancy:\n%s\n%s" % (cigA1.lin, cigB1.lin)) if cigA2 != cigB2: if is_verbose: print "Not matching:\n%s\n%s" % (cigA2.lin, cigB2.lin) if cigA2.mapq > MAPQ_THRESH and cigB2.mapq > MAPQ_THRESH: exit("Discrepancy:\n%s\n%s" % (cigA2.lin, cigB2.lin)) ctr = ctr + 1 if not isOK and isEOF: isOK = True return isOK, ctr
def assess_mapping(oufilnam): from formats import Cigar, getNextCigarPair, openFile infil = openFile(oufilnam, 'r') cigA = Cigar() cigB = Cigar() pair_ctr = 0 nonproper_ctr = 0 (isOK, isEOF) = getNextCigarPair(infil, cigA, cigB) while isOK: pair_ctr = pair_ctr + 1 if cigA.mapcls != 'A': nonproper_ctr = nonproper_ctr + 1 if cigB.mapcls != 'A': nonproper_ctr = nonproper_ctr + 1 (isOK, isEOF) = getNextCigarPair(infil, cigA, cigB) infil.close() if pair_ctr != 10000: exit("Found %i pairs, but expected 10,000." % pair_ctr) if nonproper_ctr > MAXNUM_NONPROPER_PAIRS: exit("Found %i non-proper pairs. Expected max. %i" % (nonproper_ctr, MAXNUM_NONPROPER_PAIRS))
def prep_fasta(filnam): from testdata import openFile infil = openFile(filnam, 'w') for seq in READSEQS: for i in range(len(seq)): infil.write("%c" % seq[i]) infil.close()
def checkLabels(cigfilnam, label_pairs, mateno_check=True): from formats import Cigar, openFile, getNextCigarPair cigA = Cigar() cigB = Cigar() infil = openFile(cigfilnam) for lb in label_pairs: (isOK, isEOF) = getNextCigarPair(infil, cigA, cigB, mateno_check) if (not isOK) or isEOF: exit("missing lines in cigar file %s" % cigfilnam) if (cigA.mapcls != lb[0] and lb[0] != '?') or \ (cigB.mapcls != lb[1] and lb[1] != '?'): exit("unexpeced cigar mapping labels %s%s (%s) for read pair %s" % \ (cigA.mapcls, cigB.mapcls, lb, cigA.qnam)) return
def compare_result(cigfilnam): from formats import Cigar, openFile from sys import exit cig = Cigar() infil = openFile(cigfilnam) for result in RESULTS: cig.next(infil) if not cig.ok or \ cig.qseg != (result[0], result[1]) or \ cig.sense != result[2] or \ cig.snam != result[3] or \ cig.sseg != (result[4], result[5]) or \ cig.swatscor != result[6]: exit("Unexpected result for read '%s'" % cig.qnam) infil.close()