cig.sseg != (result[4], result[5]) or \ cig.swatscor != result[6]: exit("Unexpected result for read '%s'" % cig.qnam) infil.close() def prep_fasta(filnam): from testdata import openFile infil = openFile(filnam, 'w') for seq in READSEQS: for i in range(len(seq)): infil.write("%c" % seq[i]) infil.close() if __name__ == '__main__': from testdata import DataFiles df = DataFiles() #refnam = df.unpack(REFSEQ) #refnam = df.addTMP(REFSEQ) refnam = df.joinData(REFSEQ) indexnam = df.addIndex(TMPFIL_PREFIX) readfilnam = df.addTMP(TMPFIL_PREFIX + ".fq") oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig") prep_fasta(readfilnam) smalt_index(df,indexnam, refnam, KMER, NSKIP) smalt_map(df,oufilnam, indexnam, readfilnam) compare_result(oufilnam) df.cleanup()
def smalt_map(df, oufilnam, indexnam, readfil, typ="fastq", matefil=""): from sys import exit from subprocess import call tup = (PROGNAM, 'map', '-r', '-1', '-F', typ, '-o', oufilnam, indexnam, readfil, matefil) df.call(tup, "when mapping") if __name__ == '__main__': from testdata import DataFiles, areFilesIdentical df = DataFiles() refnam = df.joinData(REF_FASTA_NAME) readnamA = df.joinData(READ_PREFIX + "_nonam_1.fq.gz") readnamB = df.joinData(READ_PREFIX + "_nonam_2.fq.gz") bamnam = df.joinData(READ_PREFIX + ".bam") samnam = df.unpack(READ_PREFIX + ".sam") indexnam = df.addIndex(TMPFIL_PREFIX) oufilnam_fastq = df.addTMP(TMPFIL_PREFIX + "fastq.sam") oufilnam_bam = df.addTMP(TMPFIL_PREFIX + "bam.sam") oufilnam_sam = df.addTMP(TMPFIL_PREFIX + "sam.sam") oufilnam2_sam = df.addTMP(TMPFIL_PREFIX + "sam2.sam") smalt_index(df, indexnam, refnam, KMER, NSKIP) smalt_map(df, oufilnam_fastq, indexnam, readnamA, "fastq", readnamB) smalt_map(df, oufilnam_bam, indexnam, bamnam, "bam")
if cmp(cig1,cig2) and cig1.mapq > 5 and cig2.mapq > 5: exit("mappings don't match for read '%s'" % \ cig1.qnam) infil2.close() infil1.close() if ctr1 != ctr1: exit("Expected the same number of mates, got %i (A) and %i (B)" % \ ctr1, ctr2) if ctr1 != 20000: exit("Expected 20,000 reads, got %i." % ctr1) if __name__ == '__main__': from testdata import DataFiles df = DataFiles() refnam = df.joinData(REF_FASTA_NAME) readnamA = df.joinData(READ_PREFIX + "_1.fq.gz") readnamB = df.joinData(READ_PREFIX + "_2.fq.gz") indexnam = df.addIndex(TMPFIL_PREFIX) samplenam1 = df.addTMP(TMPFIL_PREFIX + ".1.txt") samplenam2 = df.addTMP(TMPFIL_PREFIX + ".2.txt") oufilnam1 = df.addTMP(TMPFIL_PREFIX + ".1.cig") oufilnam2 = df.addTMP(TMPFIL_PREFIX + ".2.cig") oufilnam3 = df.addTMP(TMPFIL_PREFIX + ".3.cig") smalt_check(df,readnamA, readnamB) smalt_index(df,indexnam, refnam, KMER, NSKIP)
okflgs[expected_tup.index(observed_tup)] = True linctr = linctr + 1 for i in range(n_tup): if not okflgs[i]: print "ERROR: could not find tuple: ", expected_tup[i] allok = False if not allok: exit("ERROR when checking. Test not passed.") if __name__ == '__main__': from testdata import DataFiles df = DataFiles() refnam = df.joinData(REF_FASTA_NAME) indexnam = df.addIndex(TMPFIL_PREFIX) readfilA = df.addTMP(TMPFIL_PREFIX + "_1.fa") readfilB = df.addTMP(TMPFIL_PREFIX + "_2.fa") oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig") makeFastqPair(readfilA, readfilB) smalt_index(df, indexnam, refnam, KMER, NSKIP) smalt_map(df, oufilnam, indexnam, readfilA, readfilB, ['-p']) checkOutput(oufilnam, MAPPED_CIGAR) df.cleanup() exit(0)
if cigA2 != cigB2: if is_verbose: print "Not matching:\n%s\n%s" % (cigA2.lin, cigB2.lin) if cigA2.mapq > MAPQ_THRESH and cigB2.mapq > MAPQ_THRESH: exit("Discrepancy:\n%s\n%s" % (cigA2.lin, cigB2.lin)) ctr = ctr + 1 if not isOK and isEOF: isOK = True return isOK, ctr if __name__ == '__main__': from testdata import DataFiles, areFilesIdentical df = DataFiles() refnam = df.joinData(DATA[0]) readfilnamA = df.joinData(DATA[1]) readfilnamB = df.joinData(DATA[2]) n_pairs_expected = DATA[3] indexnam = df.addIndex(TMPFIL_PREFIX) oufilnam_ref = df.addTMP(TMPFIL_PREFIX + ".n0.cig") oufilnam_thread = df.addTMP(TMPFIL_PREFIX + ".n%i.cig" % NTHREADS) smalt_index(df, indexnam, refnam, KMER, NSKIP); smalt_map(df, oufilnam_ref, indexnam, readfilnamA, readfilnamB); smalt_map(df, oufilnam_thread, indexnam, readfilnamA, readfilnamB, ["-n", "%i" % NTHREADS, "-O"]);
nmtag = sam.tags["NM"] if nmtag: tagstr = "NM:%s:%s" % nmtag if tagstr != rdat[2]: df.exitErr("Unexpected tag '%s' (expected '%s')" % (tagstr, rdat[2])) linctr = linctr + 1 if linctr < len(readdat): df.exitErr("CIGAR strings incomplete"); infil.close() if __name__ == '__main__': from testdata import DataFiles df = DataFiles() reffilnam = df.addTMP(TMPFIL_PREFIX + "ref.fa") writeFASTAref(reffilnam) indexnam = df.addIndex(TMPFIL_PREFIX) smalt_index(df, indexnam, reffilnam, KMER, NSKIP) readfilnam = df.addTMP(TMPFIL_PREFIX + "read.fa") mate1filnam = df.addTMP(TMPFIL_PREFIX_PAIRED + "mate1.fa") mate2filnam = df.addTMP(TMPFIL_PREFIX_PAIRED + "mate2.fa") writeFASTAreads(READSEQ, readfilnam) writeFASTAreadPairs(READSEQ_PAIR, mate1filnam, mate2filnam) samoufilnam = df.addTMP(TMPFIL_PREFIX + "out.sam")
def smalt_map(df, oufilnam, indexnam, readfil, matefil, typ="fastq", flags=[]): from sys import exit from subprocess import call tup = [PROGNAM, 'map'] if len(flags) > 0: tup.extend(flags) tup.extend(['-f', typ, '-o', oufilnam, indexnam, readfil, matefil]) df.call(tup, "when mapping") if __name__ == '__main__': from testdata import DataFiles df = DataFiles() refnam = df.joinData(FNAM_REF) readnamA = df.joinData(FNAM_READ1) readnamB = df.joinData(FNAM_READ2) indexnam = df.addIndex(TMPFIL_PREFIX) oufilnam = df.addTMP(TMPFIL_PREFIX + ".sam") smalt_index(df, indexnam, refnam, KMER, NSKIP) smalt_map(df, oufilnam, indexnam, readnamA, readnamB, "sam", ["-x"]) #print "Test ok." df.cleanup() exit()
tagstr = "NM:%s:%s" % nmtag if tagstr != rdat[2]: df.exitErr("Unexpected tag '%s' (expected '%s')" % (tagstr, rdat[2])) linctr = linctr + 1 if linctr < len(readdat): df.exitErr("CIGAR strings incomplete") infil.close() if __name__ == '__main__': from testdata import DataFiles df = DataFiles() reffilnam = df.addTMP(TMPFIL_PREFIX + "ref.fa") writeFASTAref(reffilnam) indexnam = df.addIndex(TMPFIL_PREFIX) smalt_index(df, indexnam, reffilnam, KMER, NSKIP) readfilnam = df.addTMP(TMPFIL_PREFIX + "read.fa") mate1filnam = df.addTMP(TMPFIL_PREFIX_PAIRED + "mate1.fa") mate2filnam = df.addTMP(TMPFIL_PREFIX_PAIRED + "mate2.fa") writeFASTAreads(READSEQ, readfilnam) writeFASTAreadPairs(READSEQ_PAIR, mate1filnam, mate2filnam) samoufilnam = df.addTMP(TMPFIL_PREFIX + "out.sam") bamoufilnam = df.addTMP(TMPFIL_PREFIX + "out.bam")
exit("Discrepancy:\n%s\n%s" % (cigA1.lin, cigB1.lin)) if cigA2 != cigB2: if is_verbose: print "Not matching:\n%s\n%s" % (cigA2.lin, cigB2.lin) if cigA2.mapq > MAPQ_THRESH and cigB2.mapq > MAPQ_THRESH: exit("Discrepancy:\n%s\n%s" % (cigA2.lin, cigB2.lin)) ctr = ctr + 1 if not isOK and isEOF: isOK = True return isOK, ctr if __name__ == '__main__': from testdata import DataFiles, areFilesIdentical df = DataFiles() refnam = df.joinData(DATA[0]) readfilnamA = df.joinData(DATA[1]) readfilnamB = df.joinData(DATA[2]) n_pairs_expected = DATA[3] indexnam = df.addIndex(TMPFIL_PREFIX) oufilnam_ref = df.addTMP(TMPFIL_PREFIX + ".n0.cig") oufilnam_thread = df.addTMP(TMPFIL_PREFIX + ".n%i.cig" % NTHREADS) smalt_index(df, indexnam, refnam, KMER, NSKIP) smalt_map(df, oufilnam_ref, indexnam, readfilnamA, readfilnamB) smalt_map(df, oufilnam_thread, indexnam, readfilnamA, readfilnamB, ["-n", "%i" % NTHREADS, "-O"])
from subprocess import call tup = [PROGNAM, 'map'] if len(flags) > 0: tup.extend(flags) tup.extend([ '-f', typ, '-o', oufilnam, indexnam, readfil, matefil]) df.call(tup, "when mapping") if __name__ == '__main__': from testdata import DataFiles df = DataFiles() refnam = df.joinData(FNAM_REF) readnamA = df.joinData(FNAM_READ1) readnamB = df.joinData(FNAM_READ2) indexnam = df.addIndex(TMPFIL_PREFIX) oufilnam = df.addTMP(TMPFIL_PREFIX + ".sam") smalt_index(df,indexnam, refnam, KMER, NSKIP) smalt_map(df,oufilnam, indexnam, readnamA, readnamB, "sam", ["-x"]) #print "Test ok." df.cleanup() exit()
exit("Unexpected result for read '%s'" % cig.qnam) infil.close() def prep_fasta(filnam): from testdata import openFile infil = openFile(filnam, 'w') for seq in READSEQS: for i in range(len(seq)): infil.write("%c" % seq[i]) infil.close() if __name__ == '__main__': from testdata import DataFiles df = DataFiles() #refnam = df.unpack(REFSEQ) #refnam = df.addTMP(REFSEQ) refnam = df.joinData(REFSEQ) indexnam = df.addIndex(TMPFIL_PREFIX) readfilnam = df.addTMP(TMPFIL_PREFIX + ".fq") oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig") prep_fasta(readfilnam) smalt_index(df, indexnam, refnam, KMER, NSKIP) smalt_map(df, oufilnam, indexnam, readfilnam) compare_result(oufilnam) df.cleanup()
from sys import exit from subprocess import call tup = (PROGNAM, 'map', '-r', '-1', '-F', typ, '-o', oufilnam, indexnam, readfil, matefil) df.call(tup, "when mapping") if __name__ == '__main__': from testdata import DataFiles, areFilesIdentical df = DataFiles() refnam = df.joinData(REF_FASTA_NAME) readnamA = df.joinData(READ_PREFIX + "_nonam_1.fq.gz") readnamB = df.joinData(READ_PREFIX + "_nonam_2.fq.gz") bamnam = df.joinData(READ_PREFIX + ".bam") samnam = df.unpack(READ_PREFIX + ".sam") indexnam = df.addIndex(TMPFIL_PREFIX) oufilnam_fastq = df.addTMP(TMPFIL_PREFIX + "fastq.sam") oufilnam_bam = df.addTMP(TMPFIL_PREFIX + "bam.sam") oufilnam_sam = df.addTMP(TMPFIL_PREFIX + "sam.sam") oufilnam2_sam = df.addTMP(TMPFIL_PREFIX + "sam2.sam") smalt_index(df,indexnam, refnam, KMER, NSKIP) smalt_map(df,oufilnam_fastq, indexnam, readnamA, "fastq", readnamB) smalt_map(df,oufilnam_bam, indexnam, bamnam, "bam")
def process(df, indexnam, oufilnam, readnamA, readnamB, option_label_pairs, mateno_check=True): for (option, label_pairs) in option_label_pairs: smalt_map(df, oufilnam, indexnam, readnamA, readnamB, option) checkLabels(oufilnam, label_pairs, mateno_check) if __name__ == '__main__': from testdata import DataFiles df = DataFiles() refnam = df.joinData(REF_FASTA_NAME) readnamA = df.joinData(READ_PREFIX + "_1.fq") readnamB = df.joinData(READ_PREFIX + "_2.fq") indexnam = df.addIndex(TMPFIL_PREFIX) oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig") readnamRCA = df.addTMP(TMPFIL_PREFIX + "rc_1.fq") readnamRCB = df.addTMP(TMPFIL_PREFIX + "rc_2.fq") smalt_index(df, indexnam, refnam, KMER, NSKIP) process(df, indexnam, oufilnam, readnamA, readnamB, OPTION_LABEL_PAIRS_ORIG) #print "reverse complement 2nd read ..."
if sam.cigar != res[2]: df.exitErr("ERROR: wrong CIGAR string '%s' (target:'%s')" % (sam.cigar, res[2])) (typ, fld) = sam.tags["NM"] edist = int(fld) if edist != res[3]: df.exitErr("ERROR: wrong edit distance %i (target: %i)" % (edist, res[3])) infil.close() return if __name__ == '__main__': from testdata import DataFiles df = DataFiles() indexnam = df.addIndex(TMPFIL_PREFIX) reffilnam = makeFASTAfile(df, "REF", REFSEQ) readfilnam = makeFASTAfile(df, "READ", READS) rc_readfilnam = df.addTMP(TMPFIL_PREFIX + "RC.fa") samfilnam = df.addTMP(TMPFIL_PREFIX + ".sam") rc_samfilnam = df.addTMP(TMPFIL_PREFIX + "RC.sam") smalt_index(df, indexnam, reffilnam, KMER, NSKIP) smalt_map(df, samfilnam, indexnam, readfilnam) checkSAM(df, samfilnam, RESULTS) reverseComplement(df, readfilnam, rc_readfilnam) smalt_map(df, rc_samfilnam, indexnam, rc_readfilnam) checkSAM(df, samfilnam, RESULTS)
['-f', 'cigar', '-o', oufilnam, indexnam, readfil, matefil] ) df.call(tup, "when mapping") def process(df, indexnam, oufilnam, readnamA, readnamB, option_label_pairs, mateno_check=True): for (option, label_pairs) in option_label_pairs: smalt_map(df,oufilnam, indexnam, readnamA, readnamB, option) checkLabels(oufilnam, label_pairs, mateno_check) if __name__ == '__main__': from testdata import DataFiles df = DataFiles() refnam = df.joinData(REF_FASTA_NAME) readnamA = df.joinData(READ_PREFIX + "_1.fq") readnamB = df.joinData(READ_PREFIX + "_2.fq") indexnam = df.addIndex(TMPFIL_PREFIX) oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig") readnamRCA = df.addTMP(TMPFIL_PREFIX + "rc_1.fq") readnamRCB = df.addTMP(TMPFIL_PREFIX + "rc_2.fq") smalt_index(df,indexnam, refnam, KMER, NSKIP) process(df,indexnam, oufilnam, readnamA, readnamB, OPTION_LABEL_PAIRS_ORIG) #print "reverse complement 2nd read ..." reverseComplement(df,readnamB, readnamRCB)