Esempio n. 1
0
               cig.sseg != (result[4], result[5]) or \
               cig.swatscor != result[6]:
            exit("Unexpected result for read '%s'" % cig.qnam)
             
    infil.close()
    
def prep_fasta(filnam):
    from testdata import openFile

    infil = openFile(filnam, 'w')
    for seq in READSEQS:
        for i in range(len(seq)):
            infil.write("%c" % seq[i])
    infil.close()
    
if __name__ == '__main__':
    from testdata import DataFiles

    df = DataFiles()
    #refnam = df.unpack(REFSEQ)
    #refnam = df.addTMP(REFSEQ)
    refnam = df.joinData(REFSEQ)
    indexnam = df.addIndex(TMPFIL_PREFIX)
    readfilnam = df.addTMP(TMPFIL_PREFIX + ".fq")
    oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig")
    prep_fasta(readfilnam)
    smalt_index(df,indexnam, refnam, KMER, NSKIP)
    smalt_map(df,oufilnam, indexnam, readfilnam)
    compare_result(oufilnam)
    df.cleanup()
Esempio n. 2
0

def smalt_map(df, oufilnam, indexnam, readfil, typ="fastq", matefil=""):
    from sys import exit
    from subprocess import call

    tup = (PROGNAM, 'map', '-r', '-1', '-F', typ, '-o', oufilnam, indexnam,
           readfil, matefil)

    df.call(tup, "when mapping")


if __name__ == '__main__':
    from testdata import DataFiles, areFilesIdentical

    df = DataFiles()
    refnam = df.joinData(REF_FASTA_NAME)
    readnamA = df.joinData(READ_PREFIX + "_nonam_1.fq.gz")
    readnamB = df.joinData(READ_PREFIX + "_nonam_2.fq.gz")
    bamnam = df.joinData(READ_PREFIX + ".bam")
    samnam = df.unpack(READ_PREFIX + ".sam")
    indexnam = df.addIndex(TMPFIL_PREFIX)

    oufilnam_fastq = df.addTMP(TMPFIL_PREFIX + "fastq.sam")
    oufilnam_bam = df.addTMP(TMPFIL_PREFIX + "bam.sam")
    oufilnam_sam = df.addTMP(TMPFIL_PREFIX + "sam.sam")
    oufilnam2_sam = df.addTMP(TMPFIL_PREFIX + "sam2.sam")

    smalt_index(df, indexnam, refnam, KMER, NSKIP)
    smalt_map(df, oufilnam_fastq, indexnam, readnamA, "fastq", readnamB)
    smalt_map(df, oufilnam_bam, indexnam, bamnam, "bam")
Esempio n. 3
0
        if cmp(cig1,cig2) and cig1.mapq > 5 and cig2.mapq > 5:
            exit("mappings don't match for read '%s'" % \
                 cig1.qnam)
    infil2.close()
    infil1.close()

    if ctr1 != ctr1:
        exit("Expected the same number of mates, got %i (A) and %i (B)" % \
             ctr1, ctr2)
    if ctr1 != 20000:
        exit("Expected 20,000 reads, got %i." % ctr1)
    
if __name__ == '__main__':
    from testdata import DataFiles
    
    df = DataFiles()
    
    refnam = df.joinData(REF_FASTA_NAME)
    readnamA = df.joinData(READ_PREFIX + "_1.fq.gz")
    readnamB = df.joinData(READ_PREFIX + "_2.fq.gz")
    indexnam = df.addIndex(TMPFIL_PREFIX)

    samplenam1 = df.addTMP(TMPFIL_PREFIX + ".1.txt")
    samplenam2 = df.addTMP(TMPFIL_PREFIX + ".2.txt")
    
    oufilnam1 = df.addTMP(TMPFIL_PREFIX + ".1.cig")
    oufilnam2 = df.addTMP(TMPFIL_PREFIX + ".2.cig")
    oufilnam3 = df.addTMP(TMPFIL_PREFIX + ".3.cig")

    smalt_check(df,readnamA, readnamB)
    smalt_index(df,indexnam, refnam, KMER, NSKIP)
Esempio n. 4
0
        okflgs[expected_tup.index(observed_tup)] = True
        linctr = linctr + 1

    for i in range(n_tup):
        if not okflgs[i]:
            print "ERROR: could not find tuple: ", expected_tup[i]
            allok = False

    if not allok:
        exit("ERROR when checking. Test not passed.")


if __name__ == '__main__':
    from testdata import DataFiles

    df = DataFiles()

    refnam = df.joinData(REF_FASTA_NAME)
    indexnam = df.addIndex(TMPFIL_PREFIX)
    readfilA = df.addTMP(TMPFIL_PREFIX + "_1.fa")
    readfilB = df.addTMP(TMPFIL_PREFIX + "_2.fa")
    oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig")

    makeFastqPair(readfilA, readfilB)
    smalt_index(df, indexnam, refnam, KMER, NSKIP)
    smalt_map(df, oufilnam, indexnam, readfilA, readfilB, ['-p'])
    checkOutput(oufilnam, MAPPED_CIGAR)
    df.cleanup()

    exit(0)
Esempio n. 5
0
        if cigA2 != cigB2:
            if is_verbose:
                print "Not matching:\n%s\n%s" % (cigA2.lin, cigB2.lin) 
            if cigA2.mapq > MAPQ_THRESH and cigB2.mapq > MAPQ_THRESH:
                exit("Discrepancy:\n%s\n%s" % (cigA2.lin, cigB2.lin))
        ctr = ctr + 1
    if not isOK and isEOF:
        isOK = True
    return isOK, ctr
            
        
        
if __name__ == '__main__':
    from testdata import DataFiles, areFilesIdentical

    df = DataFiles()
    refnam = df.joinData(DATA[0])
    readfilnamA = df.joinData(DATA[1])
    readfilnamB = df.joinData(DATA[2])
    n_pairs_expected = DATA[3]
    
    indexnam = df.addIndex(TMPFIL_PREFIX)
    oufilnam_ref = df.addTMP(TMPFIL_PREFIX + ".n0.cig")
    oufilnam_thread = df.addTMP(TMPFIL_PREFIX + ".n%i.cig" % NTHREADS)
    
    smalt_index(df, indexnam, refnam, KMER, NSKIP);

    smalt_map(df, oufilnam_ref, indexnam, readfilnamA, readfilnamB);
    smalt_map(df, oufilnam_thread, indexnam, readfilnamA, readfilnamB,
              ["-n", "%i" % NTHREADS, "-O"]);
Esempio n. 6
0
        nmtag = sam.tags["NM"]
        if nmtag:
            tagstr = "NM:%s:%s" % nmtag
            if tagstr != rdat[2]:
                df.exitErr("Unexpected tag '%s' (expected '%s')" % (tagstr, rdat[2]))
        linctr = linctr + 1

    if linctr < len(readdat):
        df.exitErr("CIGAR strings incomplete");
        
    infil.close()
    
if __name__ == '__main__':
    from testdata import DataFiles

    df = DataFiles()
    
    reffilnam = df.addTMP(TMPFIL_PREFIX + "ref.fa")
    writeFASTAref(reffilnam)

    
    indexnam = df.addIndex(TMPFIL_PREFIX)
    smalt_index(df, indexnam, reffilnam, KMER, NSKIP)

    readfilnam = df.addTMP(TMPFIL_PREFIX + "read.fa")
    mate1filnam = df.addTMP(TMPFIL_PREFIX_PAIRED + "mate1.fa")
    mate2filnam = df.addTMP(TMPFIL_PREFIX_PAIRED + "mate2.fa")
    writeFASTAreads(READSEQ, readfilnam)
    writeFASTAreadPairs(READSEQ_PAIR, mate1filnam, mate2filnam)
    
    samoufilnam = df.addTMP(TMPFIL_PREFIX + "out.sam")
Esempio n. 7
0
def smalt_map(df, oufilnam, indexnam, readfil, matefil, typ="fastq", flags=[]):
    from sys import exit
    from subprocess import call

    tup = [PROGNAM, 'map']
    if len(flags) > 0:
        tup.extend(flags)
    tup.extend(['-f', typ, '-o', oufilnam, indexnam, readfil, matefil])
    df.call(tup, "when mapping")


if __name__ == '__main__':
    from testdata import DataFiles

    df = DataFiles()

    refnam = df.joinData(FNAM_REF)
    readnamA = df.joinData(FNAM_READ1)
    readnamB = df.joinData(FNAM_READ2)
    indexnam = df.addIndex(TMPFIL_PREFIX)
    oufilnam = df.addTMP(TMPFIL_PREFIX + ".sam")

    smalt_index(df, indexnam, refnam, KMER, NSKIP)
    smalt_map(df, oufilnam, indexnam, readnamA, readnamB, "sam", ["-x"])

    #print "Test ok."

    df.cleanup()
    exit()
Esempio n. 8
0
            tagstr = "NM:%s:%s" % nmtag
            if tagstr != rdat[2]:
                df.exitErr("Unexpected tag '%s' (expected '%s')" %
                           (tagstr, rdat[2]))
        linctr = linctr + 1

    if linctr < len(readdat):
        df.exitErr("CIGAR strings incomplete")

    infil.close()


if __name__ == '__main__':
    from testdata import DataFiles

    df = DataFiles()

    reffilnam = df.addTMP(TMPFIL_PREFIX + "ref.fa")
    writeFASTAref(reffilnam)

    indexnam = df.addIndex(TMPFIL_PREFIX)
    smalt_index(df, indexnam, reffilnam, KMER, NSKIP)

    readfilnam = df.addTMP(TMPFIL_PREFIX + "read.fa")
    mate1filnam = df.addTMP(TMPFIL_PREFIX_PAIRED + "mate1.fa")
    mate2filnam = df.addTMP(TMPFIL_PREFIX_PAIRED + "mate2.fa")
    writeFASTAreads(READSEQ, readfilnam)
    writeFASTAreadPairs(READSEQ_PAIR, mate1filnam, mate2filnam)

    samoufilnam = df.addTMP(TMPFIL_PREFIX + "out.sam")
    bamoufilnam = df.addTMP(TMPFIL_PREFIX + "out.bam")
Esempio n. 9
0
                exit("Discrepancy:\n%s\n%s" % (cigA1.lin, cigB1.lin))
        if cigA2 != cigB2:
            if is_verbose:
                print "Not matching:\n%s\n%s" % (cigA2.lin, cigB2.lin)
            if cigA2.mapq > MAPQ_THRESH and cigB2.mapq > MAPQ_THRESH:
                exit("Discrepancy:\n%s\n%s" % (cigA2.lin, cigB2.lin))
        ctr = ctr + 1
    if not isOK and isEOF:
        isOK = True
    return isOK, ctr


if __name__ == '__main__':
    from testdata import DataFiles, areFilesIdentical

    df = DataFiles()
    refnam = df.joinData(DATA[0])
    readfilnamA = df.joinData(DATA[1])
    readfilnamB = df.joinData(DATA[2])
    n_pairs_expected = DATA[3]

    indexnam = df.addIndex(TMPFIL_PREFIX)
    oufilnam_ref = df.addTMP(TMPFIL_PREFIX + ".n0.cig")
    oufilnam_thread = df.addTMP(TMPFIL_PREFIX + ".n%i.cig" % NTHREADS)

    smalt_index(df, indexnam, refnam, KMER, NSKIP)

    smalt_map(df, oufilnam_ref, indexnam, readfilnamA, readfilnamB)
    smalt_map(df, oufilnam_thread, indexnam, readfilnamA, readfilnamB,
              ["-n", "%i" % NTHREADS, "-O"])
Esempio n. 10
0
    from subprocess import call
 
    tup = [PROGNAM, 'map']
    if len(flags) > 0:
        tup.extend(flags)
    tup.extend([
           '-f', typ,
           '-o', oufilnam,
           indexnam,
           readfil, matefil])
    df.call(tup, "when mapping")

if __name__ == '__main__':
    from testdata import DataFiles
    
    df = DataFiles()
    
    refnam = df.joinData(FNAM_REF)
    readnamA = df.joinData(FNAM_READ1)
    readnamB = df.joinData(FNAM_READ2)
    indexnam = df.addIndex(TMPFIL_PREFIX)
    oufilnam = df.addTMP(TMPFIL_PREFIX + ".sam")
    
    smalt_index(df,indexnam, refnam, KMER, NSKIP)
    smalt_map(df,oufilnam, indexnam, readnamA, readnamB, "sam", ["-x"])
    
    #print "Test ok."
    
    df.cleanup()
    exit()
Esempio n. 11
0
            exit("Unexpected result for read '%s'" % cig.qnam)

    infil.close()


def prep_fasta(filnam):
    from testdata import openFile

    infil = openFile(filnam, 'w')
    for seq in READSEQS:
        for i in range(len(seq)):
            infil.write("%c" % seq[i])
    infil.close()


if __name__ == '__main__':
    from testdata import DataFiles

    df = DataFiles()
    #refnam = df.unpack(REFSEQ)
    #refnam = df.addTMP(REFSEQ)
    refnam = df.joinData(REFSEQ)
    indexnam = df.addIndex(TMPFIL_PREFIX)
    readfilnam = df.addTMP(TMPFIL_PREFIX + ".fq")
    oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig")
    prep_fasta(readfilnam)
    smalt_index(df, indexnam, refnam, KMER, NSKIP)
    smalt_map(df, oufilnam, indexnam, readfilnam)
    compare_result(oufilnam)
    df.cleanup()
Esempio n. 12
0
    from sys import exit
    from subprocess import call
    
    tup = (PROGNAM, 'map',
           '-r', '-1',
           '-F', typ,
           '-o', oufilnam,
           indexnam,
           readfil, matefil)

    df.call(tup, "when mapping")

if __name__ == '__main__':
    from testdata import DataFiles, areFilesIdentical
    
    df = DataFiles()
    refnam = df.joinData(REF_FASTA_NAME)
    readnamA = df.joinData(READ_PREFIX + "_nonam_1.fq.gz")
    readnamB = df.joinData(READ_PREFIX + "_nonam_2.fq.gz")
    bamnam = df.joinData(READ_PREFIX + ".bam")
    samnam = df.unpack(READ_PREFIX + ".sam")
    indexnam = df.addIndex(TMPFIL_PREFIX)
    
    oufilnam_fastq = df.addTMP(TMPFIL_PREFIX + "fastq.sam")
    oufilnam_bam = df.addTMP(TMPFIL_PREFIX + "bam.sam")
    oufilnam_sam = df.addTMP(TMPFIL_PREFIX + "sam.sam")
    oufilnam2_sam = df.addTMP(TMPFIL_PREFIX + "sam2.sam")
    
    smalt_index(df,indexnam, refnam, KMER, NSKIP)
    smalt_map(df,oufilnam_fastq, indexnam, readnamA, "fastq", readnamB)
    smalt_map(df,oufilnam_bam, indexnam, bamnam, "bam")
Esempio n. 13
0
def process(df,
            indexnam,
            oufilnam,
            readnamA,
            readnamB,
            option_label_pairs,
            mateno_check=True):
    for (option, label_pairs) in option_label_pairs:
        smalt_map(df, oufilnam, indexnam, readnamA, readnamB, option)
        checkLabels(oufilnam, label_pairs, mateno_check)


if __name__ == '__main__':
    from testdata import DataFiles

    df = DataFiles()

    refnam = df.joinData(REF_FASTA_NAME)
    readnamA = df.joinData(READ_PREFIX + "_1.fq")
    readnamB = df.joinData(READ_PREFIX + "_2.fq")
    indexnam = df.addIndex(TMPFIL_PREFIX)

    oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig")
    readnamRCA = df.addTMP(TMPFIL_PREFIX + "rc_1.fq")
    readnamRCB = df.addTMP(TMPFIL_PREFIX + "rc_2.fq")

    smalt_index(df, indexnam, refnam, KMER, NSKIP)
    process(df, indexnam, oufilnam, readnamA, readnamB,
            OPTION_LABEL_PAIRS_ORIG)

    #print "reverse complement 2nd read ..."
Esempio n. 14
0
        if sam.cigar != res[2]:
            df.exitErr("ERROR: wrong CIGAR string '%s' (target:'%s')" % (sam.cigar, res[2]))

        (typ, fld) = sam.tags["NM"]
        edist = int(fld)
        if edist != res[3]:
            df.exitErr("ERROR: wrong edit distance %i (target: %i)" % (edist, res[3]))
                          
    infil.close()
    return

if __name__ == '__main__':
    from testdata import DataFiles
    
    df = DataFiles()

    indexnam = df.addIndex(TMPFIL_PREFIX)
    reffilnam = makeFASTAfile(df, "REF", REFSEQ)
    readfilnam = makeFASTAfile(df, "READ", READS)
    rc_readfilnam = df.addTMP(TMPFIL_PREFIX + "RC.fa")
    samfilnam = df.addTMP(TMPFIL_PREFIX + ".sam")
    rc_samfilnam = df.addTMP(TMPFIL_PREFIX + "RC.sam")
    
    smalt_index(df, indexnam, reffilnam, KMER, NSKIP)
    smalt_map(df, samfilnam, indexnam, readfilnam)
    checkSAM(df, samfilnam, RESULTS)
    
    reverseComplement(df, readfilnam, rc_readfilnam)
    smalt_map(df, rc_samfilnam, indexnam, rc_readfilnam)
    checkSAM(df, samfilnam, RESULTS)
Esempio n. 15
0
        ['-f', 'cigar',
         '-o', oufilnam,
         indexnam,
         readfil, matefil]
        )
    df.call(tup, "when mapping")

def process(df, indexnam, oufilnam, readnamA, readnamB, option_label_pairs, mateno_check=True):
    for (option, label_pairs) in option_label_pairs:
        smalt_map(df,oufilnam, indexnam, readnamA, readnamB, option)
        checkLabels(oufilnam, label_pairs, mateno_check)

if __name__ == '__main__':
    from testdata import DataFiles
    
    df = DataFiles()
    
    refnam = df.joinData(REF_FASTA_NAME)
    readnamA = df.joinData(READ_PREFIX + "_1.fq")
    readnamB = df.joinData(READ_PREFIX + "_2.fq")
    indexnam = df.addIndex(TMPFIL_PREFIX)
    
    oufilnam = df.addTMP(TMPFIL_PREFIX + ".cig")
    readnamRCA = df.addTMP(TMPFIL_PREFIX + "rc_1.fq")
    readnamRCB = df.addTMP(TMPFIL_PREFIX + "rc_2.fq")
    
    smalt_index(df,indexnam, refnam, KMER, NSKIP)
    process(df,indexnam, oufilnam, readnamA, readnamB, OPTION_LABEL_PAIRS_ORIG)

    #print "reverse complement 2nd read ..."
    reverseComplement(df,readnamB, readnamRCB)