def chopAndAlign(G3, f2 , noisyReads, motherGenome, parameterRobot): print "Hi, I am working NOW" # Input : G3 ~ [nodeIndexList] f2 ~ [clusterMapping] noisyReads ~ [noisyReads] # Output : .FASTA file containing the recovered Segments , dotplots of the segment against the real genome segmentList = [] # Chop and fill in #print len(G3) for eachitem in G3: #print eachitem tempSeq = eachitem.nodeIndexList recovGen = readAns.reportRecovSeq(tempSeq, f2, noisyReads,parameterRobot) segmentList.append(recovGen) # Create dotpolts and fasta output #myFile = open(parameterRobot.defaultFolder, 'w') print "len(segmentList)" , len(segmentList) for eachSegment, index in zip(segmentList, range(len(segmentList))): #if len(eachSegment) == 1963 : frecov = open(parameterRobot.defaultFolder+"rec_"+str(index)+".txt", 'w') for eachbase in eachSegment: frecov.write(str(eachbase)) frecov.close() print "index, eachSegment[0:10]",index, eachSegment[0:10] compare.outputToFastaFiles(eachSegment, motherGenome, parameterRobot, index)
def ECUnitTest(Nin,G, Lin, foldername = "", bridgingDepth = 20, msaWidth = 20 ): dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = bridgingDepth dummyParameters.msaWidth = msaWidth dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p = Nin, G,Lin, 0.015 dummyParameters.indel = True dummyParameters.defaultFolder = foldername dummyParameters.threshold = 5 dummyParameters.liid = 48 snpRate, typeOfGen, detail = 0.001 ,'m', "500-200-50" G,N,L = dummyParameters.G, dummyParameters.N, dummyParameters.L, motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L, "dn") f2= logging.fmapfusedLoad(foldername+'clusteredGroup2.csv') G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple') checkCondensingBasic(G2, [G2[0]], "simple") G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters) checkCondensingBasic(G3, [G3[0]], "MB") G4 = alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate,dummyParameters) #G4 = G3 #checkCondensingBasic(G4, [G4[0]], "MB") recovSeq = eulerCycle.findEC(G4) recovGen = readAns.reportRecovSeq(recovSeq, f2, noisyReads,dummyParameters) numMistakes, success = compare.subAlignCompare(recovGen, motherGen,dummyParameters) return numMistakes, success
def chopAndAlign(G3, f2, noisyReads, motherGenome, parameterRobot): print "Hi, I am working NOW" # Input : G3 ~ [nodeIndexList] f2 ~ [clusterMapping] noisyReads ~ [noisyReads] # Output : .FASTA file containing the recovered Segments , dotplots of the segment against the real genome segmentList = [] # Chop and fill in #print len(G3) for eachitem in G3: #print eachitem tempSeq = eachitem.nodeIndexList recovGen = readAns.reportRecovSeq(tempSeq, f2, noisyReads, parameterRobot) segmentList.append(recovGen) # Create dotpolts and fasta output #myFile = open(parameterRobot.defaultFolder, 'w') print "len(segmentList)", len(segmentList) for eachSegment, index in zip(segmentList, range(len(segmentList))): #if len(eachSegment) == 1963 : frecov = open( parameterRobot.defaultFolder + "rec_" + str(index) + ".txt", 'w') for eachbase in eachSegment: frecov.write(str(eachbase)) frecov.close() print "index, eachSegment[0:10]", index, eachSegment[0:10] compare.outputToFastaFiles(eachSegment, motherGenome, parameterRobot, index)
def runAssembler(snpRate, typeOfGen, detail, parameterRobot): #N, G, L, p,K,snpRate, typeOfGen, detail = 100, 100, 10000, 0.01,30,0.001, 'r', "1000" motherGen, reads, noisyReads = dataGen.generateData( typeOfGen, detail, parameterRobot) motherGen, reads, noisyReads = logging.rawDataLoad( parameterRobot.defaultFolder + "UnitTest", parameterRobot.G, parameterRobot.N, parameterRobot.L, "dn") f1 = cluster.groupIndelNoisyKmers(noisyReads, parameterRobot) G1, startList, f1 = graphForm.getSeqGraph(f1, noisyReads, parameterRobot) f2, G2 = branchClear.clearResidual(f1, G1, parameterRobot) G3 = bridgeResolve.resolveRepeats(f2, G2, parameterRobot) G4 = alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate, parameterRobot) #G4 = G3 recovSeq = eulerCycle.findEC(G4) recovGen = readAns.reportRecovSeq(recovSeq, f2, noisyReads, parameterRobot) numMistakes, success = compare.subAlignCompare(recovGen, motherGen, parameterRobot) #numMistakes, success = 0 , 0 return numMistakes, success # Target of the new code : Fast Assemble and assembly in optimal amount of information #t0 = time.time() #N, G, L, p,snpRate, typeOfGen, detail = 1000, 10000,200, 0.015, 0.001 ,'m', "500-300-50" #runAssembler(N, G, L, p,snpRate, typeOfGen, detail) #print "Time (sec) :", time.time() - t0
def runAssembler(snpRate, typeOfGen, detail,parameterRobot): #N, G, L, p,K,snpRate, typeOfGen, detail = 100, 100, 10000, 0.01,30,0.001, 'r', "1000" motherGen, reads, noisyReads = dataGen.generateData(typeOfGen, detail,parameterRobot) motherGen, reads, noisyReads = logging.rawDataLoad(parameterRobot.defaultFolder+"UnitTest",parameterRobot.G,parameterRobot.N,parameterRobot.L, "dn") f1 = cluster.groupIndelNoisyKmers(noisyReads,parameterRobot) G1,startList, f1 = graphForm.getSeqGraph(f1,noisyReads, parameterRobot) f2, G2 = branchClear.clearResidual(f1, G1,parameterRobot) G3 = bridgeResolve.resolveRepeats(f2, G2,parameterRobot) G4 = alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate,parameterRobot ) #G4 = G3 recovSeq = eulerCycle.findEC(G4) recovGen = readAns.reportRecovSeq(recovSeq, f2, noisyReads,parameterRobot) numMistakes, success = compare.subAlignCompare(recovGen, motherGen,parameterRobot) #numMistakes, success = 0 , 0 return numMistakes, success # Target of the new code : Fast Assemble and assembly in optimal amount of information #t0 = time.time() #N, G, L, p,snpRate, typeOfGen, detail = 1000, 10000,200, 0.015, 0.001 ,'m', "500-300-50" #runAssembler(N, G, L, p,snpRate, typeOfGen, detail) #print "Time (sec) :", time.time() - t0
def ECUnitTest(Nin, G, Lin, foldername="", bridgingDepth=20, msaWidth=20): dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = bridgingDepth dummyParameters.msaWidth = msaWidth dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p = Nin, G, Lin, 0.015 dummyParameters.indel = True dummyParameters.defaultFolder = foldername dummyParameters.threshold = 5 dummyParameters.liid = 48 snpRate, typeOfGen, detail = 0.001, 'm', "500-200-50" G, N, L = dummyParameters.G, dummyParameters.N, dummyParameters.L, motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest", G, N, L, "dn") f2 = logging.fmapfusedLoad(foldername + 'clusteredGroup2.csv') G2 = logging.loadGraph(foldername + 'basicMapping.csv', foldername + 'seqMapping.txt', 'simple') checkCondensingBasic(G2, [G2[0]], "simple") G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters) checkCondensingBasic(G3, [G3[0]], "MB") G4 = alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate, dummyParameters) #G4 = G3 #checkCondensingBasic(G4, [G4[0]], "MB") recovSeq = eulerCycle.findEC(G4) recovGen = readAns.reportRecovSeq(recovSeq, f2, noisyReads, dummyParameters) numMistakes, success = compare.subAlignCompare(recovGen, motherGen, dummyParameters) return numMistakes, success