def ECUnitTest(Nin,G, Lin, foldername = "", bridgingDepth = 20, msaWidth = 20 ): dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = bridgingDepth dummyParameters.msaWidth = msaWidth dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p = Nin, G,Lin, 0.015 dummyParameters.indel = True dummyParameters.defaultFolder = foldername dummyParameters.threshold = 5 dummyParameters.liid = 48 snpRate, typeOfGen, detail = 0.001 ,'m', "500-200-50" G,N,L = dummyParameters.G, dummyParameters.N, dummyParameters.L, motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L, "dn") f2= logging.fmapfusedLoad(foldername+'clusteredGroup2.csv') G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple') checkCondensingBasic(G2, [G2[0]], "simple") G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters) checkCondensingBasic(G3, [G3[0]], "MB") G4 = alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate,dummyParameters) #G4 = G3 #checkCondensingBasic(G4, [G4[0]], "MB") recovSeq = eulerCycle.findEC(G4) recovGen = readAns.reportRecovSeq(recovSeq, f2, noisyReads,dummyParameters) numMistakes, success = compare.subAlignCompare(recovGen, motherGen,dummyParameters) return numMistakes, success
def dataGenUnitTest(): print "dataGenUnitTest" dummyParameters = logging.parameterObj() dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 100, 10000,100, 0.01, 't', "500" dummyParameters.indel = True motherGen, reads, noisyReads = dataGen.generateData(typeOfGen,detail,dummyParameters)
def batchProcessingGenomeSegTest(): print "Batch Processing LNK Test" headerName = "largeScaleTest/" os.system("mkdir " + headerName) logging.savingGenomeSegmentFile(headerName) listOfNLKDataPts = logging.loadingGenomeSegmentFile(headerName) numberOfRounds = 1 for testPoint, roundNum in zip(listOfNLKDataPts, range(len(listOfNLKDataPts))): folderName = headerName + "sample_point_" + str(roundNum) os.system("mkdir " + folderName) [ G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds, brachingDepth, bridgingDepth, msaWidth, clusterRounds, fingerPrint, clusterRatio, startindex, endindex ] = testPoint print "G", G parameterRobot = logging.parameterObj( G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds, brachingDepth, bridgingDepth, msaWidth, folderName, clusterRounds, fingerPrint, clusterRatio) snpRate, typeOfGen, detail = 0.001, 'd', "genome.fasta-" + str( startindex) + "-" + str(endindex) parameterRobot.indel = True temptime = time.time() batchProcessingUnitTest(numberOfRounds, parameterRobot, snpRate, typeOfGen, detail) print "time per sample point ", time.time() - temptime
def batchProcessingLNKTest(): print "Batch Processing LNK Test" headerName = "synthetic_reads/" os.system("mkdir " + headerName) logging.savingLNKFile(headerName) listOfNLKDataPts = logging.loadingLNKFile(headerName) numberOfRounds = 1 listOfNLKDataPts = [listOfNLKDataPts[0]] for testPoint, roundNum in zip(listOfNLKDataPts, range(len(listOfNLKDataPts))): folderName = headerName + "sample_point_" + str(roundNum) os.system("mkdir " + folderName) snpRate, typeOfGen, detail = 0.001, 'm', "500-200-50" [ G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds, brachingDepth, bridgingDepth, msaWidth, clusterRounds, fingerPrint, clusterRatio ] = testPoint parameterRobot = logging.parameterObj( G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds, brachingDepth, bridgingDepth, msaWidth, folderName, clusterRounds, fingerPrint, clusterRatio) parameterRobot.indel = True batchProcessingUnitTest(numberOfRounds, parameterRobot, snpRate, typeOfGen, detail)
def branchClearingUnitTest(G, N, L, K, liid, threshold, foldername="", branchingDepth=20): dummyParameters = logging.parameterObj() dummyParameters.defaultFolder = foldername dummyParameters.brachingDepth = branchingDepth dummyParameters.K, dummyParameters.liid, dummyParameters.threshold = K, liid, threshold dummyParameters.G, dummyParameters.N, dummyParameters.L = G, N, L motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest", G, N, L, "dn") returnfmapping = logging.fmappingLoad(foldername + 'clusteredGroup.csv') G1, startList, fmapping = graphForm.getSeqGraph(returnfmapping, noisyReads, dummyParameters) #checkCondensingBasic(G1, startList, "simple") returnfmapping, G1 = branchClear.clearResidual(returnfmapping, G1, dummyParameters) G2 = G1 graphForm.debugSeqGraph(G2) #G2 = G1 checkCondensingBasic2(G2, [G2[0]], "simple")
def branchClearingUnitTest(G, N, L,K, liid, threshold, foldername = "",branchingDepth= 20): dummyParameters = logging.parameterObj() dummyParameters.defaultFolder = foldername dummyParameters.brachingDepth = branchingDepth dummyParameters.K, dummyParameters.liid, dummyParameters.threshold = K, liid, threshold dummyParameters.G, dummyParameters.N, dummyParameters.L = G, N, L motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L, "dn") returnfmapping= logging.fmappingLoad(foldername+'clusteredGroup.csv') G1,startList, fmapping = graphForm.getSeqGraph(returnfmapping,noisyReads, dummyParameters) #checkCondensingBasic(G1, startList, "simple") returnfmapping, G1= branchClear.clearResidual(returnfmapping,G1,dummyParameters) G2 = G1 graphForm.debugSeqGraph(G2) #G2 = G1 checkCondensingBasic2(G2, [G2[0]], "simple")
def dataGenUnitTest(): print "dataGenUnitTest" dummyParameters = logging.parameterObj() dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 100, 10000, 100, 0.01, 't', "500" dummyParameters.indel = True motherGen, reads, noisyReads = dataGen.generateData( typeOfGen, detail, dummyParameters)
def overallTestRun(): #"oneLine.fasta-0-1440371" G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,folderName =10000 ,83 , 1189, 0.01 , 0.1 , 600 , 32, 8 , 508 , 377 , 417 , 1.21822542 , 0, 0, 0 , 0 , "" parameterRobot = logging.parameterObj(G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,folderName ) snpRate, typeOfGen, detail = 0.001 ,'d', "genome.fasta-50000-60000" parameterRobot.clusterRounds , parameterRobot.fingerPrint, parameterRobot.clusterRatio = 2 , 6, 1 #snpRate, typeOfGen , detail = 0.001,'m', "500-200-50" numMistakes, success = assemblerMain.runAssembler(snpRate, typeOfGen, detail, parameterRobot) return numMistakes, success
def graphFormUnitTest2(N, G, L, folderName = ""): dummyParameters = logging.parameterObj() dummyParameters.defaultFolder = folderName returnfmapping= logging.fmappingLoad(dummyParameters.defaultFolder+'clusteredGroup.csv') dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = N, G, L, 0.015, 'm', "500-200-50" motherGen, reads, noisyReads = logging.rawDataLoad(dummyParameters.defaultFolder+"UnitTest",dummyParameters.G,dummyParameters.N,dummyParameters.L, "dn") G1,startList,fmapping = graphForm.getSeqGraph(returnfmapping,noisyReads, dummyParameters )
def clusteringTestingOfK(G, N, L, folderName, K, threshold, liid): motherGen, reads, noisyReads = logging.rawDataLoad(folderName + "UnitTest",G,N,L,'a') dummyParameters = logging.parameterObj() dummyParameters.brachingDepth = 20 dummyParameters.clusterRounds , dummyParameters.fingerPrint, dummyParameters.clusterRatio = 2, 6, 2 dummyParameters.N, dummyParameters.L, dummyParameters.K, dummyParameters.G, dummyParameters.threshold, dummyParameters.liid = N, L, K, G, threshold, liid cluster.groupNoisyKmers(noisyReads,dummyParameters, 'fast' )
def clusteringTestingOfK(G, N, L, folderName, K, threshold, liid): motherGen, reads, noisyReads = logging.rawDataLoad(folderName + "UnitTest", G, N, L, 'a') dummyParameters = logging.parameterObj() dummyParameters.brachingDepth = 20 dummyParameters.clusterRounds, dummyParameters.fingerPrint, dummyParameters.clusterRatio = 2, 6, 2 dummyParameters.N, dummyParameters.L, dummyParameters.K, dummyParameters.G, dummyParameters.threshold, dummyParameters.liid = N, L, K, G, threshold, liid cluster.groupNoisyKmers(noisyReads, dummyParameters, 'fast')
def graphFormUnitTest2(N, G, L, folderName=""): dummyParameters = logging.parameterObj() dummyParameters.defaultFolder = folderName returnfmapping = logging.fmappingLoad(dummyParameters.defaultFolder + 'clusteredGroup.csv') dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = N, G, L, 0.015, 'm', "500-200-50" motherGen, reads, noisyReads = logging.rawDataLoad( dummyParameters.defaultFolder + "UnitTest", dummyParameters.G, dummyParameters.N, dummyParameters.L, "dn") G1, startList, fmapping = graphForm.getSeqGraph(returnfmapping, noisyReads, dummyParameters)
def overallTestRun(): #"oneLine.fasta-0-1440371" G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds, brachingDepth, bridgingDepth, msaWidth, folderName = 10000, 83, 1189, 0.01, 0.1, 600, 32, 8, 508, 377, 417, 1.21822542, 0, 0, 0, 0, "" parameterRobot = logging.parameterObj(G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds, brachingDepth, bridgingDepth, msaWidth, folderName) snpRate, typeOfGen, detail = 0.001, 'd', "genome.fasta-50000-60000" parameterRobot.clusterRounds, parameterRobot.fingerPrint, parameterRobot.clusterRatio = 2, 6, 1 #snpRate, typeOfGen , detail = 0.001,'m', "500-200-50" numMistakes, success = assemblerMain.runAssembler(snpRate, typeOfGen, detail, parameterRobot) return numMistakes, success
def resolveRepeatsUnitTest(foldername= ""): var = raw_input("Enter something: ") print "you entered ", var f2= logging.fmapfusedLoad(foldername+'clusteredGroup2.csv') G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple') checkCondensingBasic(G2, [G2[0]], "simple") dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = 10 G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters) checkCondensingBasic(G3, [G3[0]], "MB")
def resolveRepeatsUnitTest(foldername=""): var = raw_input("Enter something: ") print "you entered ", var f2 = logging.fmapfusedLoad(foldername + 'clusteredGroup2.csv') G2 = logging.loadGraph(foldername + 'basicMapping.csv', foldername + 'seqMapping.txt', 'simple') checkCondensingBasic(G2, [G2[0]], "simple") dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = 10 G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters) checkCondensingBasic(G3, [G3[0]], "MB")
def graphFormUnitTest(N, G, L, folderName =""): print "dataGenUnitTest" dummyParameters = logging.parameterObj() dummyParameters.defaultFolder = folderName dummyParameters.liid = 40 dummyParameters.K = 40 dummyParameters.threshold = 6 dummyParameters.p = 0.015 dummyParameters.indel = True dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = N, G, L, 0.015, 'm', "500-200-50" # motherGen, reads, noisyReads = dataGen.generateData( typeOfGen,detail,dummyParameters) motherGen, reads, noisyReads = logging.rawDataLoad(dummyParameters.defaultFolder+"UnitTest",dummyParameters.G,dummyParameters.N,dummyParameters.L, "dn") returnfmapping= cluster.groupIndelNoisyKmers(noisyReads, dummyParameters, "fast")
def testBandedClustering(): dummyParameters = logging.parameterObj() #dummyParameters.defaultFolder = "" dummyParameters.liid = 40 dummyParameters.K = 40 dummyParameters.threshold = 6 dummyParameters.p = 0.015 dummyParameters.indel = True dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 600, 10000,200, 0.015, 'm', "500-200-50" startFingerPrint, endFingerPrint, read1, read2 = [] , [] , [] , [] read1= np.zeros(200, dtype= np.int64) read2 = np.zeros(200, dtype= np.int64) for i in range(200): read1[i] = random.randint(1,4) read2[i] = random.randint(1,4) for i in range(50): read2[i] = 4 read1[150+i] = 4 print read1, read2 startFingerPrint, endFingerPrint = [160,8] ,[ 190,40] score , returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj =cluster.SWAlignmentBanded(startFingerPrint, endFingerPrint,read1 ,read2 , dummyParameters) print "score", score cluster.printSeq(returnalignedSeq1) cluster.printSeq(returnalignedSeq2) print "starti, startj, endi , endj : ", starti, startj, endi , endj score , returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj =cluster.SWAlignment(read1, read2 , dummyParameters) print "score", score cluster.printSeq(returnalignedSeq1) cluster.printSeq(returnalignedSeq2) print "starti, startj, endi , endj : ", starti, startj, endi , endj
def clusterUnitTest2(): # Unit Test 1 : Generate 30 reads with 10 copies with noise , length being 20 motherGen, reads, noisyReads = logging.rawDataLoad("clusterReadsUnitTest", 10, 300, 40, "dn") parameterRobot = logging.parameterObj() parameterRobot.N = 300 parameterRobot.L = 40 parameterRobot.G = 10000 parameterRobot.liid = 30 parameterRobot.K = 30 parameterRobot.threshold = 5 parameterRobot.p = 0.01 parameterRobot.indel = True cluster.groupIndelNoisyKmers(noisyReads, parameterRobot, "fast")
def MSAResolverEpeatUnitTest(Nin, Lin, foldername = ""): f2= logging.fmapfusedLoad('clusteredGroup2.csv') G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple') #checkCondensingBasic(G2, [G2[0]], "simple") dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = 5 dummyParameters.msaWidth = 20 G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters) #checkCondensingBasic(G3, [G3[0]], "MB") N, G, L, p,snpRate, typeOfGen, detail = Nin, 10000,Lin, 0.015, 0.001 ,'m', "500-300-50" motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L,"dn") alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate,dummyParameters)
def clusterUnitTest2(): # Unit Test 1 : Generate 30 reads with 10 copies with noise , length being 20 motherGen, reads, noisyReads = logging.rawDataLoad("clusterReadsUnitTest",10,300,40,"dn") parameterRobot = logging.parameterObj() parameterRobot.N = 300 parameterRobot.L = 40 parameterRobot.G = 10000 parameterRobot.liid = 30 parameterRobot.K = 30 parameterRobot.threshold = 5 parameterRobot.p = 0.01 parameterRobot.indel = True cluster.groupIndelNoisyKmers(noisyReads, parameterRobot, "fast")
def segmentChopAndTestTest(Nin,G, Lin, foldername = "", bridgingDepth = 20, msaWidth = 20): dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = bridgingDepth dummyParameters.msaWidth = msaWidth dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p = Nin, G,Lin, 0.015 dummyParameters.indel = True dummyParameters.defaultFolder = foldername dummyParameters.threshold = 5 dummyParameters.liid = 48 snpRate, typeOfGen, detail = 0.001 ,'m', "500-200-50" G,N,L = dummyParameters.G, dummyParameters.N, dummyParameters.L, motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L, "dn") #G3, f2 = interactiveNodeRemoval(folderName, "skip") G3, f2 = interactiveNodeRemoval(folderName, "desiredOpt") chopAndAlign(G3, f2 , noisyReads, motherGen, dummyParameters)
def graphFormUnitTest(N, G, L, folderName=""): print "dataGenUnitTest" dummyParameters = logging.parameterObj() dummyParameters.defaultFolder = folderName dummyParameters.liid = 40 dummyParameters.K = 40 dummyParameters.threshold = 6 dummyParameters.p = 0.015 dummyParameters.indel = True dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = N, G, L, 0.015, 'm', "500-200-50" # motherGen, reads, noisyReads = dataGen.generateData( typeOfGen,detail,dummyParameters) motherGen, reads, noisyReads = logging.rawDataLoad( dummyParameters.defaultFolder + "UnitTest", dummyParameters.G, dummyParameters.N, dummyParameters.L, "dn") returnfmapping = cluster.groupIndelNoisyKmers(noisyReads, dummyParameters, "fast")
def MSAResolverEpeatUnitTest(Nin, Lin, foldername=""): f2 = logging.fmapfusedLoad('clusteredGroup2.csv') G2 = logging.loadGraph(foldername + 'basicMapping.csv', foldername + 'seqMapping.txt', 'simple') #checkCondensingBasic(G2, [G2[0]], "simple") dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = 5 dummyParameters.msaWidth = 20 G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters) #checkCondensingBasic(G3, [G3[0]], "MB") N, G, L, p, snpRate, typeOfGen, detail = Nin, 10000, Lin, 0.015, 0.001, 'm', "500-300-50" motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest", G, N, L, "dn") alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate, dummyParameters)
def testBandedClustering(): dummyParameters = logging.parameterObj() #dummyParameters.defaultFolder = "" dummyParameters.liid = 40 dummyParameters.K = 40 dummyParameters.threshold = 6 dummyParameters.p = 0.015 dummyParameters.indel = True dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 600, 10000, 200, 0.015, 'm', "500-200-50" startFingerPrint, endFingerPrint, read1, read2 = [], [], [], [] read1 = np.zeros(200, dtype=np.int64) read2 = np.zeros(200, dtype=np.int64) for i in range(200): read1[i] = random.randint(1, 4) read2[i] = random.randint(1, 4) for i in range(50): read2[i] = 4 read1[150 + i] = 4 print read1, read2 startFingerPrint, endFingerPrint = [160, 8], [190, 40] score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cluster.SWAlignmentBanded( startFingerPrint, endFingerPrint, read1, read2, dummyParameters) print "score", score cluster.printSeq(returnalignedSeq1) cluster.printSeq(returnalignedSeq2) print "starti, startj, endi , endj : ", starti, startj, endi, endj score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cluster.SWAlignment( read1, read2, dummyParameters) print "score", score cluster.printSeq(returnalignedSeq1) cluster.printSeq(returnalignedSeq2) print "starti, startj, endi , endj : ", starti, startj, endi, endj
def batchProcessingGenomeSegTest(): print "Batch Processing LNK Test" headerName = "largeScaleTest/" os.system("mkdir "+headerName) logging.savingGenomeSegmentFile(headerName) listOfNLKDataPts = logging.loadingGenomeSegmentFile(headerName) numberOfRounds = 1 for testPoint,roundNum in zip(listOfNLKDataPts, range(len(listOfNLKDataPts))): folderName = headerName+"sample_point_" + str(roundNum) os.system("mkdir " + folderName ) [G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,clusterRounds, fingerPrint, clusterRatio, startindex, endindex ] = testPoint print "G", G parameterRobot = logging.parameterObj(G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,folderName,clusterRounds, fingerPrint, clusterRatio ) snpRate, typeOfGen , detail = 0.001,'d', "genome.fasta-"+str(startindex)+"-"+str(endindex) parameterRobot.indel = True temptime = time.time() batchProcessingUnitTest(numberOfRounds,parameterRobot,snpRate, typeOfGen , detail) print "time per sample point ", time.time() - temptime
def ECUnitTest(Nin, G, Lin, foldername="", bridgingDepth=20, msaWidth=20): dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = bridgingDepth dummyParameters.msaWidth = msaWidth dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p = Nin, G, Lin, 0.015 dummyParameters.indel = True dummyParameters.defaultFolder = foldername dummyParameters.threshold = 5 dummyParameters.liid = 48 snpRate, typeOfGen, detail = 0.001, 'm', "500-200-50" G, N, L = dummyParameters.G, dummyParameters.N, dummyParameters.L, motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest", G, N, L, "dn") f2 = logging.fmapfusedLoad(foldername + 'clusteredGroup2.csv') G2 = logging.loadGraph(foldername + 'basicMapping.csv', foldername + 'seqMapping.txt', 'simple') checkCondensingBasic(G2, [G2[0]], "simple") G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters) checkCondensingBasic(G3, [G3[0]], "MB") G4 = alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate, dummyParameters) #G4 = G3 #checkCondensingBasic(G4, [G4[0]], "MB") recovSeq = eulerCycle.findEC(G4) recovGen = readAns.reportRecovSeq(recovSeq, f2, noisyReads, dummyParameters) numMistakes, success = compare.subAlignCompare(recovGen, motherGen, dummyParameters) return numMistakes, success
def testChecking(folderName, N, L, G ): #N, L, G = 1345, 1000, 50000 f2 = open(folderName+"rec.txt", 'r') temp2 = f2.read() recov = np.zeros(len(temp2), dtype = np.int32) j =0 while j <len(temp2): if temp2[j] != '-': recov[j] = int(temp2[j]) j = j+1 else: recov[j] = -1 j = j+2 f2.close() dummyParameters = logging.parameterObj() dummyParameters.defaultFolder = folderName dummyParameters.G = G motherGen, reads, noisyReads = logging.rawDataLoad(folderName+"UnitTest",G,N,L, 'd') print len(recov) , len(motherGen) numberMistakes, success = compare.subAlignCompare(recov, motherGen,dummyParameters)
def dataGenUnitTest2(): print "dataGenUnitTest2" dummyParameters = logging.parameterObj() dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 100, 10000,100, 0.01, 't', "500" G,N,L = dummyParameters.G, dummyParameters.N, dummyParameters.L motherGen, reads, noisyReads = logging.rawDataLoad("UnitTest",G,N,L, 'd') print reads print noisyReads baseList = [] for indexN in range(N): for indexL in range(L): if reads[indexN][indexL] == noisyReads[indexN][indexL]: baseList.append(0) else: baseList.append(1) print "Error Probability", np.mean(baseList) print "numberOfBasesGet ", len(baseList)
def dataGenUnitTest2(): print "dataGenUnitTest2" dummyParameters = logging.parameterObj() dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 100, 10000, 100, 0.01, 't', "500" G, N, L = dummyParameters.G, dummyParameters.N, dummyParameters.L motherGen, reads, noisyReads = logging.rawDataLoad("UnitTest", G, N, L, 'd') print reads print noisyReads baseList = [] for indexN in range(N): for indexL in range(L): if reads[indexN][indexL] == noisyReads[indexN][indexL]: baseList.append(0) else: baseList.append(1) print "Error Probability", np.mean(baseList) print "numberOfBasesGet ", len(baseList)
def batchProcessingLNKTest(): print "Batch Processing LNK Test" headerName = "synthetic_reads/" os.system("mkdir "+headerName) logging.savingLNKFile(headerName) listOfNLKDataPts = logging.loadingLNKFile(headerName) numberOfRounds = 1 listOfNLKDataPts = [listOfNLKDataPts[0]] for testPoint,roundNum in zip(listOfNLKDataPts, range(len(listOfNLKDataPts))): folderName = headerName+"sample_point_" + str(roundNum) os.system("mkdir " + folderName ) snpRate, typeOfGen , detail = 0.001,'m', "500-200-50" [G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,clusterRounds, fingerPrint, clusterRatio ] = testPoint parameterRobot = logging.parameterObj(G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,folderName,clusterRounds, fingerPrint, clusterRatio ) parameterRobot.indel = True batchProcessingUnitTest(numberOfRounds,parameterRobot,snpRate, typeOfGen , detail)
def testChecking(folderName, N, L, G): #N, L, G = 1345, 1000, 50000 f2 = open(folderName + "rec.txt", 'r') temp2 = f2.read() recov = np.zeros(len(temp2), dtype=np.int32) j = 0 while j < len(temp2): if temp2[j] != '-': recov[j] = int(temp2[j]) j = j + 1 else: recov[j] = -1 j = j + 2 f2.close() dummyParameters = logging.parameterObj() dummyParameters.defaultFolder = folderName dummyParameters.G = G motherGen, reads, noisyReads = logging.rawDataLoad(folderName + "UnitTest", G, N, L, 'd') print len(recov), len(motherGen) numberMistakes, success = compare.subAlignCompare(recov, motherGen, dummyParameters)
def segmentChopAndTestTest(Nin, G, Lin, foldername="", bridgingDepth=20, msaWidth=20): dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = bridgingDepth dummyParameters.msaWidth = msaWidth dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p = Nin, G, Lin, 0.015 dummyParameters.indel = True dummyParameters.defaultFolder = foldername dummyParameters.threshold = 5 dummyParameters.liid = 48 snpRate, typeOfGen, detail = 0.001, 'm', "500-200-50" G, N, L = dummyParameters.G, dummyParameters.N, dummyParameters.L, motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest", G, N, L, "dn") #G3, f2 = interactiveNodeRemoval(folderName, "skip") G3, f2 = interactiveNodeRemoval(folderName, "desiredOpt") chopAndAlign(G3, f2, noisyReads, motherGen, dummyParameters)
def interactiveNodeRemoval(foldername, modeOfOpt = "skip"): ''' delnode 1002 deledge 1004, 1028 addedge 1023 , 3434 ''' G2 = [] if modeOfOpt == "skip": varList = ["start none", ""] elif modeOfOpt == "desiredOpt": varList = ["start none", "delnode 734288", "delnode 1716432", "fusenode 1438104", "fusenode 1166060" , "view", ""] var = varList.pop(0) while len(var) > 0 : command = var.split() if command[0] == "start": print "Start" G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple') elif command[0] == "delnode": print "To delete node" currentNodeIndex = int(command[1]) print "currentNodeIndex", currentNodeIndex for eachnode in G2: if eachnode.nodeIndex == currentNodeIndex: currentNode = eachnode for eachitem in currentNode.listOfPrevNodes: eachitem.listOfNextNodes.remove(currentNode) currentNode.listOfPrevNodes = [] for eachitem in currentNode.listOfNextNodes: eachitem.listOfPrevNodes.remove(currentNode) currentNode.listOfNextNodes = [] currentNode.nodeIndexList = [] graphForm.condenseGraph(G2) elif command[0] == "deledge": node1 = int(command[1]) node2 = int(command[2]) startNode, endNode = [] , [] for eachnode in G2: if eachnode.nodeIndex == node1: startNode = eachnode if eachnode.nodeIndex == node2: endNode = eachnode startNode.listOfNextNodes.remove(endNode) endNode.listOfPrevNodes.remove(startNode) print "To insert nodes" elif command[0] == "addedge" : node1 = int(command[1]) node2 = int(command[2]) startNode, endNode = [] , [] for eachnode in G2: if eachnode.nodeIndex == node1: startNode = eachnode if eachnode.nodeIndex == node2: endNode = eachnode startNode.listOfNextNodes.append(endNode) endNode.listOfPrevNodes.append(startNode) elif command[0] == "fusenode": myNodeIndex = int(command[1]) for eachnode in G2: if eachnode.nodeIndex == myNodeIndex: currentNode = eachnode if len(currentNode.listOfPrevNodes) == 1: prevNode = currentNode.listOfPrevNodes[0] for eachnextnode in currentNode.listOfNextNodes: eachnextnode.listOfPrevNodes.remove(currentNode) eachnextnode.listOfPrevNodes.append(prevNode) prevNode.listOfNextNodes.append(eachnextnode) prevNode.listOfNextNodes.remove(currentNode) currentNode.listOfNextNodes = [] currentNode.listOfPrevNodes =[] currentNode.nodeIndexList = [] elif len(currentNode.listOfNextNodes) ==1 : nextNode = currentNode.listOfNextNodes[0] for eachprevnode in currentNode.listOfPrevNodes: eachprevnode.listOfNextNodes.remove(currentNode) eachprevnode.listOfNextNodes.append(nextNode) nextNode.listOfPrevNodes.append(eachprevnode) nextNode.listOfPrevNodes.remove(currentNode) currentNode.listOfNextNodes = [] currentNode.listOfPrevNodes = [] currentNode.nodeIndexList = [] elif command[0] == "view": G2 = graphForm.newCondensingStep(G2) G2 = G2[0] checkCondensingBasic(G2, [G2[0]], "simple") #var = raw_input("Enter Operations: ") var = varList.pop(0) dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = 20 print "Loading fmap and graph " f2= logging.fmapfusedLoad(foldername+'clusteredGroup2.csv') G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters) assert(1==2) G3 = G2 print "Done Loading the fmap and graph" #checkCondensingBasic(G3, [G3[0]], "MB") return G3,f2
def interactiveNodeRemoval(foldername, modeOfOpt="skip"): ''' delnode 1002 deledge 1004, 1028 addedge 1023 , 3434 ''' G2 = [] if modeOfOpt == "skip": varList = ["start none", ""] elif modeOfOpt == "desiredOpt": varList = [ "start none", "delnode 734288", "delnode 1716432", "fusenode 1438104", "fusenode 1166060", "view", "" ] var = varList.pop(0) while len(var) > 0: command = var.split() if command[0] == "start": print "Start" G2 = logging.loadGraph(foldername + 'basicMapping.csv', foldername + 'seqMapping.txt', 'simple') elif command[0] == "delnode": print "To delete node" currentNodeIndex = int(command[1]) print "currentNodeIndex", currentNodeIndex for eachnode in G2: if eachnode.nodeIndex == currentNodeIndex: currentNode = eachnode for eachitem in currentNode.listOfPrevNodes: eachitem.listOfNextNodes.remove(currentNode) currentNode.listOfPrevNodes = [] for eachitem in currentNode.listOfNextNodes: eachitem.listOfPrevNodes.remove(currentNode) currentNode.listOfNextNodes = [] currentNode.nodeIndexList = [] graphForm.condenseGraph(G2) elif command[0] == "deledge": node1 = int(command[1]) node2 = int(command[2]) startNode, endNode = [], [] for eachnode in G2: if eachnode.nodeIndex == node1: startNode = eachnode if eachnode.nodeIndex == node2: endNode = eachnode startNode.listOfNextNodes.remove(endNode) endNode.listOfPrevNodes.remove(startNode) print "To insert nodes" elif command[0] == "addedge": node1 = int(command[1]) node2 = int(command[2]) startNode, endNode = [], [] for eachnode in G2: if eachnode.nodeIndex == node1: startNode = eachnode if eachnode.nodeIndex == node2: endNode = eachnode startNode.listOfNextNodes.append(endNode) endNode.listOfPrevNodes.append(startNode) elif command[0] == "fusenode": myNodeIndex = int(command[1]) for eachnode in G2: if eachnode.nodeIndex == myNodeIndex: currentNode = eachnode if len(currentNode.listOfPrevNodes) == 1: prevNode = currentNode.listOfPrevNodes[0] for eachnextnode in currentNode.listOfNextNodes: eachnextnode.listOfPrevNodes.remove(currentNode) eachnextnode.listOfPrevNodes.append(prevNode) prevNode.listOfNextNodes.append(eachnextnode) prevNode.listOfNextNodes.remove(currentNode) currentNode.listOfNextNodes = [] currentNode.listOfPrevNodes = [] currentNode.nodeIndexList = [] elif len(currentNode.listOfNextNodes) == 1: nextNode = currentNode.listOfNextNodes[0] for eachprevnode in currentNode.listOfPrevNodes: eachprevnode.listOfNextNodes.remove(currentNode) eachprevnode.listOfNextNodes.append(nextNode) nextNode.listOfPrevNodes.append(eachprevnode) nextNode.listOfPrevNodes.remove(currentNode) currentNode.listOfNextNodes = [] currentNode.listOfPrevNodes = [] currentNode.nodeIndexList = [] elif command[0] == "view": G2 = graphForm.newCondensingStep(G2) G2 = G2[0] checkCondensingBasic(G2, [G2[0]], "simple") #var = raw_input("Enter Operations: ") var = varList.pop(0) dummyParameters = logging.parameterObj() dummyParameters.bridgingDepth = 20 print "Loading fmap and graph " f2 = logging.fmapfusedLoad(foldername + 'clusteredGroup2.csv') G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters) assert (1 == 2) G3 = G2 print "Done Loading the fmap and graph" #checkCondensingBasic(G3, [G3[0]], "MB") return G3, f2