def ECUnitTest(Nin,G, Lin, foldername = "", bridgingDepth = 20, msaWidth = 20 ):
    
    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = bridgingDepth
    dummyParameters.msaWidth = msaWidth
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p  = Nin, G,Lin, 0.015
    dummyParameters.indel = True
    dummyParameters.defaultFolder = foldername
    dummyParameters.threshold = 5
    dummyParameters.liid = 48
    
    snpRate, typeOfGen, detail = 0.001 ,'m', "500-200-50" 
    G,N,L =  dummyParameters.G, dummyParameters.N, dummyParameters.L,
    motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L, "dn")  
  
    f2= logging.fmapfusedLoad(foldername+'clusteredGroup2.csv') 
    G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple')
    checkCondensingBasic(G2, [G2[0]], "simple")

    G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters)
    
    checkCondensingBasic(G3, [G3[0]], "MB")

    G4 = alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate,dummyParameters)
    #G4 = G3

    #checkCondensingBasic(G4, [G4[0]], "MB")
    
    recovSeq = eulerCycle.findEC(G4)
    
    recovGen = readAns.reportRecovSeq(recovSeq, f2, noisyReads,dummyParameters)
    
    numMistakes, success = compare.subAlignCompare(recovGen, motherGen,dummyParameters)

    return numMistakes, success
def dataGenUnitTest():
    print "dataGenUnitTest"
    dummyParameters = logging.parameterObj()
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 100, 10000,100, 0.01, 't', "500"
    dummyParameters.indel = True
    
    motherGen, reads, noisyReads = dataGen.generateData(typeOfGen,detail,dummyParameters)
Example #3
0
def batchProcessingGenomeSegTest():
    print "Batch Processing LNK Test"
    headerName = "largeScaleTest/"
    os.system("mkdir " + headerName)

    logging.savingGenomeSegmentFile(headerName)
    listOfNLKDataPts = logging.loadingGenomeSegmentFile(headerName)
    numberOfRounds = 1

    for testPoint, roundNum in zip(listOfNLKDataPts,
                                   range(len(listOfNLKDataPts))):
        folderName = headerName + "sample_point_" + str(roundNum)
        os.system("mkdir " + folderName)

        [
            G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov,
            ratio, numberOfClusterRounds, brachingDepth, bridgingDepth,
            msaWidth, clusterRounds, fingerPrint, clusterRatio, startindex,
            endindex
        ] = testPoint
        print "G", G
        parameterRobot = logging.parameterObj(
            G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov,
            ratio, numberOfClusterRounds, brachingDepth, bridgingDepth,
            msaWidth, folderName, clusterRounds, fingerPrint, clusterRatio)
        snpRate, typeOfGen, detail = 0.001, 'd', "genome.fasta-" + str(
            startindex) + "-" + str(endindex)
        parameterRobot.indel = True
        temptime = time.time()
        batchProcessingUnitTest(numberOfRounds, parameterRobot, snpRate,
                                typeOfGen, detail)
        print "time per sample point ", time.time() - temptime
Example #4
0
def batchProcessingLNKTest():
    print "Batch Processing LNK Test"
    headerName = "synthetic_reads/"
    os.system("mkdir " + headerName)

    logging.savingLNKFile(headerName)
    listOfNLKDataPts = logging.loadingLNKFile(headerName)
    numberOfRounds = 1
    listOfNLKDataPts = [listOfNLKDataPts[0]]

    for testPoint, roundNum in zip(listOfNLKDataPts,
                                   range(len(listOfNLKDataPts))):
        folderName = headerName + "sample_point_" + str(roundNum)
        os.system("mkdir " + folderName)

        snpRate, typeOfGen, detail = 0.001, 'm', "500-200-50"
        [
            G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov,
            ratio, numberOfClusterRounds, brachingDepth, bridgingDepth,
            msaWidth, clusterRounds, fingerPrint, clusterRatio
        ] = testPoint
        parameterRobot = logging.parameterObj(
            G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov,
            ratio, numberOfClusterRounds, brachingDepth, bridgingDepth,
            msaWidth, folderName, clusterRounds, fingerPrint, clusterRatio)
        parameterRobot.indel = True

        batchProcessingUnitTest(numberOfRounds, parameterRobot, snpRate,
                                typeOfGen, detail)
Example #5
0
def branchClearingUnitTest(G,
                           N,
                           L,
                           K,
                           liid,
                           threshold,
                           foldername="",
                           branchingDepth=20):
    dummyParameters = logging.parameterObj()

    dummyParameters.defaultFolder = foldername

    dummyParameters.brachingDepth = branchingDepth

    dummyParameters.K, dummyParameters.liid, dummyParameters.threshold = K, liid, threshold
    dummyParameters.G, dummyParameters.N, dummyParameters.L = G, N, L

    motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest",
                                                       G, N, L, "dn")

    returnfmapping = logging.fmappingLoad(foldername + 'clusteredGroup.csv')
    G1, startList, fmapping = graphForm.getSeqGraph(returnfmapping, noisyReads,
                                                    dummyParameters)
    #checkCondensingBasic(G1, startList, "simple")

    returnfmapping, G1 = branchClear.clearResidual(returnfmapping, G1,
                                                   dummyParameters)

    G2 = G1

    graphForm.debugSeqGraph(G2)

    #G2 = G1
    checkCondensingBasic2(G2, [G2[0]], "simple")
def branchClearingUnitTest(G, N, L,K, liid, threshold, foldername = "",branchingDepth= 20):
    dummyParameters = logging.parameterObj()
    
    dummyParameters.defaultFolder = foldername

    dummyParameters.brachingDepth = branchingDepth    
    
    dummyParameters.K, dummyParameters.liid, dummyParameters.threshold = K, liid, threshold
    dummyParameters.G, dummyParameters.N, dummyParameters.L = G, N, L 
    
    motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L, "dn")
    
    returnfmapping= logging.fmappingLoad(foldername+'clusteredGroup.csv')
    G1,startList, fmapping = graphForm.getSeqGraph(returnfmapping,noisyReads, dummyParameters)
    #checkCondensingBasic(G1, startList, "simple")


    returnfmapping, G1= branchClear.clearResidual(returnfmapping,G1,dummyParameters)
    
    G2 = G1

    graphForm.debugSeqGraph(G2)
    
    #G2 = G1 
    checkCondensingBasic2(G2, [G2[0]], "simple")
Example #7
0
def dataGenUnitTest():
    print "dataGenUnitTest"
    dummyParameters = logging.parameterObj()
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 100, 10000, 100, 0.01, 't', "500"
    dummyParameters.indel = True

    motherGen, reads, noisyReads = dataGen.generateData(
        typeOfGen, detail, dummyParameters)
def overallTestRun():
    #"oneLine.fasta-0-1440371" 
    G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,folderName  =10000   ,83 ,   1189,  0.01 ,   0.1  ,  600 ,   32,    8  ,  508  ,  377 ,   417 ,   1.21822542 ,   0,    0,    0   , 0  , ""
    parameterRobot = logging.parameterObj(G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,folderName )
    snpRate, typeOfGen, detail = 0.001 ,'d', "genome.fasta-50000-60000" 
    parameterRobot.clusterRounds , parameterRobot.fingerPrint, parameterRobot.clusterRatio = 2 , 6, 1
    #snpRate, typeOfGen , detail = 0.001,'m', "500-200-50" 
    numMistakes, success = assemblerMain.runAssembler(snpRate, typeOfGen, detail, parameterRobot)
    return numMistakes, success 
def graphFormUnitTest2(N, G, L, folderName = ""):   
    dummyParameters = logging.parameterObj()
    dummyParameters.defaultFolder = folderName
    returnfmapping= logging.fmappingLoad(dummyParameters.defaultFolder+'clusteredGroup.csv')

    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = N, G, L, 0.015, 'm', "500-200-50" 

    motherGen, reads, noisyReads = logging.rawDataLoad(dummyParameters.defaultFolder+"UnitTest",dummyParameters.G,dummyParameters.N,dummyParameters.L, "dn")
    G1,startList,fmapping = graphForm.getSeqGraph(returnfmapping,noisyReads, dummyParameters )
def clusteringTestingOfK(G, N, L, folderName, K, threshold, liid):
    motherGen, reads, noisyReads = logging.rawDataLoad(folderName + "UnitTest",G,N,L,'a')
    dummyParameters = logging.parameterObj()
    dummyParameters.brachingDepth = 20
    dummyParameters.clusterRounds , dummyParameters.fingerPrint,  dummyParameters.clusterRatio = 2, 6, 2
    
    
    dummyParameters.N, dummyParameters.L, dummyParameters.K, dummyParameters.G, dummyParameters.threshold, dummyParameters.liid = N, L, K, G, threshold, liid
     
    cluster.groupNoisyKmers(noisyReads,dummyParameters, 'fast' )
Example #11
0
def clusteringTestingOfK(G, N, L, folderName, K, threshold, liid):
    motherGen, reads, noisyReads = logging.rawDataLoad(folderName + "UnitTest",
                                                       G, N, L, 'a')
    dummyParameters = logging.parameterObj()
    dummyParameters.brachingDepth = 20
    dummyParameters.clusterRounds, dummyParameters.fingerPrint, dummyParameters.clusterRatio = 2, 6, 2

    dummyParameters.N, dummyParameters.L, dummyParameters.K, dummyParameters.G, dummyParameters.threshold, dummyParameters.liid = N, L, K, G, threshold, liid

    cluster.groupNoisyKmers(noisyReads, dummyParameters, 'fast')
Example #12
0
def graphFormUnitTest2(N, G, L, folderName=""):
    dummyParameters = logging.parameterObj()
    dummyParameters.defaultFolder = folderName
    returnfmapping = logging.fmappingLoad(dummyParameters.defaultFolder +
                                          'clusteredGroup.csv')

    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = N, G, L, 0.015, 'm', "500-200-50"

    motherGen, reads, noisyReads = logging.rawDataLoad(
        dummyParameters.defaultFolder + "UnitTest", dummyParameters.G,
        dummyParameters.N, dummyParameters.L, "dn")
    G1, startList, fmapping = graphForm.getSeqGraph(returnfmapping, noisyReads,
                                                    dummyParameters)
Example #13
0
def overallTestRun():
    #"oneLine.fasta-0-1440371"
    G, N, L, p, epsilon, K, liid, threshold, NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds, brachingDepth, bridgingDepth, msaWidth, folderName = 10000, 83, 1189, 0.01, 0.1, 600, 32, 8, 508, 377, 417, 1.21822542, 0, 0, 0, 0, ""
    parameterRobot = logging.parameterObj(G, N, L, p, epsilon, K, liid,
                                          threshold, NKcov, Nbridge, Ncov,
                                          ratio, numberOfClusterRounds,
                                          brachingDepth, bridgingDepth,
                                          msaWidth, folderName)
    snpRate, typeOfGen, detail = 0.001, 'd', "genome.fasta-50000-60000"
    parameterRobot.clusterRounds, parameterRobot.fingerPrint, parameterRobot.clusterRatio = 2, 6, 1
    #snpRate, typeOfGen , detail = 0.001,'m', "500-200-50"
    numMistakes, success = assemblerMain.runAssembler(snpRate, typeOfGen,
                                                      detail, parameterRobot)
    return numMistakes, success
def resolveRepeatsUnitTest(foldername= ""):    
    var = raw_input("Enter something: ")
    print "you entered ", var
    f2= logging.fmapfusedLoad(foldername+'clusteredGroup2.csv') 

    G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple')
    
    checkCondensingBasic(G2, [G2[0]], "simple")
    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = 10
    
    G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters)
    
    
    checkCondensingBasic(G3, [G3[0]], "MB")
Example #15
0
def resolveRepeatsUnitTest(foldername=""):
    var = raw_input("Enter something: ")
    print "you entered ", var
    f2 = logging.fmapfusedLoad(foldername + 'clusteredGroup2.csv')

    G2 = logging.loadGraph(foldername + 'basicMapping.csv',
                           foldername + 'seqMapping.txt', 'simple')

    checkCondensingBasic(G2, [G2[0]], "simple")
    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = 10

    G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters)

    checkCondensingBasic(G3, [G3[0]], "MB")
def graphFormUnitTest(N, G, L, folderName =""):
    print "dataGenUnitTest"
    dummyParameters = logging.parameterObj()
    dummyParameters.defaultFolder = folderName
    dummyParameters.liid = 40
    dummyParameters.K = 40
    dummyParameters.threshold = 6
    dummyParameters.p = 0.015
    dummyParameters.indel = True


    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = N, G, L, 0.015, 'm', "500-200-50" 
    
   # motherGen, reads, noisyReads = dataGen.generateData( typeOfGen,detail,dummyParameters)    
    motherGen, reads, noisyReads = logging.rawDataLoad(dummyParameters.defaultFolder+"UnitTest",dummyParameters.G,dummyParameters.N,dummyParameters.L, "dn")
    
    returnfmapping= cluster.groupIndelNoisyKmers(noisyReads, dummyParameters,  "fast")
def testBandedClustering():
    dummyParameters = logging.parameterObj()
    #dummyParameters.defaultFolder = ""
    dummyParameters.liid = 40
    dummyParameters.K = 40
    dummyParameters.threshold = 6
    dummyParameters.p = 0.015
    dummyParameters.indel = True
    
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 600, 10000,200, 0.015, 'm', "500-200-50" 
    
    
    startFingerPrint, endFingerPrint, read1, read2 = [] , [] , [] , []

    read1= np.zeros(200, dtype= np.int64)
    read2 = np.zeros(200, dtype= np.int64)
    
    for i in range(200):
        read1[i] = random.randint(1,4)
        read2[i] = random.randint(1,4)
    
    
    for i in range(50):
        read2[i] = 4
        read1[150+i] = 4
    
    
    print read1, read2
    startFingerPrint, endFingerPrint = [160,8] ,[ 190,40]  
    
    
    score , returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj =cluster.SWAlignmentBanded(startFingerPrint, endFingerPrint,read1 ,read2 , dummyParameters)
    print "score", score 
    cluster.printSeq(returnalignedSeq1)
    cluster.printSeq(returnalignedSeq2)
    
    print "starti, startj, endi , endj : ", starti, startj, endi , endj


    score , returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj =cluster.SWAlignment(read1, read2 , dummyParameters)
    print "score", score 
    cluster.printSeq(returnalignedSeq1)
    cluster.printSeq(returnalignedSeq2)
    
    print "starti, startj, endi , endj : ", starti, startj, endi , endj
Example #18
0
def clusterUnitTest2():
    # Unit Test 1 : Generate 30 reads with 10 copies with noise , length being 20

    motherGen, reads, noisyReads = logging.rawDataLoad("clusterReadsUnitTest",
                                                       10, 300, 40, "dn")

    parameterRobot = logging.parameterObj()
    parameterRobot.N = 300
    parameterRobot.L = 40
    parameterRobot.G = 10000
    parameterRobot.liid = 30
    parameterRobot.K = 30
    parameterRobot.threshold = 5
    parameterRobot.p = 0.01

    parameterRobot.indel = True

    cluster.groupIndelNoisyKmers(noisyReads, parameterRobot, "fast")
def MSAResolverEpeatUnitTest(Nin, Lin, foldername = ""):

    f2= logging.fmapfusedLoad('clusteredGroup2.csv') 
    G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple')
    #checkCondensingBasic(G2, [G2[0]], "simple")
    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = 5
    dummyParameters.msaWidth = 20 
    
    G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters)
    
    
    #checkCondensingBasic(G3, [G3[0]], "MB")

    N, G, L, p,snpRate, typeOfGen, detail = Nin,  10000,Lin, 0.015, 0.001 ,'m', "500-300-50" 
    motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L,"dn")

    alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate,dummyParameters)
def clusterUnitTest2():
   # Unit Test 1 : Generate 30 reads with 10 copies with noise , length being 20 
    
    motherGen, reads, noisyReads = logging.rawDataLoad("clusterReadsUnitTest",10,300,40,"dn")
    
    
    parameterRobot = logging.parameterObj()
    parameterRobot.N = 300 
    parameterRobot.L = 40
    parameterRobot.G = 10000
    parameterRobot.liid = 30
    parameterRobot.K = 30
    parameterRobot.threshold = 5
    parameterRobot.p = 0.01
    
    parameterRobot.indel = True

    
    cluster.groupIndelNoisyKmers(noisyReads, parameterRobot,  "fast")
def segmentChopAndTestTest(Nin,G, Lin, foldername = "", bridgingDepth = 20, msaWidth = 20):         
    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = bridgingDepth
    dummyParameters.msaWidth = msaWidth
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p  = Nin, G,Lin, 0.015
    dummyParameters.indel = True
    dummyParameters.defaultFolder = foldername
    dummyParameters.threshold = 5
    dummyParameters.liid = 48
    
    snpRate, typeOfGen, detail = 0.001 ,'m', "500-200-50" 
    G,N,L =  dummyParameters.G, dummyParameters.N, dummyParameters.L,
    motherGen, reads, noisyReads = logging.rawDataLoad(foldername+"UnitTest",G,N,L, "dn")  
    
    
    #G3, f2 = interactiveNodeRemoval(folderName, "skip")
    G3, f2 = interactiveNodeRemoval(folderName, "desiredOpt")
    
    chopAndAlign(G3, f2 , noisyReads, motherGen, dummyParameters)
Example #22
0
def graphFormUnitTest(N, G, L, folderName=""):
    print "dataGenUnitTest"
    dummyParameters = logging.parameterObj()
    dummyParameters.defaultFolder = folderName
    dummyParameters.liid = 40
    dummyParameters.K = 40
    dummyParameters.threshold = 6
    dummyParameters.p = 0.015
    dummyParameters.indel = True

    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = N, G, L, 0.015, 'm', "500-200-50"

    # motherGen, reads, noisyReads = dataGen.generateData( typeOfGen,detail,dummyParameters)
    motherGen, reads, noisyReads = logging.rawDataLoad(
        dummyParameters.defaultFolder + "UnitTest", dummyParameters.G,
        dummyParameters.N, dummyParameters.L, "dn")

    returnfmapping = cluster.groupIndelNoisyKmers(noisyReads, dummyParameters,
                                                  "fast")
Example #23
0
def MSAResolverEpeatUnitTest(Nin, Lin, foldername=""):

    f2 = logging.fmapfusedLoad('clusteredGroup2.csv')
    G2 = logging.loadGraph(foldername + 'basicMapping.csv',
                           foldername + 'seqMapping.txt', 'simple')
    #checkCondensingBasic(G2, [G2[0]], "simple")
    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = 5
    dummyParameters.msaWidth = 20

    G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters)

    #checkCondensingBasic(G3, [G3[0]], "MB")

    N, G, L, p, snpRate, typeOfGen, detail = Nin, 10000, Lin, 0.015, 0.001, 'm', "500-300-50"
    motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest",
                                                       G, N, L, "dn")

    alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate, dummyParameters)
Example #24
0
def testBandedClustering():
    dummyParameters = logging.parameterObj()
    #dummyParameters.defaultFolder = ""
    dummyParameters.liid = 40
    dummyParameters.K = 40
    dummyParameters.threshold = 6
    dummyParameters.p = 0.015
    dummyParameters.indel = True

    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 600, 10000, 200, 0.015, 'm', "500-200-50"

    startFingerPrint, endFingerPrint, read1, read2 = [], [], [], []

    read1 = np.zeros(200, dtype=np.int64)
    read2 = np.zeros(200, dtype=np.int64)

    for i in range(200):
        read1[i] = random.randint(1, 4)
        read2[i] = random.randint(1, 4)

    for i in range(50):
        read2[i] = 4
        read1[150 + i] = 4

    print read1, read2
    startFingerPrint, endFingerPrint = [160, 8], [190, 40]

    score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cluster.SWAlignmentBanded(
        startFingerPrint, endFingerPrint, read1, read2, dummyParameters)
    print "score", score
    cluster.printSeq(returnalignedSeq1)
    cluster.printSeq(returnalignedSeq2)

    print "starti, startj, endi , endj : ", starti, startj, endi, endj

    score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cluster.SWAlignment(
        read1, read2, dummyParameters)
    print "score", score
    cluster.printSeq(returnalignedSeq1)
    cluster.printSeq(returnalignedSeq2)

    print "starti, startj, endi , endj : ", starti, startj, endi, endj
def batchProcessingGenomeSegTest():
    print "Batch Processing LNK Test"
    headerName = "largeScaleTest/"
    os.system("mkdir "+headerName)
    
    logging.savingGenomeSegmentFile(headerName)
    listOfNLKDataPts = logging.loadingGenomeSegmentFile(headerName)
    numberOfRounds = 1
    
    for testPoint,roundNum in  zip(listOfNLKDataPts, range(len(listOfNLKDataPts))):
        folderName = headerName+"sample_point_" + str(roundNum)
        os.system("mkdir " + folderName )
                
        [G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,clusterRounds, fingerPrint, clusterRatio, startindex, endindex ] = testPoint
        print "G", G
        parameterRobot = logging.parameterObj(G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,folderName,clusterRounds, fingerPrint, clusterRatio )
        snpRate, typeOfGen , detail = 0.001,'d', "genome.fasta-"+str(startindex)+"-"+str(endindex) 
        parameterRobot.indel = True
        temptime = time.time()
        batchProcessingUnitTest(numberOfRounds,parameterRobot,snpRate, typeOfGen , detail)
        print "time per sample point ",  time.time() - temptime
Example #26
0
def ECUnitTest(Nin, G, Lin, foldername="", bridgingDepth=20, msaWidth=20):

    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = bridgingDepth
    dummyParameters.msaWidth = msaWidth
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p = Nin, G, Lin, 0.015
    dummyParameters.indel = True
    dummyParameters.defaultFolder = foldername
    dummyParameters.threshold = 5
    dummyParameters.liid = 48

    snpRate, typeOfGen, detail = 0.001, 'm', "500-200-50"
    G, N, L = dummyParameters.G, dummyParameters.N, dummyParameters.L,
    motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest",
                                                       G, N, L, "dn")

    f2 = logging.fmapfusedLoad(foldername + 'clusteredGroup2.csv')
    G2 = logging.loadGraph(foldername + 'basicMapping.csv',
                           foldername + 'seqMapping.txt', 'simple')
    checkCondensingBasic(G2, [G2[0]], "simple")

    G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters)

    checkCondensingBasic(G3, [G3[0]], "MB")

    G4 = alignmentBridge.MSAresolve(f2, G3, noisyReads, snpRate,
                                    dummyParameters)
    #G4 = G3

    #checkCondensingBasic(G4, [G4[0]], "MB")

    recovSeq = eulerCycle.findEC(G4)

    recovGen = readAns.reportRecovSeq(recovSeq, f2, noisyReads,
                                      dummyParameters)

    numMistakes, success = compare.subAlignCompare(recovGen, motherGen,
                                                   dummyParameters)

    return numMistakes, success
def testChecking(folderName, N, L, G ):
    #N, L, G =     1345, 1000, 50000
    f2 = open(folderName+"rec.txt", 'r')
    temp2 = f2.read()
    recov = np.zeros(len(temp2), dtype = np.int32)

    j =0 
    while j <len(temp2):
        if temp2[j] != '-':
            recov[j] = int(temp2[j])
            j = j+1 
        else:
            recov[j] = -1 
            j = j+2
    f2.close()
    dummyParameters = logging.parameterObj()
    dummyParameters.defaultFolder = folderName 
    dummyParameters.G = G
    motherGen, reads, noisyReads = logging.rawDataLoad(folderName+"UnitTest",G,N,L, 'd')
    
    print len(recov) , len(motherGen)
    numberMistakes, success = compare.subAlignCompare(recov, motherGen,dummyParameters)
def dataGenUnitTest2():
    print "dataGenUnitTest2"
    dummyParameters = logging.parameterObj()
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 100, 10000,100, 0.01, 't', "500"    
    G,N,L = dummyParameters.G, dummyParameters.N, dummyParameters.L
    
    motherGen, reads, noisyReads = logging.rawDataLoad("UnitTest",G,N,L, 'd')
    
    print reads 
    print noisyReads
    
    baseList = [] 
    
    
    for indexN in range(N):
        for indexL in range(L):
            if reads[indexN][indexL] == noisyReads[indexN][indexL]:
                baseList.append(0)
            else:
                baseList.append(1)
                
    print "Error Probability", np.mean(baseList)
    print "numberOfBasesGet ", len(baseList)
Example #29
0
def dataGenUnitTest2():
    print "dataGenUnitTest2"
    dummyParameters = logging.parameterObj()
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p, typeOfGen, detail = 100, 10000, 100, 0.01, 't', "500"
    G, N, L = dummyParameters.G, dummyParameters.N, dummyParameters.L

    motherGen, reads, noisyReads = logging.rawDataLoad("UnitTest", G, N, L,
                                                       'd')

    print reads
    print noisyReads

    baseList = []

    for indexN in range(N):
        for indexL in range(L):
            if reads[indexN][indexL] == noisyReads[indexN][indexL]:
                baseList.append(0)
            else:
                baseList.append(1)

    print "Error Probability", np.mean(baseList)
    print "numberOfBasesGet ", len(baseList)
def batchProcessingLNKTest():
    print "Batch Processing LNK Test"
    headerName = "synthetic_reads/"
    os.system("mkdir "+headerName)
    
    logging.savingLNKFile(headerName)
    listOfNLKDataPts = logging.loadingLNKFile(headerName)
    numberOfRounds = 1
    listOfNLKDataPts = [listOfNLKDataPts[0]]

    
    
    for testPoint,roundNum in  zip(listOfNLKDataPts, range(len(listOfNLKDataPts))):
        folderName = headerName+"sample_point_" + str(roundNum)
        os.system("mkdir " + folderName )
        

        snpRate, typeOfGen , detail = 0.001,'m', "500-200-50" 
        [G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,clusterRounds, fingerPrint, clusterRatio ] = testPoint
        parameterRobot = logging.parameterObj(G, N, L, p, epsilon, K, liid, threshold,NKcov, Nbridge, Ncov, ratio, numberOfClusterRounds,brachingDepth,bridgingDepth,msaWidth,folderName,clusterRounds, fingerPrint, clusterRatio )
        parameterRobot.indel = True
 
        batchProcessingUnitTest(numberOfRounds,parameterRobot,snpRate, typeOfGen , detail)
Example #31
0
def testChecking(folderName, N, L, G):
    #N, L, G =     1345, 1000, 50000
    f2 = open(folderName + "rec.txt", 'r')
    temp2 = f2.read()
    recov = np.zeros(len(temp2), dtype=np.int32)

    j = 0
    while j < len(temp2):
        if temp2[j] != '-':
            recov[j] = int(temp2[j])
            j = j + 1
        else:
            recov[j] = -1
            j = j + 2
    f2.close()
    dummyParameters = logging.parameterObj()
    dummyParameters.defaultFolder = folderName
    dummyParameters.G = G
    motherGen, reads, noisyReads = logging.rawDataLoad(folderName + "UnitTest",
                                                       G, N, L, 'd')

    print len(recov), len(motherGen)
    numberMistakes, success = compare.subAlignCompare(recov, motherGen,
                                                      dummyParameters)
Example #32
0
def segmentChopAndTestTest(Nin,
                           G,
                           Lin,
                           foldername="",
                           bridgingDepth=20,
                           msaWidth=20):
    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = bridgingDepth
    dummyParameters.msaWidth = msaWidth
    dummyParameters.N, dummyParameters.G, dummyParameters.L, dummyParameters.p = Nin, G, Lin, 0.015
    dummyParameters.indel = True
    dummyParameters.defaultFolder = foldername
    dummyParameters.threshold = 5
    dummyParameters.liid = 48

    snpRate, typeOfGen, detail = 0.001, 'm', "500-200-50"
    G, N, L = dummyParameters.G, dummyParameters.N, dummyParameters.L,
    motherGen, reads, noisyReads = logging.rawDataLoad(foldername + "UnitTest",
                                                       G, N, L, "dn")

    #G3, f2 = interactiveNodeRemoval(folderName, "skip")
    G3, f2 = interactiveNodeRemoval(folderName, "desiredOpt")

    chopAndAlign(G3, f2, noisyReads, motherGen, dummyParameters)
def interactiveNodeRemoval(foldername, modeOfOpt = "skip"):
     
    '''
    delnode 1002
    deledge 1004, 1028
    addedge 1023 , 3434
    '''
    
    G2 = []
    if modeOfOpt == "skip":
        varList = ["start none", ""]
    elif modeOfOpt == "desiredOpt":
        varList = ["start none", "delnode 734288", "delnode 1716432", "fusenode 1438104", "fusenode 1166060" , "view", ""] 
    
    var = varList.pop(0)
    
    while len(var) > 0 :
        
        command = var.split()
        if command[0] == "start":   
            print "Start"
            G2 = logging.loadGraph(foldername+'basicMapping.csv', foldername+'seqMapping.txt', 'simple')
                
        elif command[0] == "delnode":
            print "To delete node"
            currentNodeIndex = int(command[1])
            print "currentNodeIndex", currentNodeIndex
            for eachnode in G2: 
                if eachnode.nodeIndex == currentNodeIndex:
                    currentNode = eachnode
                    
            for eachitem in currentNode.listOfPrevNodes:
                eachitem.listOfNextNodes.remove(currentNode)
            currentNode.listOfPrevNodes = []
            
            for eachitem in currentNode.listOfNextNodes:
                eachitem.listOfPrevNodes.remove(currentNode)
            currentNode.listOfNextNodes = []
            
            currentNode.nodeIndexList = [] 
            graphForm.condenseGraph(G2)    
                          
        elif command[0] == "deledge":
            node1 = int(command[1])
            node2 = int(command[2])
            startNode, endNode = [] , []
            for eachnode in G2: 
                if eachnode.nodeIndex == node1:
                    startNode = eachnode
                     
                if eachnode.nodeIndex == node2:
                    endNode = eachnode
                    
            startNode.listOfNextNodes.remove(endNode)
            endNode.listOfPrevNodes.remove(startNode)
                    
            print "To insert nodes"
        elif command[0] == "addedge" : 
            node1 = int(command[1])
            node2 = int(command[2])
            startNode, endNode = [] , []
            for eachnode in G2: 
                if eachnode.nodeIndex == node1:
                    startNode = eachnode
                if eachnode.nodeIndex == node2:
                    endNode = eachnode
            
            startNode.listOfNextNodes.append(endNode)
            endNode.listOfPrevNodes.append(startNode)
        elif command[0] == "fusenode":
            myNodeIndex = int(command[1])
            for eachnode in G2:
                if eachnode.nodeIndex == myNodeIndex:
                    currentNode = eachnode
            if len(currentNode.listOfPrevNodes) == 1:
                prevNode = currentNode.listOfPrevNodes[0]
                for eachnextnode in currentNode.listOfNextNodes:
                    eachnextnode.listOfPrevNodes.remove(currentNode)
                    eachnextnode.listOfPrevNodes.append(prevNode)
                    prevNode.listOfNextNodes.append(eachnextnode)
                
                prevNode.listOfNextNodes.remove(currentNode)
                currentNode.listOfNextNodes = [] 
                currentNode.listOfPrevNodes =[]
                currentNode.nodeIndexList = [] 
            
            elif len(currentNode.listOfNextNodes) ==1 :
                nextNode = currentNode.listOfNextNodes[0]
                for eachprevnode in currentNode.listOfPrevNodes:
                    eachprevnode.listOfNextNodes.remove(currentNode)
                    eachprevnode.listOfNextNodes.append(nextNode)
                    nextNode.listOfPrevNodes.append(eachprevnode)
                
                nextNode.listOfPrevNodes.remove(currentNode)
                currentNode.listOfNextNodes = []
                currentNode.listOfPrevNodes = []
                currentNode.nodeIndexList = [] 
            
        elif command[0] == "view":
            G2 = graphForm.newCondensingStep(G2)
            G2 = G2[0]
            checkCondensingBasic(G2, [G2[0]], "simple")
        #var = raw_input("Enter Operations: ")
        var = varList.pop(0)
    
        

    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = 20
    
    print "Loading fmap and graph "
    f2= logging.fmapfusedLoad(foldername+'clusteredGroup2.csv') 
    
    G3 = bridgeResolve.resolveRepeats(f2,G2,dummyParameters)
    assert(1==2)
    
    G3 = G2
    print "Done Loading the fmap and graph"
    
    #checkCondensingBasic(G3, [G3[0]], "MB")
    
    return  G3,f2
Example #34
0
def interactiveNodeRemoval(foldername, modeOfOpt="skip"):
    '''
    delnode 1002
    deledge 1004, 1028
    addedge 1023 , 3434
    '''

    G2 = []
    if modeOfOpt == "skip":
        varList = ["start none", ""]
    elif modeOfOpt == "desiredOpt":
        varList = [
            "start none", "delnode 734288", "delnode 1716432",
            "fusenode 1438104", "fusenode 1166060", "view", ""
        ]

    var = varList.pop(0)

    while len(var) > 0:

        command = var.split()
        if command[0] == "start":
            print "Start"
            G2 = logging.loadGraph(foldername + 'basicMapping.csv',
                                   foldername + 'seqMapping.txt', 'simple')

        elif command[0] == "delnode":
            print "To delete node"
            currentNodeIndex = int(command[1])
            print "currentNodeIndex", currentNodeIndex
            for eachnode in G2:
                if eachnode.nodeIndex == currentNodeIndex:
                    currentNode = eachnode

            for eachitem in currentNode.listOfPrevNodes:
                eachitem.listOfNextNodes.remove(currentNode)
            currentNode.listOfPrevNodes = []

            for eachitem in currentNode.listOfNextNodes:
                eachitem.listOfPrevNodes.remove(currentNode)
            currentNode.listOfNextNodes = []

            currentNode.nodeIndexList = []
            graphForm.condenseGraph(G2)

        elif command[0] == "deledge":
            node1 = int(command[1])
            node2 = int(command[2])
            startNode, endNode = [], []
            for eachnode in G2:
                if eachnode.nodeIndex == node1:
                    startNode = eachnode

                if eachnode.nodeIndex == node2:
                    endNode = eachnode

            startNode.listOfNextNodes.remove(endNode)
            endNode.listOfPrevNodes.remove(startNode)

            print "To insert nodes"
        elif command[0] == "addedge":
            node1 = int(command[1])
            node2 = int(command[2])
            startNode, endNode = [], []
            for eachnode in G2:
                if eachnode.nodeIndex == node1:
                    startNode = eachnode
                if eachnode.nodeIndex == node2:
                    endNode = eachnode

            startNode.listOfNextNodes.append(endNode)
            endNode.listOfPrevNodes.append(startNode)
        elif command[0] == "fusenode":
            myNodeIndex = int(command[1])
            for eachnode in G2:
                if eachnode.nodeIndex == myNodeIndex:
                    currentNode = eachnode
            if len(currentNode.listOfPrevNodes) == 1:
                prevNode = currentNode.listOfPrevNodes[0]
                for eachnextnode in currentNode.listOfNextNodes:
                    eachnextnode.listOfPrevNodes.remove(currentNode)
                    eachnextnode.listOfPrevNodes.append(prevNode)
                    prevNode.listOfNextNodes.append(eachnextnode)

                prevNode.listOfNextNodes.remove(currentNode)
                currentNode.listOfNextNodes = []
                currentNode.listOfPrevNodes = []
                currentNode.nodeIndexList = []

            elif len(currentNode.listOfNextNodes) == 1:
                nextNode = currentNode.listOfNextNodes[0]
                for eachprevnode in currentNode.listOfPrevNodes:
                    eachprevnode.listOfNextNodes.remove(currentNode)
                    eachprevnode.listOfNextNodes.append(nextNode)
                    nextNode.listOfPrevNodes.append(eachprevnode)

                nextNode.listOfPrevNodes.remove(currentNode)
                currentNode.listOfNextNodes = []
                currentNode.listOfPrevNodes = []
                currentNode.nodeIndexList = []

        elif command[0] == "view":
            G2 = graphForm.newCondensingStep(G2)
            G2 = G2[0]
            checkCondensingBasic(G2, [G2[0]], "simple")
        #var = raw_input("Enter Operations: ")
        var = varList.pop(0)

    dummyParameters = logging.parameterObj()
    dummyParameters.bridgingDepth = 20

    print "Loading fmap and graph "
    f2 = logging.fmapfusedLoad(foldername + 'clusteredGroup2.csv')

    G3 = bridgeResolve.resolveRepeats(f2, G2, dummyParameters)
    assert (1 == 2)

    G3 = G2
    print "Done Loading the fmap and graph"

    #checkCondensingBasic(G3, [G3[0]], "MB")

    return G3, f2