Example #1
0
def GenAlignment(soi_seq, ts_seq, soi, ts,parameters):
    lensoi = int(soi_seq.length)
    lents = int(len(ts))
    
    breakpoints = soi_seq.dataDepths
    
    c_soi = clubgen_c.intArray(lensoi)
    if breakpoints[0] == 0:
        for i in range(lensoi):
            c_soi[i] = soi[i]
    else:
        for i in range(lensoi):
            c_soi[i] = soi[i]%2**breakpoints[0]
        
    c_ts = clubgen_c.intArray(lents)
    if breakpoints[0] == 0:
        for i in range(lents):
            c_ts[i] = ts[i]
    else:
        for i in range(lents):
            c_ts[i] = ts[i]%2**breakpoints[0]
    
    MAX_MATCH = 100
    MAX_INLINE = 3
    c_localStart = clubgen_c.intArray(MAX_MATCH)
    c_localEnd = clubgen_c.intArray(MAX_MATCH)
    c_localShift = clubgen_c.intArray(MAX_MATCH)
    for i in range(MAX_MATCH):
        c_localStart[i] = 0
        c_localEnd[i] = 0
        c_localShift[i] = 0
    
    #print soiVecComp
    #print tsVecComp
    #print numMatches
    #print 'lol?'
    #print 'local window', parameters['LOCAL_WINDOW']

    t0 = time.time()
    a = clubgen_c.GenAlignment_c(c_soi,c_ts,lensoi,lents, parameters['GLOBAL_SIG_LEVEL'], parameters['LOCAL_SIG_LEVEL'], parameters['LOCAL_WINDOW'], MAX_MATCH, MAX_INLINE, parameters['LOCAL_BRIDGE_WIDTH'], parameters['COMB_SPEED'], c_localStart,c_localEnd,c_localShift)
    TIMER = time.time() - t0

    theAlignment = Alignment(soi_seq, ts_seq)
    
    ##############################
    ##### C does local ###########
    ##############################
    
    if c_localStart[0] == c_localEnd[0]:
        return theAlignment
    
    cdfL = numpy.zeros(lensoi)
    cdfR = numpy.zeros(lensoi)
    
    if breakpoints[0] == 0:
        colScore = numpy.zeros(lensoi)
    else:
        colScore = numpy.zeros((len(breakpoints),lensoi))
        
    locWin = parameters['LOCAL_WINDOW']
    
    i = 0
    while(i < MAX_MATCH):
        if c_localStart[i] != c_localEnd[i]:

            fullScoreVec = numpy.zeros(lensoi)
            if soi_seq.dataDepths[0] != 0:
                colScore = numpy.zeros((len(soi_seq.dataDepths),lensoi))
            else:
                colScore = numpy.zeros(lensoi)
                
            SOIrange = []
            TSrange = []
                
            while(c_localShift[i] == c_localShift[i+1] and c_localStart[i] != c_localEnd[i]):
                #t1,t2,t3,t4,t5,t6 = cToPyDataFixer(c_localStart[i], c_localEnd[i], c_localShift[i], lensoi, lents, locWin, theAlignment)
                #colScore += t1
                #fullScoreVec += t2
                t3,t4 = cFindRanges(c_localStart[i], c_localEnd[i], c_localShift[i], lensoi, lents, locWin)
                SOIrange.append(t3)
                TSrange.append(t4)
                i += 1
            #t1,t2,t3,t4,cdfL,cdfR = cToPyDataFixer(c_localStart[i], c_localEnd[i], c_localShift[i], lensoi, lents, locWin, theAlignment)
            #colScore += t1
            #fullScoreVec += t2
            t3,t4 = cFindRanges(c_localStart[i], c_localEnd[i], c_localShift[i], lensoi, lents, locWin)
            SOIrange.append(t3)
            TSrange.append(t4)
            
            soiStart = max(0,c_localShift[i]-lents+locWin)
            soiEnd = min(lensoi-1,c_localShift[i]+locWin-1)
            tsStart = max(0,lents-locWin-c_localShift[i])
            tsEnd = lents-1 - max(0,locWin+c_localShift[i]-lensoi)
            
            #locAlign = LocalAlignment(SOIrange,TSrange,colScore,cdfL,cdfR,fullScoreVec)
            locAlign = LocalAlignment(SOIrange,TSrange,[soiStart,soiEnd],[tsStart,tsEnd])
            theAlignment.AddLocalAlign(locAlign) # adds the local alignment to the total alignment
            logging.debug('Added align! ' + str(SOIrange))
            i += 1
        else:
            break
            i = MAX_MATCH
    return theAlignment
Example #2
0
def GenClubsC(seqOfInt, testSeqs, parameters):
    import clubgen_c
    # build up sequence of interest
    soiPartTotal = seqOfInt.length/1000+1
    soiParts = clubgen_c.int_array_1000(soiPartTotal)
    if seqOfInt.readingFrames > 1:
        soiseq = seqOfInt.seq[0]
    else:
        soiseq = seqOfInt.seq
    for i in range(soiPartTotal):
        if seqOfInt.sequenceType == 'AA':
            for j in range(min(1000,seqOfInt.length-1000*i)):
                clubgen_c.a_set(i,j,numpy.int(soiseq[j+1000*i]),soiParts)
            for j in range(min(1000,seqOfInt.length-1000*i),1000):
                clubgen_c.a_set(i,j,0,soiParts)
        else:
            for j in range(min(1000,seqOfInt.length-1000*i)):
                clubgen_c.a_set(i,j,numpy.int(soiseq[j+1000*i]%2**seqOfInt.dataDepths[0]),soiParts)
            for j in range(min(1000,seqOfInt.length-1000*i),1000):
                clubgen_c.a_set(i,j,0,soiParts)
            
    tsPartTotal = 0
    tsPartInfo = []
    tsPartNum = 0
    if seqOfInt.readingFrames == 1:
        for j in range(len(testSeqs)):
            tmpPart = testSeqs[j].length/1000+1
            tsPartTotal += tmpPart
            for i in range(tmpPart):
                tsPartInfo.append(tsPartNum)
            tsPartNum += 1
    else:
        for k in range(seqOfInt.readingFrames):
            for j in range(len(testSeqs)):
                tmpPart = len(testSeqs[j].seq[k])/1000+1
                tsPartTotal += tmpPart
                for i in range(tmpPart):
                    tsPartInfo.append(tsPartNum)
                tsPartNum += 1
                
    #print tsPartInfo
       
    thePart = -1
    counter = 0
    tsPartNum = 0
    tsParts = clubgen_c.int_array_1000(tsPartTotal)
    
    if seqOfInt.readingFrames == 1:
        if seqOfInt.sequenceType == 'AA':
            for i in range(tsPartTotal):
                if thePart == tsPartInfo[i]:
                    counter += 1
                else:
                    thePart = tsPartInfo[i]
                    counter = 0
                for j in range(min(1000,testSeqs[tsPartInfo[i]].length-1000*counter)):
                    clubgen_c.a_set(i,j,numpy.int(testSeqs[tsPartInfo[i]].seq[j+1000*counter]),tsParts)
                for j in range(min(1000,testSeqs[tsPartInfo[i]].length-1000*counter),1000):
                    clubgen_c.a_set(i,j,0,tsParts)
        else:
            for i in range(tsPartTotal):
                if thePart == tsPartInfo[i]:
                    counter += 1
                else:
                    thePart = tsPartInfo[i]
                    counter = 0
                for j in range(min(1000,testSeqs[tsPartInfo[i]].length-1000*counter)):
                    clubgen_c.a_set(i,j,numpy.int(testSeqs[tsPartInfo[i]].seq[j+1000*counter]%2**seqOfInt.dataDepths[0]),tsParts)
                for j in range(min(1000,testSeqs[tsPartInfo[i]].length-1000*counter),1000):
                    clubgen_c.a_set(i,j,0,tsParts)
    else:
        #ts = numpy.zeros(tsPartTotal*1000,dtype = numpy.uint8)
        for k in range(seqOfInt.readingFrames):
            for i in range(tsPartTotal/seqOfInt.readingFrames):
                infoLen = len(testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k])
                #print thePart,tsPartInfo[i+k*tsPartTotal/seqOfInt.readingFrames]
                if thePart == tsPartInfo[i+k*tsPartTotal/seqOfInt.readingFrames]:
                    counter += 1
                else:
                    thePart = tsPartInfo[i+k*tsPartTotal/seqOfInt.readingFrames]
                    counter = 0
                    
                #ts[1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000:1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000+min(1000,infoLen-1000*counter)] = testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k][1000*counter:1000*counter+min(1000,infoLen-1000*counter)]%2**seqOfInt.dataDepths[0]
                
                #ts[1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000+min(1000,infoLen-1000*counter):1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000+1000] = numpy.zeros(1000-min(1000,infoLen-1000*counter),dtype = numpy.uint8)
                #print counter
                #print infoLen-1000*counter
                for j in range(min(1000,infoLen-1000*counter)):
                    clubgen_c.a_set(i+k*tsPartTotal/seqOfInt.readingFrames,j,numpy.int(testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k][j+1000*counter]%2**seqOfInt.dataDepths[0]),tsParts)
                    #print 'set', i+k*tsPartTotal/seqOfInt.readingFrames,j,numpy.int(testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k][j+1000*counter]%2**seqOfInt.dataDepths[0])
                    #ts[j+1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000] = testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k][j+1000*counter]%2**seqOfInt.dataDepths[0]
                for j in range(min(1000,infoLen-1000*counter),1000):
                    clubgen_c.a_set(i+k*tsPartTotal/seqOfInt.readingFrames,j,0,tsParts)
                    #ts[j+1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000] = 0
    
    cTsPartInfo = clubgen_c.intArray(tsPartTotal)
    for i in range(tsPartTotal):
        cTsPartInfo[i] = tsPartInfo[i]
        
    # make vector that contains all of the info
    MAX_MATCH = 20
    MAX_INLINE = 5
    alignmentInfo = clubgen_c.intArray(MAX_MATCH*5*len(testSeqs)*seqOfInt.readingFrames)
    for i in range(MAX_MATCH*5*len(testSeqs)*seqOfInt.readingFrames):
        alignmentInfo[i] = 0;
        
    if seqOfInt.readingFrames == 1:
        tsLens = clubgen_c.intArray(len(testSeqs))
        for i in range(len(testSeqs)):
            tsLens[i] = testSeqs[i].length
    else:
        tsLens = clubgen_c.intArray(len(testSeqs)*seqOfInt.readingFrames)
        for k in range(seqOfInt.readingFrames):
            for i in range(len(testSeqs)):
                tsLens[i+k*len(testSeqs)] = len(testSeqs[i].seq[k])
    
    #tsLengths = clubgen_c.intArray(len(testSeqs))
    #for i in range(len(testSeqs)):
        #tsLengths[i] = testSeqs[i].length
    
    
    
    #print 'about to go into c'
    clubgen_c.TesterAlign_c(soiPartTotal, soiParts, tsPartTotal, tsParts, cTsPartInfo, alignmentInfo, seqOfInt.length, tsLens, parameters['GLOBAL_SIG_LEVEL'], parameters['LOCAL_SIG_LEVEL'], parameters['LOCAL_WINDOW'], MAX_MATCH, MAX_INLINE, parameters['LOCAL_BRIDGE_WIDTH'], parameters['COMB_SPEED'])

    alignments = SetOfAlignments(seqOfInt)
            
    i = 0
    seqOn = 0
    rframe = 0
    
    locWin = parameters['LOCAL_WINDOW']
    theAlignment = Alignment(seqOfInt, testSeqs[seqOn])
    while(i < MAX_MATCH*5):
        
        if alignmentInfo[seqOn*MAX_MATCH*5+i] != alignmentInfo[seqOn*MAX_MATCH*5+i+1]:
            #logging.debug('alignmentInfo: ' + str(alignmentInfo[seqOn*MAX_MATCH*5+i]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+1]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+2]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+3]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+4]))
            lensoi = min(1000,seqOfInt.length-1000*alignmentInfo[seqOn*MAX_MATCH*5+i+3])
            lents = min(1000,testSeqs[seqOn%len(testSeqs)].length-1000*alignmentInfo[seqOn*MAX_MATCH*5+i+4])
            soiShift = 1000*alignmentInfo[seqOn*MAX_MATCH*5+i+3]
            tsShift = 1000*alignmentInfo[seqOn*MAX_MATCH*5+i+4]
                
            SOIrange = []
            TSrange = []
                
            while(alignmentInfo[seqOn*MAX_MATCH*5+i+2] == alignmentInfo[seqOn*MAX_MATCH*5+i+7] and alignmentInfo[seqOn*MAX_MATCH*5+i] != alignmentInfo[seqOn*MAX_MATCH*5+i+1]):
                t3,t4 = cFindRanges(alignmentInfo[seqOn*MAX_MATCH*5+i], alignmentInfo[seqOn*MAX_MATCH*5+i+1], alignmentInfo[seqOn*MAX_MATCH*5+i+2], lensoi, lents, locWin, soiShift, tsShift)
                SOIrange.append(t3)
                TSrange.append(t4)
                i += 5
                logging.debug('alignmentInfo: ' + str(alignmentInfo[seqOn*MAX_MATCH*5+i]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+1]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+2]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+3]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+4]))
            t3,t4 = cFindRanges(alignmentInfo[seqOn*MAX_MATCH*5+i], alignmentInfo[seqOn*MAX_MATCH*5+i+1], alignmentInfo[seqOn*MAX_MATCH*5+i+2], lensoi, lents, locWin, soiShift, tsShift)
            SOIrange.append(t3)
            TSrange.append(t4)
            
            soiStart = max(0,alignmentInfo[seqOn*MAX_MATCH*5+i+2]-lents+locWin) + soiShift
            soiEnd = min(lensoi-1,alignmentInfo[seqOn*MAX_MATCH*5+i+2]+locWin-1) + soiShift
            tsStart = max(0,lents-locWin-alignmentInfo[seqOn*MAX_MATCH*5+i+2]) + tsShift
            tsEnd = lents-1 - max(0,locWin+alignmentInfo[seqOn*MAX_MATCH*5+i+2]-lensoi) + tsShift
            
            #locAlign = LocalAlignment(SOIrange,TSrange,colScore,cdfL,cdfR,fullScoreVec)
            logging.debug('Added align! ' + str(SOIrange) + ' to ' + str(TSrange) + ' of ' + testSeqs[seqOn%len(testSeqs)].name + ' rf ' + str(rframe))
            locAlign = LocalAlignment(SOIrange,TSrange,[soiStart,soiEnd],[tsStart,tsEnd],rframe)
            theAlignment.AddLocalAlign(locAlign) # adds the local alignment to the total alignment
            i += 5
        else:
            #print 'added the alignment'
            alignments.AddAlign(theAlignment)
            if seqOn < len(testSeqs)*seqOfInt.readingFrames - 1:
                i = 0
                seqOn += 1
                if seqOn%len(testSeqs) == 0:
                    rframe += 1
                theAlignment = Alignment(seqOfInt, testSeqs[seqOn%len(testSeqs)])
            else:
                i = MAX_MATCH*5

    return alignments