def GenAlignment(soi_seq, ts_seq, soi, ts,parameters): lensoi = int(soi_seq.length) lents = int(len(ts)) breakpoints = soi_seq.dataDepths c_soi = clubgen_c.intArray(lensoi) if breakpoints[0] == 0: for i in range(lensoi): c_soi[i] = soi[i] else: for i in range(lensoi): c_soi[i] = soi[i]%2**breakpoints[0] c_ts = clubgen_c.intArray(lents) if breakpoints[0] == 0: for i in range(lents): c_ts[i] = ts[i] else: for i in range(lents): c_ts[i] = ts[i]%2**breakpoints[0] MAX_MATCH = 100 MAX_INLINE = 3 c_localStart = clubgen_c.intArray(MAX_MATCH) c_localEnd = clubgen_c.intArray(MAX_MATCH) c_localShift = clubgen_c.intArray(MAX_MATCH) for i in range(MAX_MATCH): c_localStart[i] = 0 c_localEnd[i] = 0 c_localShift[i] = 0 #print soiVecComp #print tsVecComp #print numMatches #print 'lol?' #print 'local window', parameters['LOCAL_WINDOW'] t0 = time.time() a = clubgen_c.GenAlignment_c(c_soi,c_ts,lensoi,lents, parameters['GLOBAL_SIG_LEVEL'], parameters['LOCAL_SIG_LEVEL'], parameters['LOCAL_WINDOW'], MAX_MATCH, MAX_INLINE, parameters['LOCAL_BRIDGE_WIDTH'], parameters['COMB_SPEED'], c_localStart,c_localEnd,c_localShift) TIMER = time.time() - t0 theAlignment = Alignment(soi_seq, ts_seq) ############################## ##### C does local ########### ############################## if c_localStart[0] == c_localEnd[0]: return theAlignment cdfL = numpy.zeros(lensoi) cdfR = numpy.zeros(lensoi) if breakpoints[0] == 0: colScore = numpy.zeros(lensoi) else: colScore = numpy.zeros((len(breakpoints),lensoi)) locWin = parameters['LOCAL_WINDOW'] i = 0 while(i < MAX_MATCH): if c_localStart[i] != c_localEnd[i]: fullScoreVec = numpy.zeros(lensoi) if soi_seq.dataDepths[0] != 0: colScore = numpy.zeros((len(soi_seq.dataDepths),lensoi)) else: colScore = numpy.zeros(lensoi) SOIrange = [] TSrange = [] while(c_localShift[i] == c_localShift[i+1] and c_localStart[i] != c_localEnd[i]): #t1,t2,t3,t4,t5,t6 = cToPyDataFixer(c_localStart[i], c_localEnd[i], c_localShift[i], lensoi, lents, locWin, theAlignment) #colScore += t1 #fullScoreVec += t2 t3,t4 = cFindRanges(c_localStart[i], c_localEnd[i], c_localShift[i], lensoi, lents, locWin) SOIrange.append(t3) TSrange.append(t4) i += 1 #t1,t2,t3,t4,cdfL,cdfR = cToPyDataFixer(c_localStart[i], c_localEnd[i], c_localShift[i], lensoi, lents, locWin, theAlignment) #colScore += t1 #fullScoreVec += t2 t3,t4 = cFindRanges(c_localStart[i], c_localEnd[i], c_localShift[i], lensoi, lents, locWin) SOIrange.append(t3) TSrange.append(t4) soiStart = max(0,c_localShift[i]-lents+locWin) soiEnd = min(lensoi-1,c_localShift[i]+locWin-1) tsStart = max(0,lents-locWin-c_localShift[i]) tsEnd = lents-1 - max(0,locWin+c_localShift[i]-lensoi) #locAlign = LocalAlignment(SOIrange,TSrange,colScore,cdfL,cdfR,fullScoreVec) locAlign = LocalAlignment(SOIrange,TSrange,[soiStart,soiEnd],[tsStart,tsEnd]) theAlignment.AddLocalAlign(locAlign) # adds the local alignment to the total alignment logging.debug('Added align! ' + str(SOIrange)) i += 1 else: break i = MAX_MATCH return theAlignment
def GenClubsC(seqOfInt, testSeqs, parameters): import clubgen_c # build up sequence of interest soiPartTotal = seqOfInt.length/1000+1 soiParts = clubgen_c.int_array_1000(soiPartTotal) if seqOfInt.readingFrames > 1: soiseq = seqOfInt.seq[0] else: soiseq = seqOfInt.seq for i in range(soiPartTotal): if seqOfInt.sequenceType == 'AA': for j in range(min(1000,seqOfInt.length-1000*i)): clubgen_c.a_set(i,j,numpy.int(soiseq[j+1000*i]),soiParts) for j in range(min(1000,seqOfInt.length-1000*i),1000): clubgen_c.a_set(i,j,0,soiParts) else: for j in range(min(1000,seqOfInt.length-1000*i)): clubgen_c.a_set(i,j,numpy.int(soiseq[j+1000*i]%2**seqOfInt.dataDepths[0]),soiParts) for j in range(min(1000,seqOfInt.length-1000*i),1000): clubgen_c.a_set(i,j,0,soiParts) tsPartTotal = 0 tsPartInfo = [] tsPartNum = 0 if seqOfInt.readingFrames == 1: for j in range(len(testSeqs)): tmpPart = testSeqs[j].length/1000+1 tsPartTotal += tmpPart for i in range(tmpPart): tsPartInfo.append(tsPartNum) tsPartNum += 1 else: for k in range(seqOfInt.readingFrames): for j in range(len(testSeqs)): tmpPart = len(testSeqs[j].seq[k])/1000+1 tsPartTotal += tmpPart for i in range(tmpPart): tsPartInfo.append(tsPartNum) tsPartNum += 1 #print tsPartInfo thePart = -1 counter = 0 tsPartNum = 0 tsParts = clubgen_c.int_array_1000(tsPartTotal) if seqOfInt.readingFrames == 1: if seqOfInt.sequenceType == 'AA': for i in range(tsPartTotal): if thePart == tsPartInfo[i]: counter += 1 else: thePart = tsPartInfo[i] counter = 0 for j in range(min(1000,testSeqs[tsPartInfo[i]].length-1000*counter)): clubgen_c.a_set(i,j,numpy.int(testSeqs[tsPartInfo[i]].seq[j+1000*counter]),tsParts) for j in range(min(1000,testSeqs[tsPartInfo[i]].length-1000*counter),1000): clubgen_c.a_set(i,j,0,tsParts) else: for i in range(tsPartTotal): if thePart == tsPartInfo[i]: counter += 1 else: thePart = tsPartInfo[i] counter = 0 for j in range(min(1000,testSeqs[tsPartInfo[i]].length-1000*counter)): clubgen_c.a_set(i,j,numpy.int(testSeqs[tsPartInfo[i]].seq[j+1000*counter]%2**seqOfInt.dataDepths[0]),tsParts) for j in range(min(1000,testSeqs[tsPartInfo[i]].length-1000*counter),1000): clubgen_c.a_set(i,j,0,tsParts) else: #ts = numpy.zeros(tsPartTotal*1000,dtype = numpy.uint8) for k in range(seqOfInt.readingFrames): for i in range(tsPartTotal/seqOfInt.readingFrames): infoLen = len(testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k]) #print thePart,tsPartInfo[i+k*tsPartTotal/seqOfInt.readingFrames] if thePart == tsPartInfo[i+k*tsPartTotal/seqOfInt.readingFrames]: counter += 1 else: thePart = tsPartInfo[i+k*tsPartTotal/seqOfInt.readingFrames] counter = 0 #ts[1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000:1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000+min(1000,infoLen-1000*counter)] = testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k][1000*counter:1000*counter+min(1000,infoLen-1000*counter)]%2**seqOfInt.dataDepths[0] #ts[1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000+min(1000,infoLen-1000*counter):1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000+1000] = numpy.zeros(1000-min(1000,infoLen-1000*counter),dtype = numpy.uint8) #print counter #print infoLen-1000*counter for j in range(min(1000,infoLen-1000*counter)): clubgen_c.a_set(i+k*tsPartTotal/seqOfInt.readingFrames,j,numpy.int(testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k][j+1000*counter]%2**seqOfInt.dataDepths[0]),tsParts) #print 'set', i+k*tsPartTotal/seqOfInt.readingFrames,j,numpy.int(testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k][j+1000*counter]%2**seqOfInt.dataDepths[0]) #ts[j+1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000] = testSeqs[tsPartInfo[i]%len(testSeqs)].seq[k][j+1000*counter]%2**seqOfInt.dataDepths[0] for j in range(min(1000,infoLen-1000*counter),1000): clubgen_c.a_set(i+k*tsPartTotal/seqOfInt.readingFrames,j,0,tsParts) #ts[j+1000*i+k*tsPartTotal/seqOfInt.readingFrames*1000] = 0 cTsPartInfo = clubgen_c.intArray(tsPartTotal) for i in range(tsPartTotal): cTsPartInfo[i] = tsPartInfo[i] # make vector that contains all of the info MAX_MATCH = 20 MAX_INLINE = 5 alignmentInfo = clubgen_c.intArray(MAX_MATCH*5*len(testSeqs)*seqOfInt.readingFrames) for i in range(MAX_MATCH*5*len(testSeqs)*seqOfInt.readingFrames): alignmentInfo[i] = 0; if seqOfInt.readingFrames == 1: tsLens = clubgen_c.intArray(len(testSeqs)) for i in range(len(testSeqs)): tsLens[i] = testSeqs[i].length else: tsLens = clubgen_c.intArray(len(testSeqs)*seqOfInt.readingFrames) for k in range(seqOfInt.readingFrames): for i in range(len(testSeqs)): tsLens[i+k*len(testSeqs)] = len(testSeqs[i].seq[k]) #tsLengths = clubgen_c.intArray(len(testSeqs)) #for i in range(len(testSeqs)): #tsLengths[i] = testSeqs[i].length #print 'about to go into c' clubgen_c.TesterAlign_c(soiPartTotal, soiParts, tsPartTotal, tsParts, cTsPartInfo, alignmentInfo, seqOfInt.length, tsLens, parameters['GLOBAL_SIG_LEVEL'], parameters['LOCAL_SIG_LEVEL'], parameters['LOCAL_WINDOW'], MAX_MATCH, MAX_INLINE, parameters['LOCAL_BRIDGE_WIDTH'], parameters['COMB_SPEED']) alignments = SetOfAlignments(seqOfInt) i = 0 seqOn = 0 rframe = 0 locWin = parameters['LOCAL_WINDOW'] theAlignment = Alignment(seqOfInt, testSeqs[seqOn]) while(i < MAX_MATCH*5): if alignmentInfo[seqOn*MAX_MATCH*5+i] != alignmentInfo[seqOn*MAX_MATCH*5+i+1]: #logging.debug('alignmentInfo: ' + str(alignmentInfo[seqOn*MAX_MATCH*5+i]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+1]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+2]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+3]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+4])) lensoi = min(1000,seqOfInt.length-1000*alignmentInfo[seqOn*MAX_MATCH*5+i+3]) lents = min(1000,testSeqs[seqOn%len(testSeqs)].length-1000*alignmentInfo[seqOn*MAX_MATCH*5+i+4]) soiShift = 1000*alignmentInfo[seqOn*MAX_MATCH*5+i+3] tsShift = 1000*alignmentInfo[seqOn*MAX_MATCH*5+i+4] SOIrange = [] TSrange = [] while(alignmentInfo[seqOn*MAX_MATCH*5+i+2] == alignmentInfo[seqOn*MAX_MATCH*5+i+7] and alignmentInfo[seqOn*MAX_MATCH*5+i] != alignmentInfo[seqOn*MAX_MATCH*5+i+1]): t3,t4 = cFindRanges(alignmentInfo[seqOn*MAX_MATCH*5+i], alignmentInfo[seqOn*MAX_MATCH*5+i+1], alignmentInfo[seqOn*MAX_MATCH*5+i+2], lensoi, lents, locWin, soiShift, tsShift) SOIrange.append(t3) TSrange.append(t4) i += 5 logging.debug('alignmentInfo: ' + str(alignmentInfo[seqOn*MAX_MATCH*5+i]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+1]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+2]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+3]) + ',' + str(alignmentInfo[seqOn*MAX_MATCH*5+i+4])) t3,t4 = cFindRanges(alignmentInfo[seqOn*MAX_MATCH*5+i], alignmentInfo[seqOn*MAX_MATCH*5+i+1], alignmentInfo[seqOn*MAX_MATCH*5+i+2], lensoi, lents, locWin, soiShift, tsShift) SOIrange.append(t3) TSrange.append(t4) soiStart = max(0,alignmentInfo[seqOn*MAX_MATCH*5+i+2]-lents+locWin) + soiShift soiEnd = min(lensoi-1,alignmentInfo[seqOn*MAX_MATCH*5+i+2]+locWin-1) + soiShift tsStart = max(0,lents-locWin-alignmentInfo[seqOn*MAX_MATCH*5+i+2]) + tsShift tsEnd = lents-1 - max(0,locWin+alignmentInfo[seqOn*MAX_MATCH*5+i+2]-lensoi) + tsShift #locAlign = LocalAlignment(SOIrange,TSrange,colScore,cdfL,cdfR,fullScoreVec) logging.debug('Added align! ' + str(SOIrange) + ' to ' + str(TSrange) + ' of ' + testSeqs[seqOn%len(testSeqs)].name + ' rf ' + str(rframe)) locAlign = LocalAlignment(SOIrange,TSrange,[soiStart,soiEnd],[tsStart,tsEnd],rframe) theAlignment.AddLocalAlign(locAlign) # adds the local alignment to the total alignment i += 5 else: #print 'added the alignment' alignments.AddAlign(theAlignment) if seqOn < len(testSeqs)*seqOfInt.readingFrames - 1: i = 0 seqOn += 1 if seqOn%len(testSeqs) == 0: rframe += 1 theAlignment = Alignment(seqOfInt, testSeqs[seqOn%len(testSeqs)]) else: i = MAX_MATCH*5 return alignments