def profileTargetsHistoAS(tccList, cName, name='boxplot'): range = 50 histDict = {} # {coord: []} histDictAS = {} for tcc in tccList: chrom, strand, start, end = cg.tccSplit(tcc) #Get highest peak (sense) tccStretch = cgPeaks.stretch(tcc, cName) tccStretch.createPeaks(span=2) highestCoord = tccStretch.getHighestPeak() if highestCoord == None: continue #AS tccAS = cg.convertToAS(tcc) tccStretch = cgPeaks.stretch(tccAS, cName) tccStretch.createPeaks(span=2) highestCoordAS = tccStretch.getHighestPeak() if highestCoordAS == None: continue #profile around point (Sense) zPoint = cg.makeTcc(chrom, strand, highestCoord, end) cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio=True) for coord in cProfile: try: histDict[coord].append(cProfile[coord]) except: #quicker way to initialize histDict[coord] = [cProfile[coord]] #profile around point (AS) zPoint = cg.convertToAS(zPoint) cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio=True, ratioCoord=highestCoordAS) for coord in cProfile: try: histDictAS[coord].append(cProfile[coord]) except: #quicker way to initialize histDictAS[coord] = [cProfile[coord]] plot.boxPlotHistoAS(histDict, histDictAS, name=name)
def extendPeakTest(tcc, pRange, minVal, maxAvgNoise, minPeakLength, maxPeakLength, cName): chrom, strand, peakPosition, end = cg.tccSplit(tcc) cProfile = stepVectorScan.profileAroundPoint(tcc, pRange, cName, ratio = True) #extend this peak left and right leftRange = range(1-pRange, 0) rightRange = range(1, pRange) leftRange.reverse() #going from the middle outward #left startFinal = leftRange[-1] for i in leftRange: if cProfile[i] > minVal: print ' extending stretch' else: print ' end of stretch L' startFinal = i + 1 break #right endFinal = rightRange[-1] for i in rightRange: if cProfile[i] > minVal: print ' extending stretch' else: print ' end of stretch R' endFinal = i - 1 break peakLength = endFinal - startFinal + 1 #avg expression around peak check... #get total expression before peak low = startFinal high = endFinal noiseExpression = 0 lowRange = range(1 - pRange, low) highRange = range(high + 1, pRange) totalLength = len(lowRange) + len(highRange) print totalLength, pRange, low, high, lowRange, highRange for i in lowRange: noiseExpression += cProfile[i] for i in highRange: noiseExpression += cProfile[i] try: avgNoise = noiseExpression/float(totalLength) except: return False #filter out peaks that look a certain way. if (minPeakLength < peakLength < maxPeakLength) and (avgNoise < maxAvgNoise): goodTcc = cg.makeTcc(chrom, strand, peakPosition + startFinal, peakPosition + endFinal) print '*KEEPER' return goodTcc else: return False
def profileTargetsHistoAS(tccList, cName, name = 'boxplot'): range = 50 histDict = {} # {coord: []} histDictAS = {} for tcc in tccList: chrom, strand, start, end = cg.tccSplit(tcc) #Get highest peak (sense) tccStretch = cgPeaks.stretch(tcc, cName) tccStretch.createPeaks(span = 2) highestCoord = tccStretch.getHighestPeak() if highestCoord == None: continue #AS tccAS = cg.convertToAS(tcc) tccStretch = cgPeaks.stretch(tccAS, cName) tccStretch.createPeaks(span = 2) highestCoordAS = tccStretch.getHighestPeak() if highestCoordAS == None: continue #profile around point (Sense) zPoint = cg.makeTcc(chrom, strand, highestCoord, end) cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio = True) for coord in cProfile: try: histDict[coord].append(cProfile[coord]) except: #quicker way to initialize histDict[coord] = [cProfile[coord]] #profile around point (AS) zPoint = cg.convertToAS(zPoint) cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio = True, ratioCoord = highestCoordAS) for coord in cProfile: try: histDictAS[coord].append(cProfile[coord]) except: #quicker way to initialize histDictAS[coord] = [cProfile[coord]] plot.boxPlotHistoAS(histDict, histDictAS, name = name)
def profileTargetsHisto(tccList, cName, name='boxplot'): histDict = {} # {coord: []} for tcc in tccList: chrom, strand, start, end = cg.tccSplit(tcc) #Get highest peak tccStretch = cgPeaks.stretch(tcc, cName) tccStretch.createPeaks(span=2) highestCoord = tccStretch.getHighestPeak() if highestCoord == None: continue #profile around point zPoint = cg.makeTcc(chrom, strand, highestCoord, end) cProfile = svs.profileAroundPoint(zPoint, 200, cName, ratio=True) for coord in cProfile: try: histDict[coord].append(cProfile[coord]) except: #quicker way to initialize histDict[coord] = [cProfile[coord]] plot.boxPlotHisto(histDict, name=name)
def profileTargetsHisto(tccList, cName, name = 'boxplot'): histDict = {} # {coord: []} for tcc in tccList: chrom, strand, start, end = cg.tccSplit(tcc) #Get highest peak tccStretch = cgPeaks.stretch(tcc, cName) tccStretch.createPeaks(span = 2) highestCoord = tccStretch.getHighestPeak() if highestCoord == None: continue #profile around point zPoint = cg.makeTcc(chrom, strand, highestCoord, end) cProfile = svs.profileAroundPoint(zPoint, 200, cName, ratio = True) for coord in cProfile: try: histDict[coord].append(cProfile[coord]) except: #quicker way to initialize histDict[coord] = [cProfile[coord]] plot.boxPlotHisto(histDict, name = name)
def parallelMakePeaks(tcc, cName, minExpression): conf = c.getConfig(cName) f = open('out/peakData.%s.%s.%s' % (tcc, minExpression, conf.conf['assembly']), 'w') print 'scanning range', tcc chrom, strand, start, end = cg.tccSplit(tcc) peaks = cgPeaks.stretch(tcc, cName) #print 'getting peaks' peaks.createPeaks(span = 1, minVal = int(minExpression)) print 'len peaks', len(peaks.peaks) endCheck = 0 for x in peaks.peaks: print x, endCheck ''' if x < endCheck: print 'endChecked' continue ''' #scan a 30 bp range around this point and find the best roof... pRange = 40 rTcc = cg.makeTcc(chrom, strand, x, x + 1) #now make profile for roof... cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio = True) #now get highest stretch length and the rNext coord. minVal = .70 highest = 0 stretch = 0 startCurrent = None startFinal = None endFinal = None for i in range(1 - pRange, pRange): print ' ', x + i, cProfile[i] if cProfile[i] > minVal: print ' extending stretch' stretch += 1 if startCurrent == None: startCurrent = i else: if stretch > 0: print 'end of stretch' if stretch > highest: #stretch ended and was higher than previous highest = stretch endFinal = i - 1 startFinal = startCurrent startCurrent = None else: startCurrent = None stretch = 0 #get +/- extend value... val = [1.0, 1.0] extend = 1 if (startFinal) and (endFinal): low = startFinal - extend high = endFinal + extend if low > (1 - pRange) and high < pRange: val[0] = float(cProfile[startFinal - extend]) val[1] = float(cProfile[endFinal + extend]) else: print 'out of range' continue else: print 'no start and end of peak' continue print low, high, x, endFinal endCheck = x + endFinal #avg expression around peak check... #get total expression before peak noiseExpression = 0 lowRange = range(1 - pRange, low) highRange = range(high + 1, pRange) totalLength = len(lowRange) + len(highRange) for i in lowRange: noiseExpression += cProfile[i] for i in highRange: noiseExpression += cProfile[i] avgNoise = noiseExpression/float(totalLength) #filter out peaks that look a certain way. print highest, val[0], val[1], avgNoise if 0 < highest < 5: #rooflength 14/26 if val[0] < 0.20 and val[1] < .20: #drop values if avgNoise < .3: goodTcc = cg.makeTcc(chrom, strand, x + low, x + high) print '*KEEPER' f.write('%s\n' % goodTcc) f.close() print 'DONE', tcc
def makePeakInput(cName, minExpression=2000): mConf = c.getConfig('Main.conf') conf = c.getConfig(cName) assembly = conf.conf['assembly'] tccList = [] chromLens = cg.returnChromLengthDict(assembly) f = open('peakData.%s' % minExpression, 'w') for chrom in chromLens: if chrom not in cg.acceptableChroms: continue for strand in ['1', '-1']: print 'Getting Peaks for ', chrom, strand prevI = 0 endCheck = 0 for i in rangePoints(1, chromLens[chrom], 1000): if i == 1: prevI = i continue start = prevI end = i prevI = i tcc = cg.makeTcc(chrom, strand, start, end) #print 'scanning range', tcc peaks = cgPeaks.stretch(tcc, cName) peaks.createPeaks(span=3, minVal=minExpression) for x in peaks.peaks: if x < endCheck: continue #scan a 30 bp range around this point and find the best roof... pRange = 30 rTcc = cg.makeTcc(chrom, strand, x, x + 1) #now make profile for roof... cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio=True) #now get highest stretch length and the rNext coord. minVal = .80 highest = 0 stretch = 0 startCurrent = None startFinal = None endFinal = None for i in range(1 - pRange, pRange): if cProfile[i] > minVal: stretch += 1 if startCurrent == None: startCurrent = i else: if stretch > 0: if stretch > highest: #stretch ended and was higher than previous highest = stretch endFinal = i - 1 startFinal = startCurrent startCurrent = None else: startCurrent = None stretch = 0 #get +/- 4 value... val = [1.0, 1.0] if (startFinal) and (endFinal): low = startFinal - 4 high = endFinal + 4 if low > (1 - pRange) and high < pRange: val[0] = float(cProfile[startFinal - 4]) val[1] = float(cProfile[endFinal + 4]) else: continue else: continue endCheck = x + high #filter out peaks that look a certain way. if 14 < highest < 26: #rooflength if val[0] < 0.2 and val[1] < .2: #drop values goodTcc = cg.makeTcc(chrom, strand, x + low, x + high) #print goodTcc f.write('%s\n' % goodTcc) f.close()
def findPeaks(pType, cName = None): #init mConf = c.cgConfig('Main.conf') conf = c.getConfig(cName) if pType == 'E': predName = conf.conf['resultsExonsSorted'] else: predName = conf.conf['resultsIntronsSorted'] print predName #make CID:hairpin:peak dictionary cHairs = getHairpins.getHairpins(predName) peakDict = {} for CID in cHairs: peakDict[CID] = [cHairs[CID],'None'] timer = cg.cgTimer() timer.start() #put peaks in memory print 'Creating peak data' peaks = {} # chr:peak:value for CID in cHairs: chrom, strand, start, end = cg.tccSplit(cHairs[CID]) tcc = cHairs[CID] #init dictionary if chrom not in peaks: peaks[chrom] = {} if strand not in peaks[chrom]: peaks[chrom][strand] = {} #create peaks for tcc and add to peak dictionary stretch = cgPeaks.stretch(tcc, cName) stretch.createPeaks() for peakCoord in stretch.peaks: peaks[chrom][strand][peakCoord] = 0 print timer.split() print 'finding best combos' bestCombos = [] aPass = 0 bPass = 0 cPass = 0 numT = 0 for CID in peakDict: cgFlag = False if CID == '538':cgFlag = True tcc = peakDict[CID][0] #print tcc tccPeaks = [] chrom = cg.ss(tcc, ':')[0] strand = cg.ss(tcc, ':')[1] start = int(cg.ss(tcc, ':')[2]) end = int(cg.ss(tcc, ':')[3]) #get all peaks for i in range(start, end + 1): if i in peaks[chrom][strand]: #print ' peak added', i tccPeaks.append(i) #Calculate parameters... pairStrings = [] #used to check if pair already added peakCombos = [] for x in tccPeaks: #scan a 30 bp range around this point and find the best roof... pRange = 30 rTcc = cg.makeTcc(chrom, strand, x, x + 1) #quickly get max value...kinda a long way to do it but whatever cProfile = stepVectorScan.profileAroundPoint(rTcc, 1, cName, ratio = False) xval = cProfile[0] max = xval highestValueCoord = x #now make profile for roof... cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio = True) #now get highest stretch length and the rNext coord. minVal = .80 highest = 0 stretch = 0 startCurrent = None startFinal = None endFinal = None for i in range(1 - pRange, pRange): if cProfile[i] > minVal: stretch += 1 if startCurrent == None: startCurrent = i else: if stretch > 0: if stretch > highest: #stretch ended and was higher than previous highest = stretch endFinal = i - 1 startFinal = startCurrent startCurrent = None else: startCurrent = None stretch = 0 #get +/- 4 value... val = [1.0, 1.0] if (startFinal) and (endFinal): low = startFinal - 4 high = endFinal + 4 if low > (1 - pRange): if high < pRange: val[0] = float(cProfile[startFinal - 4]) val[1] = float(cProfile[endFinal + 4]) #fill in other details... y = 'S' dist = 'S' ratio = 'S' peakCombos.append([tcc,x,y,dist,ratio,max,highest,val]) #print ' ', peakCombos[-1] #find best combo... topCombo = None for combo in peakCombos: roofLength = combo[6] dropValue = combo[7][0] if combo[7][1] > dropValue: dropValue = combo[7][1] #print roofLength, dropValue if 14 < roofLength < 26: if 0.0 < dropValue < 0.2: #pick one with rooflength nearest 20: if topCombo: if (math.fabs(22 - roofLength)) < (math.fabs(22 - topCombo[6])): topCombo = combo else: topCombo = combo if topCombo: peakDict[CID][1] = topCombo bestCombos.append(topCombo) print bestCombos[-1] else: #print 'None' pass print timer.split() #now update predFile (SLOT 13) predFile = open(predName, 'r') newLines = [] for line in predFile: CID = cg.ss(line)[7] if peakDict[CID][1] == 'None': peakInfo = 'None' else: peakInfo = '%s:%s:%s:%s:%s:%s' % (str(peakDict[CID][1][1])[-3:], 'S', str(peakDict[CID][1][4]).split('.')[0], peakDict[CID][1][5],peakDict[CID][1][6], peakDict[CID][1][7]) newLines.append(cg.appendToLine(line, peakInfo, 13)) predFile.close() predFile = open(predName, 'w') predFile.writelines(newLines) predFile.close()
def roofPeakTest(tcc, pRange, minRoofVal, maxAvgNoise, maxDropVal, extend, minPeakLength, maxPeakLength, cName): '''Note: extend does not extend the coordinates into the final tcc, just used for declaring peak''' coveredStartingPoints = set() chrom, strand, peakPosition, end = cg.tccSplit(tcc) cProfile = stepVectorScan.profileAroundPoint(tcc, pRange, cName, ratio = True) #extend this peak left and right leftRange = range(1-pRange, 0) rightRange = range(1, pRange) leftRange.reverse() #going from the middle outward startFinalE = 0 #left startFinal = leftRange[-1] for i in leftRange: if cProfile[i] > minRoofVal: pass else: startFinal = i + 1 startFinalE = cProfile[i] break endFinalE = 0 #right endFinal = rightRange[-1] #this only holds if it extends to the end of the range... for i in rightRange: if cProfile[i] > minRoofVal: pass else: endFinal = i - 1 endFinalE = cProfile[i] break peakLength = endFinal - startFinal + 1 extend = int(extend) low = startFinal - extend high = endFinal + extend if low > (1 - pRange) and high < pRange: dropPassL = False dropPassR = False #find if any of the values in the extended range are below drop range leftDrop = [float(cProfile[startFinal - x]) for x in range(1, extend + 1)] rightDrop = [float(cProfile[endFinal + x]) for x in range(1, extend + 1)] leftDropPass = [True if x < maxDropVal else False for x in leftDrop] rightDropPass = [True if x < maxDropVal else False for x in rightDrop] if True in leftDropPass: dropPassL = True if True in rightDropPass: dropPassR = True if (not dropPassL) or (not dropPassR): print 'dropVal Fail', 'dropLeft', 'dropRight' print startFinal, endFinal, leftDrop, rightDrop, leftDropPass, rightDropPass return False else: print 'out of range' return False #avg expression around peak check... #get total expression before peak noiseExpression = 0 lowRange = range(1 - pRange, low) highRange = range(high + 1, pRange) totalLength = len(lowRange) + len(highRange) #for i in lowRange: #noiseExpression += cProfile[i] #for i in highRange: #noiseExpression += cProfile[i] #avgNoise = noiseExpression/float(totalLength) avgNoise = 0.0 #filter out peaks that look a certain way. print startFinal, startFinalE, endFinal, endFinalE, peakLength, goodTcc = cg.makeTcc(chrom, strand, peakPosition + startFinal, peakPosition + endFinal) if (minPeakLength <= peakLength <= maxPeakLength) and (avgNoise < maxAvgNoise): #if (minPeakLength < peakLength < maxPeakLength): print ' *KEEPER', goodTcc, peakLength, avgNoise, maxAvgNoise return goodTcc else: #print 'bad peak', chrom, strand, peakPosition + startFinal, peakPosition + endFinal print ' reason', goodTcc, peakLength, avgNoise, maxAvgNoise return False
def parallelMakePeaks(tcc, cName, minExpression): conf = c.getConfig(cName) f = open( 'out/peakData.%s.%s.%s' % (tcc, minExpression, conf.conf['assembly']), 'w') print 'scanning range', tcc chrom, strand, start, end = cg.tccSplit(tcc) peaks = cgPeaks.stretch(tcc, cName) #print 'getting peaks' peaks.createPeaks(span=1, minVal=int(minExpression)) print 'len peaks', len(peaks.peaks) endCheck = 0 for x in peaks.peaks: print x, endCheck ''' if x < endCheck: print 'endChecked' continue ''' #scan a 30 bp range around this point and find the best roof... pRange = 40 rTcc = cg.makeTcc(chrom, strand, x, x + 1) #now make profile for roof... cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio=True) #now get highest stretch length and the rNext coord. minVal = .70 highest = 0 stretch = 0 startCurrent = None startFinal = None endFinal = None for i in range(1 - pRange, pRange): print ' ', x + i, cProfile[i] if cProfile[i] > minVal: print ' extending stretch' stretch += 1 if startCurrent == None: startCurrent = i else: if stretch > 0: print 'end of stretch' if stretch > highest: #stretch ended and was higher than previous highest = stretch endFinal = i - 1 startFinal = startCurrent startCurrent = None else: startCurrent = None stretch = 0 #get +/- extend value... val = [1.0, 1.0] extend = 1 if (startFinal) and (endFinal): low = startFinal - extend high = endFinal + extend if low > (1 - pRange) and high < pRange: val[0] = float(cProfile[startFinal - extend]) val[1] = float(cProfile[endFinal + extend]) else: print 'out of range' continue else: print 'no start and end of peak' continue print low, high, x, endFinal endCheck = x + endFinal #avg expression around peak check... #get total expression before peak noiseExpression = 0 lowRange = range(1 - pRange, low) highRange = range(high + 1, pRange) totalLength = len(lowRange) + len(highRange) for i in lowRange: noiseExpression += cProfile[i] for i in highRange: noiseExpression += cProfile[i] avgNoise = noiseExpression / float(totalLength) #filter out peaks that look a certain way. print highest, val[0], val[1], avgNoise if 0 < highest < 5: #rooflength 14/26 if val[0] < 0.20 and val[1] < .20: #drop values if avgNoise < .3: goodTcc = cg.makeTcc(chrom, strand, x + low, x + high) print '*KEEPER' f.write('%s\n' % goodTcc) f.close() print 'DONE', tcc
def makePeakInput(cName, minExpression = 2000): mConf = c.getConfig('Main.conf') conf = c.getConfig(cName) assembly = conf.conf['assembly'] tccList = [] chromLens = cg.returnChromLengthDict(assembly) f = open('peakData.%s' % minExpression, 'w') for chrom in chromLens: if chrom not in cg.acceptableChroms: continue for strand in ['1', '-1']: print 'Getting Peaks for ', chrom, strand prevI = 0 endCheck = 0 for i in rangePoints(1, chromLens[chrom], 1000): if i == 1: prevI = i continue start = prevI end = i prevI = i tcc = cg.makeTcc(chrom, strand, start, end) #print 'scanning range', tcc peaks = cgPeaks.stretch(tcc, cName) peaks.createPeaks(span = 3, minVal = minExpression) for x in peaks.peaks: if x < endCheck: continue #scan a 30 bp range around this point and find the best roof... pRange = 30 rTcc = cg.makeTcc(chrom, strand, x, x + 1) #now make profile for roof... cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio = True) #now get highest stretch length and the rNext coord. minVal = .80 highest = 0 stretch = 0 startCurrent = None startFinal = None endFinal = None for i in range(1 - pRange, pRange): if cProfile[i] > minVal: stretch += 1 if startCurrent == None: startCurrent = i else: if stretch > 0: if stretch > highest: #stretch ended and was higher than previous highest = stretch endFinal = i - 1 startFinal = startCurrent startCurrent = None else: startCurrent = None stretch = 0 #get +/- 4 value... val = [1.0, 1.0] if (startFinal) and (endFinal): low = startFinal - 4 high = endFinal + 4 if low > (1 - pRange) and high < pRange: val[0] = float(cProfile[startFinal - 4]) val[1] = float(cProfile[endFinal + 4]) else: continue else: continue endCheck = x + high #filter out peaks that look a certain way. if 14 < highest < 26: #rooflength if val[0] < 0.2 and val[1] < .2: #drop values goodTcc = cg.makeTcc(chrom, strand, x + low, x + high) #print goodTcc f.write('%s\n' % goodTcc) f.close()
def roofPeakTest(tcc, pRange, minRoofVal, maxAvgNoise, maxDropVal, extend, minPeakLength, maxPeakLength, cName): '''Note: extend does not extend the coordinates into the final tcc, just used for declaring peak''' chrom, strand, peakPosition, end = cg.tccSplit(tcc) cProfile = stepVectorScan.profileAroundPoint(tcc, pRange, cName, ratio = True) #extend this peak left and right leftRange = range(1-pRange, 0) rightRange = range(1, pRange) leftRange.reverse() #going from the middle outward #left startFinal = leftRange[-1] for i in leftRange: if cProfile[i] > minRoofVal: print ' extending stretch' else: print ' end of stretch L' startFinal = i + 1 break #right endFinal = rightRange[-1] #this only holds if it extends to the end of the range... for i in rightRange: if cProfile[i] > minRoofVal: print ' extending stretch' else: print ' end of stretch R' endFinal = i - 1 break peakLength = endFinal - startFinal + 1 extend = int(extend) val = [0.0, 0.0] low = startFinal - extend high = endFinal + extend if low > (1 - pRange) and high < pRange: val[0] = float(cProfile[startFinal - extend]) val[1] = float(cProfile[endFinal + extend]) if not (val[0] < maxDropVal and val[1] < maxDropVal): return False else: print 'out of range' return False #avg expression around peak check... #get total expression before peak noiseExpression = 0 lowRange = range(1 - pRange, low) highRange = range(high + 1, pRange) totalLength = len(lowRange) + len(highRange) for i in lowRange: noiseExpression += cProfile[i] for i in highRange: noiseExpression += cProfile[i] avgNoise = noiseExpression/float(totalLength) #filter out peaks that look a certain way. if (minPeakLength < peakLength < maxPeakLength) and (avgNoise < maxAvgNoise): goodTcc = cg.makeTcc(chrom, strand, peakPosition + startFinal, peakPosition + endFinal) print '*KEEPER' return goodTcc else: return False
def findPeaks(pType, cName=None): #init mConf = c.cgConfig('Main.conf') conf = c.getConfig(cName) if pType == 'E': predName = conf.conf['resultsExonsSorted'] else: predName = conf.conf['resultsIntronsSorted'] print predName #make CID:hairpin:peak dictionary cHairs = getHairpins.getHairpins(predName) peakDict = {} for CID in cHairs: peakDict[CID] = [cHairs[CID], 'None'] timer = cg.cgTimer() timer.start() #put peaks in memory print 'Creating peak data' peaks = {} # chr:peak:value for CID in cHairs: chrom, strand, start, end = cg.tccSplit(cHairs[CID]) tcc = cHairs[CID] #init dictionary if chrom not in peaks: peaks[chrom] = {} if strand not in peaks[chrom]: peaks[chrom][strand] = {} #create peaks for tcc and add to peak dictionary stretch = cgPeaks.stretch(tcc, cName) stretch.createPeaks() for peakCoord in stretch.peaks: peaks[chrom][strand][peakCoord] = 0 print timer.split() print 'finding best combos' bestCombos = [] aPass = 0 bPass = 0 cPass = 0 numT = 0 for CID in peakDict: cgFlag = False if CID == '538': cgFlag = True tcc = peakDict[CID][0] #print tcc tccPeaks = [] chrom = cg.ss(tcc, ':')[0] strand = cg.ss(tcc, ':')[1] start = int(cg.ss(tcc, ':')[2]) end = int(cg.ss(tcc, ':')[3]) #get all peaks for i in range(start, end + 1): if i in peaks[chrom][strand]: #print ' peak added', i tccPeaks.append(i) #Calculate parameters... pairStrings = [] #used to check if pair already added peakCombos = [] for x in tccPeaks: #scan a 30 bp range around this point and find the best roof... pRange = 30 rTcc = cg.makeTcc(chrom, strand, x, x + 1) #quickly get max value...kinda a long way to do it but whatever cProfile = stepVectorScan.profileAroundPoint(rTcc, 1, cName, ratio=False) xval = cProfile[0] max = xval highestValueCoord = x #now make profile for roof... cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio=True) #now get highest stretch length and the rNext coord. minVal = .80 highest = 0 stretch = 0 startCurrent = None startFinal = None endFinal = None for i in range(1 - pRange, pRange): if cProfile[i] > minVal: stretch += 1 if startCurrent == None: startCurrent = i else: if stretch > 0: if stretch > highest: #stretch ended and was higher than previous highest = stretch endFinal = i - 1 startFinal = startCurrent startCurrent = None else: startCurrent = None stretch = 0 #get +/- 4 value... val = [1.0, 1.0] if (startFinal) and (endFinal): low = startFinal - 4 high = endFinal + 4 if low > (1 - pRange): if high < pRange: val[0] = float(cProfile[startFinal - 4]) val[1] = float(cProfile[endFinal + 4]) #fill in other details... y = 'S' dist = 'S' ratio = 'S' peakCombos.append([tcc, x, y, dist, ratio, max, highest, val]) #print ' ', peakCombos[-1] #find best combo... topCombo = None for combo in peakCombos: roofLength = combo[6] dropValue = combo[7][0] if combo[7][1] > dropValue: dropValue = combo[7][1] #print roofLength, dropValue if 14 < roofLength < 26: if 0.0 < dropValue < 0.2: #pick one with rooflength nearest 20: if topCombo: if (math.fabs(22 - roofLength)) < ( math.fabs(22 - topCombo[6])): topCombo = combo else: topCombo = combo if topCombo: peakDict[CID][1] = topCombo bestCombos.append(topCombo) print bestCombos[-1] else: #print 'None' pass print timer.split() #now update predFile (SLOT 13) predFile = open(predName, 'r') newLines = [] for line in predFile: CID = cg.ss(line)[7] if peakDict[CID][1] == 'None': peakInfo = 'None' else: peakInfo = '%s:%s:%s:%s:%s:%s' % ( str(peakDict[CID][1][1])[-3:], 'S', str( peakDict[CID][1][4]).split('.')[0], peakDict[CID][1][5], peakDict[CID][1][6], peakDict[CID][1][7]) newLines.append(cg.appendToLine(line, peakInfo, 13)) predFile.close() predFile = open(predName, 'w') predFile.writelines(newLines) predFile.close()