Ejemplo n.º 1
0
def makeWig(fN, assembly, format=None, name=None):
    '''format assumes bowtie
	suitible for medium mapped files.
	takes longer.'''
    #assume bowtie
    if not format: format = 'Bowtie'
    parserFunction = returnParserFunction(format)
    if not name: name = cg.getBaseFileName(fN, naked=True)
    lDict = cg.returnChromLengthDict(assembly)

    for chrom in lDict:
        if not chrom in cg.acceptableChroms: continue
        for strand in ['1', '-1']:
            f = open(fN, 'r')
            #create hitmap of chrom and strand
            print chrom, strand, 'hitmap'
            hitDict = {}
            for line in f:

                lChrom, lStrand, start, end = cg.tccSplit(parserFunction(line))
                lStrand = str(lStrand)
                start = int(start)
                end = int(end)
                if chrom == lChrom and strand == lStrand:
                    for i in range(start, end + 1):
                        try:
                            hitDict[i] += 1
                        except KeyError:
                            hitDict[i] = 1

            #write results to wig file
            writeWigFromHitDict(hitDict, assembly)
Ejemplo n.º 2
0
def makePeakInputQ(cName, minExpression=2000):
    '''Uses shell script and qsub to get peaks quickly'''

    mConf = c.getConfig('Main.conf')
    conf = c.getConfig(cName)

    assembly = conf.conf['assembly']

    tccList = []

    chromLens = cg.returnChromLengthDict(assembly)

    for chrom in chromLens:
        if chrom not in cg.acceptableChroms: continue
        for strand in ['1', '-1']:
            print 'Getting Peaks for ', chrom, strand
            prevI = 0
            for i in rangePoints(1, chromLens[chrom], 30):
                if i == 1:
                    prevI = i
                    continue

                start = prevI
                end = i
                prevI = i

                tcc = cg.makeTcc(chrom, strand, start, end)

                log = 'logs/o-' + str(start)
                elog = 'logs/e-%s-%s-%s-%s' % (chrom, strand, start, end)
                subprocess.Popen([
                    'qsub', '-V', '-cwd', '-e', elog, '-o', log, '-l',
                    'mem=3G', '-l', 'rt=3600', 'q.sh', tcc, cName,
                    str(minExpression)
                ]).wait()
Ejemplo n.º 3
0
def makePeakInputQ(cName, minExpression = 2000):
	'''Uses shell script and qsub to get peaks quickly'''
	
	mConf = c.getConfig('Main.conf')
	conf = c.getConfig(cName)
	
	assembly = conf.conf['assembly']
	
	tccList = []
	
	chromLens = cg.returnChromLengthDict(assembly)
	
	for chrom in chromLens:
		if chrom not in cg.acceptableChroms: continue
		for strand in ['1','-1']:
			print 'Getting Peaks for ', chrom, strand
			prevI = 0
			for i in rangePoints(1, chromLens[chrom], 30):
				if i == 1:
					prevI = i
					continue
				
				start = prevI
				end = i
				prevI = i
				
				tcc = cg.makeTcc(chrom, strand, start, end)
								
				log = 'logs/o-' + str(start)
				elog = 'logs/e-%s-%s-%s-%s' % (chrom, strand, start, end)
				subprocess.Popen(['qsub', '-V', '-cwd', '-e', elog, '-o', log, '-l', 'mem=3G', '-l', 'rt=3600', 'q.sh', tcc, cName, str(minExpression)]).wait()
Ejemplo n.º 4
0
def makeWig(fN, assembly, format = None, name = None):
	
	'''format assumes bowtie
	suitible for medium mapped files.
	takes longer.'''
	#assume bowtie
	if not format: format = 'Bowtie'
	parserFunction = returnParserFunction(format)
	if not name: name = cg.getBaseFileName(fN, naked = True)
	lDict = cg.returnChromLengthDict(assembly)
	
	
	for chrom in lDict:
		if not chrom in cg.acceptableChroms: continue
		for strand in ['1', '-1']:
			f = open(fN, 'r')
			#create hitmap of chrom and strand
			print chrom, strand, 'hitmap'
			hitDict = {}
			for line in f:
				
				lChrom, lStrand, start, end = cg.tccSplit(parserFunction(line))
				lStrand = str(lStrand)
				start = int(start)
				end = int(end)
				if chrom == lChrom and strand == lStrand:
					for i in range(start, end + 1):
						try:
							hitDict[i] += 1
						except KeyError:
							hitDict[i] = 1
			
			#write results to wig file
			writeWigFromHitDict(hitDict, assembly)
Ejemplo n.º 5
0
def updateWigLength(fN, assembly):
	
	chromLengths = cg.returnChromLengthDict(assembly)
	f = open(fN, 'r')
	header = f.readline() #header
	
	lineDict = {} # chr : []
	for line in f:
		chrom = line.split('\t')[0]
		if chrom not in lineDict:
			lineDict[chrom] = []
			lineDict[chrom].append(line)
		else:
			lineDict[chrom].append(line)
	f.close()
	
	for chrom in lineDict:
		print 'extending', chrom
		
		chromLength = chromLengths[chrom]
		lastChromLine = lineDict[chrom][-1]
		lastValue = int(lastChromLine.split('\t')[2])
		print lastValue
		if lastValue < chromLength:
			print '  ', lastValue, chromLength
			lineDict[chrom].append('%s\t%s\t%s\t0.000000\n' % (chrom, lastValue, chromLength))
			print '  updated'
		
	f = open(fN, 'w')
	f.write(header)
	for chrom in lineDict:
		f.writelines(lineDict[chrom])
	f.close()
def writeWigDictToWig(wigDict, chrom, strand, assembly, name, outDir, blankValue = 0):
        '''hopefully the coords are in zero based.  And this script will convert it to 0,1'''

        #init
        coords = sorted(wigDict.keys())
        lDict = bioLibCG.returnChromLengthDict('hg19')
        chromEnd = lDict[chrom] 

        outFN = outDir + '/%s.%s.%s.wig' % (name, chrom, strand)
        f = open(outFN, 'w')

        #write first blank line
        f.write('%s\t%s\t%s\t%s\n' % (chrom, 0, coords[0], blankValue))
      
        #p.tell(' 'beginning block', coords[0] 
        prevCoord = coords[0]
        prevValue = wigDict[coords[0]]
        blockStart = prevCoord
        coords = coords[1:]
        for coord in coords:
                currValue = wigDict[coord]
     
                if coord - 1 == prevCoord:
                        #Does the value differ?
                        if currValue == prevValue:
                                #keep extending block
                                prevCoord = coord
                                prevValue = currValue
                        else:
                                #p.tell(' 'writing last equal block', blockStart, prevCoord, prevValue
                                #finish last block, with NO blank block
                                f.write('%s\t%s\t%s\t%s\n' % (chrom, blockStart, prevCoord + 1, prevValue))
                               
                                #init next block
                                prevCoord = coord
                                prevValue = currValue
                                blockStart = coord


                else: #finish last block, write zero block, start another block
                       
                       #last
                       #p.tell(' 'finishing last block', blockStart, prevCoord
                       f.write('%s\t%s\t%s\t%s\n' % (chrom, blockStart, prevCoord + 1, prevValue))
                       
                       #zero
                       #p.tell(' 'zero After:', prevCoord, coord
                       f.write('%s\t%s\t%s\t%s\n' % (chrom, prevCoord + 1, coord, blankValue))

                       #init next block
                       prevCoord = coord
                       blockStart = coord
                       prevValue = currValue
                       
                
        #write last block and last blank block line
        f.write('%s\t%s\t%s\t%s\n' % (chrom, blockStart, coord + 1, prevValue))
        f.write('%s\t%s\t%s\t%s\n' % (chrom, coord + 1, chromEnd, blankValue))
        f.close()
Ejemplo n.º 7
0
def writeWigFromHitDict(hitDict, assembly, name, directory=None):

    mConf = c.getConfig('Main.conf')
    if not directory: directory = mConf.conf['wigs']
    if not name: name = cg.getBaseFileName(name, naked=True)
    lDict = cg.returnChromLengthDict(assembly)

    cg.clearDirectory(directory, overwrite=False)
    #write results to wig file
    for chrom in hitDict:
        for strand in hitDict[chrom]:

            oF = open(directory + '/%s.%s.%s.wig' % (name, chrom, strand), 'w')
            oF.write('track type=bedGraph name=%s.%s.%s\n' %
                     (name, chrom, strand))

            #print '  sorting'
            #print hitDict[chrom]
            chromEnd = lDict[chrom]  #
            hitDict[chrom][strand][chromEnd] = 0
            keys = hitDict[chrom][strand].keys()
            keys.sort()

            #print '  writing blocks'
            prevVal = 0
            prevCoord = 0
            blockStart = 0
            blockEnd = 1
            for key in keys:
                val = hitDict[chrom][strand][key]

                if prevCoord == key - 1:
                    if val == prevVal:  #should be combined
                        blockEnd = key + 1
                    else:  #no zero block
                        #write old block
                        oF.write('%s\t%s\t%s\t%s\n' %
                                 (chrom, blockStart, blockEnd,
                                  prevVal))  #!make it a float value?
                        #start new block
                        blockStart = key
                        blockEnd = key + 1

                else:
                    #write old block
                    oF.write('%s\t%s\t%s\t%s\n' %
                             (chrom, blockStart, blockEnd, prevVal))
                    #write zero block
                    oF.write('%s\t%s\t%s\t%s\n' % (chrom, blockEnd, key, 0))
                    #start new block
                    blockStart = key
                    blockEnd = key + 1

                prevVal = val
                prevCoord = key
            oF.close()
Ejemplo n.º 8
0
def makeWigMem(fN, assembly, format=None, name=None, directory=None):
    '''format assumes bowtie
	suitible for small mapped files.'''

    if not name: name = cg.getBaseFileName(fN, naked=True)
    if not format: format = 'Bowtie'
    parserFunction = returnParserFunction(format)

    lDict = cg.returnChromLengthDict(assembly)
    f = open(fN, 'r')
    f.readline()  #header...file might not have one but its one read...

    #create hitmap of chrom and strand
    hitDict = {}  #format = chr: { strand : { coord : value
    for line in f:
        try:
            lChrom, lStrand, start, end = cg.tccSplit(parserFunction(line))
        except AttributeError:
            continue
        lStrand = str(lStrand)
        start = int(start)
        end = int(end)
        if lChrom in cg.acceptableChroms:

            #wig for degradome
            if lStrand == '1':
                i = start + 20
            else:
                i = start

            try:
                hitDict[lChrom][lStrand][i] += 1
            except KeyError:
                if lChrom not in hitDict:
                    hitDict[lChrom] = {}
                if lStrand not in hitDict[lChrom]:
                    hitDict[lChrom][lStrand] = {}
                hitDict[lChrom][lStrand][i] = 1
            '''
			
			for i in range(start, end):
				try:
					hitDict[lChrom][lStrand][i] += 1
				except KeyError:
					if lChrom not in hitDict:
						hitDict[lChrom] = {}
					if lStrand not in hitDict[lChrom]:
						hitDict[lChrom][lStrand] = {}
					hitDict[lChrom][lStrand][i] = 1
			'''
    f.close()

    #write results to wig file
    writeWigFromHitDict(hitDict, assembly, name, directory)
Ejemplo n.º 9
0
def makeWigMem(fN, assembly, format = None, name = None, directory = None):
	'''format assumes bowtie
	suitible for small mapped files.'''
	
	if not name: name = cg.getBaseFileName(fN, naked = True)
	if not format: format = 'Bowtie'
	parserFunction = returnParserFunction(format)
	
	lDict = cg.returnChromLengthDict(assembly)
	f = open(fN, 'r')
	f.readline() #header...file might not have one but its one read...
	
	#create hitmap of chrom and strand
	hitDict = {} #format = chr: { strand : { coord : value 
	for line in f:
		try:
			lChrom, lStrand, start, end = cg.tccSplit(parserFunction(line))
		except AttributeError:
			continue
		lStrand = str(lStrand)
		start = int(start)
		end = int(end)
		if lChrom in cg.acceptableChroms:
			
			#wig for degradome
			if lStrand == '1':
				i = start + 20
			else:
				i = start
				
			try:
				hitDict[lChrom][lStrand][i] += 1
			except KeyError:
				if lChrom not in hitDict:
					hitDict[lChrom] = {}
				if lStrand not in hitDict[lChrom]:
					hitDict[lChrom][lStrand] = {}
				hitDict[lChrom][lStrand][i] = 1
			'''
			
			for i in range(start, end):
				try:
					hitDict[lChrom][lStrand][i] += 1
				except KeyError:
					if lChrom not in hitDict:
						hitDict[lChrom] = {}
					if lStrand not in hitDict[lChrom]:
						hitDict[lChrom][lStrand] = {}
					hitDict[lChrom][lStrand][i] = 1
			'''		
	f.close()
	
	#write results to wig file
	writeWigFromHitDict(hitDict, assembly, name, directory)
Ejemplo n.º 10
0
def writeWigFromHitDict(hitDict, assembly, name, directory = None):
	
	mConf = c.getConfig('Main.conf')
	if not directory: directory = mConf.conf['wigs']
	if not name: name = cg.getBaseFileName(name, naked = True)
	lDict = cg.returnChromLengthDict(assembly)
	
	cg.clearDirectory(directory, overwrite = False)
	#write results to wig file
	for chrom in hitDict:
		for strand in hitDict[chrom]:
			
			oF = open(directory + '/%s.%s.%s.wig' % (name, chrom, strand), 'w')
			oF.write('track type=bedGraph name=%s.%s.%s\n' % (name, chrom, strand))
			
			#print '  sorting'
			#print hitDict[chrom]
			chromEnd = lDict[chrom] #
			hitDict[chrom][strand][chromEnd] = 0
			keys = hitDict[chrom][strand].keys()
			keys.sort()
			
			#print '  writing blocks'
			prevVal = 0
			prevCoord = 0
			blockStart = 0
			blockEnd = 1
			for key in keys:
				val = hitDict[chrom][strand][key]
				
				if prevCoord == key - 1: 
					if val == prevVal:#should be combined
						blockEnd = key + 1
					else: #no zero block
						#write old block
						oF.write('%s\t%s\t%s\t%s\n' % (chrom, blockStart, blockEnd, prevVal)) #!make it a float value?
						#start new block
						blockStart = key
						blockEnd = key + 1
						
				else:
					#write old block
					oF.write('%s\t%s\t%s\t%s\n' % (chrom, blockStart, blockEnd, prevVal))
					#write zero block
					oF.write('%s\t%s\t%s\t%s\n' % (chrom, blockEnd, key, 0))
					#start new block
					blockStart = key
					blockEnd = key + 1
				
				prevVal = val
				prevCoord = key
			oF.close()
Ejemplo n.º 11
0
def writeSetToWig(wigSet, chrom, strand, assembly, name, outDir):


        print 'if TP in set', (208148750 in wigSet)

        #init
        coords = sorted(wigSet)
        lDict = bioLibCG.returnChromLengthDict('hg19')
        chromEnd = lDict[chrom] 

        outFN = outDir + '/%s.%s.%s.wig' % (name, chrom, strand)
        f = open(outFN, 'w')

        #write first 0 line
        f.write('%s\t%s\t%s\t%s\n' % (chrom, 0, coords[0], 0))
      
        
        prevCoord = coords[0]
        blockStart = prevCoord
        coords = coords[1:]
        for coord in coords:
                if coord - 1 == prevCoord:
                        #keep extending block
                        prevCoord = coord
                else: #finish last block, write zero block, start another block
                       
                       #last
                       f.write('%s\t%s\t%s\t%s\n' % (chrom, blockStart, prevCoord + 1, 1))
                       
                       #zero
                       f.write('%s\t%s\t%s\t%s\n' % (chrom, prevCoord + 1, coord, 0))

                       #init next block
                       prevCoord = coord
                       blockStart = coord
                       
                
        #write last block and last 0 block line
        f.write('%s\t%s\t%s\t%s\n' % (chrom, blockStart, coord + 1, 1))
        f.write('%s\t%s\t%s\t%s\n' % (chrom, coord + 1, chromEnd, 0))
        f.close()
Ejemplo n.º 12
0
def spacerDistData(tranFN, outFN):
    '''chr strand tranStart tranEnd'''

    chrom_length = bioLibCG.returnChromLengthDict('hg19')

    chrom_strand_iSet = {}
    for chrom in chrom_length:
        for strand in ('+', '-'):
            chrom_strand_iSet.setdefault(chrom,
                                         {}).setdefault(strand, IntervalSet())

    print 'making intervals'
    f = open(tranFN, 'r')
    for line in f:
        ls = line.strip().split('\t')
        tranStart, tranEnd = int(ls[3]), int(ls[4])
        strand = ls[2]
        chrom = ls[1]

        chrom_strand_iSet[chrom][strand].add(Interval(tranStart, tranEnd))

    f.close()

    spacerData = []
    print 'creating spacer data'
    for chrom in chrom_strand_iSet:
        for strand in chrom_strand_iSet[chrom]:
            iSet = chrom_strand_iSet[chrom][strand]
            for i, interv in enumerate(iSet):
                if interv == iSet[-1]: break
                nextInterv = iSet[i + 1]
                seperation = nextInterv.lower_bound - interv.upper_bound
                spacerData.append(seperation)

    f = open(outFN, 'w')
    outLines = [str(x) + '\n' for x in spacerData]
    f.writelines(outLines)
    f.close()
Ejemplo n.º 13
0
def spacerDistData(tranFN, outFN):
    '''chr strand tranStart tranEnd'''

    chrom_length = bioLibCG.returnChromLengthDict('hg19')

    chrom_strand_iSet = {}
    for chrom in chrom_length:
        for strand in ('+', '-'):
            chrom_strand_iSet.setdefault(chrom, {}).setdefault(strand, IntervalSet())

    print 'making intervals'
    f = open(tranFN, 'r')
    for line in f:
        ls = line.strip().split('\t')
        tranStart, tranEnd = int(ls[3]), int(ls[4])
        strand = ls[2]
        chrom = ls[1]

        chrom_strand_iSet[chrom][strand].add(Interval(tranStart, tranEnd))

    f.close()
    
    spacerData = []
    print 'creating spacer data'
    for chrom in chrom_strand_iSet:
        for strand in chrom_strand_iSet[chrom]:
            iSet = chrom_strand_iSet[chrom][strand]
            for i, interv in enumerate(iSet):
                if interv == iSet[-1]: break
                nextInterv = iSet[i + 1]
                seperation = nextInterv.lower_bound - interv.upper_bound
                spacerData.append(seperation)

    f = open(outFN, 'w')
    outLines = [str(x) + '\n' for x in spacerData]
    f.writelines(outLines)
    f.close()
Ejemplo n.º 14
0
def makePeakInput(cName, minExpression=2000):

    mConf = c.getConfig('Main.conf')
    conf = c.getConfig(cName)

    assembly = conf.conf['assembly']

    tccList = []

    chromLens = cg.returnChromLengthDict(assembly)
    f = open('peakData.%s' % minExpression, 'w')
    for chrom in chromLens:
        if chrom not in cg.acceptableChroms: continue
        for strand in ['1', '-1']:
            print 'Getting Peaks for ', chrom, strand
            prevI = 0
            endCheck = 0
            for i in rangePoints(1, chromLens[chrom], 1000):
                if i == 1:
                    prevI = i
                    continue

                start = prevI
                end = i
                prevI = i

                tcc = cg.makeTcc(chrom, strand, start, end)
                #print 'scanning range', tcc
                peaks = cgPeaks.stretch(tcc, cName)
                peaks.createPeaks(span=3, minVal=minExpression)

                for x in peaks.peaks:

                    if x < endCheck:
                        continue

                    #scan a 30 bp range around this point and find the best roof...
                    pRange = 30
                    rTcc = cg.makeTcc(chrom, strand, x, x + 1)

                    #now make profile for roof...
                    cProfile = stepVectorScan.profileAroundPoint(rTcc,
                                                                 pRange,
                                                                 cName,
                                                                 ratio=True)

                    #now get highest stretch length and the rNext coord.
                    minVal = .80
                    highest = 0
                    stretch = 0
                    startCurrent = None
                    startFinal = None
                    endFinal = None
                    for i in range(1 - pRange, pRange):
                        if cProfile[i] > minVal:
                            stretch += 1
                            if startCurrent == None:
                                startCurrent = i
                        else:
                            if stretch > 0:
                                if stretch > highest:  #stretch ended and was higher than previous
                                    highest = stretch
                                    endFinal = i - 1
                                    startFinal = startCurrent
                                    startCurrent = None
                                else:
                                    startCurrent = None
                            stretch = 0

                    #get +/- 4 value...
                    val = [1.0, 1.0]
                    if (startFinal) and (endFinal):
                        low = startFinal - 4
                        high = endFinal + 4
                        if low > (1 - pRange) and high < pRange:
                            val[0] = float(cProfile[startFinal - 4])
                            val[1] = float(cProfile[endFinal + 4])
                        else:
                            continue
                    else:
                        continue

                    endCheck = x + high

                    #filter out peaks that look a certain way.
                    if 14 < highest < 26:  #rooflength
                        if val[0] < 0.2 and val[1] < .2:  #drop values
                            goodTcc = cg.makeTcc(chrom, strand, x + low,
                                                 x + high)
                            #print goodTcc
                            f.write('%s\n' % goodTcc)
    f.close()
Ejemplo n.º 15
0
def makeWigMem(fN, assembly, format = None, name = None, directory = None, degWig = False, switchStrand = True, normalized = False):
	'''format assumes bowtie
	suitible for small mapped files.
        switch strand does not switch the strands, it just makes sure if the data is backwards (HeLa) that it will 
        put the peak in the right spot'''
	
        print 'degWig Value', degWig
        print 'switch strands?', switchStrand
	if not name: name = cg.getBaseFileName(fN, naked = True)
	if not format: format = 'Bowtie'
	parserFunction = returnParserFunction(format)
	
	lDict = cg.returnChromLengthDict(assembly)
	f = open(fN, 'r')
	f.readline() #header...file might not have one but its one read...
	
	#create hitmap of chrom and strand
	hitDict = {} #format = chr: { strand : { coord : value 
	for line in f:
                lChrom, lStrand, start, end = cg.tccSplit(parserFunction(line))
		lStrand = str(lStrand)
		start = int(start)
		end = int(end)
                numPlacesMapped = int(line.strip().split('\t')[6])
                numPlacesMapped += 1
                readCount = 1
                if normalized:
                    readCount = float(readCount)/numPlacesMapped

		if lChrom in cg.acceptableChroms:
                        
                        if degWig:
                                #wig for degradome NOTE:!!! change lStrand == '1' to '-1' for Bracken!
                                if switchStrand:
                                    if lStrand == '1':
                                            i = start + (end - start)
                                    else:
                                            i = start + 1
                                else:                                            
                                    if lStrand == '-1':
                                            i = start + (end - start)
                                    else:
                                            i = start + 1


                                hitDict.setdefault(lChrom, {}).setdefault(lStrand, {})
                                hitDict[lChrom][lStrand][i] = hitDict[lChrom][lStrand].get(i, 0) + readCount
                        else:

                                #wig for regular
                                for i in range(start, end):
                                        try:
                                                hitDict[lChrom][lStrand][i] += readCount 
                                        except KeyError:
                                                if lChrom not in hitDict:
                                                        hitDict[lChrom] = {}
                                                if lStrand not in hitDict[lChrom]:
                                                        hitDict[lChrom][lStrand] = {}
                                                hitDict[lChrom][lStrand][i] = readCount

	f.close()
	
	#write results to wig file
	writeWigFromHitDict(hitDict, assembly, name, directory)
Ejemplo n.º 16
0
def writeWigDictToWig(wigDict,
                      chrom,
                      strand,
                      assembly,
                      name,
                      outDir,
                      blankValue=0):
    '''hopefully the coords are in zero based.  And this script will convert it to 0,1'''

    #init
    coords = sorted(wigDict.keys())
    lDict = bioLibCG.returnChromLengthDict('hg19')
    chromEnd = lDict[chrom]

    outFN = outDir + '/%s.%s.%s.wig' % (name, chrom, strand)
    f = open(outFN, 'w')

    #write first blank line
    f.write('%s\t%s\t%s\t%s\n' % (chrom, 0, coords[0], blankValue))

    #p.tell(' 'beginning block', coords[0]
    prevCoord = coords[0]
    prevValue = wigDict[coords[0]]
    blockStart = prevCoord
    coords = coords[1:]
    for coord in coords:
        currValue = wigDict[coord]

        if coord - 1 == prevCoord:
            #Does the value differ?
            if currValue == prevValue:
                #keep extending block
                prevCoord = coord
                prevValue = currValue
            else:
                #p.tell(' 'writing last equal block', blockStart, prevCoord, prevValue
                #finish last block, with NO blank block
                f.write('%s\t%s\t%s\t%s\n' %
                        (chrom, blockStart, prevCoord + 1, prevValue))

                #init next block
                prevCoord = coord
                prevValue = currValue
                blockStart = coord

        else:  #finish last block, write zero block, start another block

            #last
            #p.tell(' 'finishing last block', blockStart, prevCoord
            f.write('%s\t%s\t%s\t%s\n' %
                    (chrom, blockStart, prevCoord + 1, prevValue))

            #zero
            #p.tell(' 'zero After:', prevCoord, coord
            f.write('%s\t%s\t%s\t%s\n' %
                    (chrom, prevCoord + 1, coord, blankValue))

            #init next block
            prevCoord = coord
            blockStart = coord
            prevValue = currValue

    #write last block and last blank block line
    f.write('%s\t%s\t%s\t%s\n' % (chrom, blockStart, coord + 1, prevValue))
    f.write('%s\t%s\t%s\t%s\n' % (chrom, coord + 1, chromEnd, blankValue))
    f.close()
Ejemplo n.º 17
0
def makeWigMem(fN,
               assembly,
               format=None,
               name=None,
               directory=None,
               degWig=False,
               switchStrand=True,
               normalized=False):
    '''format assumes bowtie
	suitible for small mapped files.
        switch strand does not switch the strands, it just makes sure if the data is backwards (HeLa) that it will 
        put the peak in the right spot'''

    print 'degWig Value', degWig
    print 'switch strands?', switchStrand
    if not name: name = cg.getBaseFileName(fN, naked=True)
    if not format: format = 'Bowtie'
    parserFunction = returnParserFunction(format)

    lDict = cg.returnChromLengthDict(assembly)
    f = open(fN, 'r')
    f.readline()  #header...file might not have one but its one read...

    #create hitmap of chrom and strand
    hitDict = {}  #format = chr: { strand : { coord : value
    for line in f:
        lChrom, lStrand, start, end = cg.tccSplit(parserFunction(line))
        lStrand = str(lStrand)
        start = int(start)
        end = int(end)
        numPlacesMapped = int(line.strip().split('\t')[6])
        numPlacesMapped += 1
        readCount = 1
        if normalized:
            readCount = float(readCount) / numPlacesMapped

        if lChrom in cg.acceptableChroms:

            if degWig:
                #wig for degradome NOTE:!!! change lStrand == '1' to '-1' for Bracken!
                if switchStrand:
                    if lStrand == '1':
                        i = start + (end - start)
                    else:
                        i = start + 1
                else:
                    if lStrand == '-1':
                        i = start + (end - start)
                    else:
                        i = start + 1

                hitDict.setdefault(lChrom, {}).setdefault(lStrand, {})
                hitDict[lChrom][lStrand][i] = hitDict[lChrom][lStrand].get(
                    i, 0) + readCount
            else:

                #wig for regular
                for i in range(start, end):
                    try:
                        hitDict[lChrom][lStrand][i] += readCount
                    except KeyError:
                        if lChrom not in hitDict:
                            hitDict[lChrom] = {}
                        if lStrand not in hitDict[lChrom]:
                            hitDict[lChrom][lStrand] = {}
                        hitDict[lChrom][lStrand][i] = readCount

    f.close()

    #write results to wig file
    writeWigFromHitDict(hitDict, assembly, name, directory)
Ejemplo n.º 18
0
def makePeakInput(cName, minExpression = 2000):
	
	mConf = c.getConfig('Main.conf')
	conf = c.getConfig(cName)
	
	assembly = conf.conf['assembly']
	
	tccList = []
	
	chromLens = cg.returnChromLengthDict(assembly)
	f = open('peakData.%s' % minExpression, 'w')
	for chrom in chromLens:
		if chrom not in cg.acceptableChroms: continue
		for strand in ['1', '-1']:
			print 'Getting Peaks for ', chrom, strand
			prevI = 0
			endCheck = 0
			for i in rangePoints(1, chromLens[chrom], 1000):
				if i == 1:
					prevI = i
					continue
				
				start = prevI
				end = i
				prevI = i
				
				tcc = cg.makeTcc(chrom, strand, start, end)
				#print 'scanning range', tcc
				peaks = cgPeaks.stretch(tcc, cName)
				peaks.createPeaks(span = 3, minVal = minExpression)
				
				for x in peaks.peaks:
					
					if x < endCheck:
						continue
				
					#scan a 30 bp range around this point and find the best roof...
					pRange = 30
					rTcc = cg.makeTcc(chrom, strand, x, x + 1)
					
	
					#now make profile for roof...
					cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio = True)
					
					
					
					#now get highest stretch length and the rNext coord.
					minVal = .80
					highest = 0
					stretch = 0
					startCurrent = None
					startFinal = None
					endFinal = None
					for i in range(1 - pRange, pRange):
						if cProfile[i] > minVal:
							stretch += 1
							if startCurrent == None:
								startCurrent = i
						else:
							if stretch > 0:
								if stretch > highest: #stretch ended and was higher than previous
									highest = stretch
									endFinal = i - 1
									startFinal = startCurrent
									startCurrent = None
								else:
									startCurrent = None
							stretch = 0
					
					#get +/- 4 value...
					val = [1.0, 1.0]
					if (startFinal) and (endFinal):
						low = startFinal - 4
						high = endFinal + 4
						if low > (1 - pRange) and high < pRange:
								val[0] = float(cProfile[startFinal - 4])
								val[1] = float(cProfile[endFinal + 4])
						else:
							continue
					else:
						continue
					
					endCheck = x + high
					
					#filter out peaks that look a certain way.
					if 14 < highest < 26: #rooflength
						if val[0] < 0.2 and val[1] < .2: #drop values
							goodTcc = cg.makeTcc(chrom, strand, x + low, x + high)
							#print goodTcc
							f.write('%s\n' % goodTcc)
	f.close()