Python tccFileToList 예제들, compareData.tccFileToList Python 예제들

예제 #1

0

파일 보기

파일: bioLibCG.py 프로젝트: combiochem/ResearchScripts

def convertTccFileToGff(tccFileName):
	tccList = compare.tccFileToList(tccFileName, 0)
	gffList = convertTccListToBed(tccList)
	
	gffFileName = tccFileName + '.gff'
	gffFile = open(gffFileName, 'w')
	for line in gffList:
		gffFile.write(line)
	gffFile.close()

예제 #2

0

파일 보기

파일: bioLibCG.py 프로젝트: combiochem/ResearchScripts

def convertTccFileToBed(tccFileName):
	tccList = compare.tccFileToList(tccFileName, 0)
	bedList = convertTccListToBed(tccList)
	
	bedFileName = tccFileName + '.bed'
	bedFile = open(bedFileName, 'w')
	for line in bedList:
		bedFile.write(line)
	bedFile.close()

예제 #3

0

파일 보기

파일: bioLibCG.py 프로젝트: combiochem/ResearchScripts

def convertTccFileToBed(tccFileName):
    tccList = compare.tccFileToList(tccFileName, 0)
    bedList = convertTccListToBed(tccList)

    bedFileName = tccFileName + '.bed'
    bedFile = open(bedFileName, 'w')
    for line in bedList:
        bedFile.write(line)
    bedFile.close()

예제 #4

0

파일 보기

파일: bioLibCG.py 프로젝트: combiochem/ResearchScripts

def convertTccFileToGff(tccFileName):
    tccList = compare.tccFileToList(tccFileName, 0)
    gffList = convertTccListToBed(tccList)

    gffFileName = tccFileName + '.gff'
    gffFile = open(gffFileName, 'w')
    for line in gffList:
        gffFile.write(line)
    gffFile.close()

예제 #5

0

파일 보기

파일: filterOutKnown.py 프로젝트: sknyx/ResearchScripts

def filterOut(cName = None):
	
	#Init
	conf = cgConfig.getConfig(cName)
	
	predictionList = compare.tccFileToList(conf.conf['resultsRaw'], 1)
	#predictionList = compare.tccFileToList(conf.conf['resultsRaw'], 0)
	overlapped = compare.filterOutTccs(predictionList, conf.conf['knownDirectory'], True) #True gives me the filtered out ones instead of the list without filtered out
	
	
	matureOverlaps = open(conf.conf['matureOverlaps'], 'w')
	for tcc in overlapped:
		matureOverlaps.write(tcc + '\n')

예제 #6

0

파일 보기

파일: filterOutKnown.py 프로젝트: JasonAng/ResearchScripts

def filterOut(cName=None):

    # Init
    conf = cgConfig.getConfig(cName)

    predictionList = compare.tccFileToList(conf.conf["resultsRaw"], 1)
    # predictionList = compare.tccFileToList(conf.conf['resultsRaw'], 0)
    overlapped = compare.filterOutTccs(
        predictionList, conf.conf["knownDirectory"], True
    )  # True gives me the filtered out ones instead of the list without filtered out

    matureOverlaps = open(conf.conf["matureOverlaps"], "w")
    for tcc in overlapped:
        matureOverlaps.write(tcc + "\n")

예제 #7

0

파일 보기

파일: splitExonsIntrons.py 프로젝트: JasonAng/ResearchScripts

def splitExonsIntrons(cName = None):
	mConf = c.cgConfig('Main.conf')
	conf = c.getConfig(cName)
	
	#init
	organism = conf.conf['organism']
	minOverlap = 50
	cHairs = getHairpins.getHairpins() #CID: HAIRPIN
	exonList = compare.tccFileToList('%sExons.tcc' % organism, 0)
	hairpins = []
	for CID in cHairs:
		hairpins.append(cHairs[CID])
	
	print 'checking overlaps'
	#check which hairpins overlap exons and by how much
	exonOverlapped = compare.compareTwoTcc(hairpins, exonList, 1, amount = True)
	print '  ', len(exonOverlapped)
	
	print 'removing partial introns'
	#remove the ones that didn't overlap more than X:
	remList = []
	for tcc, oAmount in exonOverlapped:
		if oAmount < minOverlap:
			remList.append([tcc, oAmount])
	
	for item in remList:
		exonOverlapped.remove(item)
	print '  ', len(exonOverlapped), 'out of', len(cHairs.keys())
		
	#get CIDs of exons
	exonCIDs = []
	for tcc, oAmount in exonOverlapped:
		for CID in cHairs:
			if cHairs[CID] == tcc:
				exonCIDs.append(str(CID))
	
	
	#Open sorted predictions and write lines with CIDs to respective files
	predFile = open(conf.conf['resultsSorted'], 'r')
	exonFile = open(conf.conf['resultsSorted'] + '.exons', 'w')
	intronFile = open(conf.conf['resultsSorted'] + '.introns', 'w')
	for line in predFile:
		if line.split('\t')[7] in exonCIDs:
			exonFile.write(line)
		else:
			intronFile.write(line)
	predFile.close()
	exonFile.close()
	intronFile.close()

예제 #8

0

파일 보기

파일: splitExonsIntrons.py 프로젝트: sknyx/ResearchScripts

def splitExonsIntrons(cName=None):
    mConf = c.cgConfig('Main.conf')
    conf = c.getConfig(cName)

    #init
    organism = conf.conf['organism']
    minOverlap = 50
    cHairs = getHairpins.getHairpins()  #CID: HAIRPIN
    exonList = compare.tccFileToList('%sExons.tcc' % organism, 0)
    hairpins = []
    for CID in cHairs:
        hairpins.append(cHairs[CID])

    print 'checking overlaps'
    #check which hairpins overlap exons and by how much
    exonOverlapped = compare.compareTwoTcc(hairpins, exonList, 1, amount=True)
    print '  ', len(exonOverlapped)

    print 'removing partial introns'
    #remove the ones that didn't overlap more than X:
    remList = []
    for tcc, oAmount in exonOverlapped:
        if oAmount < minOverlap:
            remList.append([tcc, oAmount])

    for item in remList:
        exonOverlapped.remove(item)
    print '  ', len(exonOverlapped), 'out of', len(cHairs.keys())

    #get CIDs of exons
    exonCIDs = []
    for tcc, oAmount in exonOverlapped:
        for CID in cHairs:
            if cHairs[CID] == tcc:
                exonCIDs.append(str(CID))

    #Open sorted predictions and write lines with CIDs to respective files
    predFile = open(conf.conf['resultsSorted'], 'r')
    exonFile = open(conf.conf['resultsSorted'] + '.exons', 'w')
    intronFile = open(conf.conf['resultsSorted'] + '.introns', 'w')
    for line in predFile:
        if line.split('\t')[7] in exonCIDs:
            exonFile.write(line)
        else:
            intronFile.write(line)
    predFile.close()
    exonFile.close()
    intronFile.close()

예제 #9

0

파일 보기

파일: mirrorPeaks.py 프로젝트: sknyx/ResearchScripts

import cgPeaks
import compareData as compare
import math
import bioLibCG as cg

knowns = compare.tccFileToList('mouseKnownMirs.tcc', 0)

eLevels = []
for known in knowns:

    chrom, strand, start, end = cg.tccSplit(known, True)  #text...
    if strand == '1':
        strand = '-1'
    else:
        strand = '1'
    oppTcc = cg.makeTcc(chrom, strand, start, end)

    knownStretch = cgPeaks.stretch(known)
    knownStretch.createPeaks(1, 20)
    kPos = knownStretch.getHighestPeak()
    if kPos: eLevels.append(knownStretch.profile[kPos])

    oppStretch = cgPeaks.stretch(oppTcc)
    oppStretch.createPeaks(1, 20)
    oPos = oppStretch.getHighestPeak()

    if oPos and kPos:
        #determine if they are close enough to be considered mirrored...
        if math.fabs(int(kPos) - int(oPos)) < 12:
            print known, oPos, kPos, oppStretch.profile[
                oPos], knownStretch.profile[kPos]

예제 #10

0

파일 보기

파일: CONTROL.py 프로젝트: JasonAng/ResearchScripts

import profileTargets
import compareData as compare

tccList = compare.tccFileToList('ago.200.tcc', 0)

profileTargets.profileTargets(tccList, 'agoProfile.conf', dir = 'ago', min = 30)
#profileTargets.profileTargetsHistoAS(tccList, 'agoProfile.conf', name = 'agoNEG')

예제 #11

0

파일 보기

import compareData as compare
import bioLibCG as cg

exonList = compare.tccFileToList('allExons.tcc', 0)

print cg.getTccListTotalLength(exonList)
nonOverlap = compare.collapseOverlaps(exonList)
print cg.getTccListTotalLength(nonOverlap)

o = open('mouseExons.tcc', 'w')
for tcc in nonOverlap:
    o.write(tcc + '\n')
o.close()

예제 #12

0

파일 보기

파일: intronNoisy.py 프로젝트: JasonAng/ResearchScripts

def intronNoisy(cName = None):
	mConf = c.cgConfig('Main.conf')
	conf = c.getConfig(cName)
	
	#init
	cHairs = getHairpins.getHairpins(conf.conf['resultsIntrons']) #CID: HAIRPIN
	organism = conf.conf['organism']
	exonList = compare.tccFileToList('%sExons.tcc' % organism, 0)
	slide = 1000
	
	#make prediction overlap hitmap
	predMap = {}
	predList = []
	for CID in cHairs:
		hPin = cHairs[CID]
		predList.append(hPin)
	
	#collapse Overlaps
	print ' collapsing predictions'
	predList = compare.collapseOverlaps(predList)
	print ' collapsing exons'
	exonList = compare.collapseOverlaps(exonList)
	
	
	#collect levels for each hairpin region
	cidLevels = {}
	for CID in cHairs:
		print CID
		hPin = cHairs[CID]
		chrom = ss(hPin, ':')[0]
		strand = ss(hPin, ':')[1]
		start = int(ss(hPin, ':')[2])
		end = int(ss(hPin, ':')[3])
		
		scanStart = start - slide
		scanEnd = end + slide
		
		scanRange = []
		scanRange.append('%s:%s:%s:%s' % (chrom, strand, scanStart, start))
		scanRange.append('%s:%s:%s:%s' % (chrom, strand, end, scanEnd))
		
		print scanRange
		scanRange = compare.subtractTwoTccLists(scanRange, predList)
		scanRange = compare.subtractTwoTccLists(scanRange, exonList)
			
		levels = []
		
		print '  Retrieving Expression levels:', cg.getTccListTotalLength(scanRange)
		levels = []
		
		
		hPinLevels = stepVectorScan.scanVectorsHist(scanRange, cName)
		for hPin in hPinLevels:
			levels.extend(hPinLevels[hPin])
		
			
		cidLevels[CID] = levels
		
	#output levels to file
	
	#find longest
	longest = 0
	for CID in cidLevels:
		length = len(cidLevels[CID])
		if length > longest:
			longest = length
	
	sortedKeys = cidLevels.keys()
	sortedKeys.sort()
	
	newLines = []
	for j in range(0, longest): #how many lines are there
		newLine = []
		for CID in sortedKeys:
			if len(cidLevels[CID]) > j:# add it
				newLine.append(str(cidLevels[CID][j]))
			else:
				newLine.append('NA')
	
		newLines.append('\t'.join(newLine) + '\n')
	
	outFileN = conf.conf['intronNoiseData']
	outFile = open(outFileN, 'w')
	outFile.write('\t'.join(sortedKeys) + '\n')
	outFile.writelines(newLines)
	outFile.close()

예제 #13

0

파일 보기

파일: CONTROL.py 프로젝트: sknyx/ResearchScripts

import profileTargets
import compareData as compare

tccList = compare.tccFileToList('ago.200.tcc', 0)

profileTargets.profileTargets(tccList, 'agoProfile.conf', dir='ago', min=30)
#profileTargets.profileTargetsHistoAS(tccList, 'agoProfile.conf', name = 'agoNEG')

예제 #14

0

파일 보기

파일: singlePeakProfile.py 프로젝트: JasonAng/ResearchScripts

#given tcc, return best peak combo
import bioLibCG as cg
import cgConfig as c
import wigValue
import compareData as compare

#init
mConf = c.cgConfig('Main.conf')
conf = c.cgConfig()
pRange = 100



tccList = ['chr3:-1:96042576:96042685', 'chr3:-1:96042576:96042685']
tccList = compare.tccFileToList('mouseKnownMirs.tcc', 0)
timer = cg.cgTimer()
timer.start()

#put peaks in memory
print 'loading peak data'
peakFilesNames = cg.recurseDir(mConf.conf['wigMouse'], end = '.peaks')
peaks = {} # chr:peak:value
for pN in peakFilesNames:
	chrom = pN.strip().split('.')[4]
	strand = pN.strip().split('.')[2]
	
	#init dictionary
	if chrom not in peaks:
		peaks[chrom] = {}
	
	if strand not in peaks[chrom]:

예제 #15

0

파일 보기

파일: processData.py 프로젝트: JasonAng/ResearchScripts

import compareData as compare
import bioLibCG as cg

exonList = compare.tccFileToList('allExons.tcc', 0)

print cg.getTccListTotalLength(exonList)
nonOverlap = compare.collapseOverlaps(exonList)
print cg.getTccListTotalLength(nonOverlap)

o = open('mouseExons.tcc', 'w')
for tcc in nonOverlap:
	o.write(tcc + '\n')
o.close()

예제 #16

0

파일 보기

파일: testing.py 프로젝트: JasonAng/ResearchScripts

'''For all things testing'''
import bioLibCG as cg
import compareData as compare

fileName = '/u/home8/gxxiao/chrisgre/scripts/FilterKnownMirs/ensemblHumanData/ensemblData	.dblColonDash'
dcdList = compare.tccFileToList(fileName, 0)

tccList = cg.convertDcdToTcc(dcdList)

for x in tccList:
	print x

예제 #17

0

파일 보기

##Clusters are based off of overlapping neighbors, if you have an overlapping neighbor than you are part of that cluster.
import bioLibCG as cg
import subprocess
import compareData as compare
import cgConfig


#Start Timer
timer = cg.cgTimer()
timer.start()

#Get list of mature tccs
conf = cgConfig.returnConfDict()
finalMirFileName = '/u/home8/gxxiao/chrisgre/projects/PipeRuns/LanderHuman/out/LanderHuman-s3k8b17.ALL.FINAL.mirs.tsv'
finalMirFileName = conf['resultsRaw']
matureTccs = compare.tccFileToList(finalMirFileName, 1) # list of all mature micro in tcc
print 'List getting', timer.split()


#make connections dict
matureConnections = compare.makeConnectionsDict(matureTccs)
print 'Make connections:', timer.split()

#Now have to define Clusters...
clusters = []
addedList = []

#I don't think python passes by reference? also I think this function is in the middle because it uses a global variable :P
def createClusters(item = None, mode = None):
		
	if item in addedList:

예제 #18

0

파일 보기

파일: intronNoisy.py 프로젝트: sknyx/ResearchScripts

def intronNoisy(cName=None):
    mConf = c.cgConfig('Main.conf')
    conf = c.getConfig(cName)

    #init
    cHairs = getHairpins.getHairpins(
        conf.conf['resultsIntrons'])  #CID: HAIRPIN
    organism = conf.conf['organism']
    exonList = compare.tccFileToList('%sExons.tcc' % organism, 0)
    slide = 1000

    #make prediction overlap hitmap
    predMap = {}
    predList = []
    for CID in cHairs:
        hPin = cHairs[CID]
        predList.append(hPin)

    #collapse Overlaps
    print ' collapsing predictions'
    predList = compare.collapseOverlaps(predList)
    print ' collapsing exons'
    exonList = compare.collapseOverlaps(exonList)

    #collect levels for each hairpin region
    cidLevels = {}
    for CID in cHairs:
        print CID
        hPin = cHairs[CID]
        chrom = ss(hPin, ':')[0]
        strand = ss(hPin, ':')[1]
        start = int(ss(hPin, ':')[2])
        end = int(ss(hPin, ':')[3])

        scanStart = start - slide
        scanEnd = end + slide

        scanRange = []
        scanRange.append('%s:%s:%s:%s' % (chrom, strand, scanStart, start))
        scanRange.append('%s:%s:%s:%s' % (chrom, strand, end, scanEnd))

        print scanRange
        scanRange = compare.subtractTwoTccLists(scanRange, predList)
        scanRange = compare.subtractTwoTccLists(scanRange, exonList)

        levels = []

        print '  Retrieving Expression levels:', cg.getTccListTotalLength(
            scanRange)
        levels = []

        hPinLevels = stepVectorScan.scanVectorsHist(scanRange, cName)
        for hPin in hPinLevels:
            levels.extend(hPinLevels[hPin])

        cidLevels[CID] = levels

    #output levels to file

    #find longest
    longest = 0
    for CID in cidLevels:
        length = len(cidLevels[CID])
        if length > longest:
            longest = length

    sortedKeys = cidLevels.keys()
    sortedKeys.sort()

    newLines = []
    for j in range(0, longest):  #how many lines are there
        newLine = []
        for CID in sortedKeys:
            if len(cidLevels[CID]) > j:  # add it
                newLine.append(str(cidLevels[CID][j]))
            else:
                newLine.append('NA')

        newLines.append('\t'.join(newLine) + '\n')

    outFileN = conf.conf['intronNoiseData']
    outFile = open(outFileN, 'w')
    outFile.write('\t'.join(sortedKeys) + '\n')
    outFile.writelines(newLines)
    outFile.close()

예제 #19

0

파일 보기

파일: defineClusters.py 프로젝트: sknyx/ResearchScripts

def defineClusters(cName=None):
    #Start Timer
    timer = cg.cgTimer()
    timer.start()

    #Get list of mature tccs
    conf = cgConfig.getConfig(cName)  #passed or default
    finalMirFileName = conf.conf['resultsRaw']
    matureTccs = compare.tccFileToList(finalMirFileName,
                                       1)  # list of all mature micro in tcc
    print 'List getting', timer.split()

    #make connections dict
    matureConnections = compare.makeConnectionsDict(matureTccs)
    print 'Make connections:', timer.split()

    #Now have to define Clusters...
    clusters = []
    addedList = []

    #I don't think python passes by reference? also I think this function is in the middle because it uses a global variable :P
    def createClusters(item=None, mode=None):

        if item in addedList:
            return 0
        elif mode == "top":
            clusters.append([item])
            addedList.append(
                item)  ##creates new cluster with the item already stored in it
            for connectedItem in matureConnections[item]:
                createClusters(connectedItem, "neighbor")
        elif mode == "neighbor":
            clusters[-1].append(
                item)  #add this item to the last cluster created
            addedList.append(item)
            for connectedItem in matureConnections[item]:
                createClusters(connectedItem, "neighbor")

    for tcc in matureTccs:
        createClusters(tcc, "top")

    print 'Make Clusters', timer.split()

    #Sort Clusters.
    sortedClusters = []

    for cluster in clusters:
        sortedClusters.append(cg.sortTccList(cluster))

    print 'Sort Clusters:', timer.split()

    #Output sorted cluster file
    clusterFileName = conf.conf['sortedClusters']
    clusterFile = open(clusterFileName, 'w')
    for cluster in sortedClusters:
        for hit in cluster:
            clusterFile.write('%s,' % hit)
        clusterFile.write('\n')
    clusterFile.close()
    '''
	#re-create sortedClusters list:
	clusterFileName = 'sortedClusters.data'
	clusterFile = open(clusterFileName, 'r')
	sortedClusters = []
	
	
	for line in clusterFile:
		sortedClusters.append([])
		line = line.strip()[0:-1] #take off last comma ;P
		for hit in (line.strip().split(',')):
			sortedClusters[-1].append(hit)
	'''

    print 'Store intermediate data:', timer.split()

    #output hitsAround file
    outputFile = open(conf.conf['hitsPerFrame'], 'w')

    frameLength = 200
    frameShift = 1
    for cluster in sortedClusters:
        #grab first and last coordinate from cluster, for each cluster deduce how many theoretical microRNAs were in hitScope
        clusterChrom = cluster[0].split(":")[0]
        clusterStrand = cluster[0].split(":")[1]
        firstCoord = int(cluster[0].split(":")[2])
        #print cluster[-1]
        lastCoord = int(cluster[-1].split(":")[3])

        startCoord = firstCoord
        while startCoord < lastCoord:
            #count how many hits there are in this range
            rangeStart = startCoord - (frameLength / 2)
            rangeEnd = startCoord + (frameLength / 2)
            rangeTcc = '%s:%s:%s:%s' % (clusterChrom, clusterStrand,
                                        rangeStart, rangeEnd)
            overlappedList = compare.compareTwoTcc([rangeTcc], cluster, 2)
            hitCount = len(overlappedList)

            #output
            outputFile.write('%s\t%s\n' % (rangeTcc, hitCount))
            startCoord = startCoord + frameShift  #check overlap with range
    outputFile.close()

    print 'Output Hits per Frame:', timer.split()
    print 'Overall Time:', timer.report()

예제 #20

0

파일 보기

파일: defineClusters.py 프로젝트: JasonAng/ResearchScripts

def defineClusters(cName = None):
	#Start Timer
	timer = cg.cgTimer()
	timer.start()
	
	#Get list of mature tccs
	conf = cgConfig.getConfig(cName) #passed or default
	finalMirFileName = conf.conf['resultsRaw']
	matureTccs = compare.tccFileToList(finalMirFileName, 1) # list of all mature micro in tcc
	print 'List getting', timer.split()
	
	
	#make connections dict
	matureConnections = compare.makeConnectionsDict(matureTccs)
	print 'Make connections:', timer.split()
	
	#Now have to define Clusters...
	clusters = []
	addedList = []
	
	#I don't think python passes by reference? also I think this function is in the middle because it uses a global variable :P
	def createClusters(item = None, mode = None):
			
		if item in addedList:
			return 0
		elif mode == "top":
			clusters.append([item])
			addedList.append(item) ##creates new cluster with the item already stored in it
			for connectedItem in matureConnections[item]:
				createClusters(connectedItem, "neighbor")
		elif mode == "neighbor":
			clusters[-1].append(item) #add this item to the last cluster created
			addedList.append(item)
			for connectedItem in matureConnections[item]:
				createClusters(connectedItem, "neighbor")
		
	for tcc in matureTccs:
		createClusters(tcc, "top")
	
	print 'Make Clusters', timer.split()
	
	
	#Sort Clusters.
	sortedClusters = []
	
	for cluster in clusters:
		sortedClusters.append(cg.sortTccList(cluster))
	
	print 'Sort Clusters:', timer.split()
	
	
	#Output sorted cluster file
	clusterFileName = conf.conf['sortedClusters']
	clusterFile = open(clusterFileName, 'w')
	for cluster in sortedClusters:
		for hit in cluster:
			clusterFile.write('%s,' % hit)
		clusterFile.write('\n')
	clusterFile.close()
	
	'''
	#re-create sortedClusters list:
	clusterFileName = 'sortedClusters.data'
	clusterFile = open(clusterFileName, 'r')
	sortedClusters = []
	
	
	for line in clusterFile:
		sortedClusters.append([])
		line = line.strip()[0:-1] #take off last comma ;P
		for hit in (line.strip().split(',')):
			sortedClusters[-1].append(hit)
	'''
	
	
	print 'Store intermediate data:', timer.split()
	
	
	#output hitsAround file
	outputFile = open(conf.conf['hitsPerFrame'], 'w')
	
	frameLength = 200
	frameShift = 1
	for cluster in sortedClusters:
		#grab first and last coordinate from cluster, for each cluster deduce how many theoretical microRNAs were in hitScope
		clusterChrom = cluster[0].split(":")[0]
		clusterStrand = cluster[0].split(":")[1]
		firstCoord = int(cluster[0].split(":")[2])
		#print cluster[-1]
		lastCoord = int(cluster[-1].split(":")[3])
		
		
		startCoord = firstCoord
		while startCoord < lastCoord:
			#count how many hits there are in this range
			rangeStart = startCoord - (frameLength/2)
			rangeEnd = startCoord + (frameLength/2)
			rangeTcc = '%s:%s:%s:%s' % (clusterChrom, clusterStrand, rangeStart, rangeEnd)
			overlappedList = compare.compareTwoTcc([rangeTcc], cluster, 2)
			hitCount = len(overlappedList) 
			
			#output 
			outputFile.write('%s\t%s\n' % (rangeTcc, hitCount))
			startCoord = startCoord + frameShift #check overlap with range
	outputFile.close()
	
	print 'Output Hits per Frame:', timer.split()
	print 'Overall Time:', timer.report()

예제 #21

0

파일 보기

파일: input.py 프로젝트: JasonAng/ResearchScripts

import compareData as compare

tccList = compare.tccFileToList('snos.tcc', 0)

collapsed = compare.collapseOverlaps(tccList)

for tcc in collapsed:
	print tcc

예제 #22

0

파일 보기

파일: getPeakTypes.py 프로젝트: sknyx/ResearchScripts

import cgGenes
import compareData as compare
import cgConfig as c

cName = 'mm9.conf'
mConf = c.getConfig('Main.conf')
conf = c.getConfig(cName)
organism = conf.conf['organism']
geneSetFolder = mConf.conf['geneSets%s' % organism]
genes = cgGenes.createGeneSetFromFile(geneSetFolder + '/allTransciptsType.tsv')
peakTccs = compare.tccFileToList('peakData.500.mm9', 0)


tOverlaps = genes.transcriptOverlaps(peakTccs)
typeDict = {}
for transcript in tOverlaps:
	if transcript.type not in typeDict:
		typeDict[transcript.type] = 1
	else:
		typeDict[transcript.type] += 1

#count the amounts of each type for each transcript
amount = {}
for gene in genes.genes:
	for t in gene.transcripts:
		if t.type in amount:
			amount[t.type] += 1
		else:
			amount[t.type] = 1

print 'Total Peaks:', len(peakTccs)

예제 #23

0

파일 보기

파일: mirrorPeaks.py 프로젝트: JasonAng/ResearchScripts

import cgPeaks
import compareData as compare
import math
import bioLibCG as cg

knowns = compare.tccFileToList("mouseKnownMirs.tcc", 0)

eLevels = []
for known in knowns:

    chrom, strand, start, end = cg.tccSplit(known, True)  # text...
    if strand == "1":
        strand = "-1"
    else:
        strand = "1"
    oppTcc = cg.makeTcc(chrom, strand, start, end)

    knownStretch = cgPeaks.stretch(known)
    knownStretch.createPeaks(1, 20)
    kPos = knownStretch.getHighestPeak()
    if kPos:
        eLevels.append(knownStretch.profile[kPos])

    oppStretch = cgPeaks.stretch(oppTcc)
    oppStretch.createPeaks(1, 20)
    oPos = oppStretch.getHighestPeak()

    if oPos and kPos:
        # determine if they are close enough to be considered mirrored...
        if math.fabs(int(kPos) - int(oPos)) < 12:
            print known, oPos, kPos, oppStretch.profile[oPos], knownStretch.profile[kPos]

예제 #24

0

파일 보기

파일: input.py 프로젝트: sknyx/ResearchScripts

import compareData as compare

tccList = compare.tccFileToList('snos.tcc', 0)

collapsed = compare.collapseOverlaps(tccList)

for tcc in collapsed:
    print tcc

예제 #25

0

파일 보기

파일: noncodingBin.py 프로젝트: sknyx/ResearchScripts

#get results that are only noncoding

import bioLibCG as cg
import compareData as compare
predName = '/home/chrisgre/projects/NoncodingMouse/results/NCmouse-s3k8b17.bothNCandC.results'
keepList = compare.tccFileToList('keepNoncoding.tcc', 0)
predList = compare.tccFileToList(predName, 1)

keepers = compare.compareTwoTcc(predList, keepList, 1)
print len(keepers)

#now go back through pred file and create a new file with only lines that have noncoding in them

predFile = open(predName, 'r')
outFile = open('NCmouse.noncoding.results', 'w')

predLines = predFile.readlines()
predFile.close()
newLines = {}
for keeper in keepers:
	for line in predLines:
		if keeper in line:
			newLines[line] = 1

for line in newLines:
	outFile.write(line)

예제 #26

0

파일 보기

파일: testing.py 프로젝트: sknyx/ResearchScripts

'''For all things testing'''
import bioLibCG as cg
import compareData as compare

fileName = '/u/home8/gxxiao/chrisgre/scripts/FilterKnownMirs/ensemblHumanData/ensemblData	.dblColonDash'
dcdList = compare.tccFileToList(fileName, 0)

tccList = cg.convertDcdToTcc(dcdList)

for x in tccList:
    print x