コード例 #1
0
def createTrackInDir(dirName):
	'''Every Q function has a corresponding shell script
	Make wig file for all mapped files, for all organisms'''
	
	wrapperShell = '/home/chrisgre/scripts/mapping/createTrack.sh'
	
	mainConf = c.cgConfig('Main.conf')
	metaFileName = mainConf.conf['metaFileName']

	for file in cg.recurseDir(dirName, end = '.mapped'):
						
		#check if mouse or human
		baseFName = cg.getBaseFileName(file)
		baseFName = baseFName.split('.')[0]
		
		metaDict = cg.getMetaFileDict(metaFileName)
		
		org = 'None'
		if baseFName in metaDict:
			if metaDict[baseFName][1] == 'NONE':
				print '  NO ORG KNOWN FOR', file
				continue
			else:
				org = metaDict[baseFName][1]
				print '  USING ORG', org, file
				
		#check if there is an organism, must check due to files not in metaFile
		if org == 'None':
			print '  NO org (not in meta file)', file
			continue
			
		while True:
			#submit job if there are less than ten
			if clusterCheck.queryNumJobsQ('chrisgre') < 1000:
				#subprocess.Popen(['qsub', '-q', 'xiao', '-l', 'mem=4G', '-V', '-o', mainConf.conf['outLog'], '-e', mainConf.conf['errorLog'], wrapperShell, file, org ])
				subprocess.Popen(['qsub', '-V', '-o', mainConf.conf['outLog'], '-e', mainConf.conf['errorLog'], wrapperShell, file, org ])
				#time.sleep(.5) #give it time to update qstat
				break
			else:#wait 10 secs...
				time.sleep(20)
コード例 #2
0
#how much density for tissue?
import cgConfig
import bioLibCG as cg

mainConf = cgConfig.cgConfig('Main.conf')
conf = cgConfig.cgConfig()

predFile = open(conf.conf['resultsSorted'], 'r') #must be sorted!!!

mirIDs = [x.strip().split('\t')[0].split('.')[0] for x in predFile]

#make lib:tissue dict
metaDict = cg.getMetaFileDict(mainConf.conf['metaFileName'])
#lib is getBaseFileName(key, naked = True): tisue is metaDict[key][3]  


#make lib:hits dict
densityFile = open('/home/chrisgre/scripts/readDensity/individual.densities.data', 'r')
tissueHits = {}
cID = 'NONE'
for line in densityFile:
	if line.startswith('\t'): #lib: hit
		l = line.strip().split('\t')[0]
		hits = int(line.strip().split('\t')[1])
		tissueHits[cID][l] = hits 
	else:
		cID = line.strip()
		tissueHits[cID] = {}

tissueHist = {}#tissue: hits
for mirID in mirIDs:
コード例 #3
0
import bioLibCG as cg
import cgConfig

#init
conf = cgConfig.cgConfig()
mainConf = cgConfig.cgConfig('Main.conf')
pFileName = conf.conf['results']
smallFileName = conf.conf['smallAnalysis']
organism = conf.conf['organism']

#make list of acceptable files for organism
organismFileList = []  #NOTE: THESE ARE THE NAKED BASE FILE NAMES!
metaDict = cg.getMetaFileDict(mainConf.conf['metaFileName'])
for baseFName in metaDict:
    if metaDict[baseFName][1] == organism:
        organismFileList.append(baseFName)

#put small hits for each prediction in dictionary
pCount = {}
smallFile = open(smallFileName, 'r')

currID = 'NONE'
for line in smallFile:
    if '\t' not in line:  #This is the line with the id in it -->  store another ID
        currID = line.strip()
    else:  #this line contains library and count info  --> add
        lib = line.strip().split('\t')[0]
        count = int(line.strip().split('\t')[1])

        if cg.getBaseFileName(lib, naked=True) in organismFileList:
            if currID in pCount:
コード例 #4
0
def createMultiTrack(dirName, organism):
	'''merge all mapped tracks in directory and create a single wig file'''
	mainConf = c.cgConfig('Main.conf')
	metaFileName = mainConf.conf['metaFileName']
	
	fileList = []
	for file in cg.recurseDir(dirName, end = '.mapped'):
						
		#check if mouse or human SHOULD PUT INTO A STD FUNCTION FOR META FILE
		#check if mouse or human
		baseFName = cg.getBaseFileName(file, naked= True)
		
		metaDict = cg.getMetaFileDict(metaFileName)
		
		org = 'None'
		if baseFName in metaDict:
			if metaDict[baseFName][1] == 'NONE':
				print '  NO ORG KNOWN FOR', file
				continue
			elif not metaDict[baseFName][1] == organism:
				print '  NOT ORGANISM RUNNING', file
				continue
			else:
				org = metaDict[baseFName][1]
				print '  USING ORG', org, file
			
		#check if there is an organism, must check due to files not in metaFile
		if org == 'None':
			print '  NO org (not in meta file)', file
			continue
		
		#only make wig file for organism asked for
		if not org == organism:
			continue
		
		#if it is right organism and has mapped file then add
		fileList.append(file)
	
	
	#make merged wig
	if organism == 'human':
		chroms = cg.humanChromosomes
		assembly = 'hg19'
	elif organism == 'mouse':
		chroms = cg.mouseChromosomes
		assembly = 'mm9'
	elif organism == 'zebrafish':
		chroms = cg.zebrafishChromosomes
		assembly = 'danRer6'
	
	print 'Making Bed File vectors'
	cvg = HTSeq.GenomicArray(chroms, stranded=True, typecode='i')
	for fName in fileList:
		alignment_file = HTSeq.BowtieReader(fName)
		for alngt in alignment_file:
			if alngt.aligned:
				cvg.add_value( 1, alngt.iv ) #iv is the genomic interval..

	bedNamePos = dirName + '/Merge.' + organism + '.1.wig'
	bedNameNeg = dirName + '/Merge.' + organism + '.-1.wig'
	
	print 'Writing Bed File'
	cvg.write_bedgraph_file(bedNamePos, "+" )
	cvg.write_bedgraph_file(bedNameNeg, "-" )

	#Now extend it
	updateWigLength(bedNamePos, assembly)
	updateWigLength(bedNameNeg, assembly)
	
	#Now Sort it.
	cgSort.wigSort(bedNamePos)
	cgSort.wigSort(bedNameNeg)