コード例 #1
0
ファイル: extract_hairpins.py プロジェクト: jrgreen7/SMIRP
FastaOps.remove_newlines('data/'+inPath, 'data/tmp/'+inPath+'.fixed')
FastaOps.convert_DNA_to_RNA('data/tmp/'+inPath+'.fixed', 'data/tmp/'+inPath+'.rna')
# Step two: split the fasta for mutli-threaded processing
FastaOps.split_fasta('data/tmp/'+inPath+'.rna', numThreads)
# Step three: Launch threads
threadPath = inPath
threadExt = 'rna'
threads = []
for i in range(numThreads):
	threads.append(myThread(threadPath+'.'+str(i)+'.'+threadExt))
for thread in threads:
	thread.start()
for thread in threads:
	thread.join()

FastaOps.merge_fasta('data/tmp/'+inPath+'.rnanrhairpins', numThreads)
FastaOps.remove_AU('data/tmp/'+inPath+'.rnanrhairpins', 'data/tmp/'+inPath+'.hairpins.noAU', 5)

if clusterSim > 0.0:
	call('cdhit-est -i data/tmp/'+inPath+'.hairpins.noAU -o data/'+inPath+'.nr.hairpins')
else:
	call('cp data/tmp/'+inPath+'.hairpins.noAU data/'+inPath+'.nr.hairpins', shell=True)

if numHairpins != 0:
	outFile = open('data/'+inPath+'.nr.hairpins.'+str(numHairpins), 'w')
	inLines = open('data/'+inPath+'.nr.hairpins', 'r').readlines()
	inData = []
	for i in range(0,len(inLines)-2,2):
		inData.append(inLines[i]+inLines[i+1])

	if numHairpins < len(inData):
コード例 #2
0
ファイル: extract_hairpins.py プロジェクト: CU-BIC/SMIRP
# Step one: turn the fasta into something that RNALfold will work with
FastaOps.remove_newlines('data/' + inPath, 'data/tmp/' + inPath + '.fixed')
# Step two: split the fasta for mutli-threaded processing
FastaOps.split_fasta('data/tmp/' + inPath + '.fixed', numThreads)
# Step three: Launch threads
threadPath = inPath
threadExt = 'fixed'
threads = []
for i in range(numThreads):
    threads.append(myThread(threadPath + '.' + str(i) + '.' + threadExt))
for thread in threads:
    thread.start()
for thread in threads:
    thread.join()

FastaOps.merge_fasta('data/tmp/' + inPath + '.fixednrhairpins', numThreads)
call('cp data/tmp/' + inPath + '.fixednrhairpins data/' + inPath +
     '.nr.hairpins',
     shell=True)
print "DONE!"

# print "Finding all folds in "+self.inPath+" with RNALfold."
# call('progs/ViennaRNA-1.8.5/Progs/RNALfold -d2 -noLP -L 120 < data/tmp/'+inPath+'.fixed > data/tmp/'+inPath+'.folds', shell=True)

# print "Filtering folds  in "+self.inPath+" down to hairpins."
# FoldOps.filter_hairpins('data/tmp/'+inPath+'.folds', 'data/tmp/'+inPath+'.hairpins')

# FileConversion.RNAL_to_fasta('data/tmp/'+inPath+'.hairpins', 'data/tmp/folds_from_'+inPath)

# print "Removing redundant hairpins from "+self.inPath+"."
# sl = SequenceList()
コード例 #3
0
ファイル: test.py プロジェクト: jrgreen7/SMIRP
import classes.FastaOperations as fo
from classes.SequenceList import *

# fo.split_fasta('data/folds_from_AHGY01.fa', 10)
for i in range(10):
	sl = SequenceList()
	sl.load_fasta('data/folds_from_AHGY01.fa.'+str(i))
	sl.remove_redundant()
	sl.export_fasta('data/AHGY01.fa.nr.hairpins'+'.'+str(i))
	print str((i+1)*10)+'% complete removing redundant hairpins'
fo.merge_fasta('data/AHGY01.fa.nr.hairpins', 10)