FastaOps.remove_newlines('data/'+inPath, 'data/tmp/'+inPath+'.fixed') FastaOps.convert_DNA_to_RNA('data/tmp/'+inPath+'.fixed', 'data/tmp/'+inPath+'.rna') # Step two: split the fasta for mutli-threaded processing FastaOps.split_fasta('data/tmp/'+inPath+'.rna', numThreads) # Step three: Launch threads threadPath = inPath threadExt = 'rna' threads = [] for i in range(numThreads): threads.append(myThread(threadPath+'.'+str(i)+'.'+threadExt)) for thread in threads: thread.start() for thread in threads: thread.join() FastaOps.merge_fasta('data/tmp/'+inPath+'.rnanrhairpins', numThreads) FastaOps.remove_AU('data/tmp/'+inPath+'.rnanrhairpins', 'data/tmp/'+inPath+'.hairpins.noAU', 5) if clusterSim > 0.0: call('cdhit-est -i data/tmp/'+inPath+'.hairpins.noAU -o data/'+inPath+'.nr.hairpins') else: call('cp data/tmp/'+inPath+'.hairpins.noAU data/'+inPath+'.nr.hairpins', shell=True) if numHairpins != 0: outFile = open('data/'+inPath+'.nr.hairpins.'+str(numHairpins), 'w') inLines = open('data/'+inPath+'.nr.hairpins', 'r').readlines() inData = [] for i in range(0,len(inLines)-2,2): inData.append(inLines[i]+inLines[i+1]) if numHairpins < len(inData):
# Step one: turn the fasta into something that RNALfold will work with FastaOps.remove_newlines('data/' + inPath, 'data/tmp/' + inPath + '.fixed') # Step two: split the fasta for mutli-threaded processing FastaOps.split_fasta('data/tmp/' + inPath + '.fixed', numThreads) # Step three: Launch threads threadPath = inPath threadExt = 'fixed' threads = [] for i in range(numThreads): threads.append(myThread(threadPath + '.' + str(i) + '.' + threadExt)) for thread in threads: thread.start() for thread in threads: thread.join() FastaOps.merge_fasta('data/tmp/' + inPath + '.fixednrhairpins', numThreads) call('cp data/tmp/' + inPath + '.fixednrhairpins data/' + inPath + '.nr.hairpins', shell=True) print "DONE!" # print "Finding all folds in "+self.inPath+" with RNALfold." # call('progs/ViennaRNA-1.8.5/Progs/RNALfold -d2 -noLP -L 120 < data/tmp/'+inPath+'.fixed > data/tmp/'+inPath+'.folds', shell=True) # print "Filtering folds in "+self.inPath+" down to hairpins." # FoldOps.filter_hairpins('data/tmp/'+inPath+'.folds', 'data/tmp/'+inPath+'.hairpins') # FileConversion.RNAL_to_fasta('data/tmp/'+inPath+'.hairpins', 'data/tmp/folds_from_'+inPath) # print "Removing redundant hairpins from "+self.inPath+"." # sl = SequenceList()
import classes.FastaOperations as fo from classes.SequenceList import * # fo.split_fasta('data/folds_from_AHGY01.fa', 10) for i in range(10): sl = SequenceList() sl.load_fasta('data/folds_from_AHGY01.fa.'+str(i)) sl.remove_redundant() sl.export_fasta('data/AHGY01.fa.nr.hairpins'+'.'+str(i)) print str((i+1)*10)+'% complete removing redundant hairpins' fo.merge_fasta('data/AHGY01.fa.nr.hairpins', 10)