def preprocess(self,buildAnnotations=True): # print "Preprocessing" # if buildAnnotations: # annotation.go(self.rootdir, self.annotated_genes) #changed to annotation - Nafiz # intergene.go(self.rootdir, self.intergenes) print "Preprocessing" #Combine all genome files into a single genome fasta file #https://github.com/mortonjt/Boa/blob/master/src/format/fasta.py fasta.go(self.genome_dir, self.all_fasta, self.all_faidx, self.six_fasta, self.six_faidx) indexer = fasta.Indexer(self.all_fasta,self.all_faidx) #a class at fasta.py indexer.index() indexer.load() #https://github.com/mortonjt/Boa/blob/master/src/genome/intergene.py intergene.go(self.genome_dir,self.intergenes) #https://github.com/mortonjt/Boa/blob/master/src/annotation/annotation.py annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) #Combine all gff files together outhandle = open(self.gff,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_dir = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".gff": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() tmpfile = "tmp%d.faa"%(os.getpid()) outhandle = open(tmpfile,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_dir = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".fna": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() faa.reformat(tmpfile,self.faa) os.remove(tmpfile) faaindex = fasta.Indexer(self.faa,self.faaidx) faaindex.index()
def test(self): tmpfile = "tmp%d.faa"%(os.getpid()) faa.reformat(self.faa,tmpfile) os.rename(tmpfile,self.faa) faaindex = fasta.Indexer(self.faa,self.faaidx) faaindex.index() faaindex.load() gff = GFF(self.gff,self.outfasta,self.fasta,self.faidx,False) #gff.indextree() gff.indexdb() hits = gff.call_orfs(self.queries,faaindex) print hits ids,seqs = zip(*hits) correct_queries = [('CP002279.1','toxin.fa.cluster2.fa',0,0,1,51,100, 'Mesorhizobium opportunistum WSM2075, complete genome'), ('CP002279.1','transport.fa.cluster2.fa',0,0,1,1551,2000, 'Mesorhizobium opportunistum WSM2075, complete genome'), ('CP002279.1','transport.fa.cluster2.fa',0,0,1,3551,4000, 'Mesorhizobium opportunistum WSM2075, complete genome')] self.assertItemsEqual(ids,correct_queries)
def preprocess(self): print "Preprocessing" #Combine all genome files into a single genome fasta file fasta.go(self.genome_dir, self.all_fasta, self.all_faidx, self.six_fasta, self.six_faidx) indexer = fasta.Indexer(self.all_fasta,self.all_faidx) indexer.index() indexer.load() intergene.go(self.genome_dir,self.intergenes) annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) #Combine all gff files together outhandle = open(self.gff,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_files = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".gff": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() tmpfile = "tmp%d.faa"%(os.getpid()) outhandle = open(tmpfile,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_files = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".faa": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() faa.reformat(tmpfile,self.faa) os.remove(tmpfile) faaindex = fasta.Indexer(self.faa,self.faaidx) faaindex.index()