def setUp(self): self.root = os.environ['BACFINDER_HOME'] self.exampledir = "%s/example/Streptococcus_pyogenes"%self.root self.bacdir = "%s/bacteriocins"%self.root self.annotated_genes = "test_genes.fa" print "Example dir",self.exampledir annotation.go(self.exampledir,self.annotated_genes,types=['.gbk']) self.genome_files = test_modules.getFNA(self.exampledir) self.bacteriocins = "%s/bagel.fa"%self.bacdir self.genes = "%s/genes.fa"%self.bacdir self.intergenes = "test_intergenes.fa" intergene.go(self.exampledir,self.intergenes) self.bacteriocinsOut = "test_out_bacteriocins.txt" #self.filteredOut, self.annotationsOut = "neighbor_genes.txt" self.intermediate = "intermediate" if not os.path.exists(self.intermediate): os.mkdir(self.intermediate) self.gene_evalue = 0.000001 self.bac_evalue = 0.000001 self.num_threads = 10 self.formatdb = True #self.gene_radius = 50000 self.bacteriocin_radius = 50000 self.verbose = True self.keep_tmp = False
def preprocess(self,buildAnnotations=True): # print "Preprocessing" # if buildAnnotations: # annotation.go(self.rootdir, self.annotated_genes) #changed to annotation - Nafiz # intergene.go(self.rootdir, self.intergenes) print "Preprocessing" #Combine all genome files into a single genome fasta file #https://github.com/mortonjt/Boa/blob/master/src/format/fasta.py fasta.go(self.genome_dir, self.all_fasta, self.all_faidx, self.six_fasta, self.six_faidx) indexer = fasta.Indexer(self.all_fasta,self.all_faidx) #a class at fasta.py indexer.index() indexer.load() #https://github.com/mortonjt/Boa/blob/master/src/genome/intergene.py intergene.go(self.genome_dir,self.intergenes) #https://github.com/mortonjt/Boa/blob/master/src/annotation/annotation.py annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) #Combine all gff files together outhandle = open(self.gff,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_dir = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".gff": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() tmpfile = "tmp%d.faa"%(os.getpid()) outhandle = open(tmpfile,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_dir = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".fna": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() faa.reformat(tmpfile,self.faa) os.remove(tmpfile) faaindex = fasta.Indexer(self.faa,self.faaidx) faaindex.index()
def preprocess(self): print "Preprocessing" #Combine all genome files into a single genome fasta file fasta.go(self.genome_dir, self.all_fasta, self.all_faidx, self.six_fasta, self.six_faidx) indexer = fasta.Indexer(self.all_fasta,self.all_faidx) indexer.index() indexer.load() intergene.go(self.genome_dir,self.intergenes) annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) #Combine all gff files together outhandle = open(self.gff,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_files = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".gff": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() tmpfile = "tmp%d.faa"%(os.getpid()) outhandle = open(tmpfile,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_files = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".faa": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() faa.reformat(tmpfile,self.faa) os.remove(tmpfile) faaindex = fasta.Indexer(self.faa,self.faaidx) faaindex.index()