Beispiel #1
0
    def preprocess(self,buildAnnotations=True):
        

        # print "Preprocessing"
        # if buildAnnotations:
        #     annotation.go(self.rootdir, self.annotated_genes)    #changed to annotation - Nafiz 
        #     intergene.go(self.rootdir, self.intergenes)


        print "Preprocessing"
        #Combine all genome files into a single genome fasta file
        #https://github.com/mortonjt/Boa/blob/master/src/format/fasta.py
        fasta.go(self.genome_dir,
                 self.all_fasta,
                 self.all_faidx,
                 self.six_fasta,
                 self.six_faidx) 
        indexer = fasta.Indexer(self.all_fasta,self.all_faidx) #a class at fasta.py 
        indexer.index()
        indexer.load()

        #https://github.com/mortonjt/Boa/blob/master/src/genome/intergene.py
        intergene.go(self.genome_dir,self.intergenes)

        #https://github.com/mortonjt/Boa/blob/master/src/annotation/annotation.py
        annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) 
        
        #Combine all gff files together
        outhandle = open(self.gff,'w')
        for root, subFolders, files in os.walk(self.genome_dir):
            for fname in files:
                genome_dir = []
                organism,ext = os.path.splitext(os.path.basename(fname))
                absfile=os.path.join(root,fname)
                if ext==".gff":
                    shutil.copyfileobj(open(absfile),outhandle)
        outhandle.close()
        
        tmpfile = "tmp%d.faa"%(os.getpid())
        outhandle = open(tmpfile,'w')
        for root, subFolders, files in os.walk(self.genome_dir):
            for fname in files:
                genome_dir = []
                organism,ext = os.path.splitext(os.path.basename(fname))
                absfile=os.path.join(root,fname)
                if ext==".fna":
                    shutil.copyfileobj(open(absfile),outhandle)
        outhandle.close()
        faa.reformat(tmpfile,self.faa)
        os.remove(tmpfile)
        
        faaindex = fasta.Indexer(self.faa,self.faaidx)
        faaindex.index()
Beispiel #2
0
 def test(self):
     tmpfile = "tmp%d.faa"%(os.getpid())
     faa.reformat(self.faa,tmpfile)
     os.rename(tmpfile,self.faa)
     faaindex = fasta.Indexer(self.faa,self.faaidx)
     faaindex.index()
     faaindex.load()
     gff = GFF(self.gff,self.outfasta,self.fasta,self.faidx,False)
     #gff.indextree()
     gff.indexdb()
     hits = gff.call_orfs(self.queries,faaindex)
     print hits
     ids,seqs = zip(*hits)
     correct_queries = [('CP002279.1','toxin.fa.cluster2.fa',0,0,1,51,100,
                         'Mesorhizobium opportunistum WSM2075, complete genome'),
                        ('CP002279.1','transport.fa.cluster2.fa',0,0,1,1551,2000,
                         'Mesorhizobium opportunistum WSM2075, complete genome'),
                        ('CP002279.1','transport.fa.cluster2.fa',0,0,1,3551,4000,
                         'Mesorhizobium opportunistum WSM2075, complete genome')] 
     self.assertItemsEqual(ids,correct_queries)
Beispiel #3
0
 def preprocess(self):
     print "Preprocessing"
     #Combine all genome files into a single genome fasta file
     fasta.go(self.genome_dir,
              self.all_fasta,
              self.all_faidx,
              self.six_fasta,
              self.six_faidx) 
     indexer = fasta.Indexer(self.all_fasta,self.all_faidx)
     indexer.index()
     indexer.load()
     intergene.go(self.genome_dir,self.intergenes)
     annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) 
     #Combine all gff files together
     outhandle = open(self.gff,'w')
     for root, subFolders, files in os.walk(self.genome_dir):
         for fname in files:
             genome_files = []
             organism,ext = os.path.splitext(os.path.basename(fname))
             absfile=os.path.join(root,fname)
             if ext==".gff":
                 shutil.copyfileobj(open(absfile),outhandle)
     outhandle.close()
     
     tmpfile = "tmp%d.faa"%(os.getpid())
     outhandle = open(tmpfile,'w')
     for root, subFolders, files in os.walk(self.genome_dir):
         for fname in files:
             genome_files = []
             organism,ext = os.path.splitext(os.path.basename(fname))
             absfile=os.path.join(root,fname)
             if ext==".faa":
                 shutil.copyfileobj(open(absfile),outhandle)
     outhandle.close()
     faa.reformat(tmpfile,self.faa)
     os.remove(tmpfile)
     
     faaindex = fasta.Indexer(self.faa,self.faaidx)
     faaindex.index()