def _main(self): genomes = load_dir(self.abort_event, self.genomedir, 'gb', '.*\.gb$') if not genomes: return 1 pseqs = {} for g in genomes: if g.id not in self.promoters: continue if g.id in pseqs: continue pseqs[g.id] = self.promoters[g.id].extract(g) print g.id print pseqs[g.id] print finder = SchemaFinder() repo = finder.find(pseqs.values()) with open('FC-promoter.mfs', 'w') as out: writer = csv.writer(out) writer.writerow(('motif', 'count')) for pi in repo.get_all(): writer.writerow((pi, repo.count(pi)))
from BioUtils.Tools.Output import user_message from BioUtils.SeqUtils import load_files, load_dir _pid = os.getpid() #setup signal handler signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) signal.signal(signal.SIGQUIT, sig_handler) abort_event = Event() with user_message('Loading genomes...', '\n'): genomes_dir = u'/home/allis/Documents/INMI/Aerobic-CODH/genomes/' genome_names = ['Thermococcus_barophilus_Ch5-complete.gb', 'Thermococcus_onnurineus_NA1-complete-genome.gb', 'Thermococcus_sp._ES1.gb', 'Thermococcus-DS1-preliminary.gb'] genomes = load_dir(abort_event, genomes_dir, 'gb', r'.*\.gb') if not genomes: sys.exit(1) # load_files(abort_event, [os.path.join(genomes_dir, f) for f in genome_names], 'gb') hmm = u'/home/allis/Documents/INMI/Aerobic-CODH/COX-EC/COX-EC_1.2.99.2_CoxL.hmm' hmmer = Hmmer(abort_event) for g in genomes: results = hmmer.hmmsearch_genome(hmm, g, table=11, decorate=True) if results: SeqIO.write(g, '%s.gb' % g.name, 'gb') print '='*80 print g.name, g.description for fi in results: print results[fi][0]