def main(args): # Output and intermediate files hmm_hits_file = "%s.emapper.hmm_hits" % args.output seed_orthologs_file = "%s.emapper.seed_orthologs" % args.output annot_file = "%s.emapper.annotations" % args.output orthologs_file = "%s.emapper.predict_orthologs" % args.output if args.no_search: output_files = [annot_file] elif args.no_annot: output_files = [hmm_hits_file, seed_orthologs_file] else: output_files = [hmm_hits_file, seed_orthologs_file, annot_file] # convert to absolute path before changing directory if args.annotate_hits_table: args.annotate_hits_table = os.path.abspath(args.annotate_hits_table) # force user to decide what to do with existing files os.chdir(args.output_dir) files_present = set([pexists(fname) for fname in output_files]) if True in files_present and not args.resume and not args.override: print "Output files detected in disk. Use --resume or --override to continue" raise emapperException() if args.override: for outf in output_files: silent_rm(outf) print '# ', get_version() print '# ./emapper.py ', ' '.join(sys.argv[1:]) if args.scratch_dir: # If resuming in and using --scratch_dir, transfer existing files. if args.resume and args.scratch_dir: for f in output_files: if pexists(f): print " Copying input file %s to scratch dir %s" % ( f, args.scratch_dir) shutil.copy(f, args.scratch_dir) # Change working dir os.chdir(args.scratch_dir) # Step 1. Sequence search if not args.no_search: if args.mode == 'diamond' and not args.no_search: dump_diamond_matches(args.input, seed_orthologs_file, args) elif args.mode == 'hmmer' and not args.no_search: host, port, dbpath, scantype, idmap = setup_hmm_search(args) # Start HMM SCANNING sequences (if requested) if not pexists(hmm_hits_file) or args.override: dump_hmm_matches(args.input, hmm_hits_file, dbpath, port, scantype, idmap, args) if not args.no_refine and (not pexists(seed_orthologs_file) or args.override): if args.db == 'viruses': print 'Skipping seed ortholog detection in "viruses" database' elif args.db in EGGNOG_DATABASES: refine_matches(args.input, seed_orthologs_file, hmm_hits_file, args) else: print 'refined hits not available for custom hmm databases.' # Step 2. Annotation if not args.no_annot: annota.connect() if args.annotate_hits_table: if not os.path.exists(args.annotate_hits_table): raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), args.annotate_hits_table) annotate_hits_file(args.annotate_hits_table, annot_file, hmm_hits_file, args) elif args.db == 'viruses': annotate_hmm_matches(hmm_hits_file, hmm_hits_file + '.annotations', args) OUT = open(annot_file, 'w') for line in open(hmm_hits_file + '.annotations'): if line.startswith('#') or not line.strip(): continue (query, hitname, level, evalue, sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, q_coverage, nm, desc, cats) = line.split("\t") if hitname != '-' and hitname != 'ERROR': print >> OUT, '\t'.join( map(str, (query, hitname, evalue, sum_score, '', '', '', 'viruses', hitname + "@viruses", "%s|%s|%s" % (hitname, evalue, sum_score), cats.replace('\n', ''), desc.replace('\n', ' ')))) OUT.close() else: annotate_hits_file(seed_orthologs_file, annot_file, hmm_hits_file, args) if args.predict_ortho: orthology.connect() dump_orthologs(seed_orthologs_file, orthologs_file, args) # If running in scratch, move files to real output dir and clean up if args.scratch_dir: for fname in output_files: if pexists(fname): print " Copying result file %s from scratch to %s" % ( fname, args.output_dir) shutil.copy(annot_file, args.output_dir) print " Cleaning result file %s from scratch dir" % (fname) # Finalize and exit print colorify('Done', 'green') for f in output_files: colorify('Result files:', 'yellow') if pexists(f): print " %s" % (f) print 'Total time: %g secs' % (time.time() - _total_time) if args.mode == 'hmmer': print get_citation(['hmmer']) elif args.mode == 'diamond': print get_citation(['diamond']) shutdown_server()
def main(args): # Output and intermediate files hmm_hits_file = "%s.emapper.hmm_hits" % args.output seed_orthologs_file = "%s.emapper.seed_orthologs" % args.output annot_file = "%s.emapper.annotations" % args.output if args.no_search: output_files = [annot_file] elif args.no_annot: output_files = [hmm_hits_file, seed_orthologs_file] else: output_files = [hmm_hits_file, seed_orthologs_file, annot_file] # force user to decide what to do with existing files os.chdir(args.output_dir) files_present = set([pexists(fname) for fname in output_files]) if True in files_present and not args.resume and not args.override: print "Output files detected in disk. Use --resume or --override to continue" raise emapperException() if args.override: for outf in output_files: silent_rm(outf) print '# ', get_version() print '# ./emapper.py ', ' '.join(sys.argv[1:]) if args.scratch_dir: # If resuming in and using --scratch_dir, transfer existing files. if args.resume and args.scratch_dir: for f in output_files: if pexists(f): print " Copying input file %s to scratch dir %s" % (f, args.scratch_dir) shutil.copy(f, args.scratch_dir) # Change working dir os.chdir(args.scratch_dir) # Step 1. Sequence search if not args.no_search: if args.mode == 'diamond' and not args.no_search: dump_diamond_matches(args.input, seed_orthologs_file, args) elif args.mode == 'hmmer' and not args.no_search: host, port, dbpath, scantype, idmap = setup_hmm_search(args) # Start HMM SCANNING sequences (if requested) if not pexists(hmm_hits_file) or args.override: dump_hmm_matches(args.input, hmm_hits_file, dbpath, port, scantype, idmap, args) if not args.no_refine and (not pexists(seed_orthologs_file) or args.override): if args.db == 'viruses': print 'Skipping seed ortholog detection in "viruses" database' elif args.db in EGGNOG_DATABASES: refine_matches(args.input, seed_orthologs_file, hmm_hits_file, args) else: print 'refined hits not available for custom hmm databases.' # Step 2. Annotation if not args.no_annot: annota.connect() if args.annotate_hits_table: annotate_hits_file(args.annotate_hits_table, annot_file, hmm_hits_file, args) elif args.db == 'viruses': annotate_hmm_matches(hmm_hits_file, hmm_hits_file+'.annotations', args) OUT = open(annot_file, 'w') for line in open(hmm_hits_file+'.annotations'): if line.startswith('#') or not line.strip(): continue (query, hitname, level, evalue, sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, q_coverage, nm, desc, cats) = line.split("\t") if hitname != '-' and hitname != 'ERROR': print >>OUT, '\t'.join(map(str, (query, hitname, evalue, sum_score, '', '', '', 'viruses', hitname+"@viruses", "%s|%s|%s" %(hitname, evalue, sum_score), cats.replace('\n', ''), desc.replace('\n', ' ')))) OUT.close() else: annotate_hits_file(seed_orthologs_file, annot_file, hmm_hits_file, args) # If running in scratch, move files to real output dir and clean up if args.scratch_dir: for fname in output_files: if pexists(fname): print " Copying result file %s from scratch to %s" % (fname, args.output_dir) shutil.copy(annot_file, args.output_dir) print " Cleaning result file %s from scratch dir" %(fname) # Finalize and exit print colorify('Done', 'green') for f in output_files: colorify('Result files:', 'yellow') if pexists(f): print " %s" % (f) print 'Total time: %g secs' % (time.time()-_total_time) if args.mode == 'hmmer': print get_citation(['hmmer']) elif args.mode == 'diamond': print get_citation(['diamond']) shutdown_server()