def dump_orthologs(seed_orthologs_file, orthologs_file, args): #Copy from predict_orthologs.py OUT = open(orthologs_file, "w") if args.predict_output_format == "per_query": ortholog_header = ("#Query", "Orthologs") elif args.predict_output_format == "per_species": ortholog_header = ("#Query", "Species", "Orthologs") print >> OUT, "\t".join(ortholog_header) if args.target_taxa != 'all': args._expanded_target_taxa = orthology.normalize_target_taxa( args.target_taxa) else: # report orthologs from any species by default args._expanded_target_taxa = None pool = multiprocessing.Pool(args.cpu) for result in pool.imap(find_orthologs_per_hit, iter_hit_lines(seed_orthologs_file, args)): if result: write_orthologs_in_file(result, OUT, args) pool.terminate()
def _annotate_hit_line(arguments): annota.connect() line, args = arguments if not line.strip() or line.startswith('#'): return None r = map(str.strip, line.split('\t')) query_name = r[0] best_hit_name = r[1] if best_hit_name == '-' or best_hit_name == 'ERROR': return None best_hit_evalue = float(r[2]) best_hit_score = float(r[3]) if best_hit_score < args.seed_ortholog_score or best_hit_evalue > args.seed_ortholog_evalue: return None match_nogs = annota.get_member_ogs(best_hit_name) if not match_nogs: return None match_levels = set() for nog in match_nogs: match_levels.update(LEVEL_PARENTS[nog.split("@")[1]]) swallowest_level = sorted(match_levels & set(LEVEL_DEPTH.keys()), key=lambda x: LEVEL_DEPTH[x], reverse=True)[0] annot_levels = set() if args.tax_scope == "auto": for level in TAXONOMIC_RESOLUTION: if level in match_levels: annot_levels.add(level) annot_level_max = LEVEL_NAMES.get(level, level) break else: annot_levels.add(args.tax_scope) annot_level_max = LEVEL_NAMES.get(args.tax_scope, args.tax_scope) if args.target_taxa != 'all': target_taxa = orthology.normalize_target_taxa(args.target_taxa) else: target_taxa = None try: all_orthologies = annota.get_member_orthologs( best_hit_name, target_taxa=target_taxa, target_levels=annot_levels) except Exception: orthologs = None status = 'Error' else: orthologs = sorted(all_orthologies[args.target_orthologs]) if args.excluded_taxa: orthologs = [ o for o in orthologs if not o.startswith("%s." % args.excluded_taxa) ] status = 'OK' if orthologs: annotations = annota.summarize_annotations( orthologs, target_go_ev=args.go_evidence, excluded_go_ev=args.go_excluded) else: annotations = {} return (query_name, best_hit_name, best_hit_evalue, best_hit_score, annotations, annot_level_max, swallowest_level, match_nogs, orthologs)
def annotate_hit_line(arguments): annota.connect() line, args = arguments if not line.strip() or line.startswith('#'): return None r = map(str.strip, line.split('\t')) query_name = r[0] best_hit_name = r[1] if best_hit_name == '-' or best_hit_name == 'ERROR': return None best_hit_evalue = float(r[2]) best_hit_score = float(r[3]) if best_hit_score < args.seed_ortholog_score or best_hit_evalue > args.seed_ortholog_evalue: return None match_nogs = annota.get_member_ogs(best_hit_name) if not match_nogs: return None match_levels = set([nog.split("@")[1] for nog in match_nogs]) if args.tax_scope == "auto": for level in TAXONOMIC_RESOLUTION: if level in match_levels: annot_levels = set(LEVEL_CONTENT.get(level, [level])) annot_levels.add(level) annot_level_max = "%s[%d]" % (level, len(annot_levels)) break else: annot_levels = set(LEVEL_CONTENT.get(args.tax_scope, [args.tax_scope])) annot_levels.add(args.tax_scope) annot_level_max = "%s[%d]" % (args.tax_scope, len(annot_levels)) if args.target_taxa != 'all': target_taxa = orthology.normalize_target_taxa(args.target_taxa) else: target_taxa = None all_orthologies = annota.get_member_orthologs(best_hit_name, target_taxa=target_taxa, target_levels=annot_levels) orthologs = sorted(all_orthologies[args.target_orthologs]) if args.excluded_taxa: orthologs = [ o for o in orthologs if not o.startswith("%s." % args.excluded_taxa) ] if orthologs: pname, gos, kegg, bigg = annota.summarize_annotations( orthologs, target_go_ev=args.go_evidence, excluded_go_ev=args.go_excluded) best_name = '' if pname: name_candidate, freq = pname.most_common(1)[0] if freq >= 2: best_name = name_candidate else: pname = [] best_name = '' gos = set() kegg = set() bigg = set() return (query_name, best_hit_name, best_hit_evalue, best_hit_score, best_name, gos, kegg, bigg, annot_level_max, match_nogs, orthologs)