Beispiel #1
0
def dump_orthologs(seed_orthologs_file, orthologs_file, args):
    #Copy from predict_orthologs.py
    OUT = open(orthologs_file, "w")

    if args.predict_output_format == "per_query":
        ortholog_header = ("#Query", "Orthologs")
    elif args.predict_output_format == "per_species":
        ortholog_header = ("#Query", "Species", "Orthologs")

    print >> OUT, "\t".join(ortholog_header)

    if args.target_taxa != 'all':
        args._expanded_target_taxa = orthology.normalize_target_taxa(
            args.target_taxa)
    else:
        # report orthologs from any species by default
        args._expanded_target_taxa = None

    pool = multiprocessing.Pool(args.cpu)
    for result in pool.imap(find_orthologs_per_hit,
                            iter_hit_lines(seed_orthologs_file, args)):
        if result:
            write_orthologs_in_file(result, OUT, args)

    pool.terminate()
Beispiel #2
0
def _annotate_hit_line(arguments):
    annota.connect()
    line, args = arguments

    if not line.strip() or line.startswith('#'):
        return None
    r = map(str.strip, line.split('\t'))

    query_name = r[0]
    best_hit_name = r[1]
    if best_hit_name == '-' or best_hit_name == 'ERROR':
        return None

    best_hit_evalue = float(r[2])
    best_hit_score = float(r[3])
    if best_hit_score < args.seed_ortholog_score or best_hit_evalue > args.seed_ortholog_evalue:
        return None

    match_nogs = annota.get_member_ogs(best_hit_name)
    if not match_nogs:
        return None

    match_levels = set()
    for nog in match_nogs:
        match_levels.update(LEVEL_PARENTS[nog.split("@")[1]])

    swallowest_level = sorted(match_levels & set(LEVEL_DEPTH.keys()),
                              key=lambda x: LEVEL_DEPTH[x],
                              reverse=True)[0]

    annot_levels = set()
    if args.tax_scope == "auto":
        for level in TAXONOMIC_RESOLUTION:
            if level in match_levels:
                annot_levels.add(level)
                annot_level_max = LEVEL_NAMES.get(level, level)
                break
    else:
        annot_levels.add(args.tax_scope)
        annot_level_max = LEVEL_NAMES.get(args.tax_scope, args.tax_scope)

    if args.target_taxa != 'all':
        target_taxa = orthology.normalize_target_taxa(args.target_taxa)
    else:
        target_taxa = None

    try:
        all_orthologies = annota.get_member_orthologs(
            best_hit_name, target_taxa=target_taxa, target_levels=annot_levels)
    except Exception:
        orthologs = None
        status = 'Error'
    else:
        orthologs = sorted(all_orthologies[args.target_orthologs])
        if args.excluded_taxa:
            orthologs = [
                o for o in orthologs
                if not o.startswith("%s." % args.excluded_taxa)
            ]
        status = 'OK'

    if orthologs:
        annotations = annota.summarize_annotations(
            orthologs,
            target_go_ev=args.go_evidence,
            excluded_go_ev=args.go_excluded)
    else:
        annotations = {}

    return (query_name, best_hit_name, best_hit_evalue, best_hit_score,
            annotations, annot_level_max, swallowest_level, match_nogs,
            orthologs)
Beispiel #3
0
def annotate_hit_line(arguments):
    annota.connect()
    line, args = arguments

    if not line.strip() or line.startswith('#'):
        return None
    r = map(str.strip, line.split('\t'))

    query_name = r[0]
    best_hit_name = r[1]
    if best_hit_name == '-' or best_hit_name == 'ERROR':
        return None

    best_hit_evalue = float(r[2])
    best_hit_score = float(r[3])
    if best_hit_score < args.seed_ortholog_score or best_hit_evalue > args.seed_ortholog_evalue:
        return None

    match_nogs = annota.get_member_ogs(best_hit_name)
    if not match_nogs:
        return None

    match_levels = set([nog.split("@")[1] for nog in match_nogs])
    if args.tax_scope == "auto":
        for level in TAXONOMIC_RESOLUTION:
            if level in match_levels:
                annot_levels = set(LEVEL_CONTENT.get(level, [level]))
                annot_levels.add(level)
                annot_level_max = "%s[%d]" % (level, len(annot_levels))
                break
    else:
        annot_levels = set(LEVEL_CONTENT.get(args.tax_scope, [args.tax_scope]))
        annot_levels.add(args.tax_scope)
        annot_level_max = "%s[%d]" % (args.tax_scope, len(annot_levels))

    if args.target_taxa != 'all':
        target_taxa = orthology.normalize_target_taxa(args.target_taxa)
    else:
        target_taxa = None

    all_orthologies = annota.get_member_orthologs(best_hit_name,
                                                  target_taxa=target_taxa,
                                                  target_levels=annot_levels)

    orthologs = sorted(all_orthologies[args.target_orthologs])

    if args.excluded_taxa:
        orthologs = [
            o for o in orthologs
            if not o.startswith("%s." % args.excluded_taxa)
        ]

    if orthologs:
        pname, gos, kegg, bigg = annota.summarize_annotations(
            orthologs,
            target_go_ev=args.go_evidence,
            excluded_go_ev=args.go_excluded)

        best_name = ''
        if pname:
            name_candidate, freq = pname.most_common(1)[0]
            if freq >= 2:
                best_name = name_candidate
    else:
        pname = []
        best_name = ''
        gos = set()
        kegg = set()
        bigg = set()

    return (query_name, best_hit_name, best_hit_evalue, best_hit_score,
            best_name, gos, kegg, bigg, annot_level_max, match_nogs, orthologs)