Ejemplo n.º 1
0
    elif opts.slim_out == "all":
        only_direct = False
    else:
        p.print_help()
        sys.exit(1)

    # load DAGs
    go_dag = GODag(obo_file)
    goslim_dag = GODag(slim_obo_file)

    # in case a single term is given as input:
    if opts.term:
        if opts.term not in go_dag:
            print(("term %s not found!" % opts.term), file=sys.stderr)
            sys.exit(1)
        direct_anc, all_anc = mapslim(opts.term, go_dag, goslim_dag)
        # output either all or only direct slims, depending on user command
        if only_direct:
            slim_terms_str = ";".join(direct_anc)
        else:
            slim_terms_str = ";".join(all_anc)
        print(slim_terms_str)

    # in case a association file is given as input
    if opts.ass_file_name:
        assert os.path.exists(opts.ass_file_name), ("file %s not found!"
                                                    % opts.ass_file_name)
        assocs = read_associations(opts.ass_file_name)
        for protein_product, go_terms in assocs.items():
            all_direct_anc = set()
            all_covered_anc = set()
Ejemplo n.º 2
0
    expected_results = {
        'GO:0000005': (set(['GO:0000002', 'GO:0000003']),
                       set(['GO:0000001', 'GO:0000002', 'GO:0000003'])),
        'GO:0000006': (set(['GO:0000003']), set(['GO:0000001', 'GO:0000003'])),
        'GO:0000007':
        (set(['GO:0000004']), set(['GO:0000001', 'GO:0000003', 'GO:0000004'])),
        'GO:0000008': (set(['GO:0000003']), set(['GO:0000001', 'GO:0000003'])),
        'GO:0000009':
        (set(['GO:0000004']), set(['GO:0000001', 'GO:0000003', 'GO:0000004'])),
        'GO:0000010': (set(['GO:0000002', 'GO:0000003']),
                       set(['GO:0000001', 'GO:0000002', 'GO:0000003']))
    }

    tests_succeed = True

    for go_term, (exp_direct, exp_all) in expected_results.items():
        sys.stderr.write("Testing for term '{}' ...\n".format(go_term))
        direct_anc, all_anc = mapslim(go_term, go_dag, goslim_dag)
        if direct_anc != exp_direct or all_anc != exp_all:
            tests_succeed = False
            sys.stderr.write("failed.\n")
        else:
            sys.stderr.write("success!\n")

    if tests_succeed:
        print("All test passed successfully!")
        sys.exit(0)
    else:
        sys.stderr.write("[ERROR] At least one test failed.\n")
        sys.exit(1)
        all_go_accs_in_a_protein = set()
        all_goslim_anc_accs_in_a_protein = set()
        all_goslim_covered_anc = set()
        
        go_accs = set(interpro_go.loc[interpro_go['Protein Accession'] == protein]['GO Accession'])
        for go_acc in go_accs:
            if not pd.isnull(go_acc):
                all_go_accs_in_a_protein |= set(go_acc.split('|'))
        
        if len(all_go_accs_in_a_protein) > 0:
            for go_term in all_go_accs_in_a_protein:
                if go_term not in go:
                    continue
                    
                if USE_SLIM:
                    direct_anc, all_anc = mapslim(go_term, go, goslim)
                    all_goslim_anc_accs_in_a_protein |= all_anc
                    all_goslim_covered_anc |= (all_anc - direct_anc)

                query_term = go.query_term(go_term)
                output_table = output_table.append(pd.DataFrame({'Protein Accession': [protein], 'GO Category': [query_term.namespace], 'GO Accession': [go_term], 'GO Description': [query_term.name], 'GO Level':[query_term.level]}), ignore_index=True)

            if USE_SLIM:
                if ONLY_DIRECT:
                    all_goslim_direct_anc_accs_in_a_protein = all_goslim_anc_accs_in_a_protein - all_goslim_covered_anc
                    for goslim_term in all_goslim_direct_anc_accs_in_a_protein:
                        query_goslim_term = goslim.query_term(goslim_term)
                        output_slim_table = output_slim_table.append(pd.DataFrame({'Protein Accession': [protein], 'GO Category': [query_goslim_term.namespace], 'GOSlim Accession': [goslim_term], 'GOSlim Description': [query_goslim_term.name], 'GOSlim Level':[query_goslim_term.level]}), ignore_index=True)
                else:
                    for goslim_term in all_goslim_anc_accs_in_a_protein:
                        query_goslim_term = goslim.query_term(goslim_term)
Ejemplo n.º 4
0
from goatools.mapslim import mapslim
from collections import Counter

go_dag = GODag('/home/gstupp/goatools/go.obo')
goslim_dag = GODag('/home/gstupp/goatools3/goslim_generic.obo')
goslim_meta = GODag('/home/gstupp/goatools3/goslim_metagenomics.obo')

ana_DTA = '/home/gstupp/01_2015_mass_spec/H1_11082014/1108_Gly1_2014_12_15_15_29205/dtaselect_results_sfp0.01_p2/DTASelect-filter.txt'
parser = blazmass_tools.dta_select_parser(ana_DTA, small=True)
ps = [get_domains(p) for p in parser]
set_go = set(chain(*[p['set_go'] for p in ps if p['set_go'] is not None]))
for p in ps:
    if p['set_go']:
        p['go_slim'] = set(
            chain(*[
                mapslim(go_term, go_dag, goslim_meta)[0]
                for go_term in p['set_go'] if go_term in go_dag
            ]))
    else:
        p['go_slim'] = None
go_slim = Counter(chain(*[p['go_slim'] for p in ps if p['go_slim']]))
labels = {
    go_term: go_dag.query_term(go_term).name
    for go_term in go_slim.keys()
}
[labels[go] for (go, x) in go_slim.most_common(n=10)]

import plot_tools

cmap = plt.cm.jet
colors = cmap(np.linspace(0., 1., len(go_slim.keys())))