def post_pred_cliques(args):

    run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3})

    if not os.path.isfile('./all_data.obj'):
        sc_files = [a for a in os.listdir('./') if '.score' in a]
        cohs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta')
        docs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta')
        results = []
        for sc_file in sc_files:
            seq_name = '_'.join(sc_file.split('_')[1:8])
            coh_name = seq_name+'.pdb.gz.A'
            doc_name = seq_name+'.pdb.gz.B'
            sc_dict = score2dict(sc_file)
            ynum = re.search('y[0-9]{3}', sc_file).group(0)
            passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters)
            if len(passed) >= args['purples_threshold']:
                r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed))
                results.append(r)
        with open('./all_data.obj', 'wb') as fout:
            pickle.dump(results, fout)
    else:
        with open('./all_data.obj', 'rb') as fin:
            results = pickle.load(fin)

    if not os.path.isfile('./graph.obj'):
        result_dict = {i+1: r for i, r in enumerate(results)}
        G = nx.Graph()
        [G.add_node(a) for a in result_dict.keys()]
        for n1 in G.nodes_iter():
            for n2 in G.nodes_iter():
                if n1 != n2:
                    coh_sw_1, coh_sw_2 = result_dict[n1].coh_switch, result_dict[n2].coh_switch
                    doc_sw_1, doc_sw_2 = result_dict[n1].doc_switch, result_dict[n2].doc_switch
                    doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[n2].doc_wt
                    doc_diff = 1 if are_docs_from_diff_clusters(doc_wt_1, doc_wt_2) else 0
                    symm_switch = switch_symm_changer(doc_sw_2)
                    if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']:
                        G.add_edge(n1, n2)
                        print('adding edge\n', result_dict[n1], '\n', result_dict[n2])
                    else:
                        print('NOT\n', result_dict[n1], '\n', result_dict[n2])
        cliques = [a for a in nx.find_cliques(G)]
        max_len = max([len(a) for a in cliques])
        max_cliques = [a for a in cliques if len(a) == max_len]
        for clq in max_cliques:
            print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
def main():
    global log
    original_pwd = os.getcwd()
    running = get_running_folders()
    pending = get_pending_folders()
    run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3})
    for runner in running:
        os.chdir(runner)
        if is_folder_finished(runner):
            print('processing %s' % runner)
            log += 'folder is finished, processing %s\n' % runner.split('/')[-2]
            process_folder({'folder': runner, 'force_process': False, 'remove_pdbs': False})
            move_to_processed(runner, running, 0)
        else:
            score_dict = folder_scores(runner)
            passed, failed = all_who_pass_run_filters({}, score_dict, run_filters)
            log += 'passed %i, failed %i' % (len(passed), len(failed))
            if len(passed) >= 50:
                print('found enough passed, stopping folder %s' % runner.split('/')[-1])
                log += 'found enough passed, stopping folder\n'
                bkill_folder(runner)
                process_folder({'folder': runner, 'force_process': True, 'remove_pdbs': False})
                move_to_processed(runner, running, len(passed))
            else:
                print('not enough finished, letting him be %s, found %i passed and %i failed' %
                      (runner.split('/')[-2], len(passed), len(failed)))
                log += 'not enough finished, letting him be %s, found %i passed and %i failed\n' % \
                       (runner.split('/')[-2], len(passed), len(failed))
        os.chdir(original_pwd)

    # for pender in pending:
    #     os.chdir(pender)
    #     lsf_status, pends = how_many_queue()
    #     if lsf_status['fleishman'] < 12000:
    #         print('found %i jobs in fleishman, submitting %s' % (lsf_status['fleishman'], pender))
    #         log += 'found %i jobs in fleishman, submitting %s\n' % (lsf_status['fleishman'], pender)
    #         submit_folder(pender)
    #         move_pender_to_runner(pending, pender)
    #     os.chdir(original_pwd)

    # lsf_status, pends = how_many_queue()
    # if lsf_status['new-all.q'] <= 2000:
    #     bswitch_pends(pends, 2000-lsf_status['new-all.q'])
    os.chdir(original_pwd)
Exemple #3
0
def creat_coh_doc_purples():
    results_path = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/cliques_prediction/results/'
    sc_files = [a for a in os.listdir(results_path) if '.score' in a]
    run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                             'hbonds': -10.})

    if not os.path.isfile(results_path+'analysed.obj'):
        coh_doc_purples = {}
        for sc_file in sc_files:
            sc_dict = score2dict(results_path+sc_file)
            passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters)
            coh_name = what_coh(sc_file, args={'naming': 'coh_on_doc'})
            doc_name = what_doc(sc_file, args={'naming': 'coh_on_doc'})
            if coh_name not in coh_doc_purples.keys():
                coh_doc_purples[coh_name] = {}
            coh_doc_purples[coh_name][doc_name] = len(passed)
        pickle.dump(coh_doc_purples, open(results_path+'analysed.obj', 'wb'))

    else:
        print("reading coh_doc_purples")
        coh_doc_purples = pickle.load(open(results_path+'analysed.obj', 'rb'))
    return coh_doc_purples
#!/usr/bin/env python3.5
import os, shutil
from DoCohResultProcessor import generate_run_filters, all_who_pass_run_filters
from RosettaFilter import score2dict


topath = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/'
run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                         'hbonds': 10})
all_docs = [a for a in os.listdir(topath) if 'doc_' in a]
fout = open(topath+'minidiagonal_full_names.txt', 'w')

for doc in all_docs:
    all_dirs = os.listdir(topath+doc)
    for dir in all_dirs:
        try:
            sc_file = [a for a in os.listdir(topath+doc+'/'+dir) if a[-3:] == '.sc']
            if sc_file:
                sc_dict = score2dict(topath+doc+'/'+dir+'/'+sc_file[0])
                passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters)
                if len(passed) > 5:
                    fout.write('%s\t%i\n' % (dir, len(passed)))
                    shutil.copy(topath+doc+'/'+dir+'/'+list(sc_dict.keys())[0]+'.pdb.gz', topath+'minidiagonal_pdbs')
        except:
            print('no folder', dir)
fout.close()

# def analyse_minidiagonal(args):
#     with open('../minidiagonal.txt', 'w') as fout:
#         run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
#                                                  'hbonds': 10})
#!/usr/bin/env python3.5
import os, shutil
from DoCohResultProcessor import generate_run_filters, all_who_pass_run_filters
from RosettaFilter import score2dict

topath = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/'
run_filters = generate_run_filters(
    args={
        'ddg': 24.0,
        'sasa': 1400,
        'shape': 0.6,
        'packstat': 0.6,
        'buried_2': 3,
        'hbonds': 10
    })
all_docs = [a for a in os.listdir(topath) if 'doc_' in a]
fout = open(topath + 'minidiagonal_full_names.txt', 'w')

for doc in all_docs:
    all_dirs = os.listdir(topath + doc)
    for dir in all_dirs:
        try:
            sc_file = [
                a for a in os.listdir(topath + doc + '/' + dir)
                if a[-3:] == '.sc'
            ]
            if sc_file:
                sc_dict = score2dict(topath + doc + '/' + dir + '/' +
                                     sc_file[0])
                passed, failed = all_who_pass_run_filters({}, sc_dict,
                                                          run_filters)
        with open('iden_clq.obj', 'rb') as iden_file:
            iden_cliques = pickle.load(iden_file)
        iden_cliques.sort(key=lambda k: (k['doc_iden_avg'], k['coh_iden_avg']))
        with open('switches.obj', 'rb') as sw_in:
            switches = pickle.load(sw_in)
        all_of_them = []
        for clique in iden_cliques:
            for swi_str in clique['clique']:
                for struct in switches[swi_str]:
                    all_of_them.append(struct[:-2])
        all_of_them = list(set(all_of_them))
        for i in all_of_them:
            print(i)

    elif args['mode'] == 'bins_diagonal':
        run_filters = generate_run_filters()
        coh_seq_dict = read_multi_fastas(args['coh_seqs_file'])
        doc_seq_dict = read_multi_fastas(args['doc_seqs_file'])

        if not os.path.isfile('bins.obj'):
            bins = swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict)
            with open('bins.obj', 'wb') as b_file:
                pickle.dump(bins, b_file)
        else:
            with open('bins.obj', 'rb') as b_file:
                bins = pickle.load(b_file)

        # for k, v in bins.items():
        #     print(k, v)

        if not os.path.isfile('best_clq.obj'):
Exemple #7
0
def post_pred_cliques(args):

    run_filters = generate_run_filters(args={
        'ddg': 25.0,
        'sasa': 1400,
        'shape': 0.6,
        'packstat': 0.6,
        'buried_2': 3
    })

    if not os.path.isfile('./all_data.obj'):
        sc_files = [a for a in os.listdir('./') if '.score' in a]
        cohs_seqs = read_multi_fastas(
            '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta'
        )
        docs_seqs = read_multi_fastas(
            '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta'
        )
        results = []
        for sc_file in sc_files:
            seq_name = '_'.join(sc_file.split('_')[1:8])
            coh_name = seq_name + '.pdb.gz.A'
            doc_name = seq_name + '.pdb.gz.B'
            sc_dict = score2dict(sc_file)
            ynum = re.search('y[0-9]{3}', sc_file).group(0)
            passed, failed = all_who_pass_run_filters(args, sc_dict,
                                                      run_filters)
            if len(passed) >= args['purples_threshold']:
                r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name],
                           len(passed))
                results.append(r)
        with open('./all_data.obj', 'wb') as fout:
            pickle.dump(results, fout)
    else:
        with open('./all_data.obj', 'rb') as fin:
            results = pickle.load(fin)

    if not os.path.isfile('./graph.obj'):
        result_dict = {i + 1: r for i, r in enumerate(results)}
        G = nx.Graph()
        [G.add_node(a) for a in result_dict.keys()]
        for n1 in G.nodes_iter():
            for n2 in G.nodes_iter():
                if n1 != n2:
                    coh_sw_1, coh_sw_2 = result_dict[
                        n1].coh_switch, result_dict[n2].coh_switch
                    doc_sw_1, doc_sw_2 = result_dict[
                        n1].doc_switch, result_dict[n2].doc_switch
                    doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[
                        n2].doc_wt
                    doc_diff = 1 if are_docs_from_diff_clusters(
                        doc_wt_1, doc_wt_2) else 0
                    symm_switch = switch_symm_changer(doc_sw_2)
                    if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']:
                        G.add_edge(n1, n2)
                        print('adding edge\n', result_dict[n1], '\n',
                              result_dict[n2])
                    else:
                        print('NOT\n', result_dict[n1], '\n', result_dict[n2])
        cliques = [a for a in nx.find_cliques(G)]
        max_len = max([len(a) for a in cliques])
        max_cliques = [a for a in cliques if len(a) == max_len]
        for clq in max_cliques:
            print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
Exemple #8
0
        with open('iden_clq.obj', 'rb') as iden_file:
            iden_cliques = pickle.load(iden_file)
        iden_cliques.sort(key=lambda k: (k['doc_iden_avg'], k['coh_iden_avg']))
        with open('switches.obj', 'rb') as sw_in:
            switches = pickle.load(sw_in)
        all_of_them = []
        for clique in iden_cliques:
            for swi_str in clique['clique']:
                for struct in switches[swi_str]:
                    all_of_them.append(struct[:-2])
        all_of_them = list(set(all_of_them))
        for i in all_of_them:
            print(i)

    elif args['mode'] == 'bins_diagonal':
        run_filters = generate_run_filters()
        coh_seq_dict = read_multi_fastas(args['coh_seqs_file'])
        doc_seq_dict = read_multi_fastas(args['doc_seqs_file'])

        if not os.path.isfile('bins.obj'):
            bins = swithces_from_diagonal(args, run_filters, coh_seq_dict,
                                          doc_seq_dict)
            with open('bins.obj', 'wb') as b_file:
                pickle.dump(bins, b_file)
        else:
            with open('bins.obj', 'rb') as b_file:
                bins = pickle.load(b_file)

        # for k, v in bins.items():
        #     print(k, v)
def main():
    global log
    original_pwd = os.getcwd()
    running = get_running_folders()
    pending = get_pending_folders()
    run_filters = generate_run_filters(args={
        'ddg': 25.0,
        'sasa': 1400,
        'shape': 0.6,
        'packstat': 0.6,
        'buried_2': 3
    })
    for runner in running:
        os.chdir(runner)
        if is_folder_finished(runner):
            print('processing %s' % runner)
            log += 'folder is finished, processing %s\n' % runner.split(
                '/')[-2]
            process_folder({
                'folder': runner,
                'force_process': False,
                'remove_pdbs': False
            })
            move_to_processed(runner, running, 0)
        else:
            score_dict = folder_scores(runner)
            passed, failed = all_who_pass_run_filters({}, score_dict,
                                                      run_filters)
            log += 'passed %i, failed %i' % (len(passed), len(failed))
            if len(passed) >= 50:
                print('found enough passed, stopping folder %s' %
                      runner.split('/')[-1])
                log += 'found enough passed, stopping folder\n'
                bkill_folder(runner)
                process_folder({
                    'folder': runner,
                    'force_process': True,
                    'remove_pdbs': False
                })
                move_to_processed(runner, running, len(passed))
            else:
                print(
                    'not enough finished, letting him be %s, found %i passed and %i failed'
                    % (runner.split('/')[-2], len(passed), len(failed)))
                log += 'not enough finished, letting him be %s, found %i passed and %i failed\n' % \
                       (runner.split('/')[-2], len(passed), len(failed))
        os.chdir(original_pwd)

    # for pender in pending:
    #     os.chdir(pender)
    #     lsf_status, pends = how_many_queue()
    #     if lsf_status['fleishman'] < 12000:
    #         print('found %i jobs in fleishman, submitting %s' % (lsf_status['fleishman'], pender))
    #         log += 'found %i jobs in fleishman, submitting %s\n' % (lsf_status['fleishman'], pender)
    #         submit_folder(pender)
    #         move_pender_to_runner(pending, pender)
    #     os.chdir(original_pwd)

    # lsf_status, pends = how_many_queue()
    # if lsf_status['new-all.q'] <= 2000:
    #     bswitch_pends(pends, 2000-lsf_status['new-all.q'])
    os.chdir(original_pwd)