Python all_who_pass_run_filtersの例、DoCohResultProcessor.all_who_pass_run_filters Pythonの例

コード例 #1

0

ファイルを表示

def swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict):
    """
    :param args: run arguments
    :param run_filters: run filters
    :param coh_seq_dict: {name: AASeq()} of cohesins
    :param doc_seq_dict: {name: AASeq()} of dockerins
    :return: {switch_name: {design_name: #purples}
    """
    # positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126],
    #                   '1aoh': [33, 37, 63, 66, 70, 83, 119, 130],
    #                   '1ohz': [33, 35, 37, 39, 63, 66, 68, 70, 73, 75, 77, 79, 81, 83, 121, 125, 127],
    #                   '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]}
    # type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'}
    results = {}
    sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score']
    bins = {}
    for sc_file in sc_files:
        score_dict = score2dict(sc_file)
        passed, failed = all_who_pass_run_filters(args, score_dict,
                                                  run_filters)
        # results[sc_file] = len(list(passed.keys()))
        if len(list(passed.keys())) <= args['purples_threshold']:
            continue
        ### these kick out the date from the score names, and the makes it into the proper names:
        if '_11.10' in sc_file:
            coh_seq = coh_seq_dict[''.join(
                str(sc_file[4:-6] + '_0001.pdb.A').split('_11.10'))]
            doc_seq = doc_seq_dict[''.join(
                str(sc_file[4:-6] + '_0001.pdb.B').split('_11.10'))]
        elif '_12.10' in sc_file:
            coh_seq = coh_seq_dict[''.join(
                str(sc_file[4:-6] + '_0001.pdb.A').split('_12.10'))]
            doc_seq = doc_seq_dict[''.join(
                str(sc_file[4:-6] + '_0001.pdb.B').split('_12.10'))]

        result = Result(sc_file[4:-6], coh_seq, doc_seq,
                        len(list(passed.keys())))

        # pos_str = coh_seq.get_positions(positions_dict['1ohz'])
        # switch_str = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str])
        #
        # doc_str = doc_seq.get_positions(list(doc_symm_poses.keys()))
        # doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str])

        # if switch_str not in bins.keys():
        #     bins[switch_str] = {}
        # if doc_switch not in bins[switch_str].keys():
        #     bins[switch_str][doc_switch] = {}

        # bins[switch_str][doc_switch][''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] \
        #     = {'doc_seq': doc_seq, 'purples': len(list(passed.keys()))}
        results[sc_file[4:-6]] = result
    # return bins
    return results

コード例 #2

0

ファイルを表示

ファイル: design2bins_by_posistions.py プロジェクト: jonathaw/general_scripts

def post_pred_cliques(args):

    run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3})

    if not os.path.isfile('./all_data.obj'):
        sc_files = [a for a in os.listdir('./') if '.score' in a]
        cohs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta')
        docs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta')
        results = []
        for sc_file in sc_files:
            seq_name = '_'.join(sc_file.split('_')[1:8])
            coh_name = seq_name+'.pdb.gz.A'
            doc_name = seq_name+'.pdb.gz.B'
            sc_dict = score2dict(sc_file)
            ynum = re.search('y[0-9]{3}', sc_file).group(0)
            passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters)
            if len(passed) >= args['purples_threshold']:
                r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed))
                results.append(r)
        with open('./all_data.obj', 'wb') as fout:
            pickle.dump(results, fout)
    else:
        with open('./all_data.obj', 'rb') as fin:
            results = pickle.load(fin)

    if not os.path.isfile('./graph.obj'):
        result_dict = {i+1: r for i, r in enumerate(results)}
        G = nx.Graph()
        [G.add_node(a) for a in result_dict.keys()]
        for n1 in G.nodes_iter():
            for n2 in G.nodes_iter():
                if n1 != n2:
                    coh_sw_1, coh_sw_2 = result_dict[n1].coh_switch, result_dict[n2].coh_switch
                    doc_sw_1, doc_sw_2 = result_dict[n1].doc_switch, result_dict[n2].doc_switch
                    doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[n2].doc_wt
                    doc_diff = 1 if are_docs_from_diff_clusters(doc_wt_1, doc_wt_2) else 0
                    symm_switch = switch_symm_changer(doc_sw_2)
                    if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']:
                        G.add_edge(n1, n2)
                        print('adding edge\n', result_dict[n1], '\n', result_dict[n2])
                    else:
                        print('NOT\n', result_dict[n1], '\n', result_dict[n2])
        cliques = [a for a in nx.find_cliques(G)]
        max_len = max([len(a) for a in cliques])
        max_cliques = [a for a in cliques if len(a) == max_len]
        for clq in max_cliques:
            print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))

コード例 #3

0

ファイルを表示

ファイル: design2bins_by_posistions.py プロジェクト: jonathaw/general_scripts

def swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict):
    """
    :param args: run arguments
    :param run_filters: run filters
    :param coh_seq_dict: {name: AASeq()} of cohesins
    :param doc_seq_dict: {name: AASeq()} of dockerins
    :return: {switch_name: {design_name: #purples}
    """
    # positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126],
    #                   '1aoh': [33, 37, 63, 66, 70, 83, 119, 130],
    #                   '1ohz': [33, 35, 37, 39, 63, 66, 68, 70, 73, 75, 77, 79, 81, 83, 121, 125, 127],
    #                   '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]}
    # type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'}
    results = {}
    sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score']
    bins = {}
    for sc_file in sc_files:
        score_dict = score2dict(sc_file)
        passed, failed = all_who_pass_run_filters(args, score_dict, run_filters)
        # results[sc_file] = len(list(passed.keys()))
        if len(list(passed.keys())) <= args['purples_threshold']:
            continue
        ### these kick out the date from the score names, and the makes it into the proper names:
        if '_11.10' in sc_file:
            coh_seq = coh_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))]
            doc_seq = doc_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.B').split('_11.10'))]
        elif '_12.10' in sc_file:
            coh_seq = coh_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_12.10'))]
            doc_seq = doc_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.B').split('_12.10'))]

        result = Result(sc_file[4:-6], coh_seq, doc_seq, len(list(passed.keys())))

        # pos_str = coh_seq.get_positions(positions_dict['1ohz'])
        # switch_str = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str])
        #
        # doc_str = doc_seq.get_positions(list(doc_symm_poses.keys()))
        # doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str])

        # if switch_str not in bins.keys():
        #     bins[switch_str] = {}
        # if doc_switch not in bins[switch_str].keys():
        #     bins[switch_str][doc_switch] = {}

        # bins[switch_str][doc_switch][''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] \
        #     = {'doc_seq': doc_seq, 'purples': len(list(passed.keys()))}
        results[sc_file[4:-6]] = result
    # return bins
    return results

コード例 #4

0

ファイルを表示

ファイル: LSFManager.py プロジェクト: jonathaw/general_scripts

def main():
    global log
    original_pwd = os.getcwd()
    running = get_running_folders()
    pending = get_pending_folders()
    run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3})
    for runner in running:
        os.chdir(runner)
        if is_folder_finished(runner):
            print('processing %s' % runner)
            log += 'folder is finished, processing %s\n' % runner.split('/')[-2]
            process_folder({'folder': runner, 'force_process': False, 'remove_pdbs': False})
            move_to_processed(runner, running, 0)
        else:
            score_dict = folder_scores(runner)
            passed, failed = all_who_pass_run_filters({}, score_dict, run_filters)
            log += 'passed %i, failed %i' % (len(passed), len(failed))
            if len(passed) >= 50:
                print('found enough passed, stopping folder %s' % runner.split('/')[-1])
                log += 'found enough passed, stopping folder\n'
                bkill_folder(runner)
                process_folder({'folder': runner, 'force_process': True, 'remove_pdbs': False})
                move_to_processed(runner, running, len(passed))
            else:
                print('not enough finished, letting him be %s, found %i passed and %i failed' %
                      (runner.split('/')[-2], len(passed), len(failed)))
                log += 'not enough finished, letting him be %s, found %i passed and %i failed\n' % \
                       (runner.split('/')[-2], len(passed), len(failed))
        os.chdir(original_pwd)

    # for pender in pending:
    #     os.chdir(pender)
    #     lsf_status, pends = how_many_queue()
    #     if lsf_status['fleishman'] < 12000:
    #         print('found %i jobs in fleishman, submitting %s' % (lsf_status['fleishman'], pender))
    #         log += 'found %i jobs in fleishman, submitting %s\n' % (lsf_status['fleishman'], pender)
    #         submit_folder(pender)
    #         move_pender_to_runner(pending, pender)
    #     os.chdir(original_pwd)

    # lsf_status, pends = how_many_queue()
    # if lsf_status['new-all.q'] <= 2000:
    #     bswitch_pends(pends, 2000-lsf_status['new-all.q'])
    os.chdir(original_pwd)

コード例 #5

0

ファイルを表示

def creat_coh_doc_purples():
    results_path = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/cliques_prediction/results/'
    sc_files = [a for a in os.listdir(results_path) if '.score' in a]
    run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                             'hbonds': -10.})

    if not os.path.isfile(results_path+'analysed.obj'):
        coh_doc_purples = {}
        for sc_file in sc_files:
            sc_dict = score2dict(results_path+sc_file)
            passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters)
            coh_name = what_coh(sc_file, args={'naming': 'coh_on_doc'})
            doc_name = what_doc(sc_file, args={'naming': 'coh_on_doc'})
            if coh_name not in coh_doc_purples.keys():
                coh_doc_purples[coh_name] = {}
            coh_doc_purples[coh_name][doc_name] = len(passed)
        pickle.dump(coh_doc_purples, open(results_path+'analysed.obj', 'wb'))

    else:
        print("reading coh_doc_purples")
        coh_doc_purples = pickle.load(open(results_path+'analysed.obj', 'rb'))
    return coh_doc_purples

コード例 #6

0

ファイルを表示

ファイル: temp_find_by_sc.py プロジェクト: jonathaw/general_scripts


topath = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/'
run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
                                         'hbonds': 10})
all_docs = [a for a in os.listdir(topath) if 'doc_' in a]
fout = open(topath+'minidiagonal_full_names.txt', 'w')

for doc in all_docs:
    all_dirs = os.listdir(topath+doc)
    for dir in all_dirs:
        try:
            sc_file = [a for a in os.listdir(topath+doc+'/'+dir) if a[-3:] == '.sc']
            if sc_file:
                sc_dict = score2dict(topath+doc+'/'+dir+'/'+sc_file[0])
                passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters)
                if len(passed) > 5:
                    fout.write('%s\t%i\n' % (dir, len(passed)))
                    shutil.copy(topath+doc+'/'+dir+'/'+list(sc_dict.keys())[0]+'.pdb.gz', topath+'minidiagonal_pdbs')
        except:
            print('no folder', dir)
fout.close()

# def analyse_minidiagonal(args):
#     with open('../minidiagonal.txt', 'w') as fout:
#         run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
#                                                  'hbonds': 10})
#         counter = 0
#         score_files = [a for a in os.listdir('./') if a[-3:] == '.sc']
#         for sc in score_files:
#             score_dict = score2dict(sc)

コード例 #7

0

ファイルを表示

ファイル: temp_find_by_sc.py プロジェクト: jonathaw/general_scripts

    })
all_docs = [a for a in os.listdir(topath) if 'doc_' in a]
fout = open(topath + 'minidiagonal_full_names.txt', 'w')

for doc in all_docs:
    all_dirs = os.listdir(topath + doc)
    for dir in all_dirs:
        try:
            sc_file = [
                a for a in os.listdir(topath + doc + '/' + dir)
                if a[-3:] == '.sc'
            ]
            if sc_file:
                sc_dict = score2dict(topath + doc + '/' + dir + '/' +
                                     sc_file[0])
                passed, failed = all_who_pass_run_filters({}, sc_dict,
                                                          run_filters)
                if len(passed) > 5:
                    fout.write('%s\t%i\n' % (dir, len(passed)))
                    shutil.copy(
                        topath + doc + '/' + dir + '/' +
                        list(sc_dict.keys())[0] + '.pdb.gz',
                        topath + 'minidiagonal_pdbs')
        except:
            print('no folder', dir)
fout.close()

# def analyse_minidiagonal(args):
#     with open('../minidiagonal.txt', 'w') as fout:
#         run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3,
#                                                  'hbonds': 10})
#         counter = 0

コード例 #8

0

ファイルを表示

def post_pred_cliques(args):

    run_filters = generate_run_filters(args={
        'ddg': 25.0,
        'sasa': 1400,
        'shape': 0.6,
        'packstat': 0.6,
        'buried_2': 3
    })

    if not os.path.isfile('./all_data.obj'):
        sc_files = [a for a in os.listdir('./') if '.score' in a]
        cohs_seqs = read_multi_fastas(
            '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta'
        )
        docs_seqs = read_multi_fastas(
            '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta'
        )
        results = []
        for sc_file in sc_files:
            seq_name = '_'.join(sc_file.split('_')[1:8])
            coh_name = seq_name + '.pdb.gz.A'
            doc_name = seq_name + '.pdb.gz.B'
            sc_dict = score2dict(sc_file)
            ynum = re.search('y[0-9]{3}', sc_file).group(0)
            passed, failed = all_who_pass_run_filters(args, sc_dict,
                                                      run_filters)
            if len(passed) >= args['purples_threshold']:
                r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name],
                           len(passed))
                results.append(r)
        with open('./all_data.obj', 'wb') as fout:
            pickle.dump(results, fout)
    else:
        with open('./all_data.obj', 'rb') as fin:
            results = pickle.load(fin)

    if not os.path.isfile('./graph.obj'):
        result_dict = {i + 1: r for i, r in enumerate(results)}
        G = nx.Graph()
        [G.add_node(a) for a in result_dict.keys()]
        for n1 in G.nodes_iter():
            for n2 in G.nodes_iter():
                if n1 != n2:
                    coh_sw_1, coh_sw_2 = result_dict[
                        n1].coh_switch, result_dict[n2].coh_switch
                    doc_sw_1, doc_sw_2 = result_dict[
                        n1].doc_switch, result_dict[n2].doc_switch
                    doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[
                        n2].doc_wt
                    doc_diff = 1 if are_docs_from_diff_clusters(
                        doc_wt_1, doc_wt_2) else 0
                    symm_switch = switch_symm_changer(doc_sw_2)
                    if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \
                            switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']:
                        G.add_edge(n1, n2)
                        print('adding edge\n', result_dict[n1], '\n',
                              result_dict[n2])
                    else:
                        print('NOT\n', result_dict[n1], '\n', result_dict[n2])
        cliques = [a for a in nx.find_cliques(G)]
        max_len = max([len(a) for a in cliques])
        max_cliques = [a for a in cliques if len(a) == max_len]
        for clq in max_cliques:
            print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))

コード例 #9

0

ファイルを表示

ファイル: LSFManager.py プロジェクト: jonathaw/general_scripts

def main():
    global log
    original_pwd = os.getcwd()
    running = get_running_folders()
    pending = get_pending_folders()
    run_filters = generate_run_filters(args={
        'ddg': 25.0,
        'sasa': 1400,
        'shape': 0.6,
        'packstat': 0.6,
        'buried_2': 3
    })
    for runner in running:
        os.chdir(runner)
        if is_folder_finished(runner):
            print('processing %s' % runner)
            log += 'folder is finished, processing %s\n' % runner.split(
                '/')[-2]
            process_folder({
                'folder': runner,
                'force_process': False,
                'remove_pdbs': False
            })
            move_to_processed(runner, running, 0)
        else:
            score_dict = folder_scores(runner)
            passed, failed = all_who_pass_run_filters({}, score_dict,
                                                      run_filters)
            log += 'passed %i, failed %i' % (len(passed), len(failed))
            if len(passed) >= 50:
                print('found enough passed, stopping folder %s' %
                      runner.split('/')[-1])
                log += 'found enough passed, stopping folder\n'
                bkill_folder(runner)
                process_folder({
                    'folder': runner,
                    'force_process': True,
                    'remove_pdbs': False
                })
                move_to_processed(runner, running, len(passed))
            else:
                print(
                    'not enough finished, letting him be %s, found %i passed and %i failed'
                    % (runner.split('/')[-2], len(passed), len(failed)))
                log += 'not enough finished, letting him be %s, found %i passed and %i failed\n' % \
                       (runner.split('/')[-2], len(passed), len(failed))
        os.chdir(original_pwd)

    # for pender in pending:
    #     os.chdir(pender)
    #     lsf_status, pends = how_many_queue()
    #     if lsf_status['fleishman'] < 12000:
    #         print('found %i jobs in fleishman, submitting %s' % (lsf_status['fleishman'], pender))
    #         log += 'found %i jobs in fleishman, submitting %s\n' % (lsf_status['fleishman'], pender)
    #         submit_folder(pender)
    #         move_pender_to_runner(pending, pender)
    #     os.chdir(original_pwd)

    # lsf_status, pends = how_many_queue()
    # if lsf_status['new-all.q'] <= 2000:
    #     bswitch_pends(pends, 2000-lsf_status['new-all.q'])
    os.chdir(original_pwd)