def post_pred_cliques(args): run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3}) if not os.path.isfile('./all_data.obj'): sc_files = [a for a in os.listdir('./') if '.score' in a] cohs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta') docs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta') results = [] for sc_file in sc_files: seq_name = '_'.join(sc_file.split('_')[1:8]) coh_name = seq_name+'.pdb.gz.A' doc_name = seq_name+'.pdb.gz.B' sc_dict = score2dict(sc_file) ynum = re.search('y[0-9]{3}', sc_file).group(0) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) if len(passed) >= args['purples_threshold']: r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed)) results.append(r) with open('./all_data.obj', 'wb') as fout: pickle.dump(results, fout) else: with open('./all_data.obj', 'rb') as fin: results = pickle.load(fin) if not os.path.isfile('./graph.obj'): result_dict = {i+1: r for i, r in enumerate(results)} G = nx.Graph() [G.add_node(a) for a in result_dict.keys()] for n1 in G.nodes_iter(): for n2 in G.nodes_iter(): if n1 != n2: coh_sw_1, coh_sw_2 = result_dict[n1].coh_switch, result_dict[n2].coh_switch doc_sw_1, doc_sw_2 = result_dict[n1].doc_switch, result_dict[n2].doc_switch doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[n2].doc_wt doc_diff = 1 if are_docs_from_diff_clusters(doc_wt_1, doc_wt_2) else 0 symm_switch = switch_symm_changer(doc_sw_2) if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']: G.add_edge(n1, n2) print('adding edge\n', result_dict[n1], '\n', result_dict[n2]) else: print('NOT\n', result_dict[n1], '\n', result_dict[n2]) cliques = [a for a in nx.find_cliques(G)] max_len = max([len(a) for a in cliques]) max_cliques = [a for a in cliques if len(a) == max_len] for clq in max_cliques: print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
def main(): global log original_pwd = os.getcwd() running = get_running_folders() pending = get_pending_folders() run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3}) for runner in running: os.chdir(runner) if is_folder_finished(runner): print('processing %s' % runner) log += 'folder is finished, processing %s\n' % runner.split('/')[-2] process_folder({'folder': runner, 'force_process': False, 'remove_pdbs': False}) move_to_processed(runner, running, 0) else: score_dict = folder_scores(runner) passed, failed = all_who_pass_run_filters({}, score_dict, run_filters) log += 'passed %i, failed %i' % (len(passed), len(failed)) if len(passed) >= 50: print('found enough passed, stopping folder %s' % runner.split('/')[-1]) log += 'found enough passed, stopping folder\n' bkill_folder(runner) process_folder({'folder': runner, 'force_process': True, 'remove_pdbs': False}) move_to_processed(runner, running, len(passed)) else: print('not enough finished, letting him be %s, found %i passed and %i failed' % (runner.split('/')[-2], len(passed), len(failed))) log += 'not enough finished, letting him be %s, found %i passed and %i failed\n' % \ (runner.split('/')[-2], len(passed), len(failed)) os.chdir(original_pwd) # for pender in pending: # os.chdir(pender) # lsf_status, pends = how_many_queue() # if lsf_status['fleishman'] < 12000: # print('found %i jobs in fleishman, submitting %s' % (lsf_status['fleishman'], pender)) # log += 'found %i jobs in fleishman, submitting %s\n' % (lsf_status['fleishman'], pender) # submit_folder(pender) # move_pender_to_runner(pending, pender) # os.chdir(original_pwd) # lsf_status, pends = how_many_queue() # if lsf_status['new-all.q'] <= 2000: # bswitch_pends(pends, 2000-lsf_status['new-all.q']) os.chdir(original_pwd)
def creat_coh_doc_purples(): results_path = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/cliques_prediction/results/' sc_files = [a for a in os.listdir(results_path) if '.score' in a] run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': -10.}) if not os.path.isfile(results_path+'analysed.obj'): coh_doc_purples = {} for sc_file in sc_files: sc_dict = score2dict(results_path+sc_file) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters) coh_name = what_coh(sc_file, args={'naming': 'coh_on_doc'}) doc_name = what_doc(sc_file, args={'naming': 'coh_on_doc'}) if coh_name not in coh_doc_purples.keys(): coh_doc_purples[coh_name] = {} coh_doc_purples[coh_name][doc_name] = len(passed) pickle.dump(coh_doc_purples, open(results_path+'analysed.obj', 'wb')) else: print("reading coh_doc_purples") coh_doc_purples = pickle.load(open(results_path+'analysed.obj', 'rb')) return coh_doc_purples
#!/usr/bin/env python3.5 import os, shutil from DoCohResultProcessor import generate_run_filters, all_who_pass_run_filters from RosettaFilter import score2dict topath = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/' run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 10}) all_docs = [a for a in os.listdir(topath) if 'doc_' in a] fout = open(topath+'minidiagonal_full_names.txt', 'w') for doc in all_docs: all_dirs = os.listdir(topath+doc) for dir in all_dirs: try: sc_file = [a for a in os.listdir(topath+doc+'/'+dir) if a[-3:] == '.sc'] if sc_file: sc_dict = score2dict(topath+doc+'/'+dir+'/'+sc_file[0]) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters) if len(passed) > 5: fout.write('%s\t%i\n' % (dir, len(passed))) shutil.copy(topath+doc+'/'+dir+'/'+list(sc_dict.keys())[0]+'.pdb.gz', topath+'minidiagonal_pdbs') except: print('no folder', dir) fout.close() # def analyse_minidiagonal(args): # with open('../minidiagonal.txt', 'w') as fout: # run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, # 'hbonds': 10})
#!/usr/bin/env python3.5 import os, shutil from DoCohResultProcessor import generate_run_filters, all_who_pass_run_filters from RosettaFilter import score2dict topath = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/' run_filters = generate_run_filters( args={ 'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 10 }) all_docs = [a for a in os.listdir(topath) if 'doc_' in a] fout = open(topath + 'minidiagonal_full_names.txt', 'w') for doc in all_docs: all_dirs = os.listdir(topath + doc) for dir in all_dirs: try: sc_file = [ a for a in os.listdir(topath + doc + '/' + dir) if a[-3:] == '.sc' ] if sc_file: sc_dict = score2dict(topath + doc + '/' + dir + '/' + sc_file[0]) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters)
with open('iden_clq.obj', 'rb') as iden_file: iden_cliques = pickle.load(iden_file) iden_cliques.sort(key=lambda k: (k['doc_iden_avg'], k['coh_iden_avg'])) with open('switches.obj', 'rb') as sw_in: switches = pickle.load(sw_in) all_of_them = [] for clique in iden_cliques: for swi_str in clique['clique']: for struct in switches[swi_str]: all_of_them.append(struct[:-2]) all_of_them = list(set(all_of_them)) for i in all_of_them: print(i) elif args['mode'] == 'bins_diagonal': run_filters = generate_run_filters() coh_seq_dict = read_multi_fastas(args['coh_seqs_file']) doc_seq_dict = read_multi_fastas(args['doc_seqs_file']) if not os.path.isfile('bins.obj'): bins = swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict) with open('bins.obj', 'wb') as b_file: pickle.dump(bins, b_file) else: with open('bins.obj', 'rb') as b_file: bins = pickle.load(b_file) # for k, v in bins.items(): # print(k, v) if not os.path.isfile('best_clq.obj'):
def post_pred_cliques(args): run_filters = generate_run_filters(args={ 'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3 }) if not os.path.isfile('./all_data.obj'): sc_files = [a for a in os.listdir('./') if '.score' in a] cohs_seqs = read_multi_fastas( '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta' ) docs_seqs = read_multi_fastas( '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta' ) results = [] for sc_file in sc_files: seq_name = '_'.join(sc_file.split('_')[1:8]) coh_name = seq_name + '.pdb.gz.A' doc_name = seq_name + '.pdb.gz.B' sc_dict = score2dict(sc_file) ynum = re.search('y[0-9]{3}', sc_file).group(0) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) if len(passed) >= args['purples_threshold']: r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed)) results.append(r) with open('./all_data.obj', 'wb') as fout: pickle.dump(results, fout) else: with open('./all_data.obj', 'rb') as fin: results = pickle.load(fin) if not os.path.isfile('./graph.obj'): result_dict = {i + 1: r for i, r in enumerate(results)} G = nx.Graph() [G.add_node(a) for a in result_dict.keys()] for n1 in G.nodes_iter(): for n2 in G.nodes_iter(): if n1 != n2: coh_sw_1, coh_sw_2 = result_dict[ n1].coh_switch, result_dict[n2].coh_switch doc_sw_1, doc_sw_2 = result_dict[ n1].doc_switch, result_dict[n2].doc_switch doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[ n2].doc_wt doc_diff = 1 if are_docs_from_diff_clusters( doc_wt_1, doc_wt_2) else 0 symm_switch = switch_symm_changer(doc_sw_2) if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']: G.add_edge(n1, n2) print('adding edge\n', result_dict[n1], '\n', result_dict[n2]) else: print('NOT\n', result_dict[n1], '\n', result_dict[n2]) cliques = [a for a in nx.find_cliques(G)] max_len = max([len(a) for a in cliques]) max_cliques = [a for a in cliques if len(a) == max_len] for clq in max_cliques: print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
with open('iden_clq.obj', 'rb') as iden_file: iden_cliques = pickle.load(iden_file) iden_cliques.sort(key=lambda k: (k['doc_iden_avg'], k['coh_iden_avg'])) with open('switches.obj', 'rb') as sw_in: switches = pickle.load(sw_in) all_of_them = [] for clique in iden_cliques: for swi_str in clique['clique']: for struct in switches[swi_str]: all_of_them.append(struct[:-2]) all_of_them = list(set(all_of_them)) for i in all_of_them: print(i) elif args['mode'] == 'bins_diagonal': run_filters = generate_run_filters() coh_seq_dict = read_multi_fastas(args['coh_seqs_file']) doc_seq_dict = read_multi_fastas(args['doc_seqs_file']) if not os.path.isfile('bins.obj'): bins = swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict) with open('bins.obj', 'wb') as b_file: pickle.dump(bins, b_file) else: with open('bins.obj', 'rb') as b_file: bins = pickle.load(b_file) # for k, v in bins.items(): # print(k, v)
def main(): global log original_pwd = os.getcwd() running = get_running_folders() pending = get_pending_folders() run_filters = generate_run_filters(args={ 'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3 }) for runner in running: os.chdir(runner) if is_folder_finished(runner): print('processing %s' % runner) log += 'folder is finished, processing %s\n' % runner.split( '/')[-2] process_folder({ 'folder': runner, 'force_process': False, 'remove_pdbs': False }) move_to_processed(runner, running, 0) else: score_dict = folder_scores(runner) passed, failed = all_who_pass_run_filters({}, score_dict, run_filters) log += 'passed %i, failed %i' % (len(passed), len(failed)) if len(passed) >= 50: print('found enough passed, stopping folder %s' % runner.split('/')[-1]) log += 'found enough passed, stopping folder\n' bkill_folder(runner) process_folder({ 'folder': runner, 'force_process': True, 'remove_pdbs': False }) move_to_processed(runner, running, len(passed)) else: print( 'not enough finished, letting him be %s, found %i passed and %i failed' % (runner.split('/')[-2], len(passed), len(failed))) log += 'not enough finished, letting him be %s, found %i passed and %i failed\n' % \ (runner.split('/')[-2], len(passed), len(failed)) os.chdir(original_pwd) # for pender in pending: # os.chdir(pender) # lsf_status, pends = how_many_queue() # if lsf_status['fleishman'] < 12000: # print('found %i jobs in fleishman, submitting %s' % (lsf_status['fleishman'], pender)) # log += 'found %i jobs in fleishman, submitting %s\n' % (lsf_status['fleishman'], pender) # submit_folder(pender) # move_pender_to_runner(pending, pender) # os.chdir(original_pwd) # lsf_status, pends = how_many_queue() # if lsf_status['new-all.q'] <= 2000: # bswitch_pends(pends, 2000-lsf_status['new-all.q']) os.chdir(original_pwd)