def swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict): """ :param args: run arguments :param run_filters: run filters :param coh_seq_dict: {name: AASeq()} of cohesins :param doc_seq_dict: {name: AASeq()} of dockerins :return: {switch_name: {design_name: #purples} """ # positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126], # '1aoh': [33, 37, 63, 66, 70, 83, 119, 130], # '1ohz': [33, 35, 37, 39, 63, 66, 68, 70, 73, 75, 77, 79, 81, 83, 121, 125, 127], # '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]} # type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'} results = {} sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score'] bins = {} for sc_file in sc_files: score_dict = score2dict(sc_file) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) # results[sc_file] = len(list(passed.keys())) if len(list(passed.keys())) <= args['purples_threshold']: continue ### these kick out the date from the score names, and the makes it into the proper names: if '_11.10' in sc_file: coh_seq = coh_seq_dict[''.join( str(sc_file[4:-6] + '_0001.pdb.A').split('_11.10'))] doc_seq = doc_seq_dict[''.join( str(sc_file[4:-6] + '_0001.pdb.B').split('_11.10'))] elif '_12.10' in sc_file: coh_seq = coh_seq_dict[''.join( str(sc_file[4:-6] + '_0001.pdb.A').split('_12.10'))] doc_seq = doc_seq_dict[''.join( str(sc_file[4:-6] + '_0001.pdb.B').split('_12.10'))] result = Result(sc_file[4:-6], coh_seq, doc_seq, len(list(passed.keys()))) # pos_str = coh_seq.get_positions(positions_dict['1ohz']) # switch_str = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str]) # # doc_str = doc_seq.get_positions(list(doc_symm_poses.keys())) # doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str]) # if switch_str not in bins.keys(): # bins[switch_str] = {} # if doc_switch not in bins[switch_str].keys(): # bins[switch_str][doc_switch] = {} # bins[switch_str][doc_switch][''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] \ # = {'doc_seq': doc_seq, 'purples': len(list(passed.keys()))} results[sc_file[4:-6]] = result # return bins return results
def post_pred_cliques(args): run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3}) if not os.path.isfile('./all_data.obj'): sc_files = [a for a in os.listdir('./') if '.score' in a] cohs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta') docs_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta') results = [] for sc_file in sc_files: seq_name = '_'.join(sc_file.split('_')[1:8]) coh_name = seq_name+'.pdb.gz.A' doc_name = seq_name+'.pdb.gz.B' sc_dict = score2dict(sc_file) ynum = re.search('y[0-9]{3}', sc_file).group(0) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) if len(passed) >= args['purples_threshold']: r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed)) results.append(r) with open('./all_data.obj', 'wb') as fout: pickle.dump(results, fout) else: with open('./all_data.obj', 'rb') as fin: results = pickle.load(fin) if not os.path.isfile('./graph.obj'): result_dict = {i+1: r for i, r in enumerate(results)} G = nx.Graph() [G.add_node(a) for a in result_dict.keys()] for n1 in G.nodes_iter(): for n2 in G.nodes_iter(): if n1 != n2: coh_sw_1, coh_sw_2 = result_dict[n1].coh_switch, result_dict[n2].coh_switch doc_sw_1, doc_sw_2 = result_dict[n1].doc_switch, result_dict[n2].doc_switch doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[n2].doc_wt doc_diff = 1 if are_docs_from_diff_clusters(doc_wt_1, doc_wt_2) else 0 symm_switch = switch_symm_changer(doc_sw_2) if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']: G.add_edge(n1, n2) print('adding edge\n', result_dict[n1], '\n', result_dict[n2]) else: print('NOT\n', result_dict[n1], '\n', result_dict[n2]) cliques = [a for a in nx.find_cliques(G)] max_len = max([len(a) for a in cliques]) max_cliques = [a for a in cliques if len(a) == max_len] for clq in max_cliques: print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
def swithces_from_diagonal(args, run_filters, coh_seq_dict, doc_seq_dict): """ :param args: run arguments :param run_filters: run filters :param coh_seq_dict: {name: AASeq()} of cohesins :param doc_seq_dict: {name: AASeq()} of dockerins :return: {switch_name: {design_name: #purples} """ # positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126], # '1aoh': [33, 37, 63, 66, 70, 83, 119, 130], # '1ohz': [33, 35, 37, 39, 63, 66, 68, 70, 73, 75, 77, 79, 81, 83, 121, 125, 127], # '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]} # type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'} results = {} sc_files = [a for a in os.listdir(args['score_dir']) if a[-6:] == '.score'] bins = {} for sc_file in sc_files: score_dict = score2dict(sc_file) passed, failed = all_who_pass_run_filters(args, score_dict, run_filters) # results[sc_file] = len(list(passed.keys())) if len(list(passed.keys())) <= args['purples_threshold']: continue ### these kick out the date from the score names, and the makes it into the proper names: if '_11.10' in sc_file: coh_seq = coh_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] doc_seq = doc_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.B').split('_11.10'))] elif '_12.10' in sc_file: coh_seq = coh_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_12.10'))] doc_seq = doc_seq_dict[''.join(str(sc_file[4:-6]+'_0001.pdb.B').split('_12.10'))] result = Result(sc_file[4:-6], coh_seq, doc_seq, len(list(passed.keys()))) # pos_str = coh_seq.get_positions(positions_dict['1ohz']) # switch_str = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str]) # # doc_str = doc_seq.get_positions(list(doc_symm_poses.keys())) # doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str]) # if switch_str not in bins.keys(): # bins[switch_str] = {} # if doc_switch not in bins[switch_str].keys(): # bins[switch_str][doc_switch] = {} # bins[switch_str][doc_switch][''.join(str(sc_file[4:-6]+'_0001.pdb.A').split('_11.10'))] \ # = {'doc_seq': doc_seq, 'purples': len(list(passed.keys()))} results[sc_file[4:-6]] = result # return bins return results
def main(): global log original_pwd = os.getcwd() running = get_running_folders() pending = get_pending_folders() run_filters = generate_run_filters(args={'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3}) for runner in running: os.chdir(runner) if is_folder_finished(runner): print('processing %s' % runner) log += 'folder is finished, processing %s\n' % runner.split('/')[-2] process_folder({'folder': runner, 'force_process': False, 'remove_pdbs': False}) move_to_processed(runner, running, 0) else: score_dict = folder_scores(runner) passed, failed = all_who_pass_run_filters({}, score_dict, run_filters) log += 'passed %i, failed %i' % (len(passed), len(failed)) if len(passed) >= 50: print('found enough passed, stopping folder %s' % runner.split('/')[-1]) log += 'found enough passed, stopping folder\n' bkill_folder(runner) process_folder({'folder': runner, 'force_process': True, 'remove_pdbs': False}) move_to_processed(runner, running, len(passed)) else: print('not enough finished, letting him be %s, found %i passed and %i failed' % (runner.split('/')[-2], len(passed), len(failed))) log += 'not enough finished, letting him be %s, found %i passed and %i failed\n' % \ (runner.split('/')[-2], len(passed), len(failed)) os.chdir(original_pwd) # for pender in pending: # os.chdir(pender) # lsf_status, pends = how_many_queue() # if lsf_status['fleishman'] < 12000: # print('found %i jobs in fleishman, submitting %s' % (lsf_status['fleishman'], pender)) # log += 'found %i jobs in fleishman, submitting %s\n' % (lsf_status['fleishman'], pender) # submit_folder(pender) # move_pender_to_runner(pending, pender) # os.chdir(original_pwd) # lsf_status, pends = how_many_queue() # if lsf_status['new-all.q'] <= 2000: # bswitch_pends(pends, 2000-lsf_status['new-all.q']) os.chdir(original_pwd)
def creat_coh_doc_purples(): results_path = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/cliques_prediction/results/' sc_files = [a for a in os.listdir(results_path) if '.score' in a] run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': -10.}) if not os.path.isfile(results_path+'analysed.obj'): coh_doc_purples = {} for sc_file in sc_files: sc_dict = score2dict(results_path+sc_file) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters) coh_name = what_coh(sc_file, args={'naming': 'coh_on_doc'}) doc_name = what_doc(sc_file, args={'naming': 'coh_on_doc'}) if coh_name not in coh_doc_purples.keys(): coh_doc_purples[coh_name] = {} coh_doc_purples[coh_name][doc_name] = len(passed) pickle.dump(coh_doc_purples, open(results_path+'analysed.obj', 'wb')) else: print("reading coh_doc_purples") coh_doc_purples = pickle.load(open(results_path+'analysed.obj', 'rb')) return coh_doc_purples
topath = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/' run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, 'hbonds': 10}) all_docs = [a for a in os.listdir(topath) if 'doc_' in a] fout = open(topath+'minidiagonal_full_names.txt', 'w') for doc in all_docs: all_dirs = os.listdir(topath+doc) for dir in all_dirs: try: sc_file = [a for a in os.listdir(topath+doc+'/'+dir) if a[-3:] == '.sc'] if sc_file: sc_dict = score2dict(topath+doc+'/'+dir+'/'+sc_file[0]) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters) if len(passed) > 5: fout.write('%s\t%i\n' % (dir, len(passed))) shutil.copy(topath+doc+'/'+dir+'/'+list(sc_dict.keys())[0]+'.pdb.gz', topath+'minidiagonal_pdbs') except: print('no folder', dir) fout.close() # def analyse_minidiagonal(args): # with open('../minidiagonal.txt', 'w') as fout: # run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, # 'hbonds': 10}) # counter = 0 # score_files = [a for a in os.listdir('./') if a[-3:] == '.sc'] # for sc in score_files: # score_dict = score2dict(sc)
}) all_docs = [a for a in os.listdir(topath) if 'doc_' in a] fout = open(topath + 'minidiagonal_full_names.txt', 'w') for doc in all_docs: all_dirs = os.listdir(topath + doc) for dir in all_dirs: try: sc_file = [ a for a in os.listdir(topath + doc + '/' + dir) if a[-3:] == '.sc' ] if sc_file: sc_dict = score2dict(topath + doc + '/' + dir + '/' + sc_file[0]) passed, failed = all_who_pass_run_filters({}, sc_dict, run_filters) if len(passed) > 5: fout.write('%s\t%i\n' % (dir, len(passed))) shutil.copy( topath + doc + '/' + dir + '/' + list(sc_dict.keys())[0] + '.pdb.gz', topath + 'minidiagonal_pdbs') except: print('no folder', dir) fout.close() # def analyse_minidiagonal(args): # with open('../minidiagonal.txt', 'w') as fout: # run_filters = generate_run_filters(args={'ddg': 24.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3, # 'hbonds': 10}) # counter = 0
def post_pred_cliques(args): run_filters = generate_run_filters(args={ 'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3 }) if not os.path.isfile('./all_data.obj'): sc_files = [a for a in os.listdir('./') if '.score' in a] cohs_seqs = read_multi_fastas( '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_cohs.fasta' ) docs_seqs = read_multi_fastas( '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/all_docs.fasta' ) results = [] for sc_file in sc_files: seq_name = '_'.join(sc_file.split('_')[1:8]) coh_name = seq_name + '.pdb.gz.A' doc_name = seq_name + '.pdb.gz.B' sc_dict = score2dict(sc_file) ynum = re.search('y[0-9]{3}', sc_file).group(0) passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters) if len(passed) >= args['purples_threshold']: r = Result(seq_name, cohs_seqs[coh_name], docs_seqs[doc_name], len(passed)) results.append(r) with open('./all_data.obj', 'wb') as fout: pickle.dump(results, fout) else: with open('./all_data.obj', 'rb') as fin: results = pickle.load(fin) if not os.path.isfile('./graph.obj'): result_dict = {i + 1: r for i, r in enumerate(results)} G = nx.Graph() [G.add_node(a) for a in result_dict.keys()] for n1 in G.nodes_iter(): for n2 in G.nodes_iter(): if n1 != n2: coh_sw_1, coh_sw_2 = result_dict[ n1].coh_switch, result_dict[n2].coh_switch doc_sw_1, doc_sw_2 = result_dict[ n1].doc_switch, result_dict[n2].doc_switch doc_wt_1, doc_wt_2 = result_dict[n1].doc_wt, result_dict[ n2].doc_wt doc_diff = 1 if are_docs_from_diff_clusters( doc_wt_1, doc_wt_2) else 0 symm_switch = switch_symm_changer(doc_sw_2) if switches_differ({'diff_by': args['diff_by']}, coh_sw_1, coh_sw_2) >= args['diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, doc_sw_2) + doc_diff >= args['doc_diff_by'] and \ switches_differ({'diff_by': args['doc_diff_by']}, doc_sw_1, symm_switch) + doc_diff >= args['doc_diff_by']: G.add_edge(n1, n2) print('adding edge\n', result_dict[n1], '\n', result_dict[n2]) else: print('NOT\n', result_dict[n1], '\n', result_dict[n2]) cliques = [a for a in nx.find_cliques(G)] max_len = max([len(a) for a in cliques]) max_cliques = [a for a in cliques if len(a) == max_len] for clq in max_cliques: print(clq, '\n', '\n'.join([str(result_dict[a]) for a in clq]))
def main(): global log original_pwd = os.getcwd() running = get_running_folders() pending = get_pending_folders() run_filters = generate_run_filters(args={ 'ddg': 25.0, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 3 }) for runner in running: os.chdir(runner) if is_folder_finished(runner): print('processing %s' % runner) log += 'folder is finished, processing %s\n' % runner.split( '/')[-2] process_folder({ 'folder': runner, 'force_process': False, 'remove_pdbs': False }) move_to_processed(runner, running, 0) else: score_dict = folder_scores(runner) passed, failed = all_who_pass_run_filters({}, score_dict, run_filters) log += 'passed %i, failed %i' % (len(passed), len(failed)) if len(passed) >= 50: print('found enough passed, stopping folder %s' % runner.split('/')[-1]) log += 'found enough passed, stopping folder\n' bkill_folder(runner) process_folder({ 'folder': runner, 'force_process': True, 'remove_pdbs': False }) move_to_processed(runner, running, len(passed)) else: print( 'not enough finished, letting him be %s, found %i passed and %i failed' % (runner.split('/')[-2], len(passed), len(failed))) log += 'not enough finished, letting him be %s, found %i passed and %i failed\n' % \ (runner.split('/')[-2], len(passed), len(failed)) os.chdir(original_pwd) # for pender in pending: # os.chdir(pender) # lsf_status, pends = how_many_queue() # if lsf_status['fleishman'] < 12000: # print('found %i jobs in fleishman, submitting %s' % (lsf_status['fleishman'], pender)) # log += 'found %i jobs in fleishman, submitting %s\n' % (lsf_status['fleishman'], pender) # submit_folder(pender) # move_pender_to_runner(pending, pender) # os.chdir(original_pwd) # lsf_status, pends = how_many_queue() # if lsf_status['new-all.q'] <= 2000: # bswitch_pends(pends, 2000-lsf_status['new-all.q']) os.chdir(original_pwd)