def analyse_cliques(cliques): """ :param cliques: list of cliques :return: prints an anlysis """ coh_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/stabilisation/all_stabilised/all_j_st_cohs.fasta', '_st.A') doc_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/stabilisation/all_stabilised/all_j_st_docs.fasta', '_st.B') cliques_by_charges = parse_cliques_lists('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/stabilisation/all_stabilised/cliques_2_1.txt', remove='_st') original_names = parse_name_translation('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/minidiagonal_pdbs/translate_names.txt') clqs_by_len = {k: [] for k in range(1, 8)} for clq in cliques: clqs_by_len[len(clq)].append(clq) designs_in_all_clqs = [] for length in range(10, 6, -1): if length not in clqs_by_len.keys(): continue for clq in clqs_by_len[length]: coh_diffs, doc_diffs, doc_diffs_symm, doc_bb_diffs = [], [], [], [] print('\n\n\nfor clq', clq) for mem1 in clq: designs_in_all_clqs.append(mem1) wt_doc = original_names[mem1[1]+'.pdb.gz'][10:14] res1 = Result(mem1, coh_seqs[mem1[0]], doc_seqs[mem1[1]], 1, j=True, doc_wt=wt_doc) res1_doc_symm = switch_symm_changer(res1.doc_switch) for mem2 in clq: if mem1 != mem2: wt_doc = original_names[mem2[1]+'.pdb.gz'][10:14] res2 = Result(mem2, coh_seqs[mem2[0]], doc_seqs[mem2[1]], 1, j=True, doc_wt=wt_doc) doc_bb_diffs.append(are_docs_from_diff_clusters(res1.doc_wt, res2.doc_wt)) coh_diffs.append(switches_differ({}, res1.coh_switch, res2.coh_switch)) doc_diffs.append(switches_differ({}, res1.doc_switch, res2.doc_switch)) doc_diffs_symm.append(switches_differ({}, res1_doc_symm, res2.doc_switch)) print('results', res1) print('results', res2) print('docs diff', switches_differ({}, res1.doc_switch, res2.doc_switch)) print('doc symm diff', switches_differ({}, res1_doc_symm, res2.doc_switch)) print('doc BB dif', are_docs_from_diff_clusters(res1.doc_wt, res2.doc_wt)) print('coh diff', switches_differ({}, res1.coh_switch, res2.coh_switch)) N print('for clq %r found the following results:' % clq) print('doc_bb_diffs', doc_bb_diffs) print('doc_diffs', doc_diffs) print('doc_diffs_symm', doc_diffs_symm) print('coh_diffs', coh_diffs) print('total', sum([1 for a in doc_bb_diffs if a] + doc_diffs + doc_diffs_symm + coh_diffs)) all_cohs = list(set([a[0] for a in designs_in_all_clqs])) all_docs = list(set([a[1] for a in designs_in_all_clqs])) print('these are all the cohs i need: %s, total %i' % (', '.join(all_cohs), len(all_cohs))) print('these are all the docs i need: %s, total %i' % (', '.join(all_docs), len(all_docs))) print('LONGEST CLIQUES FOUND ARE %i' % max([len(clq) for clq in cliques])) coh_doc_purples = creat_coh_doc_purples() for clq in clqs_by_len[max(list(clqs_by_len.keys()))]: print('clq', clq) cohs = [a[0] for a in clq] docs = [a[1] for a in clq] df = pd.DataFrame(index=docs, columns=cohs) for coh in cohs: for doc in docs: df[coh][doc] = coh_doc_purples[coh][doc] show_prediction_heat_map(df)
def add_flanks(args): doc_flanks = {'1ohz': {'start': 'ESSSVLL', 'end': 'RVIDKFPVAENP'}, '2vn5': {'start': 'V', 'end': 'SKLPSN'}, '3ul4': {'start': 'V', 'end': ''}, '4dh2': {'start': 'WNK', 'end': 'NSAPTF'}, '5new': {'start': '', 'end': 'Y'}, } name_path = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/stabilisation/' translation = parse_name_translation(name_path + 'translate_names.txt') if args['type'] == 'coh': return 'D' + args['seq'] + 'NAT' elif args['type'] == 'doc': doc_name = translation[args['name'] + '.pdb.gz'][10:14] return doc_flanks[doc_name]['start'] + args['seq'] + doc_flanks[doc_name]['end']