Example #1
0
def analyse_cliques(cliques):
    """
    :param cliques: list of cliques
    :return: prints an anlysis
    """
    coh_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/stabilisation/all_stabilised/all_j_st_cohs.fasta', '_st.A')
    doc_seqs = read_multi_fastas('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/stabilisation/all_stabilised/all_j_st_docs.fasta', '_st.B')
    cliques_by_charges = parse_cliques_lists('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/stabilisation/all_stabilised/cliques_2_1.txt', remove='_st')
    original_names = parse_name_translation('/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/recliques_4Nov/clique_6_pdbs/mini_diagonal_11Nov/minidiagonal_pdbs/translate_names.txt')
    clqs_by_len = {k: [] for k in range(1, 8)}
    for clq in cliques:
        clqs_by_len[len(clq)].append(clq)
    designs_in_all_clqs = []
    for length in range(10, 6, -1):
        if length not in clqs_by_len.keys():
            continue
        for clq in clqs_by_len[length]:
            coh_diffs, doc_diffs, doc_diffs_symm, doc_bb_diffs = [], [], [], []
            print('\n\n\nfor clq', clq)
            for mem1 in clq:
                designs_in_all_clqs.append(mem1)
                wt_doc = original_names[mem1[1]+'.pdb.gz'][10:14]
                res1 = Result(mem1, coh_seqs[mem1[0]], doc_seqs[mem1[1]], 1, j=True, doc_wt=wt_doc)
                res1_doc_symm = switch_symm_changer(res1.doc_switch)
                for mem2 in clq:
                    if mem1 != mem2:
                        wt_doc = original_names[mem2[1]+'.pdb.gz'][10:14]
                        res2 = Result(mem2, coh_seqs[mem2[0]], doc_seqs[mem2[1]], 1, j=True, doc_wt=wt_doc)
                        doc_bb_diffs.append(are_docs_from_diff_clusters(res1.doc_wt, res2.doc_wt))
                        coh_diffs.append(switches_differ({}, res1.coh_switch, res2.coh_switch))
                        doc_diffs.append(switches_differ({}, res1.doc_switch, res2.doc_switch))
                        doc_diffs_symm.append(switches_differ({}, res1_doc_symm, res2.doc_switch))
                        print('results', res1)
                        print('results', res2)
                        print('docs diff', switches_differ({}, res1.doc_switch, res2.doc_switch))
                        print('doc symm diff', switches_differ({}, res1_doc_symm, res2.doc_switch))
                        print('doc BB dif', are_docs_from_diff_clusters(res1.doc_wt, res2.doc_wt))
                        print('coh diff', switches_differ({}, res1.coh_switch, res2.coh_switch))
                        N
            print('for clq %r found the following results:' % clq)
            print('doc_bb_diffs', doc_bb_diffs)
            print('doc_diffs', doc_diffs)
            print('doc_diffs_symm', doc_diffs_symm)
            print('coh_diffs', coh_diffs)
            print('total', sum([1 for a in doc_bb_diffs if a] + doc_diffs + doc_diffs_symm + coh_diffs))
    all_cohs = list(set([a[0] for a in designs_in_all_clqs]))
    all_docs = list(set([a[1] for a in designs_in_all_clqs]))
    print('these are all the cohs i need: %s, total %i' % (', '.join(all_cohs), len(all_cohs)))
    print('these are all the docs i need: %s, total %i' % (', '.join(all_docs), len(all_docs)))
    print('LONGEST CLIQUES FOUND ARE %i' % max([len(clq) for clq in cliques]))
    coh_doc_purples = creat_coh_doc_purples()
    for clq in clqs_by_len[max(list(clqs_by_len.keys()))]:
        print('clq', clq)
        cohs = [a[0] for a in clq]
        docs = [a[1] for a in clq]
        df = pd.DataFrame(index=docs, columns=cohs)
        for coh in cohs:
            for doc in docs:
                df[coh][doc] = coh_doc_purples[coh][doc]
        show_prediction_heat_map(df)
Example #2
0
def add_flanks(args):
    doc_flanks = {'1ohz': {'start': 'ESSSVLL', 'end': 'RVIDKFPVAENP'},
                  '2vn5': {'start': 'V', 'end': 'SKLPSN'},
                  '3ul4': {'start': 'V', 'end': ''},
                  '4dh2': {'start': 'WNK', 'end': 'NSAPTF'},
                  '5new': {'start': '', 'end': 'Y'},
                  }
    name_path = '/home/labs/fleishman/jonathaw/no_backup/designs/multi_docs_15Oct/reclique_18Nov/stabilisation/'
    translation = parse_name_translation(name_path + 'translate_names.txt')

    if args['type'] == 'coh':
        return 'D' + args['seq'] + 'NAT'
    elif args['type'] == 'doc':
        doc_name = translation[args['name'] + '.pdb.gz'][10:14]
        return doc_flanks[doc_name]['start'] + args['seq'] + doc_flanks[doc_name]['end']