def show_predcition_matrix(prediction: dict) -> None:
    obs_pre = {0: {0: 0, 1: 2}, 1: {0: 3, 1: 1}}
    binding_ = binding_data()
    df = pd.DataFrame(data=0, index=list(binding_.values())[0].keys(), columns=binding_.keys())
    for coh, doc_ in binding_.items():
        for doc, obs in doc_.items():
            df[coh][doc] = obs_pre[binding_[coh][doc]][prediction[coh][doc]]

    plt.figure()
    axis = plt.gca()
    cmap = colors.ListedColormap(['white', 'cornflowerblue', 'red', 'darkorange'])
    bounds = [-0.5, 0.5, 1.5, 2.5, 3.5]
    norm = colors.BoundaryNorm(bounds, cmap.N)
    heatmap = plt.pcolor(array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2)
    for y in range(array(df.shape)[0]):
        for x in range(array(df.shape)[1]):
            if array(df)[y, x] >= 0:
                plt.text(x+0.5, y+0.5, array(df)[y, x], horizontalalignment='center', verticalalignment='center')
    plt.yticks(arange(0.5, len(df.index), 1), df.index)
    plt.xticks(arange(0.5, len(df.columns), 1), df.columns, rotation=70)
    plt.xlabel('Cohesin name', style='oblique')
    plt.ylabel('Dockerin name', style='oblique')
    axis.set_aspect('equal')
    plt.title('Cohesin dockerin cross binding')
    plt.suptitle('0: obs no pred no, 1: obs yes, pred yes\n2: obs no pred yes, 3: obs yes pred no')
    plt.show()
def postdictions_summary(args):
    from matplotlib import colors
    from _binding_data import binding_data
    obs_pre = {False: {False: 0, True: 2}, True: {False: 3, True: 1}}
    binding_dict = binding_data()
    results_root = '/home/labs/fleishman/jonathaw/no_backup/postdiction_new/results/'
    # run_filters = generate_run_filters(args={'ddg': -16, 'sasa': 1200, 'shape': 0.5, 'packstat': 0.5, 'buried_2': 30,
    #                                          'hbonds': 12})
    # args_ = {'ddg': 12, 'sasa': 1400, 'shape': 0.45, 'packstat': 0.45, 'buried_2': 2, 'hbonds': 4}
    # args_ = {'ddg': 14, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 2, 'hbonds': 4} # harsh
    args_ = {'ddg': 16, 'sasa': 1400, 'shape': 0.6, 'packstat': 0.6, 'buried_2': 2, 'hbonds': 6}
    run_filters = generate_run_filters(args_)
    sc_files = [a for a in os.listdir(results_root) if '.score' in a]

    results, cohs, docs = {}, [], []
    bar_width = len(sc_files)
    sys.stdout.write("{%s}" % (" " * bar_width))
    sys.stdout.flush()
    sys.stdout.write("\b" * (bar_width+1))
    for sc_file in sc_files:
        sc_dict = score2dict(results_root+sc_file)
        passed, failed = all_who_pass_run_filters(args, sc_dict, run_filters)
        coh_name = sc_file.split('all_')[1].split('_on_')[0]
        doc_name = re.split(pattern='_[0-9]{1,2}\.[0-9]{1,2}\.score', string=sc_file.split('_on_')[1])[0]
        # print(len(passed), coh_name, doc_name, binding_dict[coh_name][doc_name])
        if coh_name not in results.keys():
            results[coh_name] = {}
            cohs.append(coh_name)
        if doc_name not in docs:
            docs.append(doc_name)
        results[coh_name][doc_name] = obs_pre[binding_dict[coh_name][doc_name]][len(passed) >= 10]
        sys.stdout.write("%s" % random.choice(['!', '@', '#', '$', '%', '^', '&', '*', '(', ')']))
        sys.stdout.flush()
    sys.stdout.write("}\n")

    # move data to data frame
    df = pd.DataFrame(columns=sorted(docs), index=sorted(cohs), data=-100)
    for coh, doc_dict in results.items():
        for doc, res in doc_dict.items():
            df[doc][coh] = res

    df = df.transpose()
    print(df)
    axis = plt.gca()
    cmap = colors.ListedColormap(['white', 'red', 'blue', 'green', 'yellow'])
    bounds = [-101, -0.5, 0.5, 1.5, 2.5, 3.5]
    norm = colors.BoundaryNorm(bounds, cmap.N)
    heatmap = plt.pcolor(np.array(df), cmap=cmap, norm=norm, edgecolors='k', linewidth=2)
    plt.yticks(np.arange(0.5, len(df.index), 1), [official_names(a) for a in df.index])
    plt.xticks(np.arange(0.5, len(df.columns), 1), [official_names(a) for a in df.columns], rotation=70)
    axis.set_aspect('equal')

    legend = plt.colorbar(heatmap)
    # legend.ax.set_yticklabels(['NA', 'TN', 'TP', 'FP', 'FN'])
    legend.ax.get_yaxis().set_ticks([])
    for j, lab in enumerate(['NA', 'TN', 'TP', 'FP', 'FN']):
        legend.ax.text(.5, (2 * j + 1) / 10.0, lab, ha='center', va='center')
    legend.ax.get_yaxis().labelpad = 15
    plt.suptitle(str(args_))
    plt.show()
def parse_binding_data() -> pd.DataFrame:
    """
    :return: data frame 'coh_name', 'doc_name', 'coh_seq', 'doc_seq', 'binders' for Rachel's data
    """
    from _binding_data import binding_data
    rachel_root = '/home/labs/fleishman/jonathaw/decision_tree/'
    cohs = read_multi_fastas(rachel_root+'cohesins_from_rachel_and_vered.fasta_aln', suffix_to_remove='/', lower=True)
    docs = read_multi_fastas(rachel_root+'dockerins_from_rachel_and_vered.fasta_aln', suffix_to_remove='/', lower=True)
    rachel_bind = binding_data()
    vered_bind = parse_vered_binding()
    result = pd.DataFrame(columns=['coh_name', 'doc_name', 'coh_seq', 'doc_seq', 'binders'])
    i = 1
    for coh, docs_dict in rachel_bind.items():
        for doc, res in docs_dict.items():
            result.loc[i] = [coh, doc, cohs[coh], docs[doc], rachel_bind[coh][doc]]
            i += 1
    for coh, docs_dict in vered_bind.items():
        for doc, res in docs_dict.items():
            result.loc[i] = [coh, doc, cohs[coh], docs[doc], vered_bind[coh][doc] == 1]
            i += 1
    for name in ['1ohz', '2b59', '2ozn', '2vn5', '2y3n', '3ul4', '4fl4', '4fl5', '4dh2', '4uyp', '5new']:
        result.loc[i] = [name, name, cohs[name], docs[name], True]
        i += 1
    print('there are %i rows in the data' % (i-1))
    return result
def parse_binding_data() -> pd.DataFrame:
    # cohs = read_multi_fastas(root_path+'cohs_specific_pos.fasta', suffix_to_remove='/')
    # docs = read_multi_fastas(root_path+'docs_specific_pos.fasta', suffix_to_remove='/')
    cohs, docs = retrive_relevant_poses()
    data = binding_data()

    colums = ['coh', 'doc'] + \
             ['core coh %i %s' % (i, aa) for i in [1, 2] for aa in aa2num.keys()] + \
             ['core doc %i %s' % (i, aa) for i in [1, 2, 3] for aa in aa2num.keys()] + \
             ['rim coh %i %s' % (i, t) for i in range(1, 19, 1) for t in types] + \
             ['rim doc %i %s' % (i, t) for i in range(1, 8, 1) for t in types] + ['binding']
    df = pd.DataFrame(columns=colums)
    i = 1
    for coh, doc_dict in data.items():
        coh_seq = cohs[coh].get_seq
        for doc, res in doc_dict.items():
            doc_seq = docs[doc].get_seq
            df.loc[i] = [coh, doc] + seqs2row(coh_seq, doc_seq) + [1 if res else 0]
            i += 1
    return df
Beispiel #5
0
def parse_binding_data() -> pd.DataFrame:
    # cohs = read_multi_fastas(root_path+'cohs_specific_pos.fasta', suffix_to_remove='/')
    # docs = read_multi_fastas(root_path+'docs_specific_pos.fasta', suffix_to_remove='/')
    cohs, docs = retrive_relevant_poses()
    data = binding_data()

    colums = ['coh', 'doc'] + \
             ['core coh %i %s' % (i, aa) for i in [1, 2] for aa in aa2num.keys()] + \
             ['core doc %i %s' % (i, aa) for i in [1, 2, 3] for aa in aa2num.keys()] + \
             ['rim coh %i %s' % (i, t) for i in range(1, 19, 1) for t in types] + \
             ['rim doc %i %s' % (i, t) for i in range(1, 8, 1) for t in types] + ['binding']
    df = pd.DataFrame(columns=colums)
    i = 1
    for coh, doc_dict in data.items():
        coh_seq = cohs[coh].get_seq
        for doc, res in doc_dict.items():
            doc_seq = docs[doc].get_seq
            df.loc[i] = [coh, doc] + seqs2row(coh_seq,
                                              doc_seq) + [1 if res else 0]
            i += 1
    return df
Beispiel #6
0
def show_predcition_matrix(prediction: dict) -> None:
    obs_pre = {0: {0: 0, 1: 2}, 1: {0: 3, 1: 1}}
    binding_ = binding_data()
    df = pd.DataFrame(data=0,
                      index=list(binding_.values())[0].keys(),
                      columns=binding_.keys())
    for coh, doc_ in binding_.items():
        for doc, obs in doc_.items():
            df[coh][doc] = obs_pre[binding_[coh][doc]][prediction[coh][doc]]

    plt.figure()
    axis = plt.gca()
    cmap = colors.ListedColormap(
        ['white', 'cornflowerblue', 'red', 'darkorange'])
    bounds = [-0.5, 0.5, 1.5, 2.5, 3.5]
    norm = colors.BoundaryNorm(bounds, cmap.N)
    heatmap = plt.pcolor(array(df),
                         cmap=cmap,
                         norm=norm,
                         edgecolors='k',
                         linewidth=2)
    for y in range(array(df.shape)[0]):
        for x in range(array(df.shape)[1]):
            if array(df)[y, x] >= 0:
                plt.text(x + 0.5,
                         y + 0.5,
                         array(df)[y, x],
                         horizontalalignment='center',
                         verticalalignment='center')
    plt.yticks(arange(0.5, len(df.index), 1), df.index)
    plt.xticks(arange(0.5, len(df.columns), 1), df.columns, rotation=70)
    plt.xlabel('Cohesin name', style='oblique')
    plt.ylabel('Dockerin name', style='oblique')
    axis.set_aspect('equal')
    plt.title('Cohesin dockerin cross binding')
    plt.suptitle(
        '0: obs no pred no, 1: obs yes, pred yes\n2: obs no pred yes, 3: obs yes pred no'
    )
    plt.show()