def erbb2_mutants(args):
    """
    draw what rosetta thinks about assaf's mutants of ErbB2
    """
    score_dir = "/home/labs/fleishman/jonathaw/elazaridis/fold_and_dock/" + \
        "erbb2/mutations/all_results/"
    exp_table = "/home/labs/fleishman/jonathaw/elazaridis/fold_and_dock/" + \
        "erbb2/mutations/general_data/mut_table.txt"
    exp_df = parse_erbb2_exp_table(exp_table)
    wt_score_file = score_dir + "all_erbb2v4_wt_28Feb.score"
    wt_df = Rf.score_file2df(wt_score_file)
    wt_ddg = Rf.get_term_by_threshold(wt_df, 'score', 5, 'a_ddg', 'mean')

    exp_df['rosetta'] = np.nan
    # exp_df['rosetta_score'] = np.nan
    for sc_file in [a for a in os.listdir(score_dir)
                    if '.score' in a and 'wt' not in a]:
        df = Rf.score_file2df(score_dir+sc_file)
        ddg = Rf.get_term_by_threshold(df, 'score', 5, 'a_ddg', 'mean')
        # scr = Rf.get_term_by_threshold(df, 'score', 5, 'score', 'mean')
        name = sc_file.split('_')[2]
        # print(sc_file, name)
        wt = name[0]
        pos = int(name[1:-1])
        mut = name[-1]
        exp_df.set_value((exp_df['pos'] == pos) & (exp_df['wt'] == wt) &
                         (exp_df['mut'] == mut), 'rosetta', ddg-wt_ddg)
    print(exp_df)
    exp_df = exp_df.dropna()
    print(exp_df.to_string())
    plt.scatter(exp_df['rosetta'], exp_df['exp'])
    plt.ylabel('experimental ∆∆G')
    plt.xlabel('rosetta ∆∆G')
    plt.axhline(0)
    plt.axvline(0)
    for i, row in exp_df.iterrows():
        plt.annotate('%s%i%s' % (row['wt'], row['pos'], row['mut']),
                     (row['rosetta'], row['exp']))
    plt.show()
def mutant_table( args: dict ):
    """
    a function to find and display the correlation between ResSolv and
    MPFrameWork and experimental results from both Doung 2006 and Assaf
    """
    scores_dir = '/home/labs/fleishman/jonathaw/elazaridis/fold_and_dock/gpa/mutant_results/%s' % args['dir']
    mp_dir = '/home/labs/fleishman/jonathaw/elazaridis/fold_and_dock/gpa/mutant_results/mpframework_18Dec/'
    main_df = pd.read_csv("/home/labs/fleishman/jonathaw/elazaridis/" +
                          "fold_and_dock/gpa/mutant_results/" +
                          "experimental_results.tsv", sep='\s+')
    wt_beta_score_file = [a for a in os.listdir(scores_dir)
                          if 'wt' in a and '.score' in a][0]
    wt_beta_df = Rf.score_file2df(scores_dir + '/' + wt_beta_score_file)
    wt_beta_ddg = Rf.get_term_by_threshold(wt_beta_df, 'score', 5, 'a_ddg',
                                           'mean')
    wt_mp_df = Rf.score_file2df('%sall_gpav1_wt_mpframework_25Oct.score' % mp_dir)
    wt_mp_ddg = Rf.get_term_by_threshold(wt_mp_df, 'score', 5, 'a_ddg', 'mean')
    results = {'rs': {}, 'mp': {}}

    for sc_file in [a for a in os.listdir(scores_dir)+os.listdir(mp_dir)
                    if '.score' in a]:
        if 'mpframework' in sc_file:
            df = Rf.score_file2df('%s/%s' % (mp_dir, sc_file))
        else:
            df = Rf.score_file2df('%s/%s' % (scores_dir, sc_file))
        name = sc_file.split('_')[2]
        if '16Mar' in sc_file:
            name = '%s%i%s' % (name[0], int(name[1:-1])+72, name[-1])
        # if name[-1] == 'M': continue
        # threshold = np.percentile(df['score'].values, 5)
        min_ddg = Rf.get_term_by_threshold(df, 'score', 5, 'a_ddg', 'mean')
        if 'mpframework' in sc_file:
            results['mp'][name] = min_ddg
            main_df.set_value(main_df['name'] == name, 'mp', min_ddg-wt_mp_ddg)
        else:
            results['rs'][name] = min_ddg
            main_df.set_value(main_df['name'] == name, 'rs',
                              min_ddg-wt_beta_ddg)

    print(main_df)
    # main_df = main_df.dropna( how='any' )
    args['logger'].log(main_df)

    if args['all4']:
        fig = plt.figure(figsize=(10, 10), facecolor='w')
        i = 1
        for scfxn in ['rs', 'mp']:
            for exp in ['dstbl', 'Doung']:
                ax = plt.subplot(2, 2, i)
                model = linear_model.LinearRegression()
                model.fit(main_df[scfxn].to_frame(), main_df[exp].to_frame())
                line_x = np.linspace(main_df[scfxn].min(), main_df[scfxn].max())
                line_y = model.predict(line_x[:, np.newaxis])
                r2 = r2_score(main_df[exp].values,
                              model.predict(main_df[scfxn].to_frame()))
                plt.scatter(main_df[scfxn], main_df[exp])
                plt.plot(line_x, line_y)
                scfxn_name = 'ResSolv' if scfxn == 'rs' else 'MPFrameWork'
                exp_name = 'Doung 2006' if exp == 'Doung' else r'dsT$\beta$L'
                plt.title('%s Vs. %s' % (scfxn_name, exp_name))
                plt.text(0.8, 0.1, r'$R^2=%.2f$' % r2, fontsize=15,
                         horizontalalignment='center',
                         verticalalignment='center', transform=ax.transAxes)
                plt.axhline(0, color='k')
                plt.axvline(0, color='k')
                if i == 3:
                    plt.xlabel('Rosetta ∆∆G', fontsize=18)
                    plt.ylabel('Experimental ∆∆G', fontsize=18)
                i += 1
        plt.show()

    else:
        fig = plt.figure(facecolor='w')
        ax1 = plt.subplot(1, 2, 1)
        model = linear_model.LinearRegression()
        rs_df = main_df[['name', 'dstbl', 'rs']].dropna(how='any')

        model.fit(rs_df['rs'].to_frame(), rs_df['dstbl'].to_frame())
        line_x = np.linspace(rs_df['rs'].min(), rs_df['rs'].max())
        line_y = model.predict(line_x[:, np.newaxis])
        r2 = r2_score(rs_df['dstbl'].values,
                      model.predict(rs_df['rs'].to_frame()))
        plt.scatter(rs_df['rs'], rs_df['dstbl'])
        plt.plot(line_x, line_y)
        plt.title('%s Vs. %s' % ('ResSolv', r'dsT$\beta$L'))
        plt.text(0.8, 0.1, r'$R^2=%.2f$' % r2, fontsize=15,
                 horizontalalignment='center', verticalalignment='center',
                 transform=ax1.transAxes)
        plt.axhline(0, color='k')
        plt.axvline(0, color='k')
        plt.xlabel('Rosetta ∆∆G', fontsize=18)
        plt.ylabel(r'dsT$\beta$L experimental results', fontsize=18)
        for x, y, n in zip(rs_df['rs'], rs_df['dstbl'], rs_df['name']):
            ax1.annotate(n, (x, y))

        ax2 = plt.subplot(1, 2, 2)
        model = linear_model.LinearRegression()
        mp_df = main_df[['name', 'dstbl', 'mp']].dropna(how='any')
        model.fit(mp_df['mp'].to_frame(), mp_df['dstbl'].to_frame())
        line_x = np.linspace(mp_df['mp'].min(), mp_df['mp'].max())
        line_y = model.predict(line_x[:, np.newaxis])
        r2 = r2_score(mp_df['dstbl'].values,
                      model.predict(mp_df['mp'].to_frame()))
        plt.scatter(mp_df['mp'], mp_df['dstbl'])
        plt.plot(line_x, line_y)
        plt.title('%s Vs. %s' % ('MPFrameWork', r'dsT$\beta$L'))
        plt.text(0.8, 0.1, r'$R^2=%.2f$' % r2, fontsize=15,
                 horizontalalignment='center', verticalalignment='center',
                 transform=ax2.transAxes)
        plt.axhline(0, color='k')
        plt.axvline(0, color='k')
        # plt.xlabel( 'Rosetta ∆∆G', fonctsize=18 )
        # plt.ylabel( r'dsT$\beta$L experimental results', fonctsize=18 )
        plt.show()
        plt.savefig('%s/dsTbL_alone.pdf' % scores_dir)