def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-mode', default='q')
    parser.add_argument('-pc_old')
    parser.add_argument('-score_old')
    parser.add_argument('-obj_old')
    parser.add_argument('-pc_new')
    parser.add_argument('-score_new')
    parser.add_argument('-obj_new')
    parser.add_argument('-pc_wj')
    parser.add_argument('-score_wj')
    parser.add_argument('-obj_wj')
    parser.add_argument('-sc')
    parser.add_argument('-mp_sc')
    parser.add_argument('-rs_sc')
    parser.add_argument('-pc')
    parser.add_argument('-mp_pc')
    parser.add_argument('-rs_pc')
    parser.add_argument('-span_threshold', default=0.3, type=float)
    parser.add_argument('-names', default=None, help='file list of names to show')
    parser.add_argument('-log_path', default='./', help='path to place log file')
    parser.add_argument('-percent', type=int, default=100)
    parser.add_argument('-best', type=bool, default=False)
    parser.add_argument('-terms', nargs='+', default=['score', 'a_shape', 'a_pack', 'a_ddg', 'res_solv'])
    parser.add_argument('-threshold', type=int, default=5)
    parser.add_argument('-show', default='show')

    args = vars(parser.parse_args())
    args['logger'] = Logger('logeer_%s.log' % time.strftime("%d.%0-m"), args['log_path'])

    if args['mode'] == 'old':
        analyse_old(args)

    elif args['mode'] == 'new':
        analyse_new(args)

    elif args['mode'] == 'wj':
        analyse_wj(args)

    elif args['mode'] == 'q':
        quick_rmsd_total(args)

    elif args['mode'] == 'slider':
        slide_ddg(args)

    elif args['mode'] == 's_by_s':
        side_by_side(args)

    elif args['mode'] == 'test':
        sc_df = Rf.score_file2df(args['sc'])
        new = Rf.get_best_of_best(sc_df)

    else:
        print('no mode')
    args['logger'].close()
def quick_rmsd_total(args):

    y_axis_term = 'score'

    sc_df = Rf.score_file2df(args['sc'], args['names'])
    args['logger'].log('found %i structs in sc_df' % len(sc_df))
    pc_df = get_rmsds_from_table(args['pc'])
    args['logger'].log('found %i structs in pc' % len(pc_df))
    a = sc_df.merge(pc_df, on='description')
    sc_df = a.copy()

    # if 'a_hha' in sc_df.columns:
        # sc_df['angle'] = sc_df['a_hha'] > 0

    args['logger'].log('left with %i in merged df' % len(sc_df))

    args['logger'].log('examining %s with span_topo threshold %f' % (args['sc'], args['span_threshold']))
    fig, ax = plt.subplots()

    if args['best']:
        sc_df = sc_df[sc_df['a_tms_span_fa'] > 0.5 ]
        threshold = np.percentile(sc_df[y_axis_term], args['percent'])
        sc_df = sc_df[ sc_df[y_axis_term] < threshold ]
        sc_df = sc_df[ sc_df['a_span_topo'] >= 0.99 ]
        sc_df_pass = Rf.get_best_of_best(sc_df, args['terms'], args['threshold'])
        sc_df_fail = sc_df[ ~sc_df['description'].isin( sc_df_pass['description'] ) ]
        args['logger'].log('%i models returned from BEST' % len(sc_df_pass))
    else:
        args['logger'].log('total of %i models in score' % len(sc_df))
        sc_df = sc_df[sc_df['a_tms_span_fa'] > 0.5]
        args['logger'].log('%i models pass tms_span' % len(sc_df))
        threshold = np.percentile(sc_df[y_axis_term], args['percent'])
        sc_df = sc_df[ sc_df[y_axis_term] < threshold ]
        args['logger'].log('for percent %.2f found threshold to be %.2f and %i strucutres pass it' % (args['percent'], threshold, len(sc_df)))
        sc_df = sc_df[sc_df['a_shape'] >= 0.6]
        sc_df = sc_df[sc_df['a_sasa'] > 700]
        args['logger'].log('%i passed sasa 600' % len(sc_df))
        sc_df = sc_df[sc_df['a_ddg'] < -5]
        args['logger'].log('%i passed ddg' % len(sc_df))
        # sc_df = sc_df[sc_df['a_pack'] > 0.6]
        sc_df = sc_df[sc_df['a_unsat'] < 1]
        args['logger'].log('%i passed unsat' % len(sc_df))
        sc_df['pass'] = sc_df['a_span_topo'] > args['span_threshold']
        sc_df = sc_df[sc_df['a_res_solv'] < -10]
        args['logger'].log('%i passed res_solv -10' % len(sc_df))

        sc_df_pass = sc_df[sc_df['a_span_topo'] > args['span_threshold']]
        args['logger'].log('%i models passed span_topo threshold' % len(sc_df_pass))
        sc_df_fail = sc_df[sc_df['a_span_topo'] <= args['span_threshold']]
        args['logger'].log('%i models failed span_topo threshold' % len(sc_df_fail))

    # ax.scatter(sc_df_fail['rmsd_calc'].values, sc_df_fail['score'].values, color='r', marker='.')

    x_array = np.ndarray(buffer=sc_df_pass['pc_rmsd'].values, shape=(len(sc_df),))
    y_array = np.ndarray(buffer=sc_df_pass[y_axis_term].values, shape=(len(sc_df)))
    if 'a_hha' in sc_df.columns:
        ax.scatter(sc_df_pass['pc_rmsd'].values, sc_df_pass[y_axis_term].values, marker='o',
                c=sc_df_pass['a_hha'].values, picker=True, cmap=plt.cm.coolwarm)
    else:
        ax.scatter(sc_df_pass['pc_rmsd'].values, sc_df_pass[y_axis_term].values, marker='o',
                c=sc_df_pass['a_span_topo'].values, picker=True, cmap=plt.cm.coolwarm)

    # min_energy = np.nanmin(list(sc_df_pass['score'].values)+list(sc_df_fail['score'].values))
    min_energy = np.nanmin(list(sc_df_pass[y_axis_term].values))
    max_energy = np.nanmax(list(sc_df_pass[y_axis_term].values))
    plt.ylim([min_energy - 1, max_energy + 1])
    plt.xlim([0, 15])
    plt.title(args['sc']+'_pass')

    z_score, rmsd_threshold = Rf.get_z_score_by_rmsd_percent(sc_df_pass)
    plt.text(0.75, 0.2, "Zscore=%.2f" % z_score, transform=ax.transAxes)
    plt.axvline(rmsd_threshold)
    # if 'a_hha' in sc_df.columns:
        # ax.scatter(sc_df_fail['pc_rmsd'].values, sc_df_fail[y_axis_term].values, marker='x',
                # c=sc_df_fail['a_hha'].values, picker=True, cmap=plt.cm.coolwarm, s=5, alpha=90)#, markersize=200)
    # else:
        # ax.scatter(sc_df_fail['pc_rmsd'].values, sc_df_fail[y_axis_term].values, marker='x',
                # c=sc_df_fail['a_span_topo'].values, picker=True, cmap=plt.cm.coolwarm, s=5, alpha=90)#, markersize=200)

    # af = PrintLabel(sc_df_pass, 'rmsd_calc', 'score', ['description', 'pass'])
    # fig.canvas.mpl_connect('button_press_event', af)
    point_label_cols = list(set(args['terms'] + ['description', 'a_sasa', 'a_res_solv', 'a_pack', 'a_span_topo', 'a_ddg', 'fa_elec']))
    pl = PointLabel(sc_df_pass, ax, fig, 'pc_rmsd', y_axis_term, point_label_cols,
                    args['logger']) # a_shape ???
    fig.canvas.mpl_connect('pick_event', pl.onpick)
    plt.xlabel('RMSD')
    plt.ylabel(y_axis_term)
    if args['show'] == 'show':
        # fig.canvas.mpl_connect('pick_event', on_pick3)
        # cursor = FollowDotCursor(ax, sc_df_pass['pc_rmsd'], sc_df_pass[y_axis_term])
        plt.show()
    else:
        plt.savefig('%s.png' % args['sc'].split('.score')[0])
Example #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-sc', type=str, help='score file')
    parser.add_argument('-percent', type=float, default=5, help='percent (1-100) best scoring to get')
    parser.add_argument('-filter', type=str, default='score', help='filter or score term to use')
    parser.add_argument('-num', default=10, type=int, help='use if you want a number of results, not better than percentile')
    parser.add_argument('-mode', default='%')
    parser.add_argument('-over_under', type=str, default='under', help='under/over score should be over/under threshold')
    parser.add_argument('-result', type=str, default=None, help='should the names be written to a file separate from the log file')
    parser.add_argument('-terms', nargs='+', default=['score', 'a_shape', 'a_pack', 'a_ddg', 'res_solv'])
    parser.add_argument('-thresholds', nargs='+', type=float)
    parser.add_argument('-percentile', default=10, type=int)
    args = vars(parser.parse_args())

    logger = Logger('top_%.1f_%s.log' % (args['percent'], args['filter']))

    # read in the score file, determine the threshold for the percentile
    sc_df = Rf.score_file2df(args['sc'])
    score = sc_df[args['filter']]


    if args['mode'] == '%':
        threshold = np.percentile(score, args['percent'])
        logger.log('found a threshold for %f for filter %s to be %.2f' % (args['percent'], args['filter'], threshold))

        # create a df for lines that pass the threshold, either over or above it...
        if args['over_under'] == 'over':
            pass_df = sc_df[sc_df[args['filter']] >= threshold]
        elif args['over_under'] == 'under':
            pass_df = sc_df[sc_df[args['filter']] <= threshold]

    if args['mode'] == 'num':
        sc_df.sort_values(args['filter'], inplace=True)
        pass_df = sc_df.head(args['num'])

    if args['mode'] == 'best_of_best':
        threshold = np.percentile(score, args['percent'])
        sc_df = sc_df[sc_df[args['filter']] <= threshold]
        pass_df = Rf.get_best_of_best(sc_df, args['terms'], args['percentile'])

    if args['mode'] == 'thresholds':
        for term, thrs in zip(args['terms'], args['thresholds']):
            if term in ['a_sasa', 'a_pack', 'a_shape', 'a_tms_span_fa',
                        'a_tms_span', 'a_span_topo']:
                sc_df = sc_df[sc_df[term] > thrs]
            elif term in ['a_mars', 'a_ddg', 'score', 'total_score',
                          'a_res_solv', 'a_span_ins']:
                sc_df = sc_df[sc_df[term] < thrs]
            threshold = np.percentile(score, args['percent'])
            pass_df = sc_df[sc_df[args['filter']] < threshold]

    # output the names (description) of models that pass the threshold, either to the logger file, or to a separate file
    if args['result'] is None:
        logger.create_header('models passing the threshold:')
        for idx, row in pass_df.iterrows():
            logger.log('%s %f' % (row['description'], row['score']), skip_stamp=True)
    else:
        with open(args['result'], 'w+') as fout:
            for name in pass_df['description']:
                fout.write(name + '\n')
    logger.close()
def quick_rmsd_total(args):

    y_axis_term = 'score'

    sc_df = Rf.score_file2df(args['sc'], args['names'])
    args['logger'].log('found %i structs in sc_df' % len(sc_df))
    pc_df = get_rmsds_from_table(args['pc'])
    args['logger'].log('found %i structs in pc' % len(pc_df))
    a = sc_df.merge(pc_df, on='description')
    sc_df = a.copy()

    if 'a_hha' in sc_df.columns:
        sc_df['angle'] = sc_df['a_hha'] > 0

    args['logger'].log('left with %i in merged df' % len(sc_df))

    args['logger'].log('examining %s with span_topo threshold %f' % (args['sc'], args['span_threshold']))
    fig, ax = plt.subplots()

    if args['best']:
        # sc_df = sc_df[ sc_df['a_span_topo'] >= 0.95 ]
        sc_df_pass = Rf.get_best_of_best(sc_df, args['terms'], args['threshold'])
        sc_df_fail = sc_df[ ~sc_df['description'].isin( sc_df_pass['description'] ) ]
        args['logger'].log('%i models returned from BEST' % len(sc_df_pass))
    else:
        args['logger'].log('total of %i models in score' % len(sc_df))
        sc_df = sc_df[sc_df['a_tms_span_fa'] > 0.5]
        args['logger'].log('%i models pass tms_span' % len(sc_df))
        threshold = np.percentile(sc_df[y_axis_term], args['percent'])
        sc_df = sc_df[ sc_df[y_axis_term] < threshold ]
        args['logger'].log('for percent %.2f found threshold to be %.2f and %i strucutres pass it' % (args['percent'], threshold, len(sc_df)))
        sc_df = sc_df[sc_df['a_shape'] >= 0.6]
        # sc_df = sc_df[sc_df['a_sasa'] > 900]
        sc_df = sc_df[sc_df['a_ddg'] < -6]
        args['logger'].log('%i passed ddg' % len(sc_df))
        # sc_df = sc_df[sc_df['a_pack'] > 0.6]
        sc_df = sc_df[sc_df['a_unsat'] < 1]
        args['logger'].log('%i passed unsat' % len(sc_df))
        sc_df['pass'] = sc_df['a_span_topo'] > args['span_threshold']

        sc_df_pass = sc_df[sc_df['a_span_topo'] > args['span_threshold']]
        args['logger'].log('%i models passed span_topo threshold' % len(sc_df_pass))
        sc_df_fail = sc_df[sc_df['a_span_topo'] <= args['span_threshold']]
        args['logger'].log('%i models failed span_topo threshold' % len(sc_df_fail))

    # ax.scatter(sc_df_fail['rmsd_calc'].values, sc_df_fail['score'].values, color='r', marker='.')

    if 'a_hha' in sc_df.columns:
        ax.scatter(sc_df_pass['pc_rmsd'].values, sc_df_pass[y_axis_term].values, marker='o',
                c=sc_df_pass['a_hha'].values, picker=True, cmap=plt.cm.coolwarm)
    else:
        ax.scatter(sc_df_pass['pc_rmsd'].values, sc_df_pass[y_axis_term].values, marker='o',
                c=sc_df_pass['a_span_topo'].values, picker=True, cmap=plt.cm.coolwarm)

    # min_energy = np.nanmin(list(sc_df_pass['score'].values)+list(sc_df_fail['score'].values))
    min_energy = np.nanmin(list(sc_df_pass[y_axis_term].values))
    max_energy = np.nanmax(list(sc_df_pass[y_axis_term].values))
    plt.ylim([min_energy - 1, max_energy + 1])
    plt.xlim([0, 30])
    plt.title(args['sc']+'_pass')

    # if 'a_hha' in sc_df.columns:
        # ax.scatter(sc_df_fail['pc_rmsd'].values, sc_df_fail[y_axis_term].values, marker='x',
                # c=sc_df_fail['a_hha'].values, picker=True, cmap=plt.cm.coolwarm, s=5, alpha=90)#, markersize=200)
    # else:
        # ax.scatter(sc_df_fail['pc_rmsd'].values, sc_df_fail[y_axis_term].values, marker='x',
                # c=sc_df_fail['a_span_topo'].values, picker=True, cmap=plt.cm.coolwarm, s=5, alpha=90)#, markersize=200)

    # af = PrintLabel(sc_df_pass, 'rmsd_calc', 'score', ['description', 'pass'])
    # fig.canvas.mpl_connect('button_press_event', af)
    point_label_cols = list(set(args['terms'] + ['description', 'a_sasa', 'a_res_solv', 'a_pack', 'a_span_topo', 'a_ddg', 'fa_elec']))
    pl = PointLabel(sc_df_pass, ax, fig, 'pc_rmsd', y_axis_term, point_label_cols, 
                    args['logger']) # a_shape ???
    fig.canvas.mpl_connect('pick_event', pl.onpick)
    # print('for pass')
    # print_best_scores(sc_df_pass, 'score', percentile=0.05)
    # print('for. fail')
    # print_best_scores(sc_df_fail, 'score', percentile=0.05)
    plt.xlabel('RMSD')
    plt.ylabel(y_axis_term)
    if args['show'] == 'show':
        plt.show()
    else:
        plt.savefig('%s.png' % args['sc'].split('.score')[0])