def _plot_scores(results, methods, method_colours): M_sorted = list(plotter.sort_methods(methods)) M_happy = {M: plotter.HAPPY_METHOD_NAMES.get(M, M) for M in M_sorted} points = {} runids = {} for M in M_sorted: vals = np.array([row[M] for idx, row in results.iterrows()]) assert not np.any(vals == MISSING), '%s is missing %s' % (M, ','.join( results['runid'][results[M] == MISSING])) points[M] = vals runids[M] = [row['runid'] for idx, row in results.iterrows()] opacity = 0.5 score_traces = [{ 'type': 'box', 'y': points[M], 'text': runids[M], 'name': M_happy[M], 'orientation': 'v', 'boxpoints': 'all', 'marker': { 'outliercolor': plotter.format_colour(method_colours[M], opacity), 'color': plotter.format_colour(method_colours[M], opacity), 'size': 12, }, 'line': { 'color': plotter.format_colour(method_colours[M]), }, 'jitter': 0.6, 'pointpos': 2.0, } for M in points] max_x = max([ calc_violin_stats(T['y'])['q2'] for T in score_traces if len(T['y']) > 0 ]) fig = { 'data': score_traces, 'layout': { 'yaxis': dict( zeroline=True, zerolinewidth=2, zerolinecolor='rgba(0,0,0,0.5)', range=(-0.15 * max_x, 1.05 * max_x), title_text= 'VAF reconstruction loss<br>(bits / mutation / tissue sample)', ), 'showlegend': False, }, } return fig
def _plot_success_rates(results, methods, method_colours, K_vals, S_vals): M_sorted = plotter.sort_methods(methods) M_happy = {M: plotter.HAPPY_METHOD_NAMES.get(M, M) for M in M_sorted} fig = plotly.subplots.make_subplots( rows=1, cols=len(K_vals), subplot_titles=[plotter.pluralize(K, 'subclone') for K in K_vals], x_title='Tissue samples', ) for Kidx, K in enumerate(K_vals): for M in M_sorted: M_successes = {} for S in S_vals: KS_rows = [ row for idx, row in results.iterrows() if row['S'] == S and row['K'] == K ] M_successes[S] = len( [row for row in KS_rows if row[M] != MISSING]) / len(KS_rows) Y = np.array([M_successes[S] for S in S_vals]) fig.add_trace( { 'type': 'scatter', 'x': [str(S) for S in S_vals], 'y': Y, 'name': M_happy[M], 'line': { 'color': plotter.format_colour(method_colours[M]), 'width': 4, }, 'marker': { 'size': 14 }, 'showlegend': Kidx == 0, }, row=1, col=Kidx + 1) fig.update_xaxes( tickangle=0, type='category', ) fig.update_yaxes(showticklabels=False, ) fig.update_yaxes( title='Success rate', tickformat='%s%%', showticklabels=True, col=1, ) return fig
def _plot_S_comparison(results, methods, method_colours, K_vals, S_vals, score_type): M_sorted = plotter.sort_methods(methods) M_happy = {M: plotter.HAPPY_METHOD_NAMES.get(M, M) for M in M_sorted} fig = plotly.subplots.make_subplots( rows = 1, cols = len(K_vals), subplot_titles = [plotter.pluralize(K, 'subclone') for K in K_vals], x_title = 'Tissue samples', shared_yaxes = True, ) for Kidx, K in enumerate(K_vals): for M in M_sorted: M_upper = {} M_error = {} M_lower = {} for S in S_vals: scores = np.array([row[M] for idx, row in results.iterrows() if row['S'] == S and row['K'] == K and row[M] != MISSING]) if len(scores) == 0: continue M_lower[S] = np.quantile(scores, 0.25) M_upper[S] = np.quantile(scores, 0.75) M_error[S] = np.median(scores) S_present = sorted(M_error.keys()) fig.add_trace({ 'type': 'scatter', 'x': [str(S) for S in S_present], 'y': [M_error[S] for S in S_present], 'error_y': { 'type': 'data', 'symmetric': False, 'array': [M_upper[S] - M_error[S] for S in S_present], 'arrayminus': [M_error[S] - M_lower[S] for S in S_present], }, 'name': M_happy[M], 'line': {'color': plotter.format_colour(method_colours[M]), 'width': 4,}, 'marker': {'size': 14}, 'showlegend': Kidx == 0, }, row=1, col=Kidx+1) fig.update_xaxes( tickangle = 45, type = 'category', ) fig.update_yaxes( title = AXIS_TITLES[score_type]['S_comparison'], col = 1, ) return fig
def _plot_single_vs_others(results, single, methods, method_colours, score_type): assert single in methods others = plotter.sort_methods(set(methods) - set((single, ))) traces = [] for M in others: points = [(row['runid'], row[M] - row[single]) for idx, row in results.iterrows() \ if MISSING not in (row[single], row[M])] if len(points) == 0: continue runids, Y = zip(*points) traces.append({ 'type': 'box', 'y': Y, 'text': runids, 'name': plotter.HAPPY_METHOD_NAMES.get(M, M), #'spanmode': 'hard', #'bandwidth': 0.07, 'boxpoints': 'all', 'marker': { 'outliercolor': plotter.format_colour(method_colours[M], 0.5), 'color': plotter.format_colour(method_colours[M], 0.5), }, 'line': { 'color': plotter.format_colour(method_colours[M]), }, 'jitter': 0.6, 'pointpos': 2.0, }) fig = { 'data': traces, 'layout': { 'yaxis': { 'title': AXIS_TITLES[score_type]['single_vs_others'] % plotter.HAPPY_METHOD_NAMES.get(single, single), 'zeroline': True, 'zerolinewidth': 1, 'zerolinecolor': 'rgba(0,0,0,0.3)', }, }, } return fig
def _plot_single_vs_others(results, single, methods, method_colours): assert single in methods others = plotter.sort_methods(set(methods) - set((single, ))) traces = [] for M in others: points = [(row['runid'], row[M] - row[single]) for idx, row in results.iterrows() \ if MISSING not in (row[single], row[M])] if len(points) == 0: continue runids, Y = zip(*points) traces.append({ 'type': 'box', 'y': Y, 'text': runids, 'name': plotter.HAPPY_METHOD_NAMES.get(M, M), 'boxpoints': 'all', 'marker': { 'outliercolor': plotter.format_colour(method_colours[M], 0.5), 'color': plotter.format_colour(method_colours[M], 0.5), }, 'line': { 'color': plotter.format_colour(method_colours[M]), }, 'jitter': 0.6, 'pointpos': 2.0, }) fig = { 'data': traces, 'layout': { 'yaxis': { 'title': 'VAF reconstruction loss relative to %s<br>(bits / mutation / tissue sample)' % plotter.HAPPY_METHOD_NAMES.get(M, M), 'zeroline': True, 'zerolinewidth': 2, 'zerolinecolor': 'rgba(0,0,0,0.5)', }, }, } return fig
def make_boxes(results, methods, single): assert single in methods others = plotter.sort_methods(set(methods) - set((single, ))) traces = [] for M in others: points = [(row['runid'], row[M] - row[single]) for idx, row in results.iterrows() \ if MISSING not in (row[single], row[M])] if len(points) == 0: continue runids, Y = zip(*points) traces.append( go.Box( y=Y, text=runids, name='%s (%s runs)' % (plotter.HAPPY_METHOD_NAMES.get(M, M), len(points)), boxpoints='all', jitter=0.3, pointpos=1.8, )) assert len(traces) > 0 return traces
def _plot_scores(results, methods, method_colours, score_type, use_same_x_limit=True): score_traces = [] success_traces = [] K_vals = sorted(pd.unique(results['K'])) N = len(K_vals) M_sorted = list(reversed(plotter.sort_methods(methods))) M_happy = {M: plotter.HAPPY_METHOD_NAMES.get(M, M) for M in M_sorted} for kidx, K in enumerate(K_vals): K_rows = [row for idx, row in results.iterrows() if row['K'] == K] missing_fracs = {M: len([row for row in K_rows if row[M] == MISSING]) / len(K_rows) for M in M_sorted} points = {M: [row[M] for row in K_rows if row[M] != MISSING] for M in M_sorted} runids = {M: [row['runid'] for row in K_rows if row[M] != MISSING] for M in M_sorted} score_traces.append([{ 'type': 'box', 'boxmean': False, 'x': points[M], 'text': runids[M], 'name': M_happy[M], 'marker_color': plotter.format_colour(method_colours[M]), 'orientation': 'h', 'width': 0.8, # Hack: only display the legend for the first trace. Otherwise, it will # be duplicated for each trace. This assumes that the methods on each # plot and their corresponding colours are invariant; if this is # violated, the legend will be wrong. 'showlegend': kidx == 0, } for M in M_sorted]) success_traces.append({ 'type': 'bar', 'x': [1 - missing_fracs[M] for M in M_sorted], 'y': [M_happy[M] for M in M_sorted], 'marker': { 'color': [plotter.format_colour(method_colours[M], 0.5) for M in M_sorted], 'line': { 'width': 2, 'color': [plotter.format_colour(method_colours[M]) for M in M_sorted], }, }, 'orientation': 'h', 'showlegend': False, }) fig = plotly.subplots.make_subplots( rows=N, cols=2, column_widths=[0.2, 0.8], shared_xaxes = True, shared_yaxes = True, row_titles = ['%s subclones' % K for K in K_vals], horizontal_spacing = 0.03, vertical_spacing = 0.03, ) for idx, (st, ft) in enumerate(zip(score_traces, success_traces)): fig.add_trace(ft, col=1, row=idx+1) for T in st: fig.add_trace(T, col=2, row=idx+1) fig.update_xaxes(range=(1, 0), col=1) # Whiskers run from `d1` (lower) to `d2` (higher). max_x = np.array([max([calc_violin_stats(T['x'])['q2'] for T in st if len(T['x']) > 0]) for st in score_traces]) min_x = np.array([min([np.min(T['x']) for T in st if len(T['x']) > 0]) for st in score_traces]) if use_same_x_limit: max_x[:] = np.max(max_x) min_x[:] = np.min(min_x) for idx, (A, B) in enumerate(zip(min_x, max_x)): assert B >= A rng = B - A fig.update_xaxes( range=(A - 0.05*rng, B + 0.05*rng), col=2, row=idx+1 ) fig.update_xaxes( title_text='Success rate', row=N, col=1, ) fig.update_xaxes( tickformat='.0%', col=1, ) fig.update_xaxes( zeroline=True, zerolinewidth=1, zerolinecolor='rgba(0,0,0,0.3)', col=2, ) fig.update_xaxes( title_text = AXIS_TITLES[score_type]['all_scores'], row = N, col = 2, ) fig.update_layout(showlegend=True, legend={'traceorder': 'reversed'}) return fig
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('--template', default='seaborn') parser.add_argument('--score-type', required=True, choices=('mutrel', 'mutphi', 'mutdistl1', 'mutdistl2', 'cputime', 'walltime')) parser.add_argument('--baseline') parser.add_argument('--hide-methods', default='') parser.add_argument('results_fn') parser.add_argument('plot_fn') args = parser.parse_args() _make_axis_titles() results, methods = plotter.load_results(args.results_fn) plotter.munge(results, methods, args.baseline) for key in ('K', 'S'): results = plotter.augment(results, key) methods = set(methods) methods -= set(args.hide_methods.split(',')) method_colours = plotter.choose_method_colours(methods) figs = { 'scores': _plot_scores( results, methods - set(('mle_unconstrained',)), method_colours, args.score_type ), 'success_rate': _plot_success_rates( results, methods - set(('mle_unconstrained', 'pwgs_supervars', 'lichee', 'pairtree_clustrel')), method_colours, (3, 10), (1, 3, 10), ), 'S_comparison': _plot_S_comparison( results, methods - set(('mle_unconstrained', 'citup', 'pastri')), method_colours, (3, 10, 30), (1, 3, 10, 30, 100), args.score_type ), } export_dims = { 'success_rate': (400, 485), } export_dims['scores'] = (700, 850) export_dims['S_comparison'] = (500, 485) for M in plotter.sort_methods(methods): if M == 'mle_unconstrained': continue name = f'{M}_vs_others' figs[name] = _plot_single_vs_others( results, M, methods,# - set(('mle_unconstrained',)), method_colours, args.score_type ) export_dims[name] = (600, 485) for F in figs.values(): if 'layout' not in F: F['layout'] = {} F['layout']['template'] = args.template plotter.write_figs(figs, args.plot_fn, export_dims)