def _calc_eta_stats(truth_fn): truth = resultserializer.Results(truth_fn) eta = truth.get('eta') K, S = eta[1:].shape biggest_eta = np.max(eta, axis=1)[1:] return (K, S, biggest_eta)
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--tree-index', type=int, default=0) parser.add_argument('results_fn') parser.add_argument('html_out_fn') args = parser.parse_args() results = resultserializer.Results(args.results_fn) sampnames = results.get('sampnames') clusters = results.get('clusters') phi = results.get('phi')[args.tree_index] struct = results.get('struct')[args.tree_index] K, S = phi.shape assert len(sampnames) == S eta = util.calc_eta(struct, phi) pairs = { 'CNS': stephutil.find_samp_pairs(sampnames, ' BM', ' CNS'), 'Spleen': stephutil.find_samp_pairs(sampnames, ' BM', ' Spleen'), } html = '<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>' html += '<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">' for name, P in pairs.items(): di_results = _calc_di(eta, clusters, struct, sampnames, P) html += f'<h1>{name}</h1>' html += _process_di(di_results) with open(args.html_out_fn, 'w') as outf: print(html, file=outf)
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--only-best', action='store_true') parser.add_argument('pairtree_ssm_fn') parser.add_argument('results_fn') args = parser.parse_args() results = resultserializer.Results(args.results_fn) phi = results.get('phi') clusters = [[]] + results.get('clusters') llh = results.get('llh') counts = results.get('count') clusterings = [clusters for _ in range(len(llh))] if args.only_best: phi = [phi[0]] clusterings = [clusterings[0]] llh = [llh[0]] counts = [1] mphi = mutphi.calc_mutphi(phi, llh, clusterings, args.pairtree_ssm_fn, counts) print(score(mphi.stats))
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('pairtree_results_fn') parser.add_argument('params_fn') parser.add_argument('neutree_fn') args = parser.parse_args() results = resultserializer.Results(args.pairtree_results_fn) params = inputparser.load_params(args.params_fn) ntree = convert(results, params['garbage']) neutree.save(ntree, args.neutree_fn)
def write_truth(structs, phi, clusters, garbage, results_fn): N = len(structs) llhs = np.zeros(N) probs = np.ones(N) / N phis = np.array([phi for _ in range(N)]) counts = np.ones(N) results = resultserializer.Results(results_fn) results.add('struct', structs) results.add('count', counts) results.add('phi', phis) results.add('llh', llhs) results.add('prob', probs) results.add('clusters', clusters) results.add('garbage', garbage) results.save()
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('pairtree_results_fn') parser.add_argument('clustrel_mutrel_fn') args = parser.parse_args() results = resultserializer.Results(args.pairtree_results_fn) clusters = [[]] + list(results.get('clusters')) garbage = list(results.get('garbage')) all_vids = set([V for C in results.get('clusters') for V in C] + garbage) clustrel = perturb_clustrel(results.get_mutrel('clustrel_posterior')) clustrel_mutrel = evalutil.make_mutrel_from_clustrel(clustrel, clusters) clustrel_mutrel = evalutil.add_garbage(clustrel_mutrel, garbage) assert set(clustrel_mutrel.vids) == all_vids evalutil.save_sorted_mutrel(clustrel_mutrel, args.clustrel_mutrel_fn)
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('--tree-index', type=int, default=0) parser.add_argument('results_fn') parser.add_argument('baseline_mutdist_fn') args = parser.parse_args() results = resultserializer.Results(args.results_fn) clusters = [[]] + results.get('clusters') vids, membership = util.make_membership_mat(clusters) mphi = np.dot(membership, results.get('phi')[args.tree_index]) baseline = mutstat.Mutstat(stats=mphi, vids=vids, assays=results.get('sampnames')) mutstat.write(baseline, args.baseline_mutdist_fn)
def _calc_tree_stats(truth_fn): truth = resultserializer.Results(truth_fn) eta = truth.get('eta') phi = truth.get('phi') struct = truth.get('structure') phi_std = np.std(phi, axis=1) phi_mean = np.mean(phi, axis=1) depth = _calc_depth(struct) num_pops = _calc_num_pops(struct) df = pd.DataFrame({ 'phi_std': phi_std[1:], 'phi_mean': phi_mean[1:], 'largest_eta': np.max(eta, axis=1)[1:], 'depth': depth, 'num_pops': num_pops, }) polyprimary = np.sum(struct == 0) > 1 return (df, polyprimary)
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--tree-index', type=int, default=0) parser.add_argument('--truth', dest='truth_fn') parser.add_argument('ssm_fn') parser.add_argument('results_fn') args = parser.parse_args() variants = inputparser.load_ssms(args.ssm_fn) if args.truth_fn: truth = _parse_truth(args.truth_fn) else: truth = {} results = resultserializer.Results(args.results_fn) sampnames = results.get('sampnames') clusters = results.get('clusters') garbage = results.get('garbage') variants = inputparser.remove_garbage(variants, garbage) phi = results.get('phi')[args.tree_index] struct = results.get('struct')[args.tree_index] K, S = phi.shape assert len(sampnames) == S eta = util.calc_eta(struct, phi) cns_pairs = stephutil.find_samp_pairs(sampnames, ' BM', ' CNS') spleen_pairs = stephutil.find_samp_pairs(sampnames, ' BM', ' Spleen') all_pairs = cns_pairs + spleen_pairs concord = _calc_concord(variants, clusters, eta, sampnames, all_pairs, truth) results = { 'concord': concord, } print(json.dumps(results))
def main(): all_plot_choices = set(( 'tree', 'pairwise_separate', 'pairwise_mle', 'vaf_matrix', 'phi', 'phi_hat', 'phi_interleaved', 'cluster_stats', 'eta', 'diversity_indices', )) parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--seed', type=int) parser.add_argument('--tree-index', type=int, default=0) parser.add_argument('--plot', dest='plot_choices', type=lambda s: set(s.split(',')), help='Things to plot; by default, plot everything') parser.add_argument('--omit-plots', dest='omit_plots', type=lambda s: set(s.split(',')), help='Things to omit from plotting; overrides --plot') parser.add_argument('--runid') parser.add_argument( '--reorder-subclones', action='store_true', help= 'Reorder subclones according to depth-first search through tree structure' ) parser.add_argument( '--tree-json', dest='tree_json_fn', help= 'Additional external file in which to store JSON, which is already stored statically in the HTML file' ) parser.add_argument('--phi-orientation', choices=('samples_as_rows', 'populations_as_rows'), default='populations_as_rows') parser.add_argument( '--remove-normal', action='store_true', help= 'Remove normal (non-cancerous) population 0 from tree, phi, and eta plots.' ) parser.add_argument('ssm_fn') parser.add_argument('params_fn') parser.add_argument('results_fn') parser.add_argument('discord_fn') parser.add_argument('html_out_fn') args = parser.parse_args() np.seterr(divide='raise', invalid='raise', over='raise') if args.seed is not None: random.seed(args.seed) np.random.seed(args.seed) plot_choices = _choose_plots(args.plot_choices, args.omit_plots, all_plot_choices) results = resultserializer.Results(args.results_fn) variants = inputparser.load_ssms(args.ssm_fn) params = inputparser.load_params(args.params_fn) discord = _parse_discord(args.discord_fn) data = { K: results.get(K)[args.tree_index] for K in ( 'struct', 'count', 'llh', 'prob', 'phi', ) } data['garbage'] = results.get('garbage') data['clusters'] = results.get('clusters') data['samples'] = params['samples'] data['clustrel_posterior'] = results.get_mutrel('clustrel_posterior') if args.reorder_subclones: data, params = _reorder_subclones(data, params) if 'hidden_samples' in params: hidden = set(params['hidden_samples']) assert hidden.issubset(set( data['samples'])) and len(hidden) < len(data['samples']) visible_sampidxs = [ idx for idx, samp in enumerate(data['samples']) if samp not in hidden ] else: visible_sampidxs = None samp_colours = params.get('samp_colours', None) pop_colours = params.get('pop_colours', None) if samp_colours is not None: assert set([S[0] for S in samp_colours]).issubset(data['samples']) if pop_colours is not None: assert len(pop_colours) == len(data['struct']) + 1 supervars = clustermaker.make_cluster_supervars(data['clusters'], variants) supervars = [supervars[vid] for vid in common.sort_vids(supervars.keys())] with open(args.html_out_fn, 'w') as outf: write_header(args.runid, args.tree_index, outf) if 'tree' in plot_choices: tree_struct = util.make_tree_struct( data['struct'], data['count'], data['llh'], data['prob'], data['phi'], supervars, data['clusters'], data['samples'], ) tree_struct['discord'] = discord _write_tree_html( tree_struct, args.tree_index, visible_sampidxs, samp_colours, pop_colours, 'eta' in plot_choices, 'diversity_indices' in plot_choices, 'phi' in plot_choices, 'phi_hat' in plot_choices, 'phi_interleaved' in plot_choices, args.phi_orientation, args.remove_normal, outf, ) if args.tree_json_fn is not None: _write_tree_json(tree_struct, args.tree_json_fn) if 'vaf_matrix' in plot_choices: vaf_plotter.plot_vaf_matrix( data['clusters'], variants, supervars, data['garbage'], data['phi'], data['samples'], should_correct_vaf=True, outf=outf, ) if 'pairwise_mle' in plot_choices: relation_plotter.plot_ml_relations(data['clustrel_posterior'], outf) if 'pairwise_separate' in plot_choices: relation_plotter.plot_separate_relations( data['clustrel_posterior'], outf) if 'cluster_stats' in plot_choices: write_cluster_stats(data['clusters'], data['garbage'], supervars, variants, outf) write_footer(outf)