def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('ssm_fn') parser.add_argument('params_fn') args = parser.parse_args() variants = inputparser.load_ssms(args.ssm_fn) params = inputparser.load_params(args.params_fn) clusters = params['clusters'] supervars = clustermaker.make_cluster_supervars(clusters, variants) superclusters = clustermaker.make_superclusters(supervars) # Add empty initial cluster, which serves as tree root. superclusters.insert(0, []) M = len(superclusters) iterations = 1000 parallel = 0 parents = [[0, 0, 0], [0, 1, 2]] for P in parents: adj = _parents2adj(P) print_init(supervars, adj) for method in ('projection', 'rprop', 'graddesc'): phi, eta = phi_fitter._fit_phis(adj, superclusters, supervars, method, iterations, parallel) # Sometimes the `projection` fitter will return zeros, which result in an # LLH of -inf if the number of variant reads `V` is non-zero, since # `Binom(X=V > 0, | N=V+R, p=0) = 0`. To avoid this, set a floor of 1e-6 # on phi values. phi = np.maximum(1e-6, phi) print_method(method, phi, supervars) print()
def sort_clusters_by_vaf(clusters, variants): supervars = clustermaker.make_cluster_supervars(clusters, variants) supervars = [supervars['S%s' % idx] for idx in range(len(supervars))] sv_vaf = np.array([S['vaf'] for S in supervars]) mean_vaf = np.mean(sv_vaf, axis=1) order = np.argsort(-mean_vaf) return [clusters[idx] for idx in order]
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('--use-supervars', action='store_true') parser.add_argument('ssm_fn') parser.add_argument('params_fn') parser.add_argument('citup_snv_fn') parser.add_argument('citup_vid_fn') parser.add_argument('citup_clusters_fn') args = parser.parse_args() variants = inputparser.load_ssms(args.ssm_fn) params = inputparser.load_params(args.params_fn) clusters = params['clusters'] if args.use_supervars: supervars = clustermaker.make_cluster_supervars(clusters, variants) superclusters = clustermaker.make_superclusters(supervars) garbage = set() write_snvs(supervars, garbage, args.citup_snv_fn, args.citup_vid_fn) write_clusters(supervars, garbage, superclusters, args.citup_clusters_fn) else: garbage = set(params['garbage']) write_snvs(variants, garbage, args.citup_snv_fn, args.citup_vid_fn) write_clusters(variants, garbage, clusters, args.citup_clusters_fn)
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--uniform-proposal', action='store_true') parser.add_argument('ssm_fn') parser.add_argument('params_fn') parser.add_argument('pastri_allele_counts_fn') parser.add_argument('pastri_proposal_fn') args = parser.parse_args() variants = inputparser.load_ssms(args.ssm_fn) params = inputparser.load_params(args.params_fn) clusters = params['clusters'] supervars = clustermaker.make_cluster_supervars(clusters, variants) matrices = { 'var_reads': extract_matrix(supervars, 'var_reads'), 'total_reads': extract_matrix(supervars, 'total_reads'), 'alpha': extract_matrix(supervars, 'var_reads'), 'beta': extract_matrix(supervars, 'total_reads'), } if args.uniform_proposal: matrices['alpha'][:] = 1 matrices['beta'][:] = 2 C_max = 15 matrices['alpha'] = matrices['alpha'][:C_max, ] matrices['beta'] = matrices['beta'][:C_max, ] write_matrices(('A', matrices['var_reads']), ('D', matrices['total_reads']), outfn=args.pastri_allele_counts_fn) write_matrices(('Alpha', matrices['alpha']), ('Beta', matrices['beta']), outfn=args.pastri_proposal_fn)
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('ssm_fn') parser.add_argument('params_fn') parser.add_argument('calder_input_fn') args = parser.parse_args() variants = inputparser.load_ssms(args.ssm_fn) params = inputparser.load_params(args.params_fn) clusters = params['clusters'] supervars = clustermaker.make_cluster_supervars(clusters, variants) vids1, var_reads = extract_matrix(supervars, 'var_reads') vids2, ref_reads = extract_matrix(supervars, 'ref_reads') assert vids1 == vids2 vids = vids1 _write_inputs(vids, params['samples'], var_reads, ref_reads, args.calder_input_fn)
def main(): parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--use-supervars', dest='use_supervars', action='store_true') parser.add_argument('ssm_fn') parser.add_argument('params_fn') parser.add_argument('pwgs_ssm_fn') parser.add_argument('pwgs_params_fn') args = parser.parse_args() variants = inputparser.load_ssms(args.ssm_fn) params = inputparser.load_params(args.params_fn) if args.use_supervars: variants = clustermaker.make_cluster_supervars(params['clusters'], variants) write_ssms(variants, args.pwgs_ssm_fn) write_params(params['samples'], args.pwgs_params_fn)
def main(): all_plot_choices = set(( 'tree', 'pairwise_separate', 'pairwise_mle', 'vaf_matrix', 'phi', 'phi_hat', 'phi_interleaved', 'cluster_stats', 'eta', 'diversity_indices', )) parser = argparse.ArgumentParser( description='LOL HI THERE', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--seed', type=int) parser.add_argument('--tree-index', type=int, default=0) parser.add_argument('--plot', dest='plot_choices', type=lambda s: set(s.split(',')), help='Things to plot; by default, plot everything') parser.add_argument('--omit-plots', dest='omit_plots', type=lambda s: set(s.split(',')), help='Things to omit from plotting; overrides --plot') parser.add_argument('--runid') parser.add_argument( '--reorder-subclones', action='store_true', help= 'Reorder subclones according to depth-first search through tree structure' ) parser.add_argument( '--tree-json', dest='tree_json_fn', help= 'Additional external file in which to store JSON, which is already stored statically in the HTML file' ) parser.add_argument('--phi-orientation', choices=('samples_as_rows', 'populations_as_rows'), default='populations_as_rows') parser.add_argument( '--remove-normal', action='store_true', help= 'Remove normal (non-cancerous) population 0 from tree, phi, and eta plots.' ) parser.add_argument('ssm_fn') parser.add_argument('params_fn') parser.add_argument('results_fn') parser.add_argument('discord_fn') parser.add_argument('html_out_fn') args = parser.parse_args() np.seterr(divide='raise', invalid='raise', over='raise') if args.seed is not None: random.seed(args.seed) np.random.seed(args.seed) plot_choices = _choose_plots(args.plot_choices, args.omit_plots, all_plot_choices) results = resultserializer.Results(args.results_fn) variants = inputparser.load_ssms(args.ssm_fn) params = inputparser.load_params(args.params_fn) discord = _parse_discord(args.discord_fn) data = { K: results.get(K)[args.tree_index] for K in ( 'struct', 'count', 'llh', 'prob', 'phi', ) } data['garbage'] = results.get('garbage') data['clusters'] = results.get('clusters') data['samples'] = params['samples'] data['clustrel_posterior'] = results.get_mutrel('clustrel_posterior') if args.reorder_subclones: data, params = _reorder_subclones(data, params) if 'hidden_samples' in params: hidden = set(params['hidden_samples']) assert hidden.issubset(set( data['samples'])) and len(hidden) < len(data['samples']) visible_sampidxs = [ idx for idx, samp in enumerate(data['samples']) if samp not in hidden ] else: visible_sampidxs = None samp_colours = params.get('samp_colours', None) pop_colours = params.get('pop_colours', None) if samp_colours is not None: assert set([S[0] for S in samp_colours]).issubset(data['samples']) if pop_colours is not None: assert len(pop_colours) == len(data['struct']) + 1 supervars = clustermaker.make_cluster_supervars(data['clusters'], variants) supervars = [supervars[vid] for vid in common.sort_vids(supervars.keys())] with open(args.html_out_fn, 'w') as outf: write_header(args.runid, args.tree_index, outf) if 'tree' in plot_choices: tree_struct = util.make_tree_struct( data['struct'], data['count'], data['llh'], data['prob'], data['phi'], supervars, data['clusters'], data['samples'], ) tree_struct['discord'] = discord _write_tree_html( tree_struct, args.tree_index, visible_sampidxs, samp_colours, pop_colours, 'eta' in plot_choices, 'diversity_indices' in plot_choices, 'phi' in plot_choices, 'phi_hat' in plot_choices, 'phi_interleaved' in plot_choices, args.phi_orientation, args.remove_normal, outf, ) if args.tree_json_fn is not None: _write_tree_json(tree_struct, args.tree_json_fn) if 'vaf_matrix' in plot_choices: vaf_plotter.plot_vaf_matrix( data['clusters'], variants, supervars, data['garbage'], data['phi'], data['samples'], should_correct_vaf=True, outf=outf, ) if 'pairwise_mle' in plot_choices: relation_plotter.plot_ml_relations(data['clustrel_posterior'], outf) if 'pairwise_separate' in plot_choices: relation_plotter.plot_separate_relations( data['clustrel_posterior'], outf) if 'cluster_stats' in plot_choices: write_cluster_stats(data['clusters'], data['garbage'], supervars, variants, outf) write_footer(outf)