Example #1
0
def _calc_eta_stats(truth_fn):
    truth = resultserializer.Results(truth_fn)
    eta = truth.get('eta')

    K, S = eta[1:].shape
    biggest_eta = np.max(eta, axis=1)[1:]
    return (K, S, biggest_eta)
Example #2
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--tree-index', type=int, default=0)
    parser.add_argument('results_fn')
    parser.add_argument('html_out_fn')
    args = parser.parse_args()

    results = resultserializer.Results(args.results_fn)
    sampnames = results.get('sampnames')
    clusters = results.get('clusters')

    phi = results.get('phi')[args.tree_index]
    struct = results.get('struct')[args.tree_index]
    K, S = phi.shape
    assert len(sampnames) == S
    eta = util.calc_eta(struct, phi)

    pairs = {
        'CNS': stephutil.find_samp_pairs(sampnames, ' BM', ' CNS'),
        'Spleen': stephutil.find_samp_pairs(sampnames, ' BM', ' Spleen'),
    }

    html = '<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>'
    html += '<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css">'
    for name, P in pairs.items():
        di_results = _calc_di(eta, clusters, struct, sampnames, P)
        html += f'<h1>{name}</h1>'
        html += _process_di(di_results)

    with open(args.html_out_fn, 'w') as outf:
        print(html, file=outf)
Example #3
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--only-best', action='store_true')
    parser.add_argument('pairtree_ssm_fn')
    parser.add_argument('results_fn')
    args = parser.parse_args()

    results = resultserializer.Results(args.results_fn)
    phi = results.get('phi')
    clusters = [[]] + results.get('clusters')
    llh = results.get('llh')
    counts = results.get('count')
    clusterings = [clusters for _ in range(len(llh))]

    if args.only_best:
        phi = [phi[0]]
        clusterings = [clusterings[0]]
        llh = [llh[0]]
        counts = [1]

    mphi = mutphi.calc_mutphi(phi, llh, clusterings, args.pairtree_ssm_fn,
                              counts)
    print(score(mphi.stats))
Example #4
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('pairtree_results_fn')
  parser.add_argument('params_fn')
  parser.add_argument('neutree_fn')
  args = parser.parse_args()

  results = resultserializer.Results(args.pairtree_results_fn)
  params = inputparser.load_params(args.params_fn)
  ntree = convert(results, params['garbage'])
  neutree.save(ntree, args.neutree_fn)
Example #5
0
def write_truth(structs, phi, clusters, garbage, results_fn):
  N = len(structs)
  llhs = np.zeros(N)
  probs = np.ones(N) / N
  phis = np.array([phi for _ in range(N)])
  counts = np.ones(N)

  results = resultserializer.Results(results_fn)
  results.add('struct', structs)
  results.add('count', counts)
  results.add('phi', phis)
  results.add('llh', llhs)
  results.add('prob', probs)
  results.add('clusters', clusters)
  results.add('garbage', garbage)
  results.save()
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('pairtree_results_fn')
    parser.add_argument('clustrel_mutrel_fn')
    args = parser.parse_args()

    results = resultserializer.Results(args.pairtree_results_fn)
    clusters = [[]] + list(results.get('clusters'))
    garbage = list(results.get('garbage'))
    all_vids = set([V for C in results.get('clusters') for V in C] + garbage)

    clustrel = perturb_clustrel(results.get_mutrel('clustrel_posterior'))
    clustrel_mutrel = evalutil.make_mutrel_from_clustrel(clustrel, clusters)
    clustrel_mutrel = evalutil.add_garbage(clustrel_mutrel, garbage)
    assert set(clustrel_mutrel.vids) == all_vids
    evalutil.save_sorted_mutrel(clustrel_mutrel, args.clustrel_mutrel_fn)
Example #7
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('--tree-index', type=int, default=0)
  parser.add_argument('results_fn')
  parser.add_argument('baseline_mutdist_fn')
  args = parser.parse_args()

  results = resultserializer.Results(args.results_fn)

  clusters = [[]] + results.get('clusters')
  vids, membership = util.make_membership_mat(clusters)
  mphi = np.dot(membership, results.get('phi')[args.tree_index])

  baseline = mutstat.Mutstat(stats=mphi, vids=vids, assays=results.get('sampnames'))
  mutstat.write(baseline, args.baseline_mutdist_fn)
Example #8
0
def _calc_tree_stats(truth_fn):
    truth = resultserializer.Results(truth_fn)
    eta = truth.get('eta')
    phi = truth.get('phi')
    struct = truth.get('structure')

    phi_std = np.std(phi, axis=1)
    phi_mean = np.mean(phi, axis=1)
    depth = _calc_depth(struct)
    num_pops = _calc_num_pops(struct)

    df = pd.DataFrame({
        'phi_std': phi_std[1:],
        'phi_mean': phi_mean[1:],
        'largest_eta': np.max(eta, axis=1)[1:],
        'depth': depth,
        'num_pops': num_pops,
    })
    polyprimary = np.sum(struct == 0) > 1
    return (df, polyprimary)
Example #9
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--tree-index', type=int, default=0)
    parser.add_argument('--truth', dest='truth_fn')
    parser.add_argument('ssm_fn')
    parser.add_argument('results_fn')
    args = parser.parse_args()

    variants = inputparser.load_ssms(args.ssm_fn)
    if args.truth_fn:
        truth = _parse_truth(args.truth_fn)
    else:
        truth = {}

    results = resultserializer.Results(args.results_fn)
    sampnames = results.get('sampnames')
    clusters = results.get('clusters')
    garbage = results.get('garbage')
    variants = inputparser.remove_garbage(variants, garbage)

    phi = results.get('phi')[args.tree_index]
    struct = results.get('struct')[args.tree_index]
    K, S = phi.shape
    assert len(sampnames) == S
    eta = util.calc_eta(struct, phi)

    cns_pairs = stephutil.find_samp_pairs(sampnames, ' BM', ' CNS')
    spleen_pairs = stephutil.find_samp_pairs(sampnames, ' BM', ' Spleen')
    all_pairs = cns_pairs + spleen_pairs

    concord = _calc_concord(variants, clusters, eta, sampnames, all_pairs,
                            truth)

    results = {
        'concord': concord,
    }
    print(json.dumps(results))
def main():
    all_plot_choices = set((
        'tree',
        'pairwise_separate',
        'pairwise_mle',
        'vaf_matrix',
        'phi',
        'phi_hat',
        'phi_interleaved',
        'cluster_stats',
        'eta',
        'diversity_indices',
    ))
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--seed', type=int)
    parser.add_argument('--tree-index', type=int, default=0)
    parser.add_argument('--plot',
                        dest='plot_choices',
                        type=lambda s: set(s.split(',')),
                        help='Things to plot; by default, plot everything')
    parser.add_argument('--omit-plots',
                        dest='omit_plots',
                        type=lambda s: set(s.split(',')),
                        help='Things to omit from plotting; overrides --plot')
    parser.add_argument('--runid')
    parser.add_argument(
        '--reorder-subclones',
        action='store_true',
        help=
        'Reorder subclones according to depth-first search through tree structure'
    )
    parser.add_argument(
        '--tree-json',
        dest='tree_json_fn',
        help=
        'Additional external file in which to store JSON, which is already stored statically in the HTML file'
    )
    parser.add_argument('--phi-orientation',
                        choices=('samples_as_rows', 'populations_as_rows'),
                        default='populations_as_rows')
    parser.add_argument(
        '--remove-normal',
        action='store_true',
        help=
        'Remove normal (non-cancerous) population 0 from tree, phi, and eta plots.'
    )
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    parser.add_argument('results_fn')
    parser.add_argument('discord_fn')
    parser.add_argument('html_out_fn')
    args = parser.parse_args()

    np.seterr(divide='raise', invalid='raise', over='raise')

    if args.seed is not None:
        random.seed(args.seed)
        np.random.seed(args.seed)

    plot_choices = _choose_plots(args.plot_choices, args.omit_plots,
                                 all_plot_choices)

    results = resultserializer.Results(args.results_fn)
    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)
    discord = _parse_discord(args.discord_fn)

    data = {
        K: results.get(K)[args.tree_index]
        for K in (
            'struct',
            'count',
            'llh',
            'prob',
            'phi',
        )
    }
    data['garbage'] = results.get('garbage')
    data['clusters'] = results.get('clusters')
    data['samples'] = params['samples']
    data['clustrel_posterior'] = results.get_mutrel('clustrel_posterior')
    if args.reorder_subclones:
        data, params = _reorder_subclones(data, params)

    if 'hidden_samples' in params:
        hidden = set(params['hidden_samples'])
        assert hidden.issubset(set(
            data['samples'])) and len(hidden) < len(data['samples'])
        visible_sampidxs = [
            idx for idx, samp in enumerate(data['samples'])
            if samp not in hidden
        ]
    else:
        visible_sampidxs = None

    samp_colours = params.get('samp_colours', None)
    pop_colours = params.get('pop_colours', None)
    if samp_colours is not None:
        assert set([S[0] for S in samp_colours]).issubset(data['samples'])
    if pop_colours is not None:
        assert len(pop_colours) == len(data['struct']) + 1

    supervars = clustermaker.make_cluster_supervars(data['clusters'], variants)
    supervars = [supervars[vid] for vid in common.sort_vids(supervars.keys())]

    with open(args.html_out_fn, 'w') as outf:
        write_header(args.runid, args.tree_index, outf)

        if 'tree' in plot_choices:
            tree_struct = util.make_tree_struct(
                data['struct'],
                data['count'],
                data['llh'],
                data['prob'],
                data['phi'],
                supervars,
                data['clusters'],
                data['samples'],
            )
            tree_struct['discord'] = discord

            _write_tree_html(
                tree_struct,
                args.tree_index,
                visible_sampidxs,
                samp_colours,
                pop_colours,
                'eta' in plot_choices,
                'diversity_indices' in plot_choices,
                'phi' in plot_choices,
                'phi_hat' in plot_choices,
                'phi_interleaved' in plot_choices,
                args.phi_orientation,
                args.remove_normal,
                outf,
            )
            if args.tree_json_fn is not None:
                _write_tree_json(tree_struct, args.tree_json_fn)

        if 'vaf_matrix' in plot_choices:
            vaf_plotter.plot_vaf_matrix(
                data['clusters'],
                variants,
                supervars,
                data['garbage'],
                data['phi'],
                data['samples'],
                should_correct_vaf=True,
                outf=outf,
            )

        if 'pairwise_mle' in plot_choices:
            relation_plotter.plot_ml_relations(data['clustrel_posterior'],
                                               outf)
        if 'pairwise_separate' in plot_choices:
            relation_plotter.plot_separate_relations(
                data['clustrel_posterior'], outf)
        if 'cluster_stats' in plot_choices:
            write_cluster_stats(data['clusters'], data['garbage'], supervars,
                                variants, outf)

        write_footer(outf)