Example #1
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--discard-garbage',
                        dest='discard_garbage',
                        action='store_true')
    parser.add_argument('--handbuilt', dest='handbuilt_fn', required=True)
    parser.add_argument('pwgs_ssm_fn')
    parser.add_argument('pwgs_params_fn')
    parser.add_argument('pairtree_ssm_fn')
    parser.add_argument('pairtree_params_fn')
    args = parser.parse_args()

    tree_type = 'handbuilt.xeno'
    hb = load_handbuilt(args.handbuilt_fn, tree_type)
    clusters = convert_clusters(hb['clusters'])
    garbage = hb['garbage']
    # Since we remove the empty first cluster, the indexing on `structure` is now
    # a little weird -- cluster `i` is now represented by `i + 1` in `structure`.
    # That's okay.
    adjl = hb['structure']
    parents = convert_adjl_to_parents(adjl)

    pwgs_params = inputparser.load_params(args.pwgs_params_fn)
    variants = load_phylowgs(args.pwgs_ssm_fn)
    if args.discard_garbage:
        remove_garbage(variants, garbage)
        variants, clusters = make_varids_contiguous(variants, garbage,
                                                    clusters)
        garbage = []

    inputparser.write_ssms(variants, args.pairtree_ssm_fn)
    write_pairtree_params(pwgs_params['samples'], garbage, clusters, parents,
                          args.pairtree_params_fn)
Example #2
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('--use-supervars', action='store_true')
  parser.add_argument('ssm_fn')
  parser.add_argument('params_fn')
  parser.add_argument('citup_snv_fn')
  parser.add_argument('citup_vid_fn')
  parser.add_argument('citup_clusters_fn')
  args = parser.parse_args()

  variants = inputparser.load_ssms(args.ssm_fn)
  params = inputparser.load_params(args.params_fn)
  clusters = params['clusters']

  if args.use_supervars:
    supervars = clustermaker.make_cluster_supervars(clusters, variants)
    superclusters = clustermaker.make_superclusters(supervars)
    garbage = set()
    write_snvs(supervars, garbage, args.citup_snv_fn, args.citup_vid_fn)
    write_clusters(supervars, garbage, superclusters, args.citup_clusters_fn)
  else:
    garbage = set(params['garbage'])
    write_snvs(variants, garbage, args.citup_snv_fn, args.citup_vid_fn)
    write_clusters(variants, garbage, clusters, args.citup_clusters_fn)
Example #3
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    args = parser.parse_args()

    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)
    clusters = params['clusters']

    supervars = clustermaker.make_cluster_supervars(clusters, variants)
    superclusters = clustermaker.make_superclusters(supervars)
    # Add empty initial cluster, which serves as tree root.
    superclusters.insert(0, [])
    M = len(superclusters)

    iterations = 1000
    parallel = 0

    parents = [[0, 0, 0], [0, 1, 2]]
    for P in parents:
        adj = _parents2adj(P)
        print_init(supervars, adj)
        for method in ('projection', 'rprop', 'graddesc'):
            phi, eta = phi_fitter._fit_phis(adj, superclusters, supervars,
                                            method, iterations, parallel)
            # Sometimes the `projection` fitter will return zeros, which result in an
            # LLH of -inf if the number of variant reads `V` is non-zero, since
            # `Binom(X=V > 0, | N=V+R, p=0) = 0`. To avoid this, set a floor of 1e-6
            # on phi values.
            phi = np.maximum(1e-6, phi)
            print_method(method, phi, supervars)
            print()
Example #4
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('--counts', required=True)
  parser.add_argument('in_ssm_fn')
  parser.add_argument('in_params_fn')
  parser.add_argument('out_base')
  args = parser.parse_args()

  random.seed(1337)

  counts = [int(C) for C in args.counts.split(',')]
  assert len(counts) == len(set(counts))
  ssms = inputparser.load_ssms(args.in_ssm_fn)
  params = inputparser.load_params(args.in_params_fn)
  sampnames = params['samples']

  # Always include diagnosis sample, on assumption we're working with
  # SJbALL022609 from Steph for the paper congraph figure.
  subsets = _select_samp_subsets(sampnames, counts, all_must_include=['D'])
  for subset in subsets:
    idxs = _find_idxs(sampnames, subset)
    new_ssms = _filter_ssms(ssms, idxs)
    new_params = dict(params)
    new_params['samples'] = subset

    out_base = '%s_S%s' % (args.out_base, len(subset))
    inputparser.write_ssms(new_ssms, out_base + '.ssm')
    with open(out_base + '.params.json', 'w') as F:
      json.dump(new_params, F)
Example #5
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--uniform-proposal', action='store_true')
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    parser.add_argument('pastri_allele_counts_fn')
    parser.add_argument('pastri_proposal_fn')
    args = parser.parse_args()

    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)
    clusters = params['clusters']
    supervars = clustermaker.make_cluster_supervars(clusters, variants)

    matrices = {
        'var_reads': extract_matrix(supervars, 'var_reads'),
        'total_reads': extract_matrix(supervars, 'total_reads'),
        'alpha': extract_matrix(supervars, 'var_reads'),
        'beta': extract_matrix(supervars, 'total_reads'),
    }
    if args.uniform_proposal:
        matrices['alpha'][:] = 1
        matrices['beta'][:] = 2

    C_max = 15
    matrices['alpha'] = matrices['alpha'][:C_max, ]
    matrices['beta'] = matrices['beta'][:C_max, ]

    write_matrices(('A', matrices['var_reads']),
                   ('D', matrices['total_reads']),
                   outfn=args.pastri_allele_counts_fn)
    write_matrices(('Alpha', matrices['alpha']), ('Beta', matrices['beta']),
                   outfn=args.pastri_proposal_fn)
Example #6
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--use-supervars', action='store_true')
    # This takes Pairtree rather than PWGS inputs, which seems a little weird,
    # but it's okay -- the PWGS inputs are the supervariants, but we need to know
    # which variants correspond to each cluster in the original Pairtree inputs.
    parser.add_argument('tree_summary', help='JSON-formatted tree summaries')
    parser.add_argument('mutation_list',
                        help='JSON-formatted list of mutations')
    parser.add_argument(
        'mutation_assignment',
        help='JSON-formatted list of SSMs and CNVs assigned to each subclone')
    parser.add_argument('pairtree_params_fn')
    parser.add_argument('neutree_fn')
    args = parser.parse_args()

    results = ResultLoader(args.tree_summary, args.mutation_list,
                           args.mutation_assignment)
    if args.use_supervars:
        params = inputparser.load_params(args.pairtree_params_fn)
        base_clusters = params['clusters']
        garbage = params['garbage']
    else:
        base_clusters = None
        garbage = []

    ntree = convert_results(results, base_clusters, garbage,
                            args.use_supervars)
    neutree.save(ntree, args.neutree_fn)
Example #7
0
def write_results(clusters, garbage, params_fn_orig, params_fn_modified):
  params = inputparser.load_params(params_fn_orig)
  for K in ('clusters', 'garbage'):
    if K in params:
      del params[K]
  params['clusters'] = clusters
  params['garbage'] = garbage

  with open(params_fn_modified, 'w') as F:
    json.dump(params, F)
Example #8
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('pairtree_results_fn')
  parser.add_argument('params_fn')
  parser.add_argument('neutree_fn')
  args = parser.parse_args()

  results = resultserializer.Results(args.pairtree_results_fn)
  params = inputparser.load_params(args.params_fn)
  ntree = convert(results, params['garbage'])
  neutree.save(ntree, args.neutree_fn)
Example #9
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    parser.add_argument('out_dir')
    args = parser.parse_args()

    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)
    sampnames = params['samples']

    convert(variants, sampnames, args.out_dir)
Example #10
0
def main():
  ssmfns = (sys.argv[1], sys.argv[3])
  paramfns = (sys.argv[2], sys.argv[4])

  ssms = [inputparser.load_ssms(F) for F in ssmfns]
  params = [inputparser.load_params(F) for F in paramfns]
  samps = [P['samples'] for P in params]


  samps_to_rename = (0,)
  for idx in samps_to_rename:
    samps[idx] = _rename(samps[idx])

  _compare(ssms, samps)
Example #11
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--use-supervars', action='store_true')
    parser.add_argument('--citup-clusters')
    parser.add_argument('citup_result_fn')
    parser.add_argument('citup_vid_fn')
    parser.add_argument('pairtree_params_fn')
    parser.add_argument('neutree_fn')
    args = parser.parse_args()

    params = inputparser.load_params(args.pairtree_params_fn)
    results = load_results(args.citup_result_fn, args.citup_vid_fn,
                           args.citup_clusters, params['clusters'],
                           args.use_supervars)
    write_neutree(results, params['garbage'], args.neutree_fn)
Example #12
0
def _process(ssmfn, jsonfn, order):
    params = inputparser.load_params(jsonfn)
    ssms = inputparser.load_ssms(ssmfn)

    order = [int(idx) for idx in order.split(',')]
    N = len(params['samples'])
    assert set(range(N)) == set(order)
    assert len(list(ssms.values())[0]['var_reads']) == N

    params['samples'] = [params['samples'][idx] for idx in order]
    for vid in ssms.keys():
        for K in ('var_reads', 'ref_reads', 'total_reads', 'vaf', 'omega_v'):
            ssms[vid][K] = ssms[vid][K][order]

    with open(jsonfn, 'w') as F:
        json.dump(params, F)
    inputparser.write_ssms(ssms, ssmfn)
Example #13
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('ssm_fn')
  parser.add_argument('params_fn')
  parser.add_argument('mutphi_fn')
  args = parser.parse_args()

  params = inputparser.load_params(args.params_fn)
  orig_mphi = mutphi.load_mutphi(args.mutphi_fn)
  mphi = impute(args.ssm_fn, params, orig_mphi)
  mphi = sort_mutphi(mphi)
  mutphi.write_mutphi(mphi, args.mutphi_fn)

  old, new = score(orig_mphi.logprobs), score(mphi.logprobs)
Example #14
0
def convert(sampid, params_fn, trees_fn, neutree_fn):
    adjms, llhs, phis, clusterings = pastri_util.load_results(
        sampid, params_fn, trees_fn)
    if len(adjms) == 0:
        return
    structs = [util.convert_adjmatrix_to_parents(A) for A in adjms]
    N = len(structs)
    params = inputparser.load_params(params_fn)
    ntree = neutree.Neutree(
        structs=structs,
        phis=phis,
        counts=np.ones(N),
        logscores=llhs,
        clusterings=clusterings,
        garbage=params['garbage'],
    )
    neutree.save(ntree, neutree_fn)
Example #15
0
def convert(params_fn, calder_mats_fn, calder_trees_fn, neutree_fn):
    params = inputparser.load_params(params_fn)

    mats, row_labels, col_labels = _load_mats(calder_mats_fn)
    assert row_labels['Fhat'][0] == 'samples'
    svids = row_labels['Fhat'][1:]
    assert svids == common.sort_vids(svids)

    struct = _load_struct(svids, calder_trees_fn)
    ntree = neutree.Neutree(
        structs=[struct],
        phis=[mats['Fhat']],
        counts=np.array([1]),
        logscores=np.array([0.]),
        clusterings=[params['clusters']],
        garbage=params['garbage'],
    )
    neutree.save(ntree, neutree_fn)
Example #16
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('params_fn')
    parser.add_argument('pickle_fn')
    args = parser.parse_args()

    params = inputparser.load_params(args.params_fn)
    adjm = util.convert_parents_to_adjmatrix(params['structure'])
    with open(args.pickle_fn, 'wb') as outf:
        pickle.dump(
            {
                'adjm': adjm,
                'clusters': params['clusters'],
                'vids_good': [V for C in params['clusters'] for V in C],
                'vids_garbage': params['garbage'],
            }, outf)
Example #17
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('-p', dest='p', type=float, required=True)
  parser.add_argument('--params', dest='paramsfn', required=True)
  parser.add_argument('mutdists', nargs='+')
  args = parser.parse_args()

  params = inputparser.load_params(args.paramsfn)
  mutdists = mutstat.load_mutstats(args.mutdists)
  mutdists = mutstat.remove_garbage(mutdists, params['garbage'])
  mutstat.check_incomplete(mutdists, params['clusters'])

  names, scores = mutstat.score_mutstats(mutdists, _score = lambda stats: score(stats, args.p))
  print(*names, sep=',')
  print(*[scores[name] for name in names], sep=',')
Example #18
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--uniform-proposal', action='store_true')
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    parser.add_argument('lichee_snv_fn')
    parser.add_argument('lichee_cluster_fn')
    args = parser.parse_args()

    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)
    sampnames = params['samples']
    clusters = params['clusters']
    garbage = set(params['garbage'])

    snv_indices = write_snvs(variants, sampnames, garbage, args.lichee_snv_fn)
    write_clusters(variants, clusters, snv_indices, args.lichee_cluster_fn)
Example #19
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('params_fn')
    parser.add_argument('pairtree_results_fn')
    args = parser.parse_args()

    params = inputparser.load_params(args.params_fn)
    pairtree_results = np.load(args.pairtree_results_fn, allow_pickle=True)
    pairtree_results = {K: pairtree_results[K] for K in pairtree_results}

    assert len(pairtree_results['struct']) == len(params['structures'])
    for struct1, struct2 in zip(pairtree_results['struct'],
                                params['structures']):
        assert np.array_equal(np.array(struct1), np.array(struct2))

    pairtree_results['llh'] = -1 * np.array(params['scores'])
    np.savez_compressed(args.pairtree_results_fn, **pairtree_results)
Example #20
0
def _process(ssmfn, jsonfn, to_remove):
  params = inputparser.load_params(jsonfn)
  ssms = inputparser.load_ssms(ssmfn)

  to_remove = set([int(idx) for idx in to_remove.split(',')])
  N = len(params['samples'])
  all_samps = set(range(N))
  assert to_remove.issubset(all_samps)
  to_keep = sorted(all_samps - to_remove)
  assert len(to_keep) > 0

  params['samples'] = [params['samples'][idx] for idx in to_keep]
  for vid in ssms.keys():
    for K in ('var_reads', 'ref_reads', 'total_reads', 'vaf', 'omega_v'):
      ssms[vid][K] = ssms[vid][K][to_keep]

  with open(jsonfn, 'w') as F:
    json.dump(params, F)
  inputparser.write_ssms(ssms, ssmfn)
Example #21
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--params', dest='paramsfn', required=True)
    parser.add_argument('mutphis', nargs='+')
    args = parser.parse_args()

    params = inputparser.load_params(args.paramsfn)
    mutphis = mutstat.load_mutstats(args.mutphis, check_inf=False)
    # We do our own NaN check rather than relying on the one in
    # `mutstat.load_mutstats`, since we want to handle NaN logprobs that we
    # sometimes get from PASTRI.
    _check_infs(mutphis)
    mutphis = mutstat.remove_garbage(mutphis, params['garbage'])
    mutstat.check_incomplete(mutphis, params['clusters'])

    names, scores = mutstat.score_mutstats(mutphis, _score=score)
    print(*names, sep=',')
    print(*[scores[name] for name in names], sep=',')
Example #22
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--use-supervars',
                        dest='use_supervars',
                        action='store_true')
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    parser.add_argument('pwgs_ssm_fn')
    parser.add_argument('pwgs_params_fn')
    args = parser.parse_args()

    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)

    if args.use_supervars:
        variants = clustermaker.make_cluster_supervars(params['clusters'],
                                                       variants)
    write_ssms(variants, args.pwgs_ssm_fn)
    write_params(params['samples'], args.pwgs_params_fn)
Example #23
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    parser.add_argument('calder_input_fn')
    args = parser.parse_args()

    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)
    clusters = params['clusters']
    supervars = clustermaker.make_cluster_supervars(clusters, variants)

    vids1, var_reads = extract_matrix(supervars, 'var_reads')
    vids2, ref_reads = extract_matrix(supervars, 'ref_reads')
    assert vids1 == vids2
    vids = vids1

    _write_inputs(vids, params['samples'], var_reads, ref_reads,
                  args.calder_input_fn)
Example #24
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--phi-hat-threshold',
                        type=float,
                        default=1 - 1e-2,
                        help='Blah')
    parser.add_argument('--quantile', type=float, default=0.5, help='Blah')
    parser.add_argument('--print-bad-data', action='store_true')
    parser.add_argument('in_ssm_fn')
    parser.add_argument('in_params_fn')
    parser.add_argument('out_params_fn')
    args = parser.parse_args()

    np.set_printoptions(linewidth=400,
                        precision=3,
                        threshold=sys.maxsize,
                        suppress=True)
    np.seterr(divide='raise', invalid='raise', over='raise')

    ssms = inputparser.load_ssms(args.in_ssm_fn)
    params = inputparser.load_params(args.in_params_fn)
    ssms = inputparser.remove_garbage(ssms, params['garbage'])

    bad_vids, bad_samp_prop = _remove_bad(ssms, args.phi_hat_threshold,
                                          args.quantile, args.print_bad_data)
    bad_ssm_prop = len(bad_vids) / len(ssms)
    if len(bad_vids) > 0:
        params['garbage'] = common.sort_vids(params['garbage'] + bad_vids)
        with open(args.out_params_fn, 'w') as F:
            json.dump(params, F)

    stats = {
        'bad_ssms': common.sort_vids(bad_vids),
        'bad_samp_prop': '%.3f' % bad_samp_prop,
        'bad_ssm_prop': '%.3f' % bad_ssm_prop,
    }
    for K, V in stats.items():
        print('%s=%s' % (K, V))
def main():
    all_plot_choices = set((
        'tree',
        'pairwise_separate',
        'pairwise_mle',
        'vaf_matrix',
        'phi',
        'phi_hat',
        'phi_interleaved',
        'cluster_stats',
        'eta',
        'diversity_indices',
    ))
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--seed', type=int)
    parser.add_argument('--tree-index', type=int, default=0)
    parser.add_argument('--plot',
                        dest='plot_choices',
                        type=lambda s: set(s.split(',')),
                        help='Things to plot; by default, plot everything')
    parser.add_argument('--omit-plots',
                        dest='omit_plots',
                        type=lambda s: set(s.split(',')),
                        help='Things to omit from plotting; overrides --plot')
    parser.add_argument('--runid')
    parser.add_argument(
        '--reorder-subclones',
        action='store_true',
        help=
        'Reorder subclones according to depth-first search through tree structure'
    )
    parser.add_argument(
        '--tree-json',
        dest='tree_json_fn',
        help=
        'Additional external file in which to store JSON, which is already stored statically in the HTML file'
    )
    parser.add_argument('--phi-orientation',
                        choices=('samples_as_rows', 'populations_as_rows'),
                        default='populations_as_rows')
    parser.add_argument(
        '--remove-normal',
        action='store_true',
        help=
        'Remove normal (non-cancerous) population 0 from tree, phi, and eta plots.'
    )
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    parser.add_argument('results_fn')
    parser.add_argument('discord_fn')
    parser.add_argument('html_out_fn')
    args = parser.parse_args()

    np.seterr(divide='raise', invalid='raise', over='raise')

    if args.seed is not None:
        random.seed(args.seed)
        np.random.seed(args.seed)

    plot_choices = _choose_plots(args.plot_choices, args.omit_plots,
                                 all_plot_choices)

    results = resultserializer.Results(args.results_fn)
    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)
    discord = _parse_discord(args.discord_fn)

    data = {
        K: results.get(K)[args.tree_index]
        for K in (
            'struct',
            'count',
            'llh',
            'prob',
            'phi',
        )
    }
    data['garbage'] = results.get('garbage')
    data['clusters'] = results.get('clusters')
    data['samples'] = params['samples']
    data['clustrel_posterior'] = results.get_mutrel('clustrel_posterior')
    if args.reorder_subclones:
        data, params = _reorder_subclones(data, params)

    if 'hidden_samples' in params:
        hidden = set(params['hidden_samples'])
        assert hidden.issubset(set(
            data['samples'])) and len(hidden) < len(data['samples'])
        visible_sampidxs = [
            idx for idx, samp in enumerate(data['samples'])
            if samp not in hidden
        ]
    else:
        visible_sampidxs = None

    samp_colours = params.get('samp_colours', None)
    pop_colours = params.get('pop_colours', None)
    if samp_colours is not None:
        assert set([S[0] for S in samp_colours]).issubset(data['samples'])
    if pop_colours is not None:
        assert len(pop_colours) == len(data['struct']) + 1

    supervars = clustermaker.make_cluster_supervars(data['clusters'], variants)
    supervars = [supervars[vid] for vid in common.sort_vids(supervars.keys())]

    with open(args.html_out_fn, 'w') as outf:
        write_header(args.runid, args.tree_index, outf)

        if 'tree' in plot_choices:
            tree_struct = util.make_tree_struct(
                data['struct'],
                data['count'],
                data['llh'],
                data['prob'],
                data['phi'],
                supervars,
                data['clusters'],
                data['samples'],
            )
            tree_struct['discord'] = discord

            _write_tree_html(
                tree_struct,
                args.tree_index,
                visible_sampidxs,
                samp_colours,
                pop_colours,
                'eta' in plot_choices,
                'diversity_indices' in plot_choices,
                'phi' in plot_choices,
                'phi_hat' in plot_choices,
                'phi_interleaved' in plot_choices,
                args.phi_orientation,
                args.remove_normal,
                outf,
            )
            if args.tree_json_fn is not None:
                _write_tree_json(tree_struct, args.tree_json_fn)

        if 'vaf_matrix' in plot_choices:
            vaf_plotter.plot_vaf_matrix(
                data['clusters'],
                variants,
                supervars,
                data['garbage'],
                data['phi'],
                data['samples'],
                should_correct_vaf=True,
                outf=outf,
            )

        if 'pairwise_mle' in plot_choices:
            relation_plotter.plot_ml_relations(data['clustrel_posterior'],
                                               outf)
        if 'pairwise_separate' in plot_choices:
            relation_plotter.plot_separate_relations(
                data['clustrel_posterior'], outf)
        if 'cluster_stats' in plot_choices:
            write_cluster_stats(data['clusters'], data['garbage'], supervars,
                                variants, outf)

        write_footer(outf)
Example #26
0
def load_results(sampid, params_fn, trees_fn):
    params = inputparser.load_params(params_fn)
    outdir = os.path.dirname(trees_fn)
    prelim_trees = load_prelim_trees(trees_fn)

    return convert_results(sampid, prelim_trees, params['clusters'], outdir)
Example #27
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--concentration',
        dest='logconc',
        type=float,
        default=-2,
        help=
        'log10(alpha) for Chinese restaurant process. The larger this is, the stronger the preference for more clusters.'
    )
    parser.add_argument('--parallel',
                        dest='parallel',
                        type=int,
                        default=1,
                        help='Number of tasks to run in parallel')
    parser.add_argument(
        '--prior',
        type=float,
        default=0.25,
        help=
        'Pairwise coclustering prior probability. Used only for --model=pairwise or --model=both.'
    )
    parser.add_argument('--model',
                        choices=('pairwise', 'linfreq'),
                        required=True,
                        help='Clustering model to use')
    parser.add_argument('ssm_fn')
    parser.add_argument('params_fn')
    args = parser.parse_args()

    variants = inputparser.load_ssms(args.ssm_fn)
    params = inputparser.load_params(args.params_fn)
    clusters = params['clusters']
    garbage = params.get('garbage', [])
    variants = inputparser.remove_garbage(variants, garbage)

    M = len(variants)
    S = len(list(variants.values())[0]['var_reads'])
    logconc = _normalize_logconc(args.logconc, S)

    if args.model == 'pairwise':
        vids, Z = cluster_pairwise._convert_clustering_to_assignment(clusters)
        logprior = _make_coclust_logprior(args.prior, S)
        mutrel_posterior, mutrel_evidence = pairwise.calc_posterior(
            variants, logprior, 'mutation', args.parallel)
        assert vids == mutrel_posterior.vids
        log_clust_probs, log_notclust_probs = cluster_pairwise._make_coclust_probs(
            mutrel_posterior)
        llh = cluster_pairwise._calc_llh(Z, log_clust_probs,
                                         log_notclust_probs, logconc)
    elif args.model == 'linfreq':
        vids1, V, T, T_prime, omega = inputparser.load_read_counts(variants)
        vids2, Z = cluster_pairwise._convert_clustering_to_assignment(clusters)
        assert vids1 == vids2

        # Beta distribution prior for phi
        phi_alpha0 = 1.
        phi_beta0 = 1.
        llh = cluster_linfreq._calc_llh(V, T_prime, Z, phi_alpha0, phi_beta0,
                                        logconc)
    else:
        raise Exception('Unknown model')

    nlglh = -llh / (M * S * np.log(2))
    print(llh, nlglh)
Example #28
0
def extract_assignment(paramsfn):
  params = inputparser.load_params(paramsfn)
  clusters = params['clusters']
  C = len(clusters)
  vids, assign = convert_clustering_to_assignment(clusters)
  return (C, vids, assign)