Esempio n. 1
def make_full_posterior(evidence, logprior):
    logprior = _complete_logprior(logprior)
    posterior = mutrel.Mutrel(
        rels=_calc_posterior_full(evidence.rels, logprior),
    return posterior
Esempio n. 2
def make_mutrel_from_trees_and_unique_clusterings(structs, llhs, clusterings):
  Relative to `make_mutrel_from_trees_and_single_clustering`, this function is
  slower and more memory intensive, but also more flexible. It differs in two

  1. It doesn't assume that the user has already computed counts for all unique
  samples -- i.e., it allows duplicate samples.

  2. It allows unique clusterings for every sample.
    assert len(structs) == len(llhs) == len(clusterings)
    weights = util.softmax(llhs)
    vids = None

    for struct, clustering, weight in zip(structs, clusterings, weights):
        adjm = util.convert_parents_to_adjmatrix(struct)
        mrel = make_mutrel_from_cluster_adj(adjm, clustering)
        if vids is None:
            vids = mrel.vids
            soft_mutrel = np.zeros(mrel.rels.shape)
            assert mrel.vids == vids
        soft_mutrel += weight * mrel.rels

    soft_mutrel = fix_rounding_errors(soft_mutrel)
    return mutrel.Mutrel(
Esempio n. 3
def make_mutrel_from_trees_and_single_clustering(structs, llhs, counts,
    # Oftentimes, we will have many samples of the same adjacency matrix paired
    # with the same clustering. This will produce the same mutrel. As computing
    # the mutrel from adjm + clustering is expensive, we want to avoid repeating
    # this unnecessarily. Instead, we just modify the associated weight of the
    # the pairing to reflect this.
    # Observe that if we have `C` copies of the LLH `W`, we obtain
    # equivalent post-softmax linear-space weights under either of the following
    # two methods:
    # 1. (naive) Represent the associated samples `C` separate times in the softmax
    # 2. (smart) Set `W' = W + log(C)`, as `exp(W') = Cexp(W)`
    weights = util.softmax(llhs + np.log(counts))
    vids = None

    for struct, weight in zip(structs, weights):
        adjm = util.convert_parents_to_adjmatrix(struct)
        crel = make_clustrel_from_cluster_adj(adjm)

        if vids is None:
            vids = crel.vids
            soft_clustrel = np.zeros(crel.rels.shape)
            assert crel.vids == vids
        soft_clustrel += weight * crel.rels

    soft_clustrel = fix_rounding_errors(soft_clustrel)
    clustrel = mutrel.Mutrel(rels=soft_clustrel, vids=vids)
    mrel = make_mutrel_from_clustrel(clustrel, clustering)
    return mrel
Esempio n. 4
def make_mutrel_from_clustrel(clustrel, clusters, check_sanity=True):
    K = len(clusters)
    assert clustrel.rels.shape == (K, K, NUM_MODELS)

    vids, membership = util.make_membership_mat(clusters)
    # K: number of non-empty clusters
    M = len(membership)
    assert len(vids) == M
    assert membership.shape == (M, K)

    mrel = np.zeros((M, M, NUM_MODELS))

    for modelidx in range(NUM_MODELS):
        mut_vs_cluster =, clustrel.rels[:, :,
                                                          modelidx])  # MxK
        mrel[:, :, modelidx] =, membership.T)
    # Disable check to improve performance. Since this is called for each tree
    # (for methods that don't have a fixed clustering), it can be prohibitively
    # slow -- it was consuming >50% of the total runtime for LICHeE's output
    # conversion.

    return mutrel.Mutrel(
Esempio n. 5
def make_clustrel_from_cluster_adj(cluster_adj):
  * `K` = # of clusters (including empty first cluster)

  `cluster_adj`: a `KxK` adjacency matrix, where `cluster_adj[a,b] = 1` iff
  `a = b` or `b` is a child of `a`

  a `KxKx5` binary mutation relation tensor
    K = len(cluster_adj)
    assert cluster_adj.shape == (K, K)
    cluster_anc = util.make_ancestral_from_adj(cluster_adj)
    # In determining A_B relations, don't want to set mutations (i,j), where i
    # and j are in same cluster, to 1.
    assert np.all(1 == cluster_anc[0])
    np.fill_diagonal(cluster_anc, 0)

    clustrel = np.zeros((K, K, NUM_MODELS))
    clustrel[:, :, Models.cocluster] = np.eye(K)
    clustrel[:, :, Models.A_B] = cluster_anc
    clustrel[:, :, Models.B_A] = clustrel[:, :, Models.A_B].T

    existing = (Models.cocluster, Models.A_B, Models.B_A)
    already_filled = np.sum(clustrel[:, :, existing], axis=2)
    clustrel[already_filled == 0, Models.diff_branches] = 1

    assert np.array_equal(np.ones((K, K)), np.sum(clustrel, axis=2))
    vids = ['S%s' % (idx + 1) for idx in range(K)]
    clustrel = mutrel.Mutrel(vids=vids, rels=clustrel)
    return clustrel
Esempio n. 6
def merge_variants(to_merge, evidence, logprior):
  assert np.all(np.array([V for group in to_merge for V in group]) < len(evidence.vids))
  already_merged = set()

  for vidxs in to_merge:
    vidxs = set(vidxs)
    assert len(vidxs & already_merged) == 0

    M_old = len(evidence.vids)
    merged_vid = ','.join([evidence.vids[V] for V in vidxs])
    new_vids = evidence.vids + [merged_vid]

    new_evidence = mutrel.init_mutrel(new_vids)
    new_evidence.rels[:-1,:-1] = evidence.rels

    merged_row = np.sum(np.array([evidence.rels[V] for V in vidxs]), axis=0)
    assert merged_row.shape == (M_old, NUM_MODELS)
    merged_col = np.copy(merged_row)
    merged_col[:,Models.A_B] = merged_row[:,Models.B_A]
    merged_col[:,Models.B_A] = merged_row[:,Models.A_B]

    new_evidence.rels[-1,:-1] = merged_row
    new_evidence.rels[:-1,-1] = merged_col
    new_evidence.rels[-1,-1,:] = -np.inf
    new_evidence.rels[-1,-1,Models.cocluster] = 0

    already_merged |= vidxs
    evidence = new_evidence

  evidence = mutrel.remove_variants_by_vidx(evidence, already_merged)
  posterior = mutrel.Mutrel(
    vids = evidence.vids,
    rels = _calc_posterior_full(evidence.rels, logprior),
  return (posterior, evidence)
Esempio n. 7
def load_mutrels(mutrel_args):
    mutrels = {}
    for mutrel_arg in mutrel_args:
        mutrel_name, mutrel_path = mutrel_arg.split('=', 1)
        assert mutrel_name not in mutrels, '%s is duplicate' % mutrel_name
        if os.path.exists(mutrel_path):
            mrel = np.load(mutrel_path)
            mutrels[mutrel_name] = mutrel.Mutrel(vids=mrel['vids'],
            mutrels[mutrel_name] = None
    return mutrels
Esempio n. 8
 def get_mutrel(self, name):
   data = self.get_many(['%s_%s' % (name, T) for T in ('vids', 'rels')])
   return mutrel.Mutrel(vids=data['%s_vids' % name], rels=data['%s_rels' % name])