Esempio n. 1
0
def make_full_posterior(evidence, logprior):
    logprior = _complete_logprior(logprior)
    posterior = mutrel.Mutrel(
        vids=evidence.vids,
        rels=_calc_posterior_full(evidence.rels, logprior),
    )
    return posterior
Esempio n. 2
0
def make_mutrel_from_trees_and_unique_clusterings(structs, llhs, clusterings):
    '''
  Relative to `make_mutrel_from_trees_and_single_clustering`, this function is
  slower and more memory intensive, but also more flexible. It differs in two
  respects:

  1. It doesn't assume that the user has already computed counts for all unique
  samples -- i.e., it allows duplicate samples.

  2. It allows unique clusterings for every sample.
  '''
    assert len(structs) == len(llhs) == len(clusterings)
    weights = util.softmax(llhs)
    vids = None

    for struct, clustering, weight in zip(structs, clusterings, weights):
        adjm = util.convert_parents_to_adjmatrix(struct)
        mrel = make_mutrel_from_cluster_adj(adjm, clustering)
        if vids is None:
            vids = mrel.vids
            soft_mutrel = np.zeros(mrel.rels.shape)
        else:
            assert mrel.vids == vids
        soft_mutrel += weight * mrel.rels

    soft_mutrel = fix_rounding_errors(soft_mutrel)
    return mutrel.Mutrel(
        vids=vids,
        rels=soft_mutrel,
    )
Esempio n. 3
0
def make_mutrel_from_trees_and_single_clustering(structs, llhs, counts,
                                                 clustering):
    # Oftentimes, we will have many samples of the same adjacency matrix paired
    # with the same clustering. This will produce the same mutrel. As computing
    # the mutrel from adjm + clustering is expensive, we want to avoid repeating
    # this unnecessarily. Instead, we just modify the associated weight of the
    # the pairing to reflect this.
    #
    # Observe that if we have `C` copies of the LLH `W`, we obtain
    # equivalent post-softmax linear-space weights under either of the following
    # two methods:
    #
    # 1. (naive) Represent the associated samples `C` separate times in the softmax
    # 2. (smart) Set `W' = W + log(C)`, as `exp(W') = Cexp(W)`
    weights = util.softmax(llhs + np.log(counts))
    vids = None

    for struct, weight in zip(structs, weights):
        adjm = util.convert_parents_to_adjmatrix(struct)
        crel = make_clustrel_from_cluster_adj(adjm)

        if vids is None:
            vids = crel.vids
            soft_clustrel = np.zeros(crel.rels.shape)
        else:
            assert crel.vids == vids
        soft_clustrel += weight * crel.rels

    soft_clustrel = fix_rounding_errors(soft_clustrel)
    clustrel = mutrel.Mutrel(rels=soft_clustrel, vids=vids)
    mrel = make_mutrel_from_clustrel(clustrel, clustering)
    return mrel
Esempio n. 4
0
def make_mutrel_from_clustrel(clustrel, clusters, check_sanity=True):
    mutrel.check_posterior_sanity(clustrel.rels)
    K = len(clusters)
    assert clustrel.rels.shape == (K, K, NUM_MODELS)

    vids, membership = util.make_membership_mat(clusters)
    # K: number of non-empty clusters
    M = len(membership)
    assert len(vids) == M
    assert membership.shape == (M, K)

    mrel = np.zeros((M, M, NUM_MODELS))

    for modelidx in range(NUM_MODELS):
        mut_vs_cluster = np.dot(membership, clustrel.rels[:, :,
                                                          modelidx])  # MxK
        mrel[:, :, modelidx] = np.dot(mut_vs_cluster, membership.T)
    # Disable check to improve performance. Since this is called for each tree
    # (for methods that don't have a fixed clustering), it can be prohibitively
    # slow -- it was consuming >50% of the total runtime for LICHeE's output
    # conversion.
    #mutrel.check_posterior_sanity(mrel)

    return mutrel.Mutrel(
        vids=vids,
        rels=mrel,
    )
Esempio n. 5
0
def make_clustrel_from_cluster_adj(cluster_adj):
    '''
  * `K` = # of clusters (including empty first cluster)

  Arguments:
  `cluster_adj`: a `KxK` adjacency matrix, where `cluster_adj[a,b] = 1` iff
  `a = b` or `b` is a child of `a`

  Returns:
  a `KxKx5` binary mutation relation tensor
  '''
    K = len(cluster_adj)
    assert cluster_adj.shape == (K, K)
    cluster_anc = util.make_ancestral_from_adj(cluster_adj)
    # In determining A_B relations, don't want to set mutations (i,j), where i
    # and j are in same cluster, to 1.
    assert np.all(1 == cluster_anc[0])
    np.fill_diagonal(cluster_anc, 0)

    clustrel = np.zeros((K, K, NUM_MODELS))
    clustrel[:, :, Models.cocluster] = np.eye(K)
    clustrel[:, :, Models.A_B] = cluster_anc
    clustrel[:, :, Models.B_A] = clustrel[:, :, Models.A_B].T

    existing = (Models.cocluster, Models.A_B, Models.B_A)
    already_filled = np.sum(clustrel[:, :, existing], axis=2)
    clustrel[already_filled == 0, Models.diff_branches] = 1

    assert np.array_equal(np.ones((K, K)), np.sum(clustrel, axis=2))
    vids = ['S%s' % (idx + 1) for idx in range(K)]
    clustrel = mutrel.Mutrel(vids=vids, rels=clustrel)
    mutrel.check_posterior_sanity(clustrel.rels)
    return clustrel
Esempio n. 6
0
def merge_variants(to_merge, evidence, logprior):
  assert np.all(np.array([V for group in to_merge for V in group]) < len(evidence.vids))
  already_merged = set()

  for vidxs in to_merge:
    vidxs = set(vidxs)
    assert len(vidxs & already_merged) == 0

    M_old = len(evidence.vids)
    merged_vid = ','.join([evidence.vids[V] for V in vidxs])
    new_vids = evidence.vids + [merged_vid]

    new_evidence = mutrel.init_mutrel(new_vids)
    new_evidence.rels[:-1,:-1] = evidence.rels

    merged_row = np.sum(np.array([evidence.rels[V] for V in vidxs]), axis=0)
    assert merged_row.shape == (M_old, NUM_MODELS)
    merged_col = np.copy(merged_row)
    merged_col[:,Models.A_B] = merged_row[:,Models.B_A]
    merged_col[:,Models.B_A] = merged_row[:,Models.A_B]

    new_evidence.rels[-1,:-1] = merged_row
    new_evidence.rels[:-1,-1] = merged_col
    new_evidence.rels[-1,-1,:] = -np.inf
    new_evidence.rels[-1,-1,Models.cocluster] = 0

    already_merged |= vidxs
    evidence = new_evidence

  evidence = mutrel.remove_variants_by_vidx(evidence, already_merged)
  posterior = mutrel.Mutrel(
    vids = evidence.vids,
    rels = _calc_posterior_full(evidence.rels, logprior),
  )
  return (posterior, evidence)
Esempio n. 7
0
def load_mutrels(mutrel_args):
    mutrels = {}
    for mutrel_arg in mutrel_args:
        mutrel_name, mutrel_path = mutrel_arg.split('=', 1)
        assert mutrel_name not in mutrels, '%s is duplicate' % mutrel_name
        if os.path.exists(mutrel_path):
            mrel = np.load(mutrel_path)
            mutrels[mutrel_name] = mutrel.Mutrel(vids=mrel['vids'],
                                                 rels=mrel['rels'])
        else:
            mutrels[mutrel_name] = None
    return mutrels
Esempio n. 8
0
 def get_mutrel(self, name):
   data = self.get_many(['%s_%s' % (name, T) for T in ('vids', 'rels')])
   return mutrel.Mutrel(vids=data['%s_vids' % name], rels=data['%s_rels' % name])