Esempio n. 1
0
def update_V_random_grid_pairwise(model, num_points=10):
    if model.params.K < 2:
        return

    ka, kb = np.random.choice(model.params.K, 2, replace=False)

    params = model.params.copy()

    old = params.V[[ka, kb]].flatten()

    D = params.D

    dim = 2 * D

    e = scipy.stats.multivariate_normal.rvs(np.zeros(dim), np.eye(dim))

    e /= np.linalg.norm(e)

    r = scipy.stats.gamma.rvs(1, 1)

    grid = np.arange(1, num_points + 1)

    ys = old[np.newaxis, :] + grid[:, np.newaxis] * r * e[np.newaxis, :]

    log_p_new = np.zeros(num_points)

    for i in range(num_points):
        params.V[[ka, kb]] = ys[i].reshape((2, D))

        log_p_new[i] = model.joint_dist.log_p(model.data, params)

    if np.all(np.isneginf(log_p_new)) or np.any(np.isnan(log_p_new)):
        return

    try:
        idx = discrete_rvs(
            np.exp(0.5 * np.log(grid) + log_normalize(log_p_new)))

    except ValueError:
        return

    new = ys[idx]

    xs = new[np.newaxis, :] - grid[:, np.newaxis] * r * e[np.newaxis, :]

    log_p_old = np.zeros(num_points)

    for i in range(num_points):
        params.V[[ka, kb]] = xs[i].reshape((2, D))

        log_p_old[i] = model.joint_dist.log_p(model.data, params)

    if do_metropolis_hastings_accept_reject(log_sum_exp(log_p_new),
                                            log_sum_exp(log_p_old), 0, 0):
        params.V[[ka, kb]] = new.reshape((2, D))

    else:
        params.V[[ka, kb]] = old.reshape((2, D))

    model.params = params
Esempio n. 2
0
def update_V_random_grid(model, num_points=10):
    if model.params.K < 2:
        return

    params = model.params.copy()

    old = params.V.flatten()

    K, D = params.V.shape

    dim = K * D

    e = scipy.stats.multivariate_normal.rvs(np.zeros(dim), np.eye(dim))

    e /= np.linalg.norm(e)

    r = scipy.stats.gamma.rvs(1, 1)

    grid = np.arange(1, num_points + 1)

    ys = old[np.newaxis, :] + grid[:, np.newaxis] * r * e[np.newaxis, :]

    log_p_new = np.zeros(num_points)

    for i in range(num_points):
        params.V = ys[i].reshape((K, D))

        log_p_new[i] = model.joint_dist.log_p(model.data, params)

    idx = discrete_rvs(np.exp(0.5 * np.log(grid) + log_normalize(log_p_new)))

    new = ys[idx]

    xs = new[np.newaxis, :] - grid[:, np.newaxis] * r * e[np.newaxis, :]

    log_p_old = np.zeros(num_points)

    for i in range(num_points):
        params.V = xs[i].reshape((K, D))

        log_p_old[i] = model.joint_dist.log_p(model.data, params)

    if do_metropolis_hastings_accept_reject(log_sum_exp(log_p_new),
                                            log_sum_exp(log_p_old), 0, 0):
        params.V = new.reshape((K, D))

    else:
        params.V = old.reshape((K, D))

    model.params = params
Esempio n. 3
0
def get_exact_posterior(model):
    log_p = []

    Zs = []

    for Z in get_all_binary_matrices(model.params.K, model.data.shape[0]):
        Zs.append(tuple(Z.flatten()))

        model.params.Z = Z

        log_p.append(model.log_p)

    p = np.exp(log_normalize(np.array(log_p)))

    return dict(list(zip(Zs, p)))
Esempio n. 4
0
def do_row_gibbs_update(cols, data, dist, feat_probs, params, row_idx, Zs):
    log_p1 = np.log(feat_probs[cols])

    log_p0 = np.log(1 - feat_probs[cols])

    log_p = np.zeros(len(Zs))

    for idx in range(len(Zs)):
        params.Z[row_idx] = Zs[idx]

        log_p[idx] = np.sum(Zs[idx, cols] * log_p1) + np.sum((1 - Zs[idx, cols]) * log_p0) + \
            dist.log_p_row(data, params, row_idx)

    log_p = log_normalize(log_p)

    idx = discrete_rvs_gumbel_trick(log_p)

    params.Z[row_idx] = Zs[idx]

    return params
Esempio n. 5
0
def get_sample_data_point(a,
                          b,
                          cn_major,
                          cn_minor,
                          cn_normal=2,
                          error_rate=1e-3,
                          tumour_content=1.0):
    cn_total = cn_major + cn_minor

    cn = []

    mu = []

    log_pi = []

    # Consider all possible mutational genotypes consistent with mutation before CN change
    for x in range(1, cn_major + 1):
        cn.append((cn_normal, cn_normal, cn_total))

        mu.append((error_rate, error_rate, min(1 - error_rate, x / cn_total)))

        log_pi.append(0)

    # Consider mutational genotype of mutation before CN change if not already added
    mutation_after_cn = (cn_normal, cn_total, cn_total)

    if mutation_after_cn not in cn:
        cn.append(mutation_after_cn)

        mu.append((error_rate, error_rate, min(1 - error_rate, 1 / cn_total)))

        log_pi.append(0)

    cn = np.array(cn, dtype=np.int)

    mu = np.array(mu, dtype=np.float)

    log_pi = log_normalize(np.array(log_pi, dtype=np.float64))

    return SampleDataPoint(int(a), int(b), cn, mu, log_pi, tumour_content)
Esempio n. 6
0
    def _propose_split(self, anchors, features, model, V, Z, Z_target=None):
        k_m = features[0]

        i, j = anchors

        _, D = V.shape

        N, K = Z.shape

        V_new = np.zeros((K + 1, D), dtype=V.dtype)

        Z_new = np.zeros((N, K + 1), dtype=Z.dtype)

        idx = 0

        for k in range(K):
            if k in features:
                continue

            V_new[idx] = V[k]

            Z_new[:, idx] = Z[:, k]

            idx += 1

        weight = np.random.random(D)

        V_new[-1] = weight * V[k_m]

        V_new[-2] = (1 - weight) * V[k_m]

        Z_new[i, -1] = 1

        Z_new[j, -2] = 1

        active_set = list(np.squeeze(np.where(Z[:, k_m] == 1)))

        active_set.remove(i)

        active_set.remove(j)

        np.random.shuffle(active_set)

        log_q = 0

        log_p = np.zeros(3)

        params = model.params.copy()

        params.V = V_new

        params.Z = Z_new

        N_prev = 2

        for idx in active_set:  # + [i, j]:
            if idx not in [i, j]:
                N_prev += 1

            m_a = np.sum(Z_new[:, -1])

            m_b = np.sum(Z_new[:, -2])

            params.Z[idx, -1] = 1

            params.Z[idx, -2] = 0

            log_p[0] = np.log(m_a) + np.log(N_prev -
                                            m_b) + model.data_dist.log_p_row(
                                                model.data, params, idx)

            params.Z[idx, -1] = 0

            params.Z[idx, -2] = 1

            log_p[1] = np.log(N_prev -
                              m_a) + np.log(m_b) + model.data_dist.log_p_row(
                                  model.data, params, idx)

            params.Z[idx, -1] = 1

            params.Z[idx, -2] = 1

            log_p[2] = np.log(m_a) + np.log(m_b) + model.data_dist.log_p_row(
                model.data, params, idx)

            log_p = log_normalize(log_p)

            if Z_target is None:
                state = discrete_rvs(np.exp(log_p))

            else:
                if np.all(Z_target[idx] == np.array([1, 0])):
                    state = 0

                elif np.all(Z_target[idx] == np.array([0, 1])):
                    state = 1

                elif np.all(Z_target[idx] == np.array([1, 1])):
                    state = 2

                else:
                    raise Exception('Invalid')

            if state == 0:
                Z_new[idx, -1] = 1

                Z_new[idx, -2] = 0

            elif state == 1:
                Z_new[idx, -1] = 0

                Z_new[idx, -2] = 1

            elif state == 2:
                Z_new[idx, -1] = 1

                Z_new[idx, -2] = 1

            else:
                raise Exception('Invalid state')

            log_q += log_p[state]

        assert Z_new is params.Z

        return V_new, Z_new, log_q