Esempio n. 1
0
def test_log_marginal_likelyhood_exact():
    def exact_function(counts, alpha, beta):
        counts_facproduct = functools.reduce(operator.mul,
                                             map(np.math.factorial, counts), 1)
        cs = sum(counts)
        ns = len(counts)
        return np.log((beta**alpha) * math.gamma(cs + alpha) /
                      (math.gamma(alpha) * counts_facproduct *
                       ((ns + beta)**(cs + alpha))))

    scorer = pasio.LogMarginalLikelyhoodComputer(np.array([0]), 3, 5, None)
    assert np.allclose(scorer.log_marginal_likelyhood(),
                       exact_function(np.array([0]), 3, 5))

    scorer = pasio.LogMarginalLikelyhoodComputer(np.array([0, 1]), 3, 5, None)
    assert np.allclose(scorer.log_marginal_likelyhood(),
                       exact_function(np.array([0, 1]), 3, 5))

    scorer = pasio.LogMarginalLikelyhoodComputer(np.array([4, 0, 1, 3]), 5, 2,
                                                 None)
    assert np.allclose(scorer.log_marginal_likelyhood(),
                       exact_function(np.array([4, 0, 1, 3]), 5, 2))

    scorer = pasio.LogMarginalLikelyhoodComputer(np.array([4, 0, 1, 3]), 1, 1,
                                                 None)
    assert np.allclose(scorer.log_marginal_likelyhood(),
                       exact_function(np.array([4, 0, 1, 3]), 1, 1))
Esempio n. 2
0
def test_stat_split_into_segments_square():
    def split_on_two_segments_or_not(counts, scorer_factory):
        scorer = scorer_factory(counts)
        best_score = scorer.score(0, len(counts))
        split_point = 0
        for i in range(len(counts)):
            current_score = scorer.score(stop=i) + scorer.score(start=i)
            if current_score > best_score:
                split_point = i
                best_score = current_score
        return best_score, split_point

    np.random.seed(4)
    scorer_factory = lambda counts, split_candidates=None: pasio.LogMarginalLikelyhoodComputer(
        counts, 1, 1, split_candidates)
    for repeat in range(5):
        counts = np.concatenate(
            [np.random.poisson(15, 100),
             np.random.poisson(20, 100)])

        optimal_split = pasio.SquareSplitter().split(counts, scorer_factory)

        two_split = split_on_two_segments_or_not(counts, scorer_factory)

        assert optimal_split[0] >= two_split[0]
        assert two_split[1] in optimal_split[1]
        assert np.allclose(
            optimal_split[0],
            pasio.compute_score_from_splits(counts, optimal_split[1],
                                            scorer_factory))
        if (two_split[1] is None):
            assert optimal_split[1] == [0, 200]
        else:
            assert abs(two_split[1] - 100) < 10
Esempio n. 3
0
def test_benchmark_log_marginal_likehood(benchmark):
    counts = np.concatenate(
        [np.random.poisson(200, 50),
         np.random.poisson(20, 50)])
    scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1)

    result = benchmark(compute_log_marginal_likelyhood2, scorer, len(counts))
Esempio n. 4
0
def test_benchmark_segmentation_long(benchmark):
    np.random.seed(2)

    counts = np.concatenate(
        [np.random.poisson(15, 500),
         np.random.poisson(20, 500)])

    scorer_factory = lambda counts, split_candidates=None: pasio.LogMarginalLikelyhoodComputer(
        counts, 1, 1, split_candidates)
    result = benchmark(segmentation, counts, scorer_factory)
Esempio n. 5
0
def test_suffixes_scores():
    np.random.seed(2)
    counts = np.concatenate(
        [np.random.poisson(15, 100),
         np.random.poisson(20, 100)])

    scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1)
    suffixes_scores = [scorer.self_score(i, 150) for i in range(150)]
    assert np.allclose(scorer.all_suffixes_self_score(150),
                       np.array(suffixes_scores))

    counts = np.array([0, 0, 1, 0, 0, 2, 2, 2, 10, 11, 100, 1, 0, 0, 1, 0],
                      dtype='int64')
    scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1)
    suffixes_scores = [
        scorer.self_score(i,
                          len(counts) - 1) for i in range(len(counts) - 1)
    ]
    assert np.allclose(scorer.all_suffixes_self_score(len(counts) - 1),
                       np.array(suffixes_scores))
Esempio n. 6
0
def test_suffixes_scores_with_candidates():
    np.random.seed(2)
    counts = np.arange(1, 10)
    scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1)
    candidates = np.array([0, 1, 3, 4, 5, 6, 7, 8, 9])
    scorer_with_candidates = pasio.LogMarginalLikelyhoodComputer(
        counts, 1, 1, split_candidates=candidates)
    candidate_suffixes = scorer.all_suffixes_self_score(9)[candidates[:-1]]
    suffixes_just_candidates = scorer_with_candidates.all_suffixes_self_score(
        8)
    assert np.allclose(candidate_suffixes, suffixes_just_candidates)

    counts = np.concatenate(
        [np.random.poisson(15, 100),
         np.random.poisson(20, 100)])
    scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1)
    candidates = np.array([0, 1, 10, 20, 21, 30, 40, 149])
    scorer_with_candidates = pasio.LogMarginalLikelyhoodComputer(
        counts, 1, 1, split_candidates=candidates)
    candidate_suffixes = scorer.all_suffixes_self_score(149)[candidates[:-1]]
    suffixes_just_candidates = scorer_with_candidates.all_suffixes_self_score(
        len(candidates) - 1)
    assert np.allclose(candidate_suffixes, suffixes_just_candidates)