Esempio n. 1
0
def test_sample_string_for_tree():
    t = nx.DiGraph([(0, 1), (1, 2), (2, 3)])
    nomi_samples = [
        immutably(sample_string_for_tree)(t, [2]) for _ in range(1000)
    ]

    mi_samples = [  # MI 1
        immutably(sample_string_for_tree)(t, [0, 1]) for _ in range(1000)
    ]

    # When self-information is high, samples become sparser, so more samples
    # are needed to get a distribution with the right MI.
    mi_samples2 = [  # MI 1
        immutably(sample_string_for_tree)(t, [2, 1]) for _ in range(10000)
    ]

    mi_samples3 = [  # MI 2
        immutably(sample_string_for_tree)(t, [0, 2]) for _ in range(1000)
    ]

    assert len(set(mi_samples)) < len(set(nomi_samples))

    def is_close(x, y, tol):
        return abs(x - y) < tol

    import rfutils.entropy
    nomi_mi = rfutils.entropy.mutual_information(
        Counter(rfutils.flatmap(lambda s: rfutils.sliding(s, 2),
                                nomi_samples)))
    assert is_close(nomi_mi, 0, 0.01)

    mi_mi = rfutils.entropy.mutual_information(
        Counter(rfutils.flatmap(lambda s: rfutils.sliding(s, 2), mi_samples)))
    assert is_close(mi_mi, 1, 0.01)

    mi2_mi = rfutils.entropy.mutual_information(
        Counter(rfutils.flatmap(lambda s: rfutils.sliding(s, 2), mi_samples2)))
    assert is_close(mi2_mi, 1, 0.01)

    mi3_mi = rfutils.entropy.mutual_information(
        Counter(rfutils.flatmap(lambda s: rfutils.sliding(s, 2), mi_samples3)))
    assert is_close(mi3_mi, 2, 0.1)
Esempio n. 2
0
def is_monotonic(cmp, xs):
    try:
        return all(cmp(x, y) for x, y in sliding(xs, 2))
    except StopIteration: # TODO fix sliding so this doesn't need to be special
        return True
Esempio n. 3
0
def skipgrams(xs, k):
    for gram in sliding(xs, k + 2):
        yield gram[0], gram[-1]
Esempio n. 4
0
def is_monotonic(cmp, xs):
    try:
        return all(cmp(x, y) for x, y in sliding(xs, 2))
    except StopIteration: # TODO fix sliding so this doesn't need to be special
        return True