def test_sample_string_for_tree(): t = nx.DiGraph([(0, 1), (1, 2), (2, 3)]) nomi_samples = [ immutably(sample_string_for_tree)(t, [2]) for _ in range(1000) ] mi_samples = [ # MI 1 immutably(sample_string_for_tree)(t, [0, 1]) for _ in range(1000) ] # When self-information is high, samples become sparser, so more samples # are needed to get a distribution with the right MI. mi_samples2 = [ # MI 1 immutably(sample_string_for_tree)(t, [2, 1]) for _ in range(10000) ] mi_samples3 = [ # MI 2 immutably(sample_string_for_tree)(t, [0, 2]) for _ in range(1000) ] assert len(set(mi_samples)) < len(set(nomi_samples)) def is_close(x, y, tol): return abs(x - y) < tol import rfutils.entropy nomi_mi = rfutils.entropy.mutual_information( Counter(rfutils.flatmap(lambda s: rfutils.sliding(s, 2), nomi_samples))) assert is_close(nomi_mi, 0, 0.01) mi_mi = rfutils.entropy.mutual_information( Counter(rfutils.flatmap(lambda s: rfutils.sliding(s, 2), mi_samples))) assert is_close(mi_mi, 1, 0.01) mi2_mi = rfutils.entropy.mutual_information( Counter(rfutils.flatmap(lambda s: rfutils.sliding(s, 2), mi_samples2))) assert is_close(mi2_mi, 1, 0.01) mi3_mi = rfutils.entropy.mutual_information( Counter(rfutils.flatmap(lambda s: rfutils.sliding(s, 2), mi_samples3))) assert is_close(mi3_mi, 2, 0.1)
def is_monotonic(cmp, xs): try: return all(cmp(x, y) for x, y in sliding(xs, 2)) except StopIteration: # TODO fix sliding so this doesn't need to be special return True
def skipgrams(xs, k): for gram in sliding(xs, k + 2): yield gram[0], gram[-1]