Exemple #1
0
def test_mixture_distribution2():
    # Test when pmfs are different lengths.
    d = dit.Distribution(['A', 'B'], [0.5, 0.5])
    d2 = dit.Distribution(['A', 'B'], [1, 0], sort=True, trim=True)

    # Fails when it checks that all pmfs have the same length.
    assert_raises(ValueError, dit.mixture_distribution2, [d, d2], [0.5, 0.5])
Exemple #2
0
def test_mixture_distribution5():
    # Incompatible sample spaces.
    d1 = dit.Distribution(['A', 'B'], [0.5, 0.5])
    d2 = dit.Distribution(['B', 'C'], [0.5, 0.5])
    d3 = dit.mixture_distribution([d1, d2], [0.5, 0.5], merge=True)
    pmf = np.array([0.25, 0.5, 0.25])
    assert_true(np.allclose(pmf, d3.pmf))
Exemple #3
0
def test_mixture_distribution():
    d = dit.Distribution(['A', 'B'], [0.5, 0.5])
    d2 = dit.Distribution(['A', 'B'], [1, 0])
    pmf = np.array([0.75, 0.25])

    d3 = dit.mixture_distribution([d, d2], [0.5, 0.5])
    npt.assert_allclose(pmf, d3.pmf)
Exemple #4
0
def marginal_maxent_dists(dist,
                          k_max=None,
                          maxiters=1000,
                          tol=1e-3,
                          verbose=False):
    """
    Return the marginal-constrained maximum entropy distributions.

    Parameters
    ----------
    dist : distribution
        The distribution used to constrain the maxent distributions.
    k_max : int
        The maximum order to calculate.

    """
    dist = prepare_dist(dist)

    n_variables = dist.outcome_length()
    symbols = dist.alphabet[0]

    if k_max is None:
        k_max = n_variables

    outcomes = list(dist._sample_space)

    # Optimization for the k=0 and k=1 cases are slow since you have to optimize
    # the full space. We also know the answer in these cases.

    # This is safe since the distribution must be dense.
    k0 = dit.Distribution(outcomes, [1] * len(outcomes),
                          base='linear',
                          validate=False)
    k0.normalize()

    k1 = dit.product_distribution(dist)

    dists = [k0, k1]
    for k in range(k_max + 1):
        if verbose:
            print(
                "Constraining maxent dist to match {0}-way marginals.".format(
                    k))

        if k in [0, 1, n_variables]:
            continue

        kwargs = {'maxiters': maxiters, 'tol': tol, 'verbose': verbose}
        pmf_opt, opt = marginal_maxent(dist, k, **kwargs)
        d = dit.Distribution(outcomes, pmf_opt)
        d.make_sparse()
        dists.append(d)

    # To match the all-way marginal is to match itself. Again, this is a time
    # savings decision, even though the optimization should be fast.
    if k_max == n_variables:
        dists.append(dist)

    return dists
Exemple #5
0
def test_mixture_distribution_weights():
    d = dit.Distribution(['A', 'B'], [0.5, 0.5])
    d2 = dit.Distribution(['A', 'B'], [1, 0])

    with pytest.raises(ditException):
        dit.mixture_distribution([d, d2], [1])
    with pytest.raises(ditException):
        dit.mixture_distribution2([d, d2], [1])
Exemple #6
0
def test_mixture_distribution_log():
    d = dit.Distribution(['A', 'B'], [0.5, 0.5])
    d2 = dit.Distribution(['A', 'B'], [1, 0])
    d.set_base(2)
    d2.set_base(2)
    weights = np.log2(np.array([0.5, 0.5]))
    pmf = np.log2(np.array([0.75, 0.25]))

    d3 = dit.mixture_distribution([d, d2], weights)
    npt.assert_allclose(pmf, d3.pmf)
Exemple #7
0
def test_mixture_distribution3():
    # Sample spaces are compatible.
    # But pmfs have a different order.
    d = dit.Distribution(['A', 'B'], [0.5, 0.5])
    d2 = dit.Distribution(['B', 'A'], [1, 0], sort=False, trim=False, sparse=False)
    pmf = np.array([0.25, 0.75])

    d3 = dit.mixture_distribution([d, d2], [0.5, 0.5])
    assert np.allclose(pmf, d3.pmf)
    d3 = dit.mixture_distribution2([d, d2], [0.5, 0.5])
    assert not np.allclose(pmf, d3.pmf)
Exemple #8
0
def test_mixture_distribution4():
    # Sample spaces are compatible.
    # But pmfs have a different lengths and orders.
    d = dit.Distribution(['A', 'B'], [0.5, 0.5])
    d2 = dit.Distribution(['B', 'A'], [1, 0], sort=False, trim=False, sparse=True)
    d2.make_sparse(trim=True)
    pmf = np.array([0.25, 0.75])

    d3 = dit.mixture_distribution([d, d2], [0.5, 0.5])
    assert np.allclose(pmf, d3.pmf)
    with pytest.raises(ValueError):
        dit.mixture_distribution2([d, d2], [0.5, 0.5])
Exemple #9
0
def auto_bright_nonlin(img,
                       epochs,
                       transform_factor=0.5,
                       sigma=0.8,
                       mean_thresh=2,
                       mean_reduction=0.9):
    """
    TODO: Transform multiple images simultaneously (e.g. Before and After) per Roy's request

    Try sliding window approach and maximize entropy for each window. Windows can't be too small, or too large.
    :param img: numpy array/obj of the image you want to transform
    :param epochs: hyperparameter for number of transformations
    :param transform_factor: hyperparameter for rate of exponential transformation
    :param sigma: gaussian filter hyperparameter
    :param mean_thresh: hyperparameter controlling sensitivity of intensity cutoff
    :param mean_reduction: hyperparameter for reducing the lowest intensity pixels
    :return best_img: maximum entropy image
    """
    # normalize pixels between 0 and 1
    img = np.array(img).astype(np.float)
    img *= 1 / np.max(img)

    # calculate initial entropy of the image
    counts, bins = np.histogram(img)
    count_frac = [count / np.sum(counts) for count in counts]
    d = dit.Distribution(list(map(str, range(len(counts)))), count_frac)
    entropy_loss = [entropy(d)]
    d_entropy = 1  # arbitrary
    imgs = [
        img
    ]  # holds all images so that we can choose the one with the best entropy
    for i in range(epochs):
        # remove low intensity pixels
        img[img <= mean_thresh * np.mean(img)] *= mean_reduction
        img = gf(img, sigma=sigma)
        img = img**(1 - (transform_factor * d_entropy))
        img[img == np.inf] = 1  # clip infities at 1
        imgs.append(img)
        counts, bins = np.histogram(img)
        count_frac = [count / np.sum(counts) for count in counts]
        d = dit.Distribution(list(map(str, range(len(counts)))), count_frac)
        entropy_loss.append(entropy(d))
        d_entropy = entropy_loss[-1] - entropy_loss[-2]
        if i % 10 == 0:
            print('Finished: ', 100 * i / epochs, '%')

    print('Best entropy: ', max(entropy_loss), 'at ix ',
          entropy_loss.index(max(entropy_loss)))
    best_img = imgs[entropy_loss.index(max(entropy_loss))]
    best_img = gf(best_img, sigma=sigma)
    return best_img, entropy_loss
Exemple #10
0
def test_RVFunctions_from_mapping1():
    d = dit.Distribution(['00', '01', '10', '11'], [1 / 4] * 4)
    bf = dit.RVFunctions(d)
    mapping = {'00': '0', '01': '1', '10': '1', '11': '0'}
    d = dit.insert_rvf(d, bf.from_mapping(mapping))
    outcomes = ('000', '011', '101', '110')
    assert_equal(d.outcomes, outcomes)
Exemple #11
0
def test_rvfunctions1():
    # Smoke test with strings
    d = dit.Distribution(['00', '01', '10', '11'], [1 / 4] * 4)
    bf = dit.RVFunctions(d)
    d = dit.insert_rvf(d, bf.xor([0, 1]))
    d = dit.insert_rvf(d, bf.xor([1, 2]))
    assert_equal(d.outcomes, ('0000', '0110', '1011', '1101'))
Exemple #12
0
def prepare_cluster_distribution(Net,
                                 InputsX,
                                 InputsY,
                                 Outputs,
                                 set_names=False):
    '''Generates a dit Distribution of three random varaibles where each varaible represents a group of N>=1 original varaibles.'''
    start, stop = Net.calc_all_updates()
    Nodes = list(Net.nodeDict.keys())
    M = start.shape[0]

    inpX = np.zeros(M)
    for node_idx, node in enumerate(InputsX):
        inpX += (node_idx + 1) * start[:, Nodes.index(node)]

    inpY = np.zeros(M)
    for node_idx, node in enumerate(InputsY):
        inpY += (node_idx + 1) * start[:, Nodes.index(node)]

    outp = np.zeros(M)
    for node_idx, node in enumerate(Outputs):
        outp += (node_idx + 1) * stop[:, Nodes.index(node)]

    res = np.vstack((np.vstack((inpX, inpY)), outp))
    res = [tuple(res[:, i].astype(int)) for i in range(M)]
    c = Counter(res)
    states = list(c.keys())
    probs = list(c.values())
    d = dit.Distribution(states, [p / sum(probs) for p in probs])

    if set_names:
        d.set_rv_names(["Inp1", "Inp2", "Outp"])

    return res, d
Exemple #13
0
def test_RVFunctions_from_partition():
    d = dit.Distribution(['00', '01', '10', '11'], [1 / 4] * 4)
    bf = dit.RVFunctions(d)
    partition = (('00', '11'), ('01', '10'))
    d = dit.insert_rvf(d, bf.from_partition(partition))
    outcomes = ('000', '011', '101', '110')
    assert_equal(d.outcomes, outcomes)
Exemple #14
0
def test_rvfunctions2():
    # Smoke test with int tuples
    d = dit.Distribution([(0,0), (0,1), (1,0), (1,1)], [1/4]*4)
    bf = dit.RVFunctions(d)
    d = dit.insert_rvf(d, bf.xor([0,1]))
    d = dit.insert_rvf(d, bf.xor([1,2]))
    assert d.outcomes == ((0,0,0,0), (0,1,1,0), (1,0,1,1), (1,1,0,1))
Exemple #15
0
def test_distribution_from_bayesnet_error():
    # Test distribution_from_bayesnet with functions and distributions.
    # This is not allowed and should fail.

    x = nx.DiGraph()
    x.add_edge('A', 'C')
    x.add_edge('B', 'C')

    d = dit.example_dists.Xor()
    sample_space = d._sample_space

    def uniform(node_val, parents):
        return 0.5

    unif = dit.Distribution('01', [.5, .5])
    unif.set_rv_names('A')

    x.node['C']['dist'] = uniform
    x.node['A']['dist'] = unif
    x.node['B']['dist'] = uniform

    assert_raises(Exception,
                  dit.distribution_from_bayesnet,
                  x,
                  sample_space=sample_space)
Exemple #16
0
def find_clusters(Net):
    '''
    Finds clusters in the network by first calclating the attractor states and then finding sets of nodes that have low
    joint entropy. 
    
    Output: list of tuples, where the first entry is a tuple of node indices and the second entry is the jount entropy
    '''

    a_start, a_stop = identify_attactors(Net)
    nodes = Net.NodeIDs
    # calculate the joint attaractor distribution
    d = dit.Distribution(a_start, [1 / len(a_start)] * len(a_start))

    entropies = []
    clusters = []
    for pair in combinations(range(len(nodes)), 2):
        #find all pairs that have entropy lower than 1
        H = dit.shannon.entropy(d.marginal(pair))
        if H <= 1:
            entropies.append((pair, dit.shannon.entropy(d.marginal(pair))))

    for pair in entropies:
        # successively add nodes to the pairs to find larger clausters with low entropy
        a = find_cluster_containing_pair(Net, a_start, pair[0])
        clusters.append(tuple(a[0][0]))
        clusters = list(set(clusters))

    return [(c, dit.shannon.entropy(d.marginal(c))) for c in clusters]
Exemple #17
0
def test_RVFunctions_from_mapping2():
    d = dit.Distribution([(0, 0), (0, 1), (1, 0), (1, 1)], [1 / 4] * 4)
    bf = dit.RVFunctions(d)
    mapping = {(0, 0): 0, (0, 1): 1, (1, 0): 1, (1, 1): 0}
    d = dit.insert_rvf(d, bf.from_mapping(mapping, force=True))
    outcomes = ((0, 0, 0), (0, 1, 1), (1, 0, 1), (1, 1, 0))
    assert_equal(d.outcomes, outcomes)
Exemple #18
0
def save_values():
        for j in interval_lengths:
            loc = 'hours-perk-week'+str(j)
            temp = np.linspace(0,40,(40//j +1))
            temp = temp.astype(int)
            print(loc)
            df[loc] = pd.cut(df['hours-per-week'],bins = temp)
            df[loc] = df[loc].cat.add_categories('>40').fillna('>40').astype(str)
            selected_columns = [
                'income',
                'education',
                'sex',
                'race',
                'occupation',
                'age-group',
                loc
            ]
            # take all samples with attributes that we're interested in
            data_array = list(map(lambda r: tuple(r[k] for k in selected_columns), df.to_dict("record")))
            # create distribution from the samples with uniform distribution
            dist_census = dit.Distribution(data_array, [1. / df.shape[0] ] * df.shape[0])
            # set variable aliases to the discribution
            dist_census.set_rv_names("".join(rvs_names))
            rvs_to_name = dict(zip(rvs_names, selected_columns))
            decomp_S_HA = information_decomposition(rvs_to_name,dist_census, 'S', 'HA',)
            decomp_S_HE = information_decomposition(rvs_to_name,dist_census, 'S', 'HE')
            decomp_S_HR = information_decomposition(rvs_to_name,dist_census, 'S', 'HR')
            decomp_S_HG = information_decomposition(rvs_to_name,dist_census, 'S', 'HG')
            decomp_S_HO = information_decomposition(rvs_to_name,dist_census, 'S', 'HO')
            data_vals[j] = [decomp_S_HA,decomp_S_HE,decomp_S_HR,decomp_S_HG,decomp_S_HO,]
        bar_graph(data_vals)
Exemple #19
0
def max_synergistic_nudge(old_X: dit.Distribution,
                          YgivenX: np.ndarray,
                          eps: float = 0.01):
    base = old_X.get_base()
    new_X = old_X.copy(base=base)
    old_X.make_dense()
    rvs = old_X.get_rv_names()
    outcomes = old_X.outcomes
    if len(rvs) < 3:
        return max_global_nudge(old_X, YgivenX, eps)

    nudge, _ = max_nudge(old_X.copy('linear'),
                         YgivenX,
                         eps=eps,
                         nudge_type='synergistic_old')
    #  print("synergistic eps",sum(abs(nudge)), eps, old_X.outcome_length())
    if base == 'linear':
        perform_nudge(new_X, nudge)
    else:
        log_nudge, sign = np.log(np.abs(nudge)), np.sign(nudge)
        perform_log_nudge(new_X, log_nudge, sign)
    dct = {o: new_X[o] if o in new_X.outcomes else 0.0 for o in outcomes}
    #print(outcomes, dct)
    new_X = dit.Distribution(dct)
    new_X.set_rv_names(rvs)
    return new_X
Exemple #20
0
def BEC_joint(epsilon):
    """
    The joint distribution for the binary erase channel at channel capacity.

    Parameters
    ----------
    epsilon : float
        The noise level at which the input is erased.

    """
    pX = dit.Distribution(['0', '1'], [1 / 2, 1 / 2])
    pYgX0 = dit.Distribution(['0', '1', 'e'], [1 - epsilon, 0, epsilon])
    pYgX1 = dit.Distribution(['0', '1', 'e'], [0, 1 - epsilon, epsilon])
    pYgX = [pYgX0, pYgX1]
    pXY = dit.joint_from_factors(pX, pYgX, strict=False)
    return pXY
Exemple #21
0
def max_global_nudge(old_X: dit.Distribution,
                     YgivenX: np.ndarray,
                     eps: float = 0.01):
    base = old_X.get_base()
    new_X = old_X.copy(base=base)
    old_X.make_dense()
    rvs = old_X.get_rv_names()
    outcomes = old_X.outcomes

    nudge, _ = max_nudge(old_X.copy('linear'),
                         YgivenX,
                         eps=eps,
                         nudge_type='global')

    #  print("global eps",sum(abs(nudge)), eps, old_X.outcome_length())
    if base == 'linear':
        perform_nudge(new_X, nudge)
    else:
        # print(nudge)
        log_nudge, sign = np.log(np.abs(nudge)), np.sign(nudge)
        # print(log_nudge, sign)
        # log_nudge[log_nudge == -np.inf] = 0
        # print("converted to log nudge",nudge, log_nudge, sign)
        perform_log_nudge(new_X, log_nudge, sign)

    dct = {o: new_X[o] if o in new_X.outcomes else 0.0 for o in outcomes}
    #print(outcomes, dct)
    new_X = dit.Distribution(dct)
    new_X.set_rv_names(rvs)
    return new_X
Exemple #22
0
def individual_nudge(old_X: dit.Distribution,
                     eps: float = 0.01,
                     rvs_other=None) -> dit.Distribution:
    mask = old_X._mask
    base = old_X.get_base()
    if old_X.outcome_length() == 1:
        return global_nudge(old_X, eps)
    outcomes = old_X.outcomes
    rv_names = old_X.get_rv_names()

    if rvs_other == None:
        rvs = old_X.get_rv_names()
        rvs_other = np.random.choice(rvs, len(rvs) - 1, replace=False)

    X_other, Xi_given_Xother = old_X.condition_on(rvs_other)
    nudge_size = len(Xi_given_Xother[0])

    if base == 'linear':
        nudge = generate_nudge(nudge_size, eps / len(Xi_given_Xother))
        for Xi in Xi_given_Xother:
            perform_nudge(Xi, nudge)
    else:
        nudge, sign = generate_log_nudge(nudge_size, eps)
        for Xi in Xi_given_Xother:
            perform_log_nudge(Xi, nudge, sign)
    new_X = dit.joint_from_factors(X_other, Xi_given_Xother).copy(base)
    #add back any missing outcomes
    dct = {o: new_X[o] if o in new_X.outcomes else 0.0 for o in outcomes}
    #print(outcomes, dct)
    new_X = dit.Distribution(dct)
    new_X.set_rv_names(rv_names)
    new_X.make_dense()
    new_X._mask = mask
    return new_X
def convert_samples_to_dit(samples, node, neighbors, model="ising"):
    selected_samples = samples.loc[:, neighbors + [node]]
    n_vars = len(neighbors + [node])
    new_columns = list(selected_samples.columns)
    new_columns.remove(node)
    new_columns.append(node)
    selected_samples = selected_samples.reindex(columns=new_columns)
    uniques = selected_samples.groupby(selected_samples.columns.to_list()) \
                                    .size() \
                                    .reset_index(name="count")

    #Normalize the counts
    uniques["count"] = uniques["count"] / uniques["count"].sum()
    #Convert to dict
    states = {}
    if model == "ising":
        states = {
            state: 0
            for state in itertools.product([-1, 1], repeat=n_vars)
        }
    else:
        states = {
            state: 0
            for state in itertools.product([0, 1], repeat=n_vars)
        }
    for row in uniques.values:
        states[tuple(row[:-1])] = row[-1]

    d = dit.Distribution(states)
    d.set_rv_names(get_vars(d.outcome_length()))
    return d
Exemple #24
0
def test_expanded_samplespace3():
    """Expand a sample space without unioning the alphabets."""
    outcomes = ['01a', '10a']
    pmf = [1 / 2, 1 / 2]
    d = dit.Distribution(outcomes, pmf, sample_space=outcomes)
    d2 = dit.algorithms.expanded_samplespace(d, union=False)
    ss_ = ['00a', '01a', '10a', '11a']
    assert list(d2.sample_space()) == ss_
Exemple #25
0
 def __CreateDD(self, comb, prob):
     dd = dit.Distribution(comb, prob)
     nRV = len(dd.rvs)
     dd.set_rv_names(range(nRV))
     self.iRV_now = list(range(nRV // 2))
     self.iRV_next = list(range(nRV // 2, nRV))
     self.nRV = nRV // 2
     self.dd = dd
Exemple #26
0
def example_C():
    # Giant bit, perfect correlation.

    # Note, doesn't converge if we do this with n=4. e.g.: '1111', '0000'. Lol!
    outcomes = ['111', '000']
    d = dit.Distribution(outcomes, [.5, .5])
    maxent_dists = dit.algorithms.marginal_maxent_dists(d)
    print_output(d, maxent_dists)
Exemple #27
0
def test_rvfunctions_toolarge():
    letters = 'abcd'
    outcomes = itertools.product(letters, repeat=3)
    outcomes = list(map(''.join, outcomes))
    d = dit.Distribution(outcomes, [1 / 64] * 64, validate=False)
    rvf = dit.RVFunctions(d)
    partition = [(d.outcomes[i], ) for i in range(len(d))]
    assert_raises(NotImplementedError, rvf.from_partition, partition)
Exemple #28
0
def moment_maxent_dists(dist, symbol_map, k_max=None, jitter=True,
                        with_replacement=True, show_progress=True):
    """
    Return the marginal-constrained maximum entropy distributions.

    Parameters
    ----------
    dist : distribution
        The distribution used to constrain the maxent distributions.
    symbol_map : iterable
        A list whose elements are the real values that each state is assigned
        while calculating moments. Typical values are [-1, 1] or [0, 1].
    k_max : int
        The maximum order to calculate.
    jitter : bool | float
        When `True` or a float, we perturb the distribution slightly before
        proceeding. This can sometimes help with convergence.
    with_replacement : bool
        If `True`, then variables are selected for moments with replacement.
        The standard Ising model selects without replacement.
    show-progress : bool
        If `True`, show convergence progress to stdout.

    """
    dist = prepare_dist(dist)

    if jitter:
        # This is sometimes necessary. If your distribution does not have
        # full support than convergence can be difficult to come by.
        dist.pmf = dit.math.pmfops.jittered(dist.pmf)

    n_variables = dist.outcome_length()
    symbols = dist.alphabet[0]

    if k_max is None:
        k_max = n_variables

    outcomes = list(dist._product(symbols, repeat=n_variables))

    if with_replacement:
        text = 'with replacement'
    else:
        text = 'without replacement'

    dists = []
    for k in range(k_max + 1):
        msg = "Constraining maxent dist to match {0}-way moments, {1}."
        print()
        print(msg.format(k, text))
        print()
        opt = MomentMaximumEntropy(dist, k, symbol_map, with_replacement=with_replacement)
        pmf_opt = opt.optimize(show_progress=show_progress)
        pmf_opt = pmf_opt.reshape(pmf_opt.shape[0])
        d = dit.Distribution(outcomes, pmf_opt)
        dists.append(d)

    return dists
def CorrelatedAND(r):
    Px = dit.Distribution(['000', '010', '100', '111'],
                          [1 - a - b + r, b - r, a - r, r])
    lat = pid.PID_SD(Px)
    return np.array([
        r,
        lat.get_partial(((), )) / lat._total,
        lat.get_partial(((0, ), (1, ))) / lat._total,
    ])
Exemple #30
0
def test_rvfunctions3():
    # Smoke test strings with from_hexes
    outcomes = ['000', '001', '010', '011', '100', '101', '110', '111']
    pmf = [1 / 8] * 8
    d = dit.Distribution(outcomes, pmf)
    bf = dit.RVFunctions(d)
    d = dit.insert_rvf(d, bf.from_hexes('27'))
    outcomes = ('0000', '0010', '0101', '0110', '1000', '1010', '1100', '1111')
    assert_equal(d.outcomes, outcomes)