Beispiel #1
0
def est_MI_JVHW_F(X, Y, N):
    """This function returns our scalar estimate of mutual information I(X;Y)
    when both X and Y are vectors, and returns a row vector consisting
    of the estimate of mutual information between each corresponding column
    of X and Y when they are matrices.
    Input:
    ----- X, Y: two vectors or matrices with the same size, which can only
                contain integers.
    Output:
    ----- est: the estimate of the mutual information between input vectors
               or that between each corresponding column of the input
               matrices. The output data type is double.
    """
    [X, Y, XY] = formalize(X, Y)

    xbins = np.zeros((N))
    for x_i in range(N):
        xbins[x_i] = (X == x_i).sum()

    xbins = xbins / X.shape[0]
    H_y_x = 0
    X = X[:, 0] - 1

    for x_i in range(N):

        if (X == x_i).sum() == 0:
            continue
        H_y_x += xbins[x_i] * est_entro_JVHW(
            Y[np.argwhere(X == x_i).reshape(-1)].reshape(-1, 1))

    return np.maximum(0, est_entro_JVHW(Y) - H_y_x)
Beispiel #2
0
def est_MI_JVHW(X, Y):
    """Return mutual information estimates using JVHW entropy estimator.
    
    This function returns our scalar estimate of the mutual information (in bits)
    I(X;Y) when both X and Y are vectors, and returns a vector consisting
    of the estimate of mutual information between each corresponding column
    of X and Y when they are matrices.
    Input:
    ----- X, Y: two vectors or matrices (in numpy.array type) with the same size,
    which can only contain integers
    Output:
    ----- est: the estimate of the mutual information (in bits) between input vectors
    or that between each corresponding column of the input matrices
    """

    if X.shape != Y.shape:
        print('Input arguments X and Y should be of the same size!')
        return

    Y_uni, Y_r = np.unique(Y, return_inverse=True)
    Y_r = Y_r.reshape(Y.shape)
    Ny = len(Y_uni)

    est = est_entro_JVHW(X) + est_entro_JVHW(Y_r) - est_entro_JVHW(X * Ny +
                                                                   Y_r)
    return np.maximum(est, 0)
Beispiel #3
0
def est_MI_JVHW(X, Y):
    """This function returns our scalar estimate of mutual information I(X;Y)
    when both X and Y are vectors, and returns a row vector consisting
    of the estimate of mutual information between each corresponding column
    of X and Y when they are matrices.
    Input:
    ----- X, Y: two vectors or matrices with the same size, which can only
                contain integers.
    Output:
    ----- est: the estimate of the mutual information between input vectors
               or that between each corresponding column of the input
               matrices. The output data type is double.
    """
    [X, Y, XY] = formalize(X, Y)
    # I(X,Y) = H(X) + H(Y) - H(X,Y)
    return np.maximum(
        0,
        est_entro_JVHW(X) + est_entro_JVHW(Y) - est_entro_JVHW(XY))
Beispiel #4
0
    JVHW_err = np.zeros(num)
    MLE_err = np.zeros(num)

    twonum = np.random.rand(2, 1)
    for i in range(num):
        S = record_S[i]
        n = record_n[i]
        print("S = {0}, n = {1}".format(int(S), int(n)))

        dist = np.random.beta(twonum[0], twonum[1], int(S))
        dist /= dist.sum()

        true_S[i] = entropy_true(dist)
        samp = randsmpl(dist, int(n), mc_times)

        record_JVHW = est_entro_JVHW(samp)
        record_MLE = est_entro_MLE(samp)

        JVHW_err[i] = np.mean(np.abs(record_JVHW - true_S[i]))
        MLE_err[i] = np.mean(np.abs(record_MLE - true_S[i]))

    plt.plot(record_S / record_n,
             JVHW_err,
             'b-s',
             linewidth=2,
             markerfacecolor='b')
    plt.plot(record_S / record_n,
             MLE_err,
             'r-.o',
             linewidth=2,
             markerfacecolor='r')
            if nindices - last_index_write > 1000:
                outfile.flush()
                last_index_write = nindices

            if nindices > counter * 1000000:
                print("{nindices}: {line}".format(nindices=nindices,
                                                  line=line[:50]))
                counter += 1

        if (args.jvhw or args.pml) and len(indices) > counter * 100000:
            info = "{filename}, N = {N}".format(filename=filename,
                                                N=len(indices))

            if args.jvhw:
                jvhw_estimate = est_entro_JVHW(indices)[0]
                info += ", JVHW estimate = {jvhw}".format(jvhw=jvhw_estimate)

            if args.pml:
                pml_estimate = estimate_entropy_PML_approximate(indices)
                info += ", PML estimate = {pml}".format(pml=pml_estimate)

            print(info)
            outfile.write(info + "\n")
            outfile.flush()
            counter += 1

    datafile.close()

outfile.close()
Beispiel #6
0
if __name__ == '__main__':
    C = 1
    num = 15
    mc_times = 20
    record_S = np.ceil(np.logspace(2, 5, num))
    record_n = np.ceil(C*record_S/np.log(record_S))
    true_S = np.array([])
    JVHW_S = np.array([])
    MLE_S = np.array([])
    twonum = np.random.rand(2, 1)
    for i in range(num):
        S = record_S[i]
        n = record_n[i]
        dist = np.random.beta(twonum[0], twonum[1], S)
        dist = dist / sum(dist)
        true_S = np.append(true_S, entropy_true(dist))
        record_H = np.zeros(mc_times)
        record_MLE = np.zeros(mc_times)
        for mc in range(mc_times):
            samp = rv_discrete(values=(np.arange(S), dist)).rvs(size=n)
            record_MLE[mc] = est_entro_MLE(samp)
            record_H[mc] = est_entro_JVHW(samp)
        JVHW_S = np.append(JVHW_S, np.mean(abs(record_H - true_S[-1])))
        MLE_S = np.append(MLE_S, np.mean(abs(record_MLE - true_S[-1])))
    
    plot_JVHW, = plt.plot(record_S/record_n, JVHW_S, 'b-s', linewidth=2.0, label='JVHW estimator')
    plot_MLE, = plt.plot(record_S/record_n, MLE_S, 'r-.o', linewidth=2.0, label = 'MLE')
    plt.xlabel('S/n')
    plt.ylabel('Mean Absolute Error')
    plt.legend(handles=[plot_JVHW, plot_MLE], loc=2)
    plt.show()